diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index c405c1f084..2f2d0d2f5e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -2,7 +2,7 @@ name: Bug report about: Create a report to help us improve title: '' -labels: '' +labels: 'kind/bug, priority/p2' assignees: '' --- diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index bbcbbe7d61..d73d644481 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -2,7 +2,7 @@ name: Feature request about: Suggest an idea for this project title: '' -labels: '' +labels: 'kind/feature' assignees: '' --- diff --git a/.github/workflows/lint_pr.yml b/.github/workflows/lint_pr.yml new file mode 100644 index 0000000000..40c3dead00 --- /dev/null +++ b/.github/workflows/lint_pr.yml @@ -0,0 +1,24 @@ +name: lint-pr + +on: + pull_request_target: + types: + - opened + - edited + - synchronize + +jobs: + validate-title: + name: Validate PR title + runs-on: ubuntu-latest + steps: + - uses: amannn/action-semantic-pull-request@v4 + with: + # Must use uppercase + subjectPattern: ^(?=[A-Z]).+$ + subjectPatternError: | + The subject "{subject}" found in the pull request title "{title}" + didn't match the configured pattern. Please ensure that the subject + starts with an uppercase character. + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/master_only.yml b/.github/workflows/master_only.yml index 42f0383832..8bb4dafa34 100644 --- a/.github/workflows/master_only.yml +++ b/.github/workflows/master_only.yml @@ -120,11 +120,20 @@ jobs: run: pip install pip-tools - name: Install dependencies run: make install-python-ci-dependencies + - name: Setup Redis Cluster + run: | + docker pull vishnunair/docker-redis-cluster:latest + docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster - name: Test python env: FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-lambda-docker-image.outputs.DOCKER_IMAGE_TAG }} FEAST_USAGE: "False" IS_TEST: "True" + SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} + SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} + SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} + SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} + SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: pytest -n 8 --cov=./ --cov-report=xml --verbose --color=yes sdk/python/tests --integration --durations=5 - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 @@ -140,6 +149,11 @@ jobs: FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-lambda-docker-image.outputs.DOCKER_IMAGE_TAG }} FEAST_USAGE: "False" IS_TEST: "True" + SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} + SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} + SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} + SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} + SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: pytest --verbose --color=yes sdk/python/tests --integration --benchmark --benchmark-autosave --benchmark-save-data --durations=5 - name: Upload Benchmark Artifact to S3 run: aws s3 cp --recursive .benchmarks s3://feast-ci-pytest-benchmarks diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index 8a910f943c..000f9e9728 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -145,12 +145,21 @@ jobs: run: pip install pip-tools - name: Install dependencies run: make install-python-ci-dependencies + - name: Setup Redis Cluster + run: | + docker pull vishnunair/docker-redis-cluster:latest + docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster - name: Test python if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak env: FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-docker-image.outputs.DOCKER_IMAGE_TAG }} FEAST_USAGE: "False" IS_TEST: "True" + SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} + SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} + SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} + SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} + SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: pytest -n 8 --cov=./ --cov-report=xml --verbose --color=yes sdk/python/tests --integration --durations=5 - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5ff1139acb..f0e5419af7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -103,7 +103,7 @@ jobs: runs-on: ubuntu-latest needs: get-version env: - HELM_VERSION: v2.17.0 + HELM_VERSION: v3.8.0 VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} steps: - uses: actions/checkout@v2 @@ -144,5 +144,63 @@ jobs: python3 setup.py sdist bdist_wheel python3 -m twine upload --verbose dist/* - # TODO(adchia): publish java sdk once maven repo is updated - # See https://github.com/feast-dev/feast-java/blob/master/.github/workflows/release.yml#L104 \ No newline at end of file + publish-python-sdk-no-telemetry: + runs-on: ubuntu-latest + needs: get-version + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + container: python:3.7 + steps: + - uses: actions/checkout@v2 + - name: Install pip-tools + run: pip install pip-tools + - name: Install dependencies + run: make install-python-ci-dependencies PYTHON=3.7 + - name: Publish Python Package + env: + SETUPTOOLS_SCM_PRETEND_VERSION: ${{ needs.get-version.outputs.version_without_prefix }} + run: | + cd sdk/python + sed -i 's/DEFAULT_FEAST_USAGE_VALUE = "True"/DEFAULT_FEAST_USAGE_VALUE = "False"/g' feast/constants.py + sed -i 's/NAME = "feast"/NAME = "feast-no-telemetry"/g' setup.py + python3 -m pip install --user --upgrade setuptools wheel twine + python3 setup.py sdist bdist_wheel + python3 -m twine upload --verbose dist/* + + publish-java-sdk: + container: maven:3.6-jdk-11 + runs-on: ubuntu-latest + needs: get-version + steps: + - uses: actions/checkout@v2 + with: + submodules: 'true' + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: '11' + java-package: jdk + architecture: x64 + - uses: actions/setup-python@v2 + with: + python-version: '3.7' + architecture: 'x64' + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- + - name: Publish java sdk + env: + VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} + GPG_PUBLIC_KEY: ${{ secrets.GPG_PUBLIC_KEY }} + GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }} + MAVEN_SETTINGS: ${{ secrets.MAVEN_SETTINGS }} + run: | + echo -n "$GPG_PUBLIC_KEY" > /root/public-key + echo -n "$GPG_PRIVATE_KEY" > /root/private-key + mkdir -p /root/.m2/ + echo -n "$MAVEN_SETTINGS" > /root/.m2/settings.xml + infra/scripts/publish-java-sdk.sh --revision ${VERSION_WITHOUT_PREFIX} --gpg-key-import-dir /root diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 5de5971703..d9552e175e 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -42,6 +42,12 @@ jobs: - name: Install dependencies run: make install-python-ci-dependencies - name: Test Python + env: + SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} + SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} + SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} + SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} + SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: FEAST_USAGE=False pytest -n 8 --cov=./ --cov-report=xml --verbose --color=yes sdk/python/tests - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 diff --git a/.prow.yaml b/.prow.yaml index b03a71a475..4c8372cc7c 100644 --- a/.prow.yaml +++ b/.prow.yaml @@ -1,102 +1,4 @@ -presubmits: -- name: test-core-and-ingestion - decorate: true - spec: - containers: - - image: maven:3.6-jdk-11 - command: ["infra/scripts/test-java-core-ingestion.sh"] - resources: - requests: - cpu: "2000m" - memory: "1536Mi" - skip_branches: - - ^v0\.(3|4)-branch$ - -- name: test-core-and-ingestion-java-8 - decorate: true - always_run: true - spec: - containers: - - image: maven:3.6-jdk-8 - command: ["infra/scripts/test-java-core-ingestion.sh"] - resources: - requests: - cpu: "2000m" - memory: "1536Mi" - branches: - - ^v0\.(3|4)-branch$ - -- name: test-serving - decorate: true - spec: - containers: - - image: maven:3.6-jdk-11 - command: ["infra/scripts/test-java-serving.sh"] - skip_branches: - - ^v0\.(3|4)-branch$ - -- name: test-serving-java-8 - decorate: true - always_run: true - spec: - containers: - - image: maven:3.6-jdk-8 - command: ["infra/scripts/test-java-serving.sh"] - branches: - - ^v0\.(3|4)-branch$ - -- name: test-java-sdk - decorate: true - spec: - containers: - - image: maven:3.6-jdk-11 - command: ["infra/scripts/test-java-sdk.sh"] - skip_branches: - - ^v0\.(3|4)-branch$ - -- name: test-java-sdk-java-8 - decorate: true - always_run: true - spec: - containers: - - image: maven:3.6-jdk-8 - command: ["infra/scripts/test-java-sdk.sh"] - branches: - - ^v0\.(3|4)-branch$ - -- name: test-golang-sdk - decorate: true - spec: - containers: - - image: golang:1.13 - command: ["infra/scripts/test-golang-sdk.sh"] - postsubmits: -- name: publish-python-sdk - decorate: true - spec: - containers: - - image: python:3 - command: - - sh - - -c - - | - make package-protos && make compile-protos-python && infra/scripts/publish-python-sdk.sh \ - --directory-path sdk/python --repository pypi - volumeMounts: - - name: pypirc - mountPath: /root/.pypirc - subPath: .pypirc - readOnly: true - volumes: - - name: pypirc - secret: - secretName: pypirc - branches: - # Filter on tags with semantic versioning, prefixed with "v" - # https://github.com/semver/semver/issues/232 - - ^v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ - - name: publish-java-sdk decorate: true spec: @@ -128,31 +30,3 @@ postsubmits: branches: # Filter on tags with semantic versioning, prefixed with "v". - ^v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ - -- name: publish-java-8-sdk - decorate: true - spec: - containers: - - image: maven:3.6-jdk-8 - command: - - bash - - -c - - infra/scripts/publish-java-sdk.sh --revision ${PULL_BASE_REF:1} - volumeMounts: - - name: gpg-keys - mountPath: /etc/gpg - readOnly: true - - name: maven-settings - mountPath: /root/.m2/settings.xml - subPath: settings.xml - readOnly: true - volumes: - - name: gpg-keys - secret: - secretName: gpg-keys - - name: maven-settings - secret: - secretName: maven-settings - branches: - # Filter on tags with semantic versioning, prefixed with "v". v0.3 and v0.4 only. - - ^v0\.(3|4)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ diff --git a/.prow/config.yaml b/.prow/config.yaml index 3bc2d760a3..e633b2a895 100644 --- a/.prow/config.yaml +++ b/.prow/config.yaml @@ -64,6 +64,9 @@ tide: merge_method: feast-dev/feast: squash feast-dev/feast-spark: squash + merge_commit_template: + feast-dev/feast: + title: '{{.Title}}' blocker_label: merge-blocker squash_label: tide/squash diff --git a/.prow/plugins.yaml b/.prow/plugins.yaml index 0f89e07be5..0dbf0f4c72 100644 --- a/.prow/plugins.yaml +++ b/.prow/plugins.yaml @@ -34,6 +34,8 @@ config_updater: maps: .prow/config.yaml: name: config + .prow.yaml: + name: job-config external_plugins: feast-dev/feast: diff --git a/CHANGELOG.md b/CHANGELOG.md index 53514c5ad0..1aac35ddb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,106 @@ # Changelog +## [v0.18.1](https://github.com/feast-dev/feast/tree/v0.18.1) (2022-02-15) + +[Full Changelog](https://github.com/feast-dev/feast/compare/v0.18.0...v0.18.1) + +**Fixed bugs:** + +- ODFVs raise a PerformanceWarning for very large sets of features [\#2293](https://github.com/feast-dev/feast/issues/2293) +- Don't require `snowflake` to always be installed [\#2309](https://github.com/feast-dev/feast/pull/2309) ([judahrand](https://github.com/judahrand)) +- podAnnotations Values in the feature-server chart [\#2304](https://github.com/feast-dev/feast/pull/2304) ([tpvasconcelos](https://github.com/tpvasconcelos)) +- Fixing the Java helm charts and adding a demo tutorial on how to use them [\#2298](https://github.com/feast-dev/feast/pull/2298) ([adchia](https://github.com/adchia)) +- avoid using transactions on OSS Redis [\#2296](https://github.com/feast-dev/feast/pull/2296) ([DvirDukhan](https://github.com/DvirDukhan)) +- Include infra objects in registry dump and fix Infra's from\_proto [\#2295](https://github.com/feast-dev/feast/pull/2295) ([adchia](https://github.com/adchia)) +- Expose snowflake credentials for unit testing [\#2288](https://github.com/feast-dev/feast/pull/2288) ([sfc-gh-madkins](https://github.com/sfc-gh-madkins)) +- Fix flaky tests \(test\_online\_store\_cleanup & test\_feature\_get\_online\_features\_types\_match\) [\#2276](https://github.com/feast-dev/feast/pull/2276) ([pyalex](https://github.com/pyalex)) + +**Merged pull requests:** + +- Remove old flag warning with the python feature server [\#2300](https://github.com/feast-dev/feast/pull/2300) ([adchia](https://github.com/adchia)) +- Use an OFFLINE schema for Snowflake offline store tests [\#2291](https://github.com/feast-dev/feast/pull/2291) ([sfc-gh-madkins](https://github.com/sfc-gh-madkins)) +- fix typos in markdown files [\#2289](https://github.com/feast-dev/feast/pull/2289) ([charliec443](https://github.com/charliec443)) +- Add -SNAPSHOT suffix to pom.xml version [\#2286](https://github.com/feast-dev/feast/pull/2286) ([tsotnet](https://github.com/tsotnet)) +- Update CONTRIBUTING.md [\#2282](https://github.com/feast-dev/feast/pull/2282) ([adchia](https://github.com/adchia)) + +## [v0.18.0](https://github.com/feast-dev/feast/tree/v0.18.0) (2022-02-05) + +[Full Changelog](https://github.com/feast-dev/feast/compare/v0.17.0...v0.18.0) + +**Implemented enhancements:** + +- Tutorial on validation of historical features [\#2277](https://github.com/feast-dev/feast/pull/2277) ([pyalex](https://github.com/pyalex)) +- Feast plan clean up [\#2256](https://github.com/feast-dev/feast/pull/2256) ([felixwang9817](https://github.com/felixwang9817)) +- Return `UNIX\_TIMESTAMP` as Python `datetime` [\#2244](https://github.com/feast-dev/feast/pull/2244) ([judahrand](https://github.com/judahrand)) +- Validating historical features against reference dataset with "great expectations" profiler [\#2243](https://github.com/feast-dev/feast/pull/2243) ([pyalex](https://github.com/pyalex)) +- Implement feature\_store.\_apply\_diffs to handle registry and infra diffs [\#2238](https://github.com/feast-dev/feast/pull/2238) ([felixwang9817](https://github.com/felixwang9817)) +- Compare Python objects instead of proto objects [\#2227](https://github.com/feast-dev/feast/pull/2227) ([felixwang9817](https://github.com/felixwang9817)) +- Modify feature\_store.plan to produce an InfraDiff [\#2211](https://github.com/feast-dev/feast/pull/2211) ([felixwang9817](https://github.com/felixwang9817)) +- Implement diff\_infra\_protos method for feast plan [\#2204](https://github.com/feast-dev/feast/pull/2204) ([felixwang9817](https://github.com/felixwang9817)) +- Persisting results of historical retrieval [\#2197](https://github.com/feast-dev/feast/pull/2197) ([pyalex](https://github.com/pyalex)) +- Merge feast-snowflake plugin into main repo with documentation [\#2193](https://github.com/feast-dev/feast/pull/2193) ([sfc-gh-madkins](https://github.com/sfc-gh-madkins)) +- Add InfraDiff class for feast plan [\#2190](https://github.com/feast-dev/feast/pull/2190) ([felixwang9817](https://github.com/felixwang9817)) +- Use FeatureViewProjection instead of FeatureView in ODFV [\#2186](https://github.com/feast-dev/feast/pull/2186) ([judahrand](https://github.com/judahrand)) + +**Fixed bugs:** + +- Set `created\_timestamp` and `last\_updated\_timestamp` fields [\#2266](https://github.com/feast-dev/feast/pull/2266) ([judahrand](https://github.com/judahrand)) +- Use `datetime.utcnow\(\)` to avoid timezone issues [\#2265](https://github.com/feast-dev/feast/pull/2265) ([judahrand](https://github.com/judahrand)) +- Fix Redis key serialization in java feature server [\#2264](https://github.com/feast-dev/feast/pull/2264) ([pyalex](https://github.com/pyalex)) +- modify registry.db s3 object initialization to work in S3 subdirectory with Java Feast Server [\#2259](https://github.com/feast-dev/feast/pull/2259) ([NalinGHub](https://github.com/NalinGHub)) +- Add snowflake environment variables to allow testing on snowflake infra [\#2258](https://github.com/feast-dev/feast/pull/2258) ([sfc-gh-madkins](https://github.com/sfc-gh-madkins)) +- Correct inconsistent dependency [\#2255](https://github.com/feast-dev/feast/pull/2255) ([judahrand](https://github.com/judahrand)) +- Fix for historical field mappings [\#2252](https://github.com/feast-dev/feast/pull/2252) ([michelle-rascati-sp](https://github.com/michelle-rascati-sp)) +- Add backticks to left\_table\_query\_string [\#2250](https://github.com/feast-dev/feast/pull/2250) ([dmille](https://github.com/dmille)) +- Fix inference of BigQuery ARRAY types. [\#2245](https://github.com/feast-dev/feast/pull/2245) ([judahrand](https://github.com/judahrand)) +- Fix Redshift data creator [\#2242](https://github.com/feast-dev/feast/pull/2242) ([felixwang9817](https://github.com/felixwang9817)) +- Delete entity key from Redis only when all attached feature views are gone [\#2240](https://github.com/feast-dev/feast/pull/2240) ([pyalex](https://github.com/pyalex)) +- Tests for transformation service integration in java feature server [\#2236](https://github.com/feast-dev/feast/pull/2236) ([pyalex](https://github.com/pyalex)) +- Feature server helm chart produces invalid YAML [\#2234](https://github.com/feast-dev/feast/pull/2234) ([pyalex](https://github.com/pyalex)) +- Docker build fails for java feature server [\#2230](https://github.com/feast-dev/feast/pull/2230) ([pyalex](https://github.com/pyalex)) +- Fix ValueType.UNIX\_TIMESTAMP conversions [\#2219](https://github.com/feast-dev/feast/pull/2219) ([judahrand](https://github.com/judahrand)) +- Add on demand feature views deletion [\#2203](https://github.com/feast-dev/feast/pull/2203) ([corentinmarek](https://github.com/corentinmarek)) +- Compare only specs in integration tests [\#2200](https://github.com/feast-dev/feast/pull/2200) ([felixwang9817](https://github.com/felixwang9817)) +- Bump log4j-core from 2.17.0 to 2.17.1 in /java [\#2189](https://github.com/feast-dev/feast/pull/2189) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Support multiple application properties files \(incl from classpath\) [\#2187](https://github.com/feast-dev/feast/pull/2187) ([pyalex](https://github.com/pyalex)) +- Avoid requesting features from OnlineStore twice [\#2185](https://github.com/feast-dev/feast/pull/2185) ([judahrand](https://github.com/judahrand)) +- Speed up Datastore deletes by batch deletions with multithreading [\#2182](https://github.com/feast-dev/feast/pull/2182) ([ptoman-pa](https://github.com/ptoman-pa)) +- Fixes large payload runtime exception in Datastore \(issue 1633\) [\#2181](https://github.com/feast-dev/feast/pull/2181) ([ptoman-pa](https://github.com/ptoman-pa)) + +**Merged pull requests:** + +- Add link to community plugin for Spark offline store [\#2279](https://github.com/feast-dev/feast/pull/2279) ([adchia](https://github.com/adchia)) +- Fix broken links on documentation [\#2278](https://github.com/feast-dev/feast/pull/2278) ([adchia](https://github.com/adchia)) +- Publish alternative python package with FEAST\_USAGE=False by default [\#2275](https://github.com/feast-dev/feast/pull/2275) ([pyalex](https://github.com/pyalex)) +- Unify all helm charts versions [\#2274](https://github.com/feast-dev/feast/pull/2274) ([pyalex](https://github.com/pyalex)) +- Fix / update helm chart workflows to push the feast python server [\#2273](https://github.com/feast-dev/feast/pull/2273) ([adchia](https://github.com/adchia)) +- Update Feast Serving documentation with ways to run and debug locally [\#2272](https://github.com/feast-dev/feast/pull/2272) ([adchia](https://github.com/adchia)) +- Fix Snowflake docs [\#2270](https://github.com/feast-dev/feast/pull/2270) ([felixwang9817](https://github.com/felixwang9817)) +- Update local-feature-server.md [\#2269](https://github.com/feast-dev/feast/pull/2269) ([tsotnet](https://github.com/tsotnet)) +- Update docs to include Snowflake/DQM and removing unused docs from old versions of Feast [\#2268](https://github.com/feast-dev/feast/pull/2268) ([adchia](https://github.com/adchia)) +- Graduate Python feature server [\#2263](https://github.com/feast-dev/feast/pull/2263) ([felixwang9817](https://github.com/felixwang9817)) +- Fix benchmark tests at HEAD by passing in Snowflake secrets [\#2262](https://github.com/feast-dev/feast/pull/2262) ([adchia](https://github.com/adchia)) +- Refactor `pa\_to\_feast\_value\_type` [\#2246](https://github.com/feast-dev/feast/pull/2246) ([judahrand](https://github.com/judahrand)) +- Allow using pandas.StringDtype to support on-demand features with STRING type [\#2229](https://github.com/feast-dev/feast/pull/2229) ([pyalex](https://github.com/pyalex)) +- Bump jackson-databind from 2.10.1 to 2.10.5.1 in /java/common [\#2228](https://github.com/feast-dev/feast/pull/2228) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Split apply total parse repo [\#2226](https://github.com/feast-dev/feast/pull/2226) ([mickey-liu](https://github.com/mickey-liu)) +- Publish renamed java packages to maven central \(via Sonatype\) [\#2225](https://github.com/feast-dev/feast/pull/2225) ([pyalex](https://github.com/pyalex)) +- Make online store nullable [\#2224](https://github.com/feast-dev/feast/pull/2224) ([mirayyuce](https://github.com/mirayyuce)) +- Optimize `\_populate\_result\_rows\_from\_feature\_view` [\#2223](https://github.com/feast-dev/feast/pull/2223) ([judahrand](https://github.com/judahrand)) +- Update to newer `redis-py` [\#2221](https://github.com/feast-dev/feast/pull/2221) ([judahrand](https://github.com/judahrand)) +- Adding a local feature server test [\#2217](https://github.com/feast-dev/feast/pull/2217) ([adchia](https://github.com/adchia)) +- replace GetOnlineFeaturesResponse with GetOnlineFeaturesResponseV2 in… [\#2214](https://github.com/feast-dev/feast/pull/2214) ([tsotnet](https://github.com/tsotnet)) +- Updates to click==8.\* [\#2210](https://github.com/feast-dev/feast/pull/2210) ([diogommartins](https://github.com/diogommartins)) +- Bump protobuf-java from 3.12.2 to 3.16.1 in /java [\#2208](https://github.com/feast-dev/feast/pull/2208) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add default priority for bug reports [\#2207](https://github.com/feast-dev/feast/pull/2207) ([adchia](https://github.com/adchia)) +- Modify issue templates to automatically attach labels [\#2205](https://github.com/feast-dev/feast/pull/2205) ([adchia](https://github.com/adchia)) +- Python FeatureServer optimization [\#2202](https://github.com/feast-dev/feast/pull/2202) ([judahrand](https://github.com/judahrand)) +- Refactor all importer logic to belong in feast.importer [\#2199](https://github.com/feast-dev/feast/pull/2199) ([felixwang9817](https://github.com/felixwang9817)) +- Refactor `OnlineResponse.to\_dict\(\)` [\#2196](https://github.com/feast-dev/feast/pull/2196) ([judahrand](https://github.com/judahrand)) +- \[Java feature server\] Converge ServingService API to make Python and Java feature servers consistent [\#2166](https://github.com/feast-dev/feast/pull/2166) ([pyalex](https://github.com/pyalex)) +- Add a unit test for the tag\_proto\_objects method [\#2163](https://github.com/feast-dev/feast/pull/2163) ([achals](https://github.com/achals)) + + ## [v0.17.0](https://github.com/feast-dev/feast/tree/v0.17.0) (2021-12-31) [Full Changelog](https://github.com/feast-dev/feast/compare/v0.16.1...v0.17.0) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6918d7f1de..62e42d4df0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,15 +1,29 @@ # Development Guide: Main Feast Repository > Please see [Development Guide](https://docs.feast.dev/project/development-guide) for project level development instructions. -### Overview +## Overview This guide is targeted at developers looking to contribute to Feast components in the main Feast repository: - [Feast Python SDK / CLI](#feast-python-sdk-%2F-cli) +- [Feast Java Serving](#feast-java-serving) - [Feast Go Client](#feast-go-client) -- [Feast Terraform](#feast-terraform) ## Making a pull request +### Pull request checklist +A quick list of things to keep in mind as you're making changes: +- As you make changes + - Make your changes in a [forked repo](#forking-the-repo) (instead of making a branch on the main Feast repo) + - [Sign your commits](#signing-off-commits) as you go (to avoid DCO checks failing) + - [Rebase from master](#incorporating-upstream-changes-from-master) instead of using `git pull` on your PR branch + - Install [pre-commit hooks](#pre-commit-hooks) to ensure all the default linters / formatters are run when you push. +- When you make the PR + - Make a pull request from the forked repo you made + - Ensure you add a GitHub **label** (i.e. a kind tag to the PR (e.g. `kind/bug` or `kind/housekeeping`)) or else checks will fail. + - Ensure you leave a release note for any user facing changes in the PR. There is a field automatically generated in the PR request. You can write `NONE` in that field if there are no user facing changes. + - Please run tests locally before submitting a PR (e.g. for Python, the [local integration tests](#local-integration-tests)) + - Try to keep PRs smaller. This makes them easier to review. + ### Forking the repo Fork the Feast Github repo and clone your fork locally. Then make changes to a local branch to the fork. @@ -26,6 +40,9 @@ pre-commit install --hook-type pre-commit --hook-type pre-push 3. On push, the pre-commit hook will run. This runs `make format` and `make lint`. ### Signing off commits +> :warning: Warning: using the default integrations with IDEs like VSCode or IntelliJ will not sign commits. +> When you submit a PR, you'll have to re-sign commits to pass the DCO check. + Use git signoffs to sign your commits. See https://docs.github.com/en/github/authenticating-to-github/managing-commit-signature-verification for details @@ -50,7 +67,7 @@ Setting up your development environment for Feast Python SDK / CLI: 3. _Recommended:_ Create a virtual environment to isolate development dependencies to be installed ```sh # create & activate a virtual environment -python -v venv venv/ +python -m venv venv/ source venv/bin/activate ``` @@ -96,13 +113,27 @@ make test-python > - Ensure Feast Python SDK / CLI is not configured with configuration overrides (ie `~/.feast/config` should be empty). ### Integration Tests -To get tests running, you'll need to have GCP / AWS / Redis setup: +There are two sets of tests you can run: +1. Local integration tests (for faster development) +2. Full integration tests (requires cloud environment setups) + +#### Local integration tests +To get local integration tests running, you'll need to have Redis setup: Redis 1. Install Redis: [Quickstart](https://redis.io/topics/quickstart) 2. Run `redis-server` -GCP +Now run `make test-python-universal-local` + +#### Full integration tests +To test across clouds, on top of setting up Redis, you also need GCP / AWS / Snowflake setup. + +> Note: you can manually control what tests are run today by inspecting +> [RepoConfiguration](https://github.com/feast-dev/feast/blob/master/sdk/python/tests/integration/feature_repos/repo_configuration.py) +> and commenting out tests that are added to `DEFAULT_FULL_REPO_CONFIGS` + +**GCP** 1. Install the [Cloud SDK](https://cloud.google.com/sdk/docs/install). 2. Then run login to gcloud: ``` @@ -111,15 +142,19 @@ GCP ``` 3. Export `GCLOUD_PROJECT=[your project]` to your .zshrc -AWS +**AWS** 1. TODO(adchia): flesh out setting up AWS login (or create helper script) 2. Modify `RedshiftDataSourceCreator` to use your credentials -Then run `make test-python-integration`. Note that for GCP / AWS, this will create new temporary tables / datasets. +**Snowflake** +- See https://signup.snowflake.com/ -## Feast Go Client -:warning: Feast Go Client will move to its own standalone repository in the future. +Then run `make test-python-integration`. Note that for Snowflake / GCP / AWS, this will create new temporary tables / datasets. + +## Feast Java Serving +See [Java contributing guide](java/CONTRIBUTING.md) +## Feast Go Client ### Environment Setup Setting up your development environment for Feast Go SDK: @@ -152,14 +187,4 @@ go vet Unit tests for the Feast Go Client can be run as follows: ```sh go test -``` - -## Feast on Kubernetes -:warning: Feast Terraform will move to its own standalone repository in the future. - -See the deployment guide of the respective cloud providers for how to work with these deployments: -- [Helm Deployment on Kubernetes](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/kubernetes-with-helm) -- [Terraform Deployment on Amazon EKS](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/kubernetes-amazon-eks-with-terraform) -- [Terraform Deployment on Azure AKS](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-terraform) -- [Terraform Deployment on Google Cloud GKE](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/google-cloud-gke-with-terraform) -- [Kustomize Deployment on IBM Cloud IKS or OpenShift](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/ibm-cloud-iks-with-kustomize) +``` \ No newline at end of file diff --git a/README.md b/README.md index 649bb909fa..ed40cd52cb 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,9 @@ Feast is an open source feature store for machine learning. Feast is the fastest Please see our [documentation](https://docs.feast.dev/) for more information about the project. ## 📐 Architecture - +![](docs/assets/feast-marchitecture.png) -The above architecture is the minimal Feast deployment. Want to run the full Feast on GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-gcp-aws). +The above architecture is the minimal Feast deployment. Want to run the full Feast on Snowflake/GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws). ## 🐣 Getting Started @@ -136,25 +136,27 @@ The list below contains the functionality that contributors are planning to deve * Want to speak to a Feast contributor? We are more than happy to jump on a call. Please schedule a time using [Calendly](https://calendly.com/d/x2ry-g5bb/meet-with-feast-team). * **Data Sources** + * [x] [Snowflake source](https://docs.feast.dev/reference/data-sources/snowflake) * [x] [Redshift source](https://docs.feast.dev/reference/data-sources/redshift) * [x] [BigQuery source](https://docs.feast.dev/reference/data-sources/bigquery) * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) - * [x] Kafka source (with [push support into the online store](reference/alpha-stream-ingestion.md)) - * [x] [Snowflake source (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) + * [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) + * [x] Kafka source (with [push support into the online store](https://docs.feast.dev/reference/alpha-stream-ingestion)) * [ ] HTTP source * **Offline Stores** + * [x] [Snowflake](https://docs.feast.dev/reference/offline-stores/snowflake) * [x] [Redshift](https://docs.feast.dev/reference/offline-stores/redshift) * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) + * [x] [Trino (community plugin)](https://github.com/Shopify/feast-trino) + * [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) - * [x] [Snowflake (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) - * [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) * **Online Stores** * [x] [DynamoDB](https://docs.feast.dev/reference/online-stores/dynamodb) * [x] [Redis](https://docs.feast.dev/reference/online-stores/redis) @@ -189,7 +191,7 @@ The list below contains the functionality that contributors are planning to deve * [ ] Delete API * [ ] Feature Logging (for training) * **Data Quality Management (See [RFC](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98/edit))** - * [ ] Data profiling and validation (Great Expectations) (Planned for Q1 2022) + * [x] Data profiling and validation (Great Expectations) * [ ] Metric production * [ ] Training-serving skew detection * [ ] Drift detection @@ -197,10 +199,10 @@ The list below contains the functionality that contributors are planning to deve * [x] Python SDK for browsing feature registry * [x] CLI for browsing feature registry * [x] Model-centric feature tracking (feature services) + * [x] Amundsen integration (see [Feast extractor](https://github.com/amundsen-io/amundsen/blob/main/databuilder/databuilder/extractor/feast_extractor.py)) * [ ] REST API for browsing feature registry * [ ] Feast Web UI * [ ] Feature versioning - * [ ] Amundsen integration ## 🎓 Important Resources @@ -208,7 +210,7 @@ The list below contains the functionality that contributors are planning to deve Please refer to the official documentation at [Documentation](https://docs.feast.dev/) * [Quickstart](https://docs.feast.dev/getting-started/quickstart) * [Tutorials](https://docs.feast.dev/tutorials/tutorials-overview) - * [Running Feast with GCP/AWS](https://docs.feast.dev/how-to-guides/feast-gcp-aws) + * [Running Feast with Snowflake/GCP/AWS](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws) * [Change Log](https://github.com/feast-dev/feast/blob/master/CHANGELOG.md) * [Slack (#Feast)](https://slack.feast.dev/) diff --git a/docs/README.md b/docs/README.md index 1a76adbde3..f8b9af3c32 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,7 +4,7 @@ Feast (**Fea**ture **St**ore) is an operational data system for managing and serving machine learning features to models in production. Feast is able to serve feature data to models from a low-latency online store (for real-time prediction) or from an offline store (for scale-out batch scoring or model training). -![](.gitbook/assets/feast-marchitecture-211014.png) +![](assets/feast-marchitecture.png) ## Problems Feast Solves @@ -30,7 +30,7 @@ Feast addresses this problem by introducing feature reuse through a centralized **Feature discovery:** We also aim for Feast to include a first-class user interface for exploring and discovering entities and features. -**‌Feature validation:** We additionally aim for Feast to improve support for statistics generation of feature data and subsequent validation of these statistics. Current support is limited. +**Feature validation:** We additionally aim for Feast to improve support for statistics generation of feature data and subsequent validation of these statistics. Current support is limited. ## What Feast is not @@ -52,6 +52,6 @@ Explore the following resources to get started with Feast: * [Concepts](getting-started/concepts/) describes all important Feast API concepts * [Architecture](getting-started/architecture-and-components/) describes Feast's overall architecture. * [Tutorials](tutorials/tutorials-overview.md) shows full examples of using Feast in machine learning applications. -* [Running Feast with GCP/AWS](how-to-guides/feast-gcp-aws/) provides a more in-depth guide to using Feast. +* [Running Feast with Snowflake/GCP/AWS](how-to-guides/feast-snowflake-gcp-aws/) provides a more in-depth guide to using Feast. * [Reference](reference/feast-cli-commands.md) contains detailed API and design documents. * [Contributing](project/contributing.md) contains resources for anyone who wants to contribute to Feast. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 987a432ac9..deec3e9eed 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -16,10 +16,11 @@ * [Feature service](getting-started/concepts/feature-service.md) * [Feature retrieval](getting-started/concepts/feature-retrieval.md) * [Point-in-time joins](getting-started/concepts/point-in-time-joins.md) + * [Dataset](getting-started/concepts/dataset.md) * [Architecture](getting-started/architecture-and-components/README.md) * [Overview](getting-started/architecture-and-components/overview.md) * [Feature repository](getting-started/architecture-and-components/feature-repository.md) - * [Registry](getting-started/architecture-and-components/untitled.md) + * [Registry](getting-started/architecture-and-components/registry.md) * [Offline store](getting-started/architecture-and-components/offline-store.md) * [Online store](getting-started/architecture-and-components/online-store.md) * [Provider](getting-started/architecture-and-components/provider.md) @@ -32,17 +33,20 @@ * [Driver ranking](tutorials/driver-ranking-with-feast.md) * [Fraud detection on GCP](tutorials/fraud-detection.md) * [Real-time credit scoring on AWS](tutorials/real-time-credit-scoring-on-aws.md) +* [Driver stats on Snowflake](tutorials/driver-stats-on-snowflake.md) +* [Validating historical features with Great Expectations](tutorials/validating-historical-features.md) ## How-to Guides -* [Running Feast with GCP/AWS](how-to-guides/feast-gcp-aws/README.md) - * [Install Feast](how-to-guides/feast-gcp-aws/install-feast.md) - * [Create a feature repository](how-to-guides/feast-gcp-aws/create-a-feature-repository.md) - * [Deploy a feature store](how-to-guides/feast-gcp-aws/deploy-a-feature-store.md) - * [Build a training dataset](how-to-guides/feast-gcp-aws/build-a-training-dataset.md) - * [Load data into the online store](how-to-guides/feast-gcp-aws/load-data-into-the-online-store.md) - * [Read features from the online store](how-to-guides/feast-gcp-aws/read-features-from-the-online-store.md) +* [Running Feast with Snowflake/GCP/AWS](how-to-guides/feast-snowflake-gcp-aws/README.md) + * [Install Feast](how-to-guides/feast-snowflake-gcp-aws/install-feast.md) + * [Create a feature repository](how-to-guides/feast-snowflake-gcp-aws/create-a-feature-repository.md) + * [Deploy a feature store](how-to-guides/feast-snowflake-gcp-aws/deploy-a-feature-store.md) + * [Build a training dataset](how-to-guides/feast-snowflake-gcp-aws/build-a-training-dataset.md) + * [Load data into the online store](how-to-guides/feast-snowflake-gcp-aws/load-data-into-the-online-store.md) + * [Read features from the online store](how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md) * [Running Feast in production](how-to-guides/running-feast-in-production.md) +* [Deploying a Java feature server on Kubernetes](how-to-guides/fetching-java-features-k8s.md) * [Upgrading from Feast 0.9](https://docs.google.com/document/u/1/d/1AOsr\_baczuARjCpmZgVd8mCqTF4AZ49OEyU4Cn-uTT0/edit) * [Adding a custom provider](how-to-guides/creating-a-custom-provider.md) * [Adding a new online store](how-to-guides/adding-support-for-a-new-online-store.md) @@ -53,10 +57,12 @@ * [Data sources](reference/data-sources/README.md) * [File](reference/data-sources/file.md) + * [Snowflake](reference/data-sources/snowflake.md) * [BigQuery](reference/data-sources/bigquery.md) * [Redshift](reference/data-sources/redshift.md) * [Offline stores](reference/offline-stores/README.md) * [File](reference/offline-stores/file.md) + * [Snowflake](reference/offline-stores/snowflake.md) * [BigQuery](reference/offline-stores/bigquery.md) * [Redshift](reference/offline-stores/redshift.md) * [Online stores](reference/online-stores/README.md) @@ -71,9 +77,11 @@ * [Feature repository](reference/feature-repository/README.md) * [feature\_store.yaml](reference/feature-repository/feature-store-yaml.md) * [.feastignore](reference/feature-repository/feast-ignore.md) +* [Feature servers](reference/feature-servers/README.md) + * [Local feature server](reference/feature-servers/local-feature-server.md) +* [\[Alpha\] Data quality monitoring](reference/dqm.md) * [\[Alpha\] On demand feature view](reference/alpha-on-demand-feature-view.md) * [\[Alpha\] Stream ingestion](reference/alpha-stream-ingestion.md) -* [\[Alpha\] Local feature server](reference/feature-server.md) * [\[Alpha\] AWS Lambda feature server](reference/alpha-aws-lambda-feature-server.md) * [Feast CLI reference](reference/feast-cli-commands.md) * [Python API reference](http://rtd.feast.dev) diff --git a/docs/advanced/audit-logging.md b/docs/advanced/audit-logging.md deleted file mode 100644 index 1870a687bd..0000000000 --- a/docs/advanced/audit-logging.md +++ /dev/null @@ -1,132 +0,0 @@ -# Audit Logging - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -## Introduction - -Feast provides audit logging functionality in order to debug problems and to trace the lineage of events. - -## Audit Log Types - -Audit Logs produced by Feast come in three favors: - -| Audit Log Type | Description | -| :--- | :--- | -| Message Audit Log | Logs service calls that can be used to track Feast request handling. Currently only gRPC request/response is supported. Enabling Message Audit Logs can be resource intensive and significantly increase latency, as such is not recommended on Online Serving. | -| Transition Audit Log | Logs transitions in status in resources managed by Feast \(ie an Ingestion Job becoming RUNNING\). | -| Action Audit Log | Logs actions performed on a specific resource managed by Feast \(ie an Ingestion Job is aborted\). | - -## Configuration - -| Audit Log Type | Description | -| :--- | :--- | -| Message Audit Log | Enabled when both `feast.logging.audit.enabled` and `feast.logging.audit.messageLogging.enabled` is set to `true` | -| Transition Audit Log | Enabled when `feast.logging.audit.enabled` is set to `true` | -| Action Audit Log | Enabled when `feast.logging.audit.enabled` is set to `true` | - -## JSON Format - -Audit Logs produced by Feast are written to the console similar to normal logs but in a structured, machine parsable JSON. Example of a Message Audit Log JSON entry produced: - -```text -{ - "message": { - "logType": "FeastAuditLogEntry", - "kind": "MESSAGE", - "statusCode": "OK", - "request": { - "filter": { - "project": "dummy", - } - }, - "application": "Feast", - "response": {}, - "method": "ListFeatureTables", - "identity": "105960238928959148073", - "service": "CoreService", - "component": "feast-core", - "id": "45329ea9-0d48-46c5-b659-4604f6193711", - "version": "0.10.0-SNAPSHOT" - }, - "hostname": "feast.core" - "timestamp": "2020-10-20T04:45:24Z", - "severity": "INFO", -} -``` - -## Log Entry Schema - -Fields common to all Audit Log Types: - -| Field | Description | -| :--- | :--- | -| `logType` | Log Type. Always set to `FeastAuditLogEntry`. Useful for filtering out Feast audit logs. | -| `application` | Application. Always set to `Feast`. | -| `component` | Feast Component producing the Audit Log. Set to `feast-core` for Feast Core and `feast-serving` for Feast Serving. Use to filtering out Audit Logs by component. | -| `version` | Version of Feast producing this Audit Log. Use to filtering out Audit Logs by version. | - -Fields in Message Audit Log Type - -| Field | Description | -| :--- | :--- | -| `id` | Generated UUID that uniquely identifies the service call. | -| `service` | Name of the Service that handled the service call. | -| `method` | Name of the Method that handled the service call. Useful for filtering Audit Logs by method \(ie `ApplyFeatureTable` calls\) | -| `request` | Full request submitted by client in the service call as JSON. | -| `response` | Full response returned to client by the service after handling the service call as JSON. | -| `identity` | Identity of the client making the service call as an user Id. Only set when Authentication is enabled. | -| `statusCode` | The status code returned by the service handling the service call \(ie `OK` if service call handled without error\). | - -Fields in Action Audit Log Type - -| Field | Description | -| :--- | :--- | -| `action` | Name of the action taken on the resource. | -| `resource.type` | Type of resource of which the action was taken on \(i.e `FeatureTable`\) | -| resource.id | Identifier specifying the specific resource of which the action was taken on. | - -Fields in Transition Audit Log Type - -| Field | Description | -| :--- | :--- | -| `status` | The new status that the resource transitioned to | -| `resource.type` | Type of resource of which the transition occurred \(i.e `FeatureTable`\) | -| `resource.id` | Identifier specifying the specific resource of which the transition occurred. | - -## Log Forwarder - -Feast currently only supports forwarding Request/Response \(Message Audit Log Type\) logs to an external fluentD service with `feast.**` Fluentd tag. - -### Request/Response Log Example - -```text -{ - "id": "45329ea9-0d48-46c5-b659-4604f6193711", - "service": "CoreService" - "status_code": "OK", - "identity": "105960238928959148073", - "method": "ListProjects", - "request": {}, - "response": { - "projects": [ - "default", "project1", "project2" - ] - } - "release_name": 506.457.14.512 -} -``` - -### Configuration - -The Fluentd Log Forwarder configured with the with the following configuration options in `application.yml`: - -| Settings | Description | -| :--- | :--- | -| `feast.logging.audit.messageLogging.destination` | `fluentd` | -| `feast.logging.audit.messageLogging.fluentdHost` | `localhost` | -| `feast.logging.audit.messageLogging.fluentdPort` | `24224` | - -When using Fluentd as the Log forwarder, a Feast `release_name` can be logged instead of the IP address \(eg. IP of Kubernetes pod deployment\), by setting an environment variable `RELEASE_NAME` when deploying Feast. - diff --git a/docs/advanced/metrics.md b/docs/advanced/metrics.md deleted file mode 100644 index 5ea69f883f..0000000000 --- a/docs/advanced/metrics.md +++ /dev/null @@ -1,59 +0,0 @@ -# Metrics - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -## Overview - -Feast Components export metrics that can provide insight into Feast behavior: - -* [Feast Ingestion Jobs can be configured to push metrics into StatsD](metrics.md#pushing-ingestion-metrics-to-statsd) -* [Prometheus can be configured to scrape metrics from Feast Core and Serving.](metrics.md#exporting-feast-metrics-to-prometheus) - -See the [Metrics Reference ](../reference/metrics-reference.md)for documentation on metrics are exported by Feast. - -{% hint style="info" %} -Feast Job Controller currently does not export any metrics on its own. However its `application.yml` is used to configure metrics export for ingestion jobs. -{% endhint %} - -## Pushing Ingestion Metrics to StatsD - -### **Feast Ingestion Job** - -Feast Ingestion Job can be configured to push Ingestion metrics to a StatsD instance. Metrics export to StatsD for Ingestion Job is configured in Job Controller's `application.yml` under `feast.jobs.metrics` - -```yaml - feast: - jobs: - metrics: - # Enables Statd metrics export if true. - enabled: true - type: statsd - # Host and port of the StatsD instance to export to. - host: localhost - port: 9125 -``` - -{% hint style="info" %} -If you need Ingestion Metrics in Prometheus or some other metrics backend, use a metrics forwarder to forward Ingestion Metrics from StatsD to the metrics backend of choice. \(ie Use [`prometheus-statsd-exporter`](https://github.com/prometheus/statsd_exporter) to forward metrics to Prometheus\). -{% endhint %} - -## Exporting Feast Metrics to Prometheus - -### **Feast Core and Serving** - -Feast Core and Serving exports metrics to a Prometheus instance via Prometheus scraping its `/metrics` endpoint. Metrics export to Prometheus for Core and Serving can be configured via their corresponding `application.yml` - -```yaml -server: - # Configures the port where metrics are exposed via /metrics for Prometheus to scrape. - port: 8081 -``` - -[Direct Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) to scrape directly from Core and Serving's `/metrics` endpoint. - -## Further Reading - -See the [Metrics Reference ](../reference/metrics-reference.md)for documentation on metrics are exported by Feast. - diff --git a/docs/advanced/security.md b/docs/advanced/security.md deleted file mode 100644 index 769260074f..0000000000 --- a/docs/advanced/security.md +++ /dev/null @@ -1,480 +0,0 @@ ---- -description: 'Secure Feast with SSL/TLS, Authentication and Authorization.' ---- - -# Security - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -### Overview - -![Overview of Feast's Security Methods.](../.gitbook/assets/untitled-25-1-%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%281%29%20%281%29.jpg) - -Feast supports the following security methods: - -* [SSL/TLS on messaging between Feast Core, Feast Online Serving and Feast SDKs.](security.md#2-ssl-tls) -* [Authentication to Feast Core and Serving based on Open ID Connect ID tokens.](security.md#3-authentication) -* [Authorization based on project membership and delegating authorization grants to external Authorization Server.](security.md#4-authorization) - -[Important considerations when integrating Authentication/Authorization](security.md#5-authentication-and-authorization). - -### **SSL/TLS** - -Feast supports SSL/TLS encrypted inter-service communication among Feast Core, Feast Online Serving, and Feast SDKs. - -#### Configuring SSL/TLS on Feast Core and Feast Serving - -The following properties configure SSL/TLS. These properties are located in their corresponding `application.yml`files: - -| Configuration Property | Description | -| :--- | :--- | -| `grpc.server.security.enabled` | Enables SSL/TLS functionality if `true` | -| `grpc.server.security.certificateChain` | Provide the path to certificate chain. | -| `grpc.server.security.privateKey` | Provide the to private key. | - -> Read more on enabling SSL/TLS in the[ gRPC starter docs.](https://yidongnan.github.io/grpc-spring-boot-starter/en/server/security.html#enable-transport-layer-security) - -#### Configuring SSL/TLS on Python SDK/CLI - -To enable SSL/TLS in the [Feast Python SDK](https://api.docs.feast.dev/python/#feast.client.Client) or [Feast CLI](../getting-started/connect-to-feast/feast-cli.md), set the config options via `feast config`: - -| Configuration Option | Description | -| :--- | :--- | -| `core_enable_ssl` | Enables SSL/TLS functionality on connections to Feast core if `true` | -| `serving_enable_ssl` | Enables SSL/TLS functionality on connections to Feast Online Serving if `true` | -| `core_server_ssl_cert` | Optional. Specifies the path of the root certificate used to verify Core Service's identity. If omitted, uses system certificates. | -| `serving_server_ssl_cert` | Optional. Specifies the path of the root certificate used to verify Serving Service's identity. If omitted, uses system certificates. | - -{% hint style="info" %} -The Python SDK automatically uses SSL/TLS when connecting to Feast Core and Feast Online Serving via port 443. -{% endhint %} - -#### Configuring SSL/TLS on Go SDK - -Configure SSL/TLS on the [Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) by passing configuration via `SecurityConfig`: - -```go -cli, err := feast.NewSecureGrpcClient("localhost", 6566, feast.SecurityConfig{ - EnableTLS: true, - TLSCertPath: "/path/to/cert.pem", -})Option -``` - -| Config Option | Description | -| :--- | :--- | -| `EnableTLS` | Enables SSL/TLS functionality when connecting to Feast if `true` | -| `TLSCertPath` | Optional. Provides the path of the root certificate used to verify Feast Service's identity. If omitted, uses system certificates. | - -#### Configuring SSL/TLS on **Java** SDK - -Configure SSL/TLS on the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk) by passing configuration via `SecurityConfig`: - -```java -FeastClient client = FeastClient.createSecure("localhost", 6566, - SecurityConfig.newBuilder() - .setTLSEnabled(true) - .setCertificatePath(Optional.of("/path/to/cert.pem")) - .build()); -``` - -| Config Option | Description | -| :--- | :--- | -| `setTLSEnabled()` | Enables SSL/TLS functionality when connecting to Feast if `true` | -| `setCertificatesPath()` | Optional. Set the path of the root certificate used to verify Feast Service's identity. If omitted, uses system certificates. | - -### **Authentication** - -{% hint style="warning" %} -To prevent man in the middle attacks, we recommend that SSL/TLS be implemented prior to authentication. -{% endhint %} - -Authentication can be implemented to identify and validate client requests to Feast Core and Feast Online Serving. Currently, Feast uses[ ](https://auth0.com/docs/protocols/openid-connect-protocol)[Open ID Connect \(OIDC\)](https://auth0.com/docs/protocols/openid-connect-protocol) ID tokens \(i.e. [Google Open ID Connect](https://developers.google.com/identity/protocols/oauth2/openid-connect)\) to authenticate client requests. - -#### Configuring Authentication in Feast Core and Feast Online Serving - -Authentication can be configured for Feast Core and Feast Online Serving via properties in their corresponding `application.yml` files: - -| Configuration Property | Description | -| :--- | :--- | -| `feast.security.authentication.enabled` | Enables Authentication functionality if `true` | -| `feast.security.authentication.provider` | Authentication Provider type. Currently only supports `jwt` | -| `feast.security.authentication.option.jwkEndpointURI` | HTTPS URL used by Feast to retrieved the [JWK](https://tools.ietf.org/html/rfc7517) used to verify OIDC ID tokens. | - -{% hint style="info" %} -`jwkEndpointURI`is set to retrieve Google's OIDC JWK by default, allowing OIDC ID tokens issued by Google to be used for authentication. -{% endhint %} - -Behind the scenes, Feast Core and Feast Online Serving authenticate by: - -* Extracting the OIDC ID token `TOKEN`from gRPC metadata submitted with request: - -```text -('authorization', 'Bearer: TOKEN') -``` - -* Validates token's authenticity using the JWK retrieved from the `jwkEndpointURI` - -#### **Authenticating Serving with Feast Core** - -Feast Online Serving communicates with Feast Core during normal operation. When both authentication and authorization are enabled on Feast Core, Feast Online Serving is forced to authenticate its requests to Feast Core. Otherwise, Feast Online Serving produces an Authentication failure error when connecting to Feast Core. - - Properties used to configure Serving authentication via `application.yml`: - -| Configuration Property | Description | -| :--- | :--- | -| `feast.core-authentication.enabled` | Requires Feast Online Serving to authenticate when communicating with Feast Core. | -| `feast.core-authentication.provider` | Selects provider Feast Online Serving uses to retrieve credentials then used to authenticate requests to Feast Core. Valid providers are `google` and `oauth`. | - -{% tabs %} -{% tab title="Google Provider" %} -Google Provider automatically extracts the credential from the credential JSON file. - -* Set [`GOOGLE_APPLICATION_CREDENTIALS` environment variable](https://cloud.google.com/docs/authentication/getting-started#setting_the_environment_variable) to the path of the credential in the JSON file. -{% endtab %} - -{% tab title="OAuth Provider" %} -OAuth Provider makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential. OAuth requires the following options to be set at `feast.security.core-authentication.options.`: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Configuration PropertyDescription
oauth_url - Target URL receiving the client-credentials request.
grant_type - OAuth grant type. Set as client_credentials -
client_id - Client Id used in the client-credentials request.
client_secret - Client secret used in the client-credentials request.
audience - -

Target audience of the credential. Set to host URL of Feast Core.

-

(i.e. https://localhost if Feast Core listens on localhost).

-
jwkEndpointURI - HTTPS URL used to retrieve a JWK that can be used to decode the credential.
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Python SDK/CLI** - -Configure the [Feast Python SDK](https://api.docs.feast.dev/python/) and [Feast CLI](../getting-started/connect-to-feast/feast-cli.md) to use authentication via `feast config`: - -```python -$ feast config set enable_auth true -``` - -| Configuration Option | Description | -| :--- | :--- | -| `enable_auth` | Enables authentication functionality if set to `true`. | -| `auth_provider` | Use an authentication provider to obtain a credential for authentication. Currently supports `google` and `oauth`. | -| `auth_token` | Manually specify a static token for use in authentication. Overrules `auth_provider` if both are set. | - -{% tabs %} -{% tab title="Google Provider" %} -Google Provider automatically finds and uses Google Credentials to authenticate requests: - -* Google Provider automatically uses established credentials for authenticating requests if you are already authenticated with the `gcloud` CLI via: - -```text -$ gcloud auth application-default login -``` - -* Alternatively Google Provider can be configured to use the credentials in the JSON file via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud Authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\): - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` -{% endtab %} - -{% tab title="OAuth Provider" %} -OAuth Provider makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests. The OAuth provider requires the following config options to be set via `feast config`: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Configuration PropertyDescription
oauth_token_request_url - Target URL receiving the client-credentials request.
oauth_grant_type - OAuth grant type. Set as client_credentials -
oauth_client_id - Client Id used in the client-credentials request.
oauth_client_secret - Client secret used in the client-credentials request.
oauth_audience - -

Target audience of the credential. Set to host URL of target Service.

-

(https://localhost if Service listens on localhost).

-
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Go SDK** - -Configure the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to use authentication by specifying the credential via `SecurityConfig`: - -```go -// error handling omitted. -// Use Google Credential as provider. -cred, _ := feast.NewGoogleCredential("localhost:6566") -cli, _ := feast.NewSecureGrpcClient("localhost", 6566, feast.SecurityConfig{ - // Specify the credential to provide tokens for Feast Authentication. - Credential: cred, -}) -``` - -{% tabs %} -{% tab title="Google Credential" %} -Google Credential uses Service Account credentials JSON file set via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud Authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\) to obtain tokens for Authenticating Feast requests: - -* Exporting `GOOGLE_APPLICATION_CREDENTIALS` - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` - -* Create a Google Credential with target audience. - -```go -cred, _ := feast.NewGoogleCredential("localhost:6566") -``` - -> Target audience of the credential should be set to host URL of target Service. \(ie `https://localhost` if Service listens on `localhost`\): -{% endtab %} - -{% tab title="OAuth Credential" %} -OAuth Credential makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests: - -* Create OAuth Credential with parameters: - -```go -cred := feast.NewOAuthCredential("localhost:6566", "client_id", "secret", "https://oauth.endpoint/auth") -``` - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
audience - -

Target audience of the credential. Set to host URL of target Service.

-

( https://localhost if Service listens on localhost).

-
clientId - Client Id used in the client-credentials request.
clientSecret - Client secret used in the client-credentials request.
endpointURL - Target URL to make the client-credentials request to.
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Java SDK** - -Configure the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to use authentication by setting credentials via `SecurityConfig`: - -```java -// Use GoogleAuthCredential as provider. -CallCredentials credentials = new GoogleAuthCredentials( - Map.of("audience", "localhost:6566")); - -FeastClient client = FeastClient.createSecure("localhost", 6566, - SecurityConfig.newBuilder() - // Specify the credentials to provide tokens for Feast Authentication. - .setCredentials(Optional.of(creds)) - .build()); -``` - -{% tabs %} -{% tab title="GoogleAuthCredentials" %} -GoogleAuthCredentials uses Service Account credentials JSON file set via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\) to obtain tokens for Authenticating Feast requests: - -* Exporting `GOOGLE_APPLICATION_CREDENTIALS` - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` - -* Create a Google Credential with target audience. - -```java -CallCredentials credentials = new GoogleAuthCredentials( - Map.of("audience", "localhost:6566")); -``` - -> Target audience of the credentials should be set to host URL of target Service. \(ie `https://localhost` if Service listens on `localhost`\): -{% endtab %} - -{% tab title="OAuthCredentials" %} -OAuthCredentials makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests: - -* Create OAuthCredentials with parameters: - -```java -CallCredentials credentials = new OAuthCredentials(Map.of( - "audience": "localhost:6566", - "grant_type", "client_credentials", - "client_id", "some_id", - "client_id", "secret", - "oauth_url", "https://oauth.endpoint/auth", - "jwkEndpointURI", "https://jwk.endpoint/jwk")); -``` - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
audience - -

Target audience of the credential. Set to host URL of target Service.

-

( https://localhost if Service listens on localhost).

-
grant_type - OAuth grant type. Set as client_credentials -
client_id - Client Id used in the client-credentials request.
client_secret - Client secret used in the client-credentials request.
oauth_url - Target URL to make the client-credentials request to obtain credential.
jwkEndpointURI - HTTPS URL used to retrieve a JWK that can be used to decode the credential.
-{% endtab %} -{% endtabs %} - -### Authorization - -{% hint style="info" %} -Authorization requires that authentication be configured to obtain a user identity for use in authorizing requests. -{% endhint %} - -Authorization provides access control to FeatureTables and/or Features based on project membership. Users who are members of a project are authorized to: - -* Create and/or Update a Feature Table in the Project. -* Retrieve Feature Values for Features in that Project. - -#### **Authorization API/Server** - -![Feast Authorization Flow](../.gitbook/assets/rsz_untitled23%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29.jpg) - -Feast delegates Authorization grants to an external Authorization Server that implements the [Authorization Open API specification](https://github.com/feast-dev/feast/blob/master/common/src/main/resources/api.yaml). - -* Feast checks whether a user is authorized to make a request by making a `checkAccessRequest` to the Authorization Server. -* The Authorization Server should return a `AuthorizationResult` with whether the user is allowed to make the request. - -Authorization can be configured for Feast Core and Feast Online Serving via properties in their corresponding `application.yml` - -| Configuration Property | Description | -| :--- | :--- | -| `feast.security.authorization.enabled` | Enables authorization functionality if `true`. | -| `feast.security.authorization.provider` | Authentication Provider type. Currently only supports `http` | -| `feast.security.authorization.option.authorizationUrl` | URL endpoint of Authorization Server to make check access requests to. | -| `feast.security.authorization.option.subjectClaim` | Optional. Name of the claim of the to extract from the ID Token to include in the check access request as Subject. | - -{% hint style="info" %} -This example of the [Authorization Server with Keto](https://github.com/feast-dev/feast-keto-auth-server) can be used as a reference implementation for implementing an Authorization Server that Feast supports. -{% endhint %} - -### **Authentication & Authorization** - -When using Authentication & Authorization, consider: - -* Enabling Authentication without Authorization makes authentication **optional**. You can still send unauthenticated requests. -* Enabling Authorization forces all requests to be authenticated. Requests that are not authenticated are **dropped.** - - - diff --git a/docs/advanced/troubleshooting.md b/docs/advanced/troubleshooting.md deleted file mode 100644 index 1060466d30..0000000000 --- a/docs/advanced/troubleshooting.md +++ /dev/null @@ -1,136 +0,0 @@ -# Troubleshooting - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -If at any point in time you cannot resolve a problem, please see the [Community](../community.md) section for reaching out to the Feast community. - -### How can I verify that all services are operational? - -#### Docker Compose - -The containers should be in an `up` state: - -```text -docker ps -``` - -#### Google Kubernetes Engine - -All services should either be in a `RUNNING` state or `COMPLETED`state: - -```text -kubectl get pods -``` - -### How can I verify that I can connect to all services? - -First locate the the host and port of the Feast Services. - -#### **Docker Compose \(from inside the docker network\)** - -You will probably need to connect using the hostnames of services and standard Feast ports: - -```bash -export FEAST_CORE_URL=core:6565 -export FEAST_ONLINE_SERVING_URL=online_serving:6566 -export FEAST_HISTORICAL_SERVING_URL=historical_serving:6567 -export FEAST_JOBCONTROLLER_URL=jobcontroller:6570 -``` - -#### **Docker Compose \(from outside the docker network\)** - -You will probably need to connect using `localhost` and standard ports: - -```bash -export FEAST_CORE_URL=localhost:6565 -export FEAST_ONLINE_SERVING_URL=localhost:6566 -export FEAST_HISTORICAL_SERVING_URL=localhost:6567 -export FEAST_JOBCONTROLLER_URL=localhost:6570 -``` - -#### **Google Kubernetes Engine \(GKE\)** - -You will need to find the external IP of one of the nodes as well as the NodePorts. Please make sure that your firewall is open for these ports: - -```bash -export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) -export FEAST_CORE_URL=${FEAST_IP}:32090 -export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 -export FEAST_HISTORICAL_SERVING_URL=${FEAST_IP}:32092 -``` - -`netcat`, `telnet`, or even `curl` can be used to test whether all services are available and ports are open, but `grpc_cli` is the most powerful. It can be installed from [here](https://github.com/grpc/grpc/blob/master/doc/command_line_tool.md). - -#### Testing Connectivity From Feast Services: - -Use `grpc_cli` to test connetivity by listing the gRPC methods exposed by Feast services: - -```bash -grpc_cli ls ${FEAST_CORE_URL} feast.core.CoreService -``` - -```bash -grpc_cli ls ${FEAST_JOBCONTROLLER_URL} feast.core.JobControllerService -``` - -```bash -grpc_cli ls ${FEAST_HISTORICAL_SERVING_URL} feast.serving.ServingService -``` - -```bash -grpc_cli ls ${FEAST_ONLINE_SERVING_URL} feast.serving.ServingService -``` - -### How can I print logs from the Feast Services? - -Feast will typically have three services that you need to monitor if something goes wrong. - -* Feast Core -* Feast Job Controller -* Feast Serving \(Online\) -* Feast Serving \(Batch\) - -In order to print the logs from these services, please run the commands below. - -#### Docker Compose - -Use `docker-compose logs` to obtain Feast component logs: - -```text - docker logs -f feast_core_1 -``` - -```text - docker logs -f feast_jobcontroller_1 -``` - -```text -docker logs -f feast_historical_serving_1 -``` - -```text -docker logs -f feast_online_serving_1 -``` - -#### Google Kubernetes Engine - -Use `kubectl logs` to obtain Feast component logs: - -```text -kubectl logs $(kubectl get pods | grep feast-core | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-jobcontroller | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-serving-batch | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-serving-online | awk '{print $1}') -``` - diff --git a/docs/advanced/upgrading.md b/docs/advanced/upgrading.md deleted file mode 100644 index 3c7b95d544..0000000000 --- a/docs/advanced/upgrading.md +++ /dev/null @@ -1,113 +0,0 @@ -# Upgrading Feast - -### Migration from v0.6 to v0.7 - -#### Feast Core Validation changes - -In v0.7, Feast Core no longer accepts starting with number \(0-9\) and using dash in names for: - -* Project -* Feature Set -* Entities -* Features - -Migrate all project, feature sets, entities, feature names: - -* with ‘-’ by recreating them with '-' replace with '\_' -* recreate any names with a number \(0-9\) as the first letter to one without. - -Feast now prevents feature sets from being applied if no store is subscribed to that Feature Set. - -* Ensure that a store is configured to subscribe to the Feature Set before applying the Feature Set. - -#### Feast Core's Job Coordinator is now Feast Job Controller - -In v0.7, Feast Core's Job Coordinator has been decoupled from Feast Core and runs as a separate Feast Job Controller application. See its [Configuration reference](../reference/configuration-reference.md#2-feast-core-serving-and-job-controller) for how to configure Feast Job Controller. - -**Ingestion Job API** - -In v0.7, the following changes are made to the Ingestion Job API: - -* Changed List Ingestion Job API to return list of `FeatureSetReference` instead of list of FeatureSet in response. -* Moved `ListIngestionJobs`, `StopIngestionJob`, `RestartIngestionJob` calls from `CoreService` to `JobControllerService`. -* Python SDK/CLI: Added new [Job Controller client ](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/contrib/job_controller/client.py)and `jobcontroller_url` config option. - -Users of the Ingestion Job API via gRPC should migrate by: - -* Add new client to connect to Job Controller endpoint to call `JobControllerService` and call `ListIngestionJobs`, `StopIngestionJob`, `RestartIngestionJob` from new client. -* Migrate code to accept feature references instead of feature sets returned in `ListIngestionJobs` response. - -Users of Ingestion Job via Python SDK \(ie `feast ingest-jobs list` or `client.stop_ingest_job()` etc.\) should migrate by: - -* `ingest_job()`methods only: Create a new separate [Job Controller client](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/contrib/job_controller/client.py) to connect to the job controller and call `ingest_job()` methods using the new client. -* Configure the Feast Job Controller endpoint url via `jobcontroller_url` config option. - -#### Configuration Properties Changes - -* Rename `feast.jobs.consolidate-jobs-per-source property` to `feast.jobs.controller.consolidate-jobs-per-sources` -* Rename`feast.security.authorization.options.subjectClaim` to `feast.security.authentication.options.subjectClaim` -* Rename `feast.logging.audit.messageLoggingEnabled` to `feast.audit.messageLogging.enabled` - -### Migration from v0.5 to v0.6 - -#### Database schema - -In Release 0.6 we introduced [Flyway](https://flywaydb.org/) to handle schema migrations in PostgreSQL. Flyway is integrated into `core` and for now on all migrations will be run automatically on `core` start. It uses table `flyway_schema_history` in the same database \(also created automatically\) to keep track of already applied migrations. So no specific maintenance should be needed. - -If you already have existing deployment of feast 0.5 - Flyway will detect existing tables and omit first baseline migration. - -After `core` started you should have `flyway_schema_history` look like this - -```text ->> select version, description, script, checksum from flyway_schema_history - -version | description | script | checksum ---------+-----------------------------------------+-----------------------------------------+------------ - 1 | << Flyway Baseline >> | << Flyway Baseline >> | - 2 | RELEASE 0.6 Generalizing Source AND ... | V2__RELEASE_0.6_Generalizing_Source_... | 1537500232 -``` - -In this release next major schema changes were done: - -* Source is not shared between FeatureSets anymore. It's changed to 1:1 relation - - and source's primary key is now auto-incremented number. - -* Due to generalization of Source `sources.topics` & `sources.bootstrap_servers` columns were deprecated. - - They will be replaced with `sources.config`. Data migration handled by code when respected Source is used. - - `topics` and `bootstrap_servers` will be deleted in the next release. - -* Job \(table `jobs`\) is no longer connected to `Source` \(table `sources`\) since it uses consolidated source for optimization purposes. - - All data required by Job would be embedded in its table. - -New Models \(tables\): - -* feature\_statistics - -Minor changes: - -* FeatureSet has new column version \(see [proto](https://github.com/feast-dev/feast/blob/master/protos/feast/core/FeatureSet.proto) for details\) -* Connecting table `jobs_feature_sets` in many-to-many relation between jobs & feature sets - - has now `version` and `delivery_status`. - -### Migration from v0.4 to v0.6 - -#### Database - -For all versions earlier than 0.5 seamless migration is not feasible due to earlier breaking changes and creation of new database will be required. - -Since database will be empty - first \(baseline\) migration would be applied: - -```text ->> select version, description, script, checksum from flyway_schema_history - -version | description | script | checksum ---------+-----------------------------------------+-----------------------------------------+------------ - 1 | Baseline | V1__Baseline.sql | 1091472110 - 2 | RELEASE 0.6 Generalizing Source AND ... | V2__RELEASE_0.6_Generalizing_Source_... | 1537500232 -``` - diff --git a/docs/architecture.md b/docs/architecture.md deleted file mode 100644 index a2dc5cd6a8..0000000000 --- a/docs/architecture.md +++ /dev/null @@ -1,2 +0,0 @@ -# Architecture - diff --git a/docs/architecture.png b/docs/architecture.png deleted file mode 100644 index 6d56a62360..0000000000 Binary files a/docs/architecture.png and /dev/null differ diff --git a/docs/assets/arch.png b/docs/assets/arch.png deleted file mode 100644 index bc655b60f3..0000000000 Binary files a/docs/assets/arch.png and /dev/null differ diff --git a/docs/assets/feast-components-overview.png b/docs/assets/feast-components-overview.png deleted file mode 100644 index 1f69bb7ed8..0000000000 Binary files a/docs/assets/feast-components-overview.png and /dev/null differ diff --git a/docs/assets/feast-marchitecture.png b/docs/assets/feast-marchitecture.png new file mode 100644 index 0000000000..0a7b044b09 Binary files /dev/null and b/docs/assets/feast-marchitecture.png differ diff --git a/docs/assets/statistics-sources (1).png b/docs/assets/statistics-sources (1).png deleted file mode 100644 index 02be233968..0000000000 Binary files a/docs/assets/statistics-sources (1).png and /dev/null differ diff --git a/docs/assets/statistics-sources (2).png b/docs/assets/statistics-sources (2).png deleted file mode 100644 index 02be233968..0000000000 Binary files a/docs/assets/statistics-sources (2).png and /dev/null differ diff --git a/docs/assets/statistics-sources (3).png b/docs/assets/statistics-sources (3).png deleted file mode 100644 index 02be233968..0000000000 Binary files a/docs/assets/statistics-sources (3).png and /dev/null differ diff --git a/docs/assets/statistics-sources (4).png b/docs/assets/statistics-sources (4).png deleted file mode 100644 index 02be233968..0000000000 Binary files a/docs/assets/statistics-sources (4).png and /dev/null differ diff --git a/docs/assets/statistics-sources.png b/docs/assets/statistics-sources.png deleted file mode 100644 index 02be233968..0000000000 Binary files a/docs/assets/statistics-sources.png and /dev/null differ diff --git a/docs/build-a-training-dataset.md b/docs/build-a-training-dataset.md deleted file mode 100644 index eff44fdf9c..0000000000 --- a/docs/build-a-training-dataset.md +++ /dev/null @@ -1,2 +0,0 @@ -# Build a training dataset - diff --git a/docs/create-a-feature-repository.md b/docs/create-a-feature-repository.md deleted file mode 100644 index 5f781f0651..0000000000 --- a/docs/create-a-feature-repository.md +++ /dev/null @@ -1,2 +0,0 @@ -# Create a feature repository - diff --git a/docs/deploy-a-feature-store.md b/docs/deploy-a-feature-store.md deleted file mode 100644 index 0447b0ffbf..0000000000 --- a/docs/deploy-a-feature-store.md +++ /dev/null @@ -1,2 +0,0 @@ -# Deploy a feature store - diff --git a/docs/docs/.gitbook/assets/basic-architecture-diagram.svg b/docs/docs/.gitbook/assets/basic-architecture-diagram.svg deleted file mode 100644 index b707f49046..0000000000 --- a/docs/docs/.gitbook/assets/basic-architecture-diagram.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/docs/.gitbook/assets/feast-docs-overview-diagram-2.svg b/docs/docs/.gitbook/assets/feast-docs-overview-diagram-2.svg deleted file mode 100644 index 7f30963ec7..0000000000 --- a/docs/docs/.gitbook/assets/feast-docs-overview-diagram-2.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/entities.md b/docs/entities.md deleted file mode 100644 index dadeac1cac..0000000000 --- a/docs/entities.md +++ /dev/null @@ -1,2 +0,0 @@ -# Entities - diff --git a/docs/feast-on-kubernetes/advanced-1/README.md b/docs/feast-on-kubernetes/advanced-1/README.md deleted file mode 100644 index 0fb91367c2..0000000000 --- a/docs/feast-on-kubernetes/advanced-1/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Advanced - diff --git a/docs/feast-on-kubernetes/advanced-1/audit-logging.md b/docs/feast-on-kubernetes/advanced-1/audit-logging.md deleted file mode 100644 index 1870a687bd..0000000000 --- a/docs/feast-on-kubernetes/advanced-1/audit-logging.md +++ /dev/null @@ -1,132 +0,0 @@ -# Audit Logging - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -## Introduction - -Feast provides audit logging functionality in order to debug problems and to trace the lineage of events. - -## Audit Log Types - -Audit Logs produced by Feast come in three favors: - -| Audit Log Type | Description | -| :--- | :--- | -| Message Audit Log | Logs service calls that can be used to track Feast request handling. Currently only gRPC request/response is supported. Enabling Message Audit Logs can be resource intensive and significantly increase latency, as such is not recommended on Online Serving. | -| Transition Audit Log | Logs transitions in status in resources managed by Feast \(ie an Ingestion Job becoming RUNNING\). | -| Action Audit Log | Logs actions performed on a specific resource managed by Feast \(ie an Ingestion Job is aborted\). | - -## Configuration - -| Audit Log Type | Description | -| :--- | :--- | -| Message Audit Log | Enabled when both `feast.logging.audit.enabled` and `feast.logging.audit.messageLogging.enabled` is set to `true` | -| Transition Audit Log | Enabled when `feast.logging.audit.enabled` is set to `true` | -| Action Audit Log | Enabled when `feast.logging.audit.enabled` is set to `true` | - -## JSON Format - -Audit Logs produced by Feast are written to the console similar to normal logs but in a structured, machine parsable JSON. Example of a Message Audit Log JSON entry produced: - -```text -{ - "message": { - "logType": "FeastAuditLogEntry", - "kind": "MESSAGE", - "statusCode": "OK", - "request": { - "filter": { - "project": "dummy", - } - }, - "application": "Feast", - "response": {}, - "method": "ListFeatureTables", - "identity": "105960238928959148073", - "service": "CoreService", - "component": "feast-core", - "id": "45329ea9-0d48-46c5-b659-4604f6193711", - "version": "0.10.0-SNAPSHOT" - }, - "hostname": "feast.core" - "timestamp": "2020-10-20T04:45:24Z", - "severity": "INFO", -} -``` - -## Log Entry Schema - -Fields common to all Audit Log Types: - -| Field | Description | -| :--- | :--- | -| `logType` | Log Type. Always set to `FeastAuditLogEntry`. Useful for filtering out Feast audit logs. | -| `application` | Application. Always set to `Feast`. | -| `component` | Feast Component producing the Audit Log. Set to `feast-core` for Feast Core and `feast-serving` for Feast Serving. Use to filtering out Audit Logs by component. | -| `version` | Version of Feast producing this Audit Log. Use to filtering out Audit Logs by version. | - -Fields in Message Audit Log Type - -| Field | Description | -| :--- | :--- | -| `id` | Generated UUID that uniquely identifies the service call. | -| `service` | Name of the Service that handled the service call. | -| `method` | Name of the Method that handled the service call. Useful for filtering Audit Logs by method \(ie `ApplyFeatureTable` calls\) | -| `request` | Full request submitted by client in the service call as JSON. | -| `response` | Full response returned to client by the service after handling the service call as JSON. | -| `identity` | Identity of the client making the service call as an user Id. Only set when Authentication is enabled. | -| `statusCode` | The status code returned by the service handling the service call \(ie `OK` if service call handled without error\). | - -Fields in Action Audit Log Type - -| Field | Description | -| :--- | :--- | -| `action` | Name of the action taken on the resource. | -| `resource.type` | Type of resource of which the action was taken on \(i.e `FeatureTable`\) | -| resource.id | Identifier specifying the specific resource of which the action was taken on. | - -Fields in Transition Audit Log Type - -| Field | Description | -| :--- | :--- | -| `status` | The new status that the resource transitioned to | -| `resource.type` | Type of resource of which the transition occurred \(i.e `FeatureTable`\) | -| `resource.id` | Identifier specifying the specific resource of which the transition occurred. | - -## Log Forwarder - -Feast currently only supports forwarding Request/Response \(Message Audit Log Type\) logs to an external fluentD service with `feast.**` Fluentd tag. - -### Request/Response Log Example - -```text -{ - "id": "45329ea9-0d48-46c5-b659-4604f6193711", - "service": "CoreService" - "status_code": "OK", - "identity": "105960238928959148073", - "method": "ListProjects", - "request": {}, - "response": { - "projects": [ - "default", "project1", "project2" - ] - } - "release_name": 506.457.14.512 -} -``` - -### Configuration - -The Fluentd Log Forwarder configured with the with the following configuration options in `application.yml`: - -| Settings | Description | -| :--- | :--- | -| `feast.logging.audit.messageLogging.destination` | `fluentd` | -| `feast.logging.audit.messageLogging.fluentdHost` | `localhost` | -| `feast.logging.audit.messageLogging.fluentdPort` | `24224` | - -When using Fluentd as the Log forwarder, a Feast `release_name` can be logged instead of the IP address \(eg. IP of Kubernetes pod deployment\), by setting an environment variable `RELEASE_NAME` when deploying Feast. - diff --git a/docs/feast-on-kubernetes/advanced-1/metrics.md b/docs/feast-on-kubernetes/advanced-1/metrics.md deleted file mode 100644 index 43f7b973b6..0000000000 --- a/docs/feast-on-kubernetes/advanced-1/metrics.md +++ /dev/null @@ -1,59 +0,0 @@ -# Metrics - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -## Overview - -Feast Components export metrics that can provide insight into Feast behavior: - -* [Feast Ingestion Jobs can be configured to push metrics into StatsD](metrics.md#pushing-ingestion-metrics-to-statsd) -* [Prometheus can be configured to scrape metrics from Feast Core and Serving.](metrics.md#exporting-feast-metrics-to-prometheus) - -See the [Metrics Reference ](../reference-1/metrics-reference.md)for documentation on metrics are exported by Feast. - -{% hint style="info" %} -Feast Job Controller currently does not export any metrics on its own. However its `application.yml` is used to configure metrics export for ingestion jobs. -{% endhint %} - -## Pushing Ingestion Metrics to StatsD - -### **Feast Ingestion Job** - -Feast Ingestion Job can be configured to push Ingestion metrics to a StatsD instance. Metrics export to StatsD for Ingestion Job is configured in Job Controller's `application.yml` under `feast.jobs.metrics` - -```yaml - feast: - jobs: - metrics: - # Enables Statd metrics export if true. - enabled: true - type: statsd - # Host and port of the StatsD instance to export to. - host: localhost - port: 9125 -``` - -{% hint style="info" %} -If you need Ingestion Metrics in Prometheus or some other metrics backend, use a metrics forwarder to forward Ingestion Metrics from StatsD to the metrics backend of choice. \(ie Use [`prometheus-statsd-exporter`](https://github.com/prometheus/statsd_exporter) to forward metrics to Prometheus\). -{% endhint %} - -## Exporting Feast Metrics to Prometheus - -### **Feast Core and Serving** - -Feast Core and Serving exports metrics to a Prometheus instance via Prometheus scraping its `/metrics` endpoint. Metrics export to Prometheus for Core and Serving can be configured via their corresponding `application.yml` - -```yaml -server: - # Configures the port where metrics are exposed via /metrics for Prometheus to scrape. - port: 8081 -``` - -[Direct Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) to scrape directly from Core and Serving's `/metrics` endpoint. - -## Further Reading - -See the [Metrics Reference ](../reference-1/metrics-reference.md)for documentation on metrics are exported by Feast. - diff --git a/docs/feast-on-kubernetes/advanced-1/security.md b/docs/feast-on-kubernetes/advanced-1/security.md deleted file mode 100644 index b6e42afd73..0000000000 --- a/docs/feast-on-kubernetes/advanced-1/security.md +++ /dev/null @@ -1,480 +0,0 @@ ---- -description: 'Secure Feast with SSL/TLS, Authentication and Authorization.' ---- - -# Security - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -### Overview - -![Overview of Feast's Security Methods.](../../.gitbook/assets/untitled-25-1-%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%281%29%20%284%29.jpg) - -Feast supports the following security methods: - -* [SSL/TLS on messaging between Feast Core, Feast Online Serving and Feast SDKs.](security.md#2-ssl-tls) -* [Authentication to Feast Core and Serving based on Open ID Connect ID tokens.](security.md#3-authentication) -* [Authorization based on project membership and delegating authorization grants to external Authorization Server.](security.md#4-authorization) - -[Important considerations when integrating Authentication/Authorization](security.md#5-authentication-and-authorization). - -### **SSL/TLS** - -Feast supports SSL/TLS encrypted inter-service communication among Feast Core, Feast Online Serving, and Feast SDKs. - -#### Configuring SSL/TLS on Feast Core and Feast Serving - -The following properties configure SSL/TLS. These properties are located in their corresponding `application.yml`files: - -| Configuration Property | Description | -| :--- | :--- | -| `grpc.server.security.enabled` | Enables SSL/TLS functionality if `true` | -| `grpc.server.security.certificateChain` | Provide the path to certificate chain. | -| `grpc.server.security.privateKey` | Provide the to private key. | - -> Read more on enabling SSL/TLS in the[ gRPC starter docs.](https://yidongnan.github.io/grpc-spring-boot-starter/en/server/security.html#enable-transport-layer-security) - -#### Configuring SSL/TLS on Python SDK/CLI - -To enable SSL/TLS in the [Feast Python SDK](https://api.docs.feast.dev/python/#feast.client.Client) or [Feast CLI](../getting-started/connect-to-feast/feast-cli.md), set the config options via `feast config`: - -| Configuration Option | Description | -| :--- | :--- | -| `core_enable_ssl` | Enables SSL/TLS functionality on connections to Feast core if `true` | -| `serving_enable_ssl` | Enables SSL/TLS functionality on connections to Feast Online Serving if `true` | -| `core_server_ssl_cert` | Optional. Specifies the path of the root certificate used to verify Core Service's identity. If omitted, uses system certificates. | -| `serving_server_ssl_cert` | Optional. Specifies the path of the root certificate used to verify Serving Service's identity. If omitted, uses system certificates. | - -{% hint style="info" %} -The Python SDK automatically uses SSL/TLS when connecting to Feast Core and Feast Online Serving via port 443. -{% endhint %} - -#### Configuring SSL/TLS on Go SDK - -Configure SSL/TLS on the [Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) by passing configuration via `SecurityConfig`: - -```go -cli, err := feast.NewSecureGrpcClient("localhost", 6566, feast.SecurityConfig{ - EnableTLS: true, - TLSCertPath: "/path/to/cert.pem", -})Option -``` - -| Config Option | Description | -| :--- | :--- | -| `EnableTLS` | Enables SSL/TLS functionality when connecting to Feast if `true` | -| `TLSCertPath` | Optional. Provides the path of the root certificate used to verify Feast Service's identity. If omitted, uses system certificates. | - -#### Configuring SSL/TLS on **Java** SDK - -Configure SSL/TLS on the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk) by passing configuration via `SecurityConfig`: - -```java -FeastClient client = FeastClient.createSecure("localhost", 6566, - SecurityConfig.newBuilder() - .setTLSEnabled(true) - .setCertificatePath(Optional.of("/path/to/cert.pem")) - .build()); -``` - -| Config Option | Description | -| :--- | :--- | -| `setTLSEnabled()` | Enables SSL/TLS functionality when connecting to Feast if `true` | -| `setCertificatesPath()` | Optional. Set the path of the root certificate used to verify Feast Service's identity. If omitted, uses system certificates. | - -### **Authentication** - -{% hint style="warning" %} -To prevent man in the middle attacks, we recommend that SSL/TLS be implemented prior to authentication. -{% endhint %} - -Authentication can be implemented to identify and validate client requests to Feast Core and Feast Online Serving. Currently, Feast uses[ ](https://auth0.com/docs/protocols/openid-connect-protocol)[Open ID Connect \(OIDC\)](https://auth0.com/docs/protocols/openid-connect-protocol) ID tokens \(i.e. [Google Open ID Connect](https://developers.google.com/identity/protocols/oauth2/openid-connect)\) to authenticate client requests. - -#### Configuring Authentication in Feast Core and Feast Online Serving - -Authentication can be configured for Feast Core and Feast Online Serving via properties in their corresponding `application.yml` files: - -| Configuration Property | Description | -| :--- | :--- | -| `feast.security.authentication.enabled` | Enables Authentication functionality if `true` | -| `feast.security.authentication.provider` | Authentication Provider type. Currently only supports `jwt` | -| `feast.security.authentication.option.jwkEndpointURI` | HTTPS URL used by Feast to retrieved the [JWK](https://tools.ietf.org/html/rfc7517) used to verify OIDC ID tokens. | - -{% hint style="info" %} -`jwkEndpointURI`is set to retrieve Google's OIDC JWK by default, allowing OIDC ID tokens issued by Google to be used for authentication. -{% endhint %} - -Behind the scenes, Feast Core and Feast Online Serving authenticate by: - -* Extracting the OIDC ID token `TOKEN`from gRPC metadata submitted with request: - -```text -('authorization', 'Bearer: TOKEN') -``` - -* Validates token's authenticity using the JWK retrieved from the `jwkEndpointURI` - -#### **Authenticating Serving with Feast Core** - -Feast Online Serving communicates with Feast Core during normal operation. When both authentication and authorization are enabled on Feast Core, Feast Online Serving is forced to authenticate its requests to Feast Core. Otherwise, Feast Online Serving produces an Authentication failure error when connecting to Feast Core. - - Properties used to configure Serving authentication via `application.yml`: - -| Configuration Property | Description | -| :--- | :--- | -| `feast.core-authentication.enabled` | Requires Feast Online Serving to authenticate when communicating with Feast Core. | -| `feast.core-authentication.provider` | Selects provider Feast Online Serving uses to retrieve credentials then used to authenticate requests to Feast Core. Valid providers are `google` and `oauth`. | - -{% tabs %} -{% tab title="Google Provider" %} -Google Provider automatically extracts the credential from the credential JSON file. - -* Set [`GOOGLE_APPLICATION_CREDENTIALS` environment variable](https://cloud.google.com/docs/authentication/getting-started#setting_the_environment_variable) to the path of the credential in the JSON file. -{% endtab %} - -{% tab title="OAuth Provider" %} -OAuth Provider makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential. OAuth requires the following options to be set at `feast.security.core-authentication.options.`: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Configuration PropertyDescription
oauth_url - Target URL receiving the client-credentials request.
grant_type - OAuth grant type. Set as client_credentials -
client_id - Client Id used in the client-credentials request.
client_secret - Client secret used in the client-credentials request.
audience - -

Target audience of the credential. Set to host URL of Feast Core.

-

(i.e. https://localhost if Feast Core listens on localhost).

-
jwkEndpointURI - HTTPS URL used to retrieve a JWK that can be used to decode the credential.
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Python SDK/CLI** - -Configure the [Feast Python SDK](https://api.docs.feast.dev/python/) and [Feast CLI](../getting-started/connect-to-feast/feast-cli.md) to use authentication via `feast config`: - -```python -$ feast config set enable_auth true -``` - -| Configuration Option | Description | -| :--- | :--- | -| `enable_auth` | Enables authentication functionality if set to `true`. | -| `auth_provider` | Use an authentication provider to obtain a credential for authentication. Currently supports `google` and `oauth`. | -| `auth_token` | Manually specify a static token for use in authentication. Overrules `auth_provider` if both are set. | - -{% tabs %} -{% tab title="Google Provider" %} -Google Provider automatically finds and uses Google Credentials to authenticate requests: - -* Google Provider automatically uses established credentials for authenticating requests if you are already authenticated with the `gcloud` CLI via: - -```text -$ gcloud auth application-default login -``` - -* Alternatively Google Provider can be configured to use the credentials in the JSON file via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud Authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\): - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` -{% endtab %} - -{% tab title="OAuth Provider" %} -OAuth Provider makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests. The OAuth provider requires the following config options to be set via `feast config`: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Configuration PropertyDescription
oauth_token_request_url - Target URL receiving the client-credentials request.
oauth_grant_type - OAuth grant type. Set as client_credentials -
oauth_client_id - Client Id used in the client-credentials request.
oauth_client_secret - Client secret used in the client-credentials request.
oauth_audience - -

Target audience of the credential. Set to host URL of target Service.

-

(https://localhost if Service listens on localhost).

-
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Go SDK** - -Configure the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to use authentication by specifying the credential via `SecurityConfig`: - -```go -// error handling omitted. -// Use Google Credential as provider. -cred, _ := feast.NewGoogleCredential("localhost:6566") -cli, _ := feast.NewSecureGrpcClient("localhost", 6566, feast.SecurityConfig{ - // Specify the credential to provide tokens for Feast Authentication. - Credential: cred, -}) -``` - -{% tabs %} -{% tab title="Google Credential" %} -Google Credential uses Service Account credentials JSON file set via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud Authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\) to obtain tokens for Authenticating Feast requests: - -* Exporting `GOOGLE_APPLICATION_CREDENTIALS` - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` - -* Create a Google Credential with target audience. - -```go -cred, _ := feast.NewGoogleCredential("localhost:6566") -``` - -> Target audience of the credential should be set to host URL of target Service. \(ie `https://localhost` if Service listens on `localhost`\): -{% endtab %} - -{% tab title="OAuth Credential" %} -OAuth Credential makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests: - -* Create OAuth Credential with parameters: - -```go -cred := feast.NewOAuthCredential("localhost:6566", "client_id", "secret", "https://oauth.endpoint/auth") -``` - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
audience - -

Target audience of the credential. Set to host URL of target Service.

-

( https://localhost if Service listens on localhost).

-
clientId - Client Id used in the client-credentials request.
clientSecret - Client secret used in the client-credentials request.
endpointURL - Target URL to make the client-credentials request to.
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Java SDK** - -Configure the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to use authentication by setting credentials via `SecurityConfig`: - -```java -// Use GoogleAuthCredential as provider. -CallCredentials credentials = new GoogleAuthCredentials( - Map.of("audience", "localhost:6566")); - -FeastClient client = FeastClient.createSecure("localhost", 6566, - SecurityConfig.newBuilder() - // Specify the credentials to provide tokens for Feast Authentication. - .setCredentials(Optional.of(creds)) - .build()); -``` - -{% tabs %} -{% tab title="GoogleAuthCredentials" %} -GoogleAuthCredentials uses Service Account credentials JSON file set via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\) to obtain tokens for Authenticating Feast requests: - -* Exporting `GOOGLE_APPLICATION_CREDENTIALS` - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` - -* Create a Google Credential with target audience. - -```java -CallCredentials credentials = new GoogleAuthCredentials( - Map.of("audience", "localhost:6566")); -``` - -> Target audience of the credentials should be set to host URL of target Service. \(ie `https://localhost` if Service listens on `localhost`\): -{% endtab %} - -{% tab title="OAuthCredentials" %} -OAuthCredentials makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests: - -* Create OAuthCredentials with parameters: - -```java -CallCredentials credentials = new OAuthCredentials(Map.of( - "audience": "localhost:6566", - "grant_type", "client_credentials", - "client_id", "some_id", - "client_id", "secret", - "oauth_url", "https://oauth.endpoint/auth", - "jwkEndpointURI", "https://jwk.endpoint/jwk")); -``` - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
audience - -

Target audience of the credential. Set to host URL of target Service.

-

( https://localhost if Service listens on localhost).

-
grant_type - OAuth grant type. Set as client_credentials -
client_id - Client Id used in the client-credentials request.
client_secret - Client secret used in the client-credentials request.
oauth_url - Target URL to make the client-credentials request to obtain credential.
jwkEndpointURI - HTTPS URL used to retrieve a JWK that can be used to decode the credential.
-{% endtab %} -{% endtabs %} - -### Authorization - -{% hint style="info" %} -Authorization requires that authentication be configured to obtain a user identity for use in authorizing requests. -{% endhint %} - -Authorization provides access control to FeatureTables and/or Features based on project membership. Users who are members of a project are authorized to: - -* Create and/or Update a Feature Table in the Project. -* Retrieve Feature Values for Features in that Project. - -#### **Authorization API/Server** - -![Feast Authorization Flow](../../.gitbook/assets/rsz_untitled23%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29.jpg) - -Feast delegates Authorization grants to an external Authorization Server that implements the [Authorization Open API specification](https://github.com/feast-dev/feast/blob/master/common/src/main/resources/api.yaml). - -* Feast checks whether a user is authorized to make a request by making a `checkAccessRequest` to the Authorization Server. -* The Authorization Server should return a `AuthorizationResult` with whether the user is allowed to make the request. - -Authorization can be configured for Feast Core and Feast Online Serving via properties in their corresponding `application.yml` - -| Configuration Property | Description | -| :--- | :--- | -| `feast.security.authorization.enabled` | Enables authorization functionality if `true`. | -| `feast.security.authorization.provider` | Authentication Provider type. Currently only supports `http` | -| `feast.security.authorization.option.authorizationUrl` | URL endpoint of Authorization Server to make check access requests to. | -| `feast.security.authorization.option.subjectClaim` | Optional. Name of the claim of the to extract from the ID Token to include in the check access request as Subject. | - -{% hint style="info" %} -This example of the [Authorization Server with Keto](https://github.com/feast-dev/feast-keto-auth-server) can be used as a reference implementation for implementing an Authorization Server that Feast supports. -{% endhint %} - -### **Authentication & Authorization** - -When using Authentication & Authorization, consider: - -* Enabling Authentication without Authorization makes authentication **optional**. You can still send unauthenticated requests. -* Enabling Authorization forces all requests to be authenticated. Requests that are not authenticated are **dropped.** - - - diff --git a/docs/feast-on-kubernetes/advanced-1/troubleshooting.md b/docs/feast-on-kubernetes/advanced-1/troubleshooting.md deleted file mode 100644 index 7b0224abe3..0000000000 --- a/docs/feast-on-kubernetes/advanced-1/troubleshooting.md +++ /dev/null @@ -1,136 +0,0 @@ -# Troubleshooting - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -If at any point in time you cannot resolve a problem, please see the [Community](../../community.md) section for reaching out to the Feast community. - -### How can I verify that all services are operational? - -#### Docker Compose - -The containers should be in an `up` state: - -```text -docker ps -``` - -#### Google Kubernetes Engine - -All services should either be in a `RUNNING` state or `COMPLETED`state: - -```text -kubectl get pods -``` - -### How can I verify that I can connect to all services? - -First locate the the host and port of the Feast Services. - -#### **Docker Compose \(from inside the docker network\)** - -You will probably need to connect using the hostnames of services and standard Feast ports: - -```bash -export FEAST_CORE_URL=core:6565 -export FEAST_ONLINE_SERVING_URL=online_serving:6566 -export FEAST_HISTORICAL_SERVING_URL=historical_serving:6567 -export FEAST_JOBCONTROLLER_URL=jobcontroller:6570 -``` - -#### **Docker Compose \(from outside the docker network\)** - -You will probably need to connect using `localhost` and standard ports: - -```bash -export FEAST_CORE_URL=localhost:6565 -export FEAST_ONLINE_SERVING_URL=localhost:6566 -export FEAST_HISTORICAL_SERVING_URL=localhost:6567 -export FEAST_JOBCONTROLLER_URL=localhost:6570 -``` - -#### **Google Kubernetes Engine \(GKE\)** - -You will need to find the external IP of one of the nodes as well as the NodePorts. Please make sure that your firewall is open for these ports: - -```bash -export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) -export FEAST_CORE_URL=${FEAST_IP}:32090 -export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 -export FEAST_HISTORICAL_SERVING_URL=${FEAST_IP}:32092 -``` - -`netcat`, `telnet`, or even `curl` can be used to test whether all services are available and ports are open, but `grpc_cli` is the most powerful. It can be installed from [here](https://github.com/grpc/grpc/blob/master/doc/command_line_tool.md). - -#### Testing Connectivity From Feast Services: - -Use `grpc_cli` to test connetivity by listing the gRPC methods exposed by Feast services: - -```bash -grpc_cli ls ${FEAST_CORE_URL} feast.core.CoreService -``` - -```bash -grpc_cli ls ${FEAST_JOBCONTROLLER_URL} feast.core.JobControllerService -``` - -```bash -grpc_cli ls ${FEAST_HISTORICAL_SERVING_URL} feast.serving.ServingService -``` - -```bash -grpc_cli ls ${FEAST_ONLINE_SERVING_URL} feast.serving.ServingService -``` - -### How can I print logs from the Feast Services? - -Feast will typically have three services that you need to monitor if something goes wrong. - -* Feast Core -* Feast Job Controller -* Feast Serving \(Online\) -* Feast Serving \(Batch\) - -In order to print the logs from these services, please run the commands below. - -#### Docker Compose - -Use `docker-compose logs` to obtain Feast component logs: - -```text - docker logs -f feast_core_1 -``` - -```text - docker logs -f feast_jobcontroller_1 -``` - -```text -docker logs -f feast_historical_serving_1 -``` - -```text -docker logs -f feast_online_serving_1 -``` - -#### Google Kubernetes Engine - -Use `kubectl logs` to obtain Feast component logs: - -```text -kubectl logs $(kubectl get pods | grep feast-core | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-jobcontroller | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-serving-batch | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-serving-online | awk '{print $1}') -``` - diff --git a/docs/feast-on-kubernetes/advanced-1/upgrading.md b/docs/feast-on-kubernetes/advanced-1/upgrading.md deleted file mode 100644 index 7e61d3518b..0000000000 --- a/docs/feast-on-kubernetes/advanced-1/upgrading.md +++ /dev/null @@ -1,113 +0,0 @@ -# Upgrading Feast - -### Migration from v0.6 to v0.7 - -#### Feast Core Validation changes - -In v0.7, Feast Core no longer accepts starting with number \(0-9\) and using dash in names for: - -* Project -* Feature Set -* Entities -* Features - -Migrate all project, feature sets, entities, feature names: - -* with ‘-’ by recreating them with '-' replace with '\_' -* recreate any names with a number \(0-9\) as the first letter to one without. - -Feast now prevents feature sets from being applied if no store is subscribed to that Feature Set. - -* Ensure that a store is configured to subscribe to the Feature Set before applying the Feature Set. - -#### Feast Core's Job Coordinator is now Feast Job Controller - -In v0.7, Feast Core's Job Coordinator has been decoupled from Feast Core and runs as a separate Feast Job Controller application. See its [Configuration reference](../reference-1/configuration-reference.md#2-feast-core-serving-and-job-controller) for how to configure Feast Job Controller. - -**Ingestion Job API** - -In v0.7, the following changes are made to the Ingestion Job API: - -* Changed List Ingestion Job API to return list of `FeatureSetReference` instead of list of FeatureSet in response. -* Moved `ListIngestionJobs`, `StopIngestionJob`, `RestartIngestionJob` calls from `CoreService` to `JobControllerService`. -* Python SDK/CLI: Added new [Job Controller client ](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/contrib/job_controller/client.py)and `jobcontroller_url` config option. - -Users of the Ingestion Job API via gRPC should migrate by: - -* Add new client to connect to Job Controller endpoint to call `JobControllerService` and call `ListIngestionJobs`, `StopIngestionJob`, `RestartIngestionJob` from new client. -* Migrate code to accept feature references instead of feature sets returned in `ListIngestionJobs` response. - -Users of Ingestion Job via Python SDK \(ie `feast ingest-jobs list` or `client.stop_ingest_job()` etc.\) should migrate by: - -* `ingest_job()`methods only: Create a new separate [Job Controller client](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/contrib/job_controller/client.py) to connect to the job controller and call `ingest_job()` methods using the new client. -* Configure the Feast Job Controller endpoint url via `jobcontroller_url` config option. - -#### Configuration Properties Changes - -* Rename `feast.jobs.consolidate-jobs-per-source property` to `feast.jobs.controller.consolidate-jobs-per-sources` -* Rename`feast.security.authorization.options.subjectClaim` to `feast.security.authentication.options.subjectClaim` -* Rename `feast.logging.audit.messageLoggingEnabled` to `feast.audit.messageLogging.enabled` - -### Migration from v0.5 to v0.6 - -#### Database schema - -In Release 0.6 we introduced [Flyway](https://flywaydb.org/) to handle schema migrations in PostgreSQL. Flyway is integrated into `core` and for now on all migrations will be run automatically on `core` start. It uses table `flyway_schema_history` in the same database \(also created automatically\) to keep track of already applied migrations. So no specific maintenance should be needed. - -If you already have existing deployment of feast 0.5 - Flyway will detect existing tables and omit first baseline migration. - -After `core` started you should have `flyway_schema_history` look like this - -```text ->> select version, description, script, checksum from flyway_schema_history - -version | description | script | checksum ---------+-----------------------------------------+-----------------------------------------+------------ - 1 | << Flyway Baseline >> | << Flyway Baseline >> | - 2 | RELEASE 0.6 Generalizing Source AND ... | V2__RELEASE_0.6_Generalizing_Source_... | 1537500232 -``` - -In this release next major schema changes were done: - -* Source is not shared between FeatureSets anymore. It's changed to 1:1 relation - - and source's primary key is now auto-incremented number. - -* Due to generalization of Source `sources.topics` & `sources.bootstrap_servers` columns were deprecated. - - They will be replaced with `sources.config`. Data migration handled by code when respected Source is used. - - `topics` and `bootstrap_servers` will be deleted in the next release. - -* Job \(table `jobs`\) is no longer connected to `Source` \(table `sources`\) since it uses consolidated source for optimization purposes. - - All data required by Job would be embedded in its table. - -New Models \(tables\): - -* feature\_statistics - -Minor changes: - -* FeatureSet has new column version \(see [proto](https://github.com/feast-dev/feast/blob/master/protos/feast/core/FeatureSet.proto) for details\) -* Connecting table `jobs_feature_sets` in many-to-many relation between jobs & feature sets - - has now `version` and `delivery_status`. - -### Migration from v0.4 to v0.6 - -#### Database - -For all versions earlier than 0.5 seamless migration is not feasible due to earlier breaking changes and creation of new database will be required. - -Since database will be empty - first \(baseline\) migration would be applied: - -```text ->> select version, description, script, checksum from flyway_schema_history - -version | description | script | checksum ---------+-----------------------------------------+-----------------------------------------+------------ - 1 | Baseline | V1__Baseline.sql | 1091472110 - 2 | RELEASE 0.6 Generalizing Source AND ... | V2__RELEASE_0.6_Generalizing_Source_... | 1537500232 -``` - diff --git a/docs/feast-on-kubernetes/concepts/README.md b/docs/feast-on-kubernetes/concepts/README.md deleted file mode 100644 index e834417d3f..0000000000 --- a/docs/feast-on-kubernetes/concepts/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Concepts - diff --git a/docs/feast-on-kubernetes/concepts/architecture.md b/docs/feast-on-kubernetes/concepts/architecture.md deleted file mode 100644 index f4cf23eb95..0000000000 --- a/docs/feast-on-kubernetes/concepts/architecture.md +++ /dev/null @@ -1,51 +0,0 @@ -# Architecture - -![](../../.gitbook/assets/image%20%286%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%282%29%20%281%29%20%282%29.png) - -## Sequence description - -1. **Log Raw Events:** Production backend applications are configured to emit internal state changes as events to a stream. -2. **Create Stream Features:** Stream processing systems like Flink, Spark, and Beam are used to transform and refine events and to produce features that are logged back to the stream. -3. **Log Streaming Features:** Both raw and refined events are logged into a data lake or batch storage location. -4. **Create Batch Features:** ELT/ETL systems like Spark and SQL are used to transform data in the batch store. -5. **Define and Ingest Features:** The Feast user defines [feature tables](feature-tables.md) based on the features available in batch and streaming sources and publish these definitions to Feast Core. -6. **Poll Feature Definitions:** The Feast Job Service polls for new or changed feature definitions. -7. **Start Ingestion Jobs:** Every new feature table definition results in a new ingestion job being provisioned \(see limitations\). -8. **Batch Ingestion:** Batch ingestion jobs are short-lived jobs that load data from batch sources into either an offline or online store \(see limitations\). -9. **Stream Ingestion:** Streaming ingestion jobs are long-lived jobs that load data from stream sources into online stores. A stream source and batch source on a feature table must have the same features/fields. -10. **Model Training:** A model training pipeline is launched. It uses the Feast Python SDK to retrieve a training dataset and trains a model. -11. **Get Historical Features:** Feast exports a point-in-time correct training dataset based on the list of features and entity DataFrame provided by the model training pipeline. -12. **Deploy Model:** The trained model binary \(and list of features\) are deployed into a model serving system. -13. **Get Prediction:** A backend system makes a request for a prediction from the model serving service. -14. **Retrieve Online Features:** The model serving service makes a request to the Feast Online Serving service for online features using a Feast SDK. -15. **Return Prediction:** The model serving service makes a prediction using the returned features and returns the outcome. - -{% hint style="warning" %} -Limitations - -* Only Redis is supported for online storage. -* Batch ingestion jobs must be triggered from your own scheduler like Airflow. Streaming ingestion jobs are automatically launched by the Feast Job Service. -{% endhint %} - -## Components: - -A complete Feast deployment contains the following components: - -* **Feast Core:** Acts as the central registry for feature and entity definitions in Feast. -* **Feast Job Service:** Manages data processing jobs that load data from sources into stores, and jobs that export training datasets. -* **Feast Serving:** Provides low-latency access to feature values in an online store. -* **Feast Python SDK CLI:** The primary user facing SDK. Used to: - * Manage feature definitions with Feast Core. - * Launch jobs through the Feast Job Service. - * Retrieve training datasets. - * Retrieve online features. -* **Online Store:** The online store is a database that stores only the latest feature values for each entity. The online store can be populated by either batch ingestion jobs \(in the case the user has no streaming source\), or can be populated by a streaming ingestion job from a streaming source. Feast Online Serving looks up feature values from the online store. -* **Offline Store:** The offline store persists batch data that has been ingested into Feast. This data is used for producing training datasets. -* **Feast Spark SDK:** A Spark specific Feast SDK. Allows teams to use Spark for loading features into an online store and for building training datasets over offline sources. - -Please see the [configuration reference](../reference-1/configuration-reference.md#overview) for more details on configuring these components. - -{% hint style="info" %} -Java and Go Clients are also available for online feature retrieval. See [API Reference](../reference-1/api/). -{% endhint %} - diff --git a/docs/feast-on-kubernetes/concepts/entities.md b/docs/feast-on-kubernetes/concepts/entities.md deleted file mode 100644 index e8134cf142..0000000000 --- a/docs/feast-on-kubernetes/concepts/entities.md +++ /dev/null @@ -1,64 +0,0 @@ -# Entities - -## Overview - -An entity is any domain object that can be modeled and about which information can be stored. Entities are usually recognizable concepts, either concrete or abstract, such as persons, places, things, or events. - -Examples of entities in the context of ride-hailing and food delivery: `customer`, `order`, `driver`, `restaurant`, `dish`, `area`. - -Entities are important in the context of feature stores since features are always properties of a specific entity. For example, we could have a feature `total_trips_24h` for driver `D011234` with a feature value of `11`. - -Feast uses entities in the following way: - -* Entities serve as the keys used to look up features for producing training datasets and online feature values. -* Entities serve as a natural grouping of features in a feature table. A feature table must belong to an entity \(which could be a composite entity\) - -## Structure of an Entity - -When creating an entity specification, consider the following fields: - -* **Name**: Name of the entity -* **Description**: Description of the entity -* **Value Type**: Value type of the entity. Feast will attempt to coerce entity columns in your data sources into this type. -* **Labels**: Labels are maps that allow users to attach their own metadata to entities - -A valid entity specification is shown below: - -```python -customer = Entity( - name="customer_id", - description="Customer id for ride customer", - value_type=ValueType.INT64, - labels={} -) -``` - -## Working with an Entity - -### Creating an Entity: - -```python -# Create a customer entity -customer_entity = Entity(name="customer_id", description="ID of car customer") -client.apply(customer_entity) -``` - -### Updating an Entity: - -```python -# Update a customer entity -customer_entity = client.get_entity("customer_id") -customer_entity.description = "ID of bike customer" -client.apply(customer_entity) -``` - -Permitted changes include: - -* The entity's description and labels - -The following changes are not permitted: - -* Project -* Name of an entity -* Type - diff --git a/docs/feast-on-kubernetes/concepts/feature-tables.md b/docs/feast-on-kubernetes/concepts/feature-tables.md deleted file mode 100644 index 5b5c0efc56..0000000000 --- a/docs/feast-on-kubernetes/concepts/feature-tables.md +++ /dev/null @@ -1,122 +0,0 @@ -# Feature Tables - -## Overview - -Feature tables are both a schema and a logical means of grouping features, data [sources](sources.md), and other related metadata. - -Feature tables serve the following purposes: - -* Feature tables are a means for defining the location and properties of data [sources](sources.md). -* Feature tables are used to create within Feast a database-level structure for the storage of feature values. -* The data sources described within feature tables allow Feast to find and ingest feature data into stores within Feast. -* Feature tables ensure data is efficiently stored during [ingestion](../user-guide/define-and-ingest-features.md) by providing a grouping mechanism of features values that occur on the same event timestamp. - -{% hint style="info" %} -Feast does not yet apply feature transformations. Transformations are currently expected to happen before data is ingested into Feast. The data sources described within feature tables should reference feature values in their already transformed form. -{% endhint %} - -### Features - -A feature is an individual measurable property observed on an entity. For example the amount of transactions \(feature\) a customer \(entity\) has completed. Features are used for both model training and scoring \(batch, online\). - -Features are defined as part of feature tables. Since Feast does not apply transformations, a feature is basically a schema that only contains a name and a type: - -```python -avg_daily_ride = Feature("average_daily_rides", ValueType.FLOAT) -``` - -Visit [FeatureSpec](https://api.docs.feast.dev/grpc/feast.core.pb.html#FeatureSpecV2) for the complete feature specification API. - -## Structure of a Feature Table - -Feature tables contain the following fields: - -* **Name:** Name of feature table. This name must be unique within a project. -* **Entities:** List of [entities](entities.md) to associate with the features defined in this feature table. Entities are used as lookup keys when retrieving features from a feature table. -* **Features:** List of features within a feature table. -* **Labels:** Labels are arbitrary key-value properties that can be defined by users. -* **Max age:** Max age affect the retrieval of features from a feature table. Age is measured as the duration of time between the event timestamp of a feature and the lookup time on an [entity key]() used to retrieve the feature. Feature values outside max age will be returned as unset values. Max age allows for eviction of keys from online stores and limits the amount of historical scanning required for historical feature values during retrieval. -* **Batch Source:** The batch data source from which Feast will ingest feature values into stores. This can either be used to back-fill stores before switching over to a streaming source, or it can be used as the primary source of data for a feature table. Visit [Sources](sources.md) to learn more about batch sources. -* **Stream Source:** The streaming data source from which you can ingest streaming feature values into Feast. Streaming sources must be paired with a batch source containing the same feature values. A streaming source is only used to populate online stores. The batch equivalent source that is paired with a streaming source is used during the generation of historical feature datasets. Visit [Sources](sources.md) to learn more about stream sources. - -Here is a ride-hailing example of a valid feature table specification: - -{% tabs %} -{% tab title="driver\_trips\_feature\_table.py" %} -```python -from feast import BigQuerySource, FeatureTable, Feature, ValueType -from google.protobuf.duration_pb2 import Duration - -driver_ft = FeatureTable( - name="driver_trips", - entities=["driver_id"], - features=[ - Feature("average_daily_rides", ValueType.FLOAT), - Feature("rating", ValueType.FLOAT) - ], - max_age=Duration(seconds=3600), - labels={ - "team": "driver_matching" - }, - batch_source=BigQuerySource( - table_ref="gcp_project:bq_dataset.bq_table", - event_timestamp_column="datetime", - created_timestamp_column="timestamp", - field_mapping={ - "rating": "driver_rating" - } - ) -) -``` -{% endtab %} -{% endtabs %} - -By default, Feast assumes that features specified in the feature-table specification corresponds one-to-one to the fields found in the sources. All features defined in a feature table should be available in the defined sources. - -Field mappings can be used to map features defined in Feast to fields as they occur in data sources. - -In the example feature-specification table above, we use field mappings to ensure the feature named `rating` in the batch source is mapped to the field named `driver_rating`. - -## Working with a Feature Table - -#### Creating a Feature Table - -```python -driver_ft = FeatureTable(...) -client.apply(driver_ft) -``` - -#### Updating a Feature Table - -```python -driver_ft = FeatureTable() - -client.apply(driver_ft) - -driver_ft.labels = {"team": "marketplace"} - -client.apply(driver_ft) -``` - -#### Feast currently supports the following changes to feature tables: - -* Adding new features. -* Removing features. -* Updating source, max age, and labels. - -{% hint style="warning" %} -Deleted features are archived, rather than removed completely. Importantly, new features cannot use the names of these deleted features. -{% endhint %} - -#### Feast currently does not support the following changes to feature tables: - -* Changes to the project or name of a feature table. -* Changes to entities related to a feature table. -* Changes to names and types of existing features. - -#### Deleting a Feature Table - -{% hint style="danger" %} -Feast currently does not support the deletion of feature tables. -{% endhint %} - diff --git a/docs/feast-on-kubernetes/concepts/overview.md b/docs/feast-on-kubernetes/concepts/overview.md deleted file mode 100644 index 461510984b..0000000000 --- a/docs/feast-on-kubernetes/concepts/overview.md +++ /dev/null @@ -1,21 +0,0 @@ -# Overview - -### Concepts - -[Entities](entities.md) are objects in an organization like customers, transactions, and drivers, products, etc. - -[Sources](sources.md) are external sources of data where feature data can be found. - -[Feature Tables](feature-tables.md) are objects that define logical groupings of features, data sources, and other related metadata. - -### Concept Hierarchy - -![](../../.gitbook/assets/image%20%284%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%281%29.png) - -Feast contains the following core concepts: - -* **Projects:** Serve as a top level namespace for all Feast resources. Each project is a completely independent environment in Feast. Users can only work in a single project at a time. -* **Entities:** Entities are the objects in an organization on which features occur. They map to your business domain \(users, products, transactions, locations\). -* **Feature Tables:** Defines a group of features that occur on a specific entity. -* **Features:** Individual feature within a feature table. - diff --git a/docs/feast-on-kubernetes/concepts/sources.md b/docs/feast-on-kubernetes/concepts/sources.md deleted file mode 100644 index 65595d94a9..0000000000 --- a/docs/feast-on-kubernetes/concepts/sources.md +++ /dev/null @@ -1,90 +0,0 @@ -# Sources - -### Overview - -Sources are descriptions of external feature data and are registered to Feast as part of [feature tables](feature-tables.md). Once registered, Feast can ingest feature data from these sources into stores. - -Currently, Feast supports the following source types: - -#### Batch Source - -* File \(as in Spark\): Parquet \(only\). -* BigQuery - -#### Stream Source - -* Kafka -* Kinesis - -The following encodings are supported on streams - -* Avro -* Protobuf - -### Structure of a Source - -For both batch and stream sources, the following configurations are necessary: - -* **Event timestamp column**: Name of column containing timestamp when event data occurred. Used during point-in-time join of feature values to [entity timestamps](). -* **Created timestamp column**: Name of column containing timestamp when data is created. Used to deduplicate data when multiple copies of the same [entity key]() is ingested. - -Example data source specifications: - -{% tabs %} -{% tab title="batch\_sources.py" %} -```python -from feast import FileSource -from feast.data_format import ParquetFormat - -batch_file_source = FileSource( - file_format=ParquetFormat(), - file_url="file:///feast/customer.parquet", - event_timestamp_column="event_timestamp", - created_timestamp_column="created_timestamp", -) -``` -{% endtab %} - -{% tab title="stream\_sources.py" %} -```python -from feast import KafkaSource -from feast.data_format import ProtoFormat - -stream_kafka_source = KafkaSource( - bootstrap_servers="localhost:9094", - message_format=ProtoFormat(class_path="class.path"), - topic="driver_trips", - event_timestamp_column="event_timestamp", - created_timestamp_column="created_timestamp", -) -``` -{% endtab %} -{% endtabs %} - -The [Feast Python API documentation](https://api.docs.feast.dev/python/) provides more information about options to specify for the above sources. - -### Working with a Source - -#### Creating a Source - -Sources are defined as part of [feature tables](feature-tables.md): - -```python -batch_bigquery_source = BigQuerySource( - table_ref="gcp_project:bq_dataset.bq_table", - event_timestamp_column="event_timestamp", - created_timestamp_column="created_timestamp", -) - -stream_kinesis_source = KinesisSource( - bootstrap_servers="localhost:9094", - record_format=ProtoFormat(class_path="class.path"), - region="us-east-1", - stream_name="driver_trips", - event_timestamp_column="event_timestamp", - created_timestamp_column="created_timestamp", -) -``` - -Feast ensures that the source complies with the schema of the feature table. These specified data sources can then be included inside a feature table specification and registered to Feast Core. - diff --git a/docs/feast-on-kubernetes/concepts/stores.md b/docs/feast-on-kubernetes/concepts/stores.md deleted file mode 100644 index 59deac0a6a..0000000000 --- a/docs/feast-on-kubernetes/concepts/stores.md +++ /dev/null @@ -1,20 +0,0 @@ -# Stores - -In Feast, a store is a database that is populated with feature data that will ultimately be served to models. - -## Offline \(Historical\) Store - -The offline store maintains historical copies of feature values. These features are grouped and stored in feature tables. During retrieval of historical data, features are queries from these feature tables in order to produce training datasets. - -## Online Store - -The online store maintains only the latest values for a specific feature. - -* Feature values are stored based on their [entity keys]() -* Feast currently supports Redis as an online store. -* Online stores are meant for very high throughput writes from ingestion jobs and very low latency access to features during online serving. - -{% hint style="info" %} -Feast only supports a single online store in production -{% endhint %} - diff --git a/docs/feast-on-kubernetes/getting-started/README.md b/docs/feast-on-kubernetes/getting-started/README.md deleted file mode 100644 index b9423182fe..0000000000 --- a/docs/feast-on-kubernetes/getting-started/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Getting started - -{% hint style="danger" %} -Feast on Kubernetes is only supported using Feast 0.9 \(and below\). We are working to add support for Feast on Kubernetes with the latest release of Feast. Please see our [roadmap](../../roadmap.md) for more details. -{% endhint %} - -### Install Feast - -If you would like to deploy a new installation of Feast, click on [Install Feast](install-feast/) - -{% page-ref page="install-feast/" %} - -### Connect to Feast - -If you would like to connect to an existing Feast deployment, click on [Connect to Feast](connect-to-feast/) - -{% page-ref page="connect-to-feast/" %} - -### Learn Feast - -If you would like to learn more about Feast, click on [Learn Feast](learn-feast.md) - -{% page-ref page="learn-feast.md" %} - diff --git a/docs/feast-on-kubernetes/getting-started/connect-to-feast/README.md b/docs/feast-on-kubernetes/getting-started/connect-to-feast/README.md deleted file mode 100644 index 4333359f90..0000000000 --- a/docs/feast-on-kubernetes/getting-started/connect-to-feast/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Connect to Feast - -### Feast Python SDK - -The Feast Python SDK is used as a library to interact with a Feast deployment. - -* Define, register, and manage entities and features -* Ingest data into Feast -* Build and retrieve training datasets -* Retrieve online features - -{% page-ref page="python-sdk.md" %} - -### Feast CLI - -The Feast CLI is a command line implementation of the Feast Python SDK. - -* Define, register, and manage entities and features from the terminal -* Ingest data into Feast -* Manage ingestion jobs - -{% page-ref page="feast-cli.md" %} - -### Online Serving Clients - -The following clients can be used to retrieve online feature values: - -* [Feast Python SDK](https://api.docs.feast.dev/python/) -* [Feast Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) -* [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk) - diff --git a/docs/feast-on-kubernetes/getting-started/connect-to-feast/feast-cli.md b/docs/feast-on-kubernetes/getting-started/connect-to-feast/feast-cli.md deleted file mode 100644 index 47471b8471..0000000000 --- a/docs/feast-on-kubernetes/getting-started/connect-to-feast/feast-cli.md +++ /dev/null @@ -1,37 +0,0 @@ -# Feast CLI - -Install the Feast CLI using pip: - -```bash -pip install feast==0.9.* -``` - -Configure the CLI to connect to your Feast Core deployment: - -```text -feast config set core_url your.feast.deployment -``` - -{% hint style="info" %} -By default, all configuration is stored in `~/.feast/config` -{% endhint %} - -The CLI is a wrapper around the [Feast Python SDK](python-sdk.md): - -```aspnet -$ feast - -Usage: feast [OPTIONS] COMMAND [ARGS]... - -Options: - --help Show this message and exit. - -Commands: - config View and edit Feast properties - entities Create and manage entities - feature-tables Create and manage feature tables - jobs Create and manage jobs - projects Create and manage projects - version Displays version and connectivity information -``` - diff --git a/docs/feast-on-kubernetes/getting-started/connect-to-feast/python-sdk.md b/docs/feast-on-kubernetes/getting-started/connect-to-feast/python-sdk.md deleted file mode 100644 index 3e7c86880e..0000000000 --- a/docs/feast-on-kubernetes/getting-started/connect-to-feast/python-sdk.md +++ /dev/null @@ -1,20 +0,0 @@ -# Python SDK - -Install the [Feast Python SDK](https://api.docs.feast.dev/python/) using pip: - -```bash -pip install feast==0.9.* -``` - -Connect to an existing Feast Core deployment: - -```python -from feast import Client - -# Connect to an existing Feast Core deployment -client = Client(core_url='feast.example.com:6565') - -# Ensure that your client is connected by printing out some feature tables -client.list_feature_tables() -``` - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/README.md b/docs/feast-on-kubernetes/getting-started/install-feast/README.md deleted file mode 100644 index 0b77ab431a..0000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# Install Feast - -A production deployment of Feast is deployed using Kubernetes. - -## Kubernetes \(with Helm\) - -This guide installs Feast into an existing Kubernetes cluster using Helm. The installation is not specific to any cloud platform or environment, but requires Kubernetes and Helm. - -{% page-ref page="kubernetes-with-helm.md" %} - -## Amazon EKS \(with Terraform\) - -This guide installs Feast into an AWS environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -{% page-ref page="kubernetes-amazon-eks-with-terraform.md" %} - -## Azure AKS \(with Helm\) - -This guide installs Feast into an Azure AKS environment with Helm. - -{% page-ref page="kubernetes-azure-aks-with-helm.md" %} - -## Azure AKS \(with Terraform\) - -This guide installs Feast into an Azure environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -{% page-ref page="kubernetes-azure-aks-with-terraform.md" %} - -## Google Cloud GKE \(with Terraform\) - -This guide installs Feast into a Google Cloud environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -{% page-ref page="google-cloud-gke-with-terraform.md" %} - -## IBM Cloud Kubernetes Service \(IKS\) and Red Hat OpenShift \(using Kustomize\) - -This guide installs Feast into an existing [IBM Cloud Kubernetes Service](https://www.ibm.com/cloud/kubernetes-service) or [Red Hat OpenShift on IBM Cloud](https://www.ibm.com/cloud/openshift) using Kustomize. - -{% page-ref page="ibm-cloud-iks-with-kustomize.md" %} - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/google-cloud-gke-with-terraform.md b/docs/feast-on-kubernetes/getting-started/install-feast/google-cloud-gke-with-terraform.md deleted file mode 100644 index a3252cf0bb..0000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/google-cloud-gke-with-terraform.md +++ /dev/null @@ -1,52 +0,0 @@ -# Google Cloud GKE \(with Terraform\) - -### Overview - -This guide installs Feast on GKE using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/gcp). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your GCP account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* GKE cluster -* Feast services running on GKE -* Google Memorystore \(Redis\) as online store -* Dataproc cluster -* Kafka running on GKE, exposed to the dataproc cluster via internal load balancer - -### 1. Requirements - -* Install [Terraform](https://www.terraform.io/) > = 0.12 \(tested with 0.13.3\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.3.4\) -* GCP [authentication](https://cloud.google.com/docs/authentication) and sufficient [privilege](https://cloud.google.com/iam/docs/understanding-roles) to create the resources listed above. - -### 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/gcp`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. Sample configurations are provided below: - -{% code title="my\_feast.tfvars" %} -```typescript -gcp_project_name = "kf-feast" -name_prefix = "feast-0-8" -region = "asia-east1" -gke_machine_type = "n1-standard-2" -network = "default" -subnetwork = "default" -dataproc_staging_bucket = "feast-dataproc" -``` -{% endcode %} - -### 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/gcp -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - - - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md b/docs/feast-on-kubernetes/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md deleted file mode 100644 index 0abca57b6d..0000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md +++ /dev/null @@ -1,193 +0,0 @@ -# IBM Cloud Kubernetes Service \(IKS\) and Red Hat OpenShift \(with Kustomize\) - -## Overview - -This guide installs Feast on an existing IBM Cloud Kubernetes cluster or Red Hat OpenShift on IBM Cloud , and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Kafka \(Optional\) -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Prerequisites - -1. [IBM Cloud Kubernetes Service](https://www.ibm.com/cloud/kubernetes-service) or [Red Hat OpenShift on IBM Cloud](https://www.ibm.com/cloud/openshift) -2. Install [Kubectl](https://cloud.ibm.com/docs/containers?topic=containers-cs_cli_install#kubectl) that matches the major.minor versions of your IKS or Install the [OpenShift CLI](https://cloud.ibm.com/docs/openshift?topic=openshift-openshift-cli#cli_oc) that matches your local operating system and OpenShift cluster version. -3. Install [Helm 3](https://helm.sh/) -4. Install [Kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/) - -## 2. Preparation - -### IBM Cloud Block Storage Setup \(IKS only\) - -:warning: If you have Red Hat OpenShift Cluster on IBM Cloud skip to this [section](ibm-cloud-iks-with-kustomize.md#Security-Context-Constraint-Setup). - -By default, IBM Cloud Kubernetes cluster uses [IBM Cloud File Storage](https://www.ibm.com/cloud/file-storage) based on NFS as the default storage class, and non-root users do not have write permission on the volume mount path for NFS-backed storage. Some common container images in Feast, such as Redis, Postgres, and Kafka specify a non-root user to access the mount path in the images. When containers are deployed using these images, the containers fail to start due to insufficient permissions of the non-root user creating folders on the mount path. - -[IBM Cloud Block Storage](https://www.ibm.com/cloud/block-storage) allows for the creation of raw storage volumes and provides faster performance without the permission restriction of NFS-backed storage - -Therefore, to deploy Feast we need to set up [IBM Cloud Block Storage](https://cloud.ibm.com/docs/containers?topic=containers-block_storage#install_block) as the default storage class so that you can have all the functionalities working and get the best experience from Feast. - -1. [Follow the instructions](https://helm.sh/docs/intro/install/) to install the Helm version 3 client on your local machine. -2. Add the IBM Cloud Helm chart repository to the cluster where you want to use the IBM Cloud Block Storage plug-in. - - ```text - helm repo add iks-charts https://icr.io/helm/iks-charts - helm repo update - ``` - -3. Install the IBM Cloud Block Storage plug-in. When you install the plug-in, pre-defined block storage classes are added to your cluster. - - ```text - helm install v2.0.2 iks-charts/ibmcloud-block-storage-plugin -n kube-system - ``` - - Example output: - - ```text - NAME: v2.0.2 - LAST DEPLOYED: Fri Feb 5 12:29:50 2021 - NAMESPACE: kube-system - STATUS: deployed - REVISION: 1 - NOTES: - Thank you for installing: ibmcloud-block-storage-plugin. Your release is named: v2.0.2 - ... - ``` - -4. Verify that all block storage plugin pods are in a "Running" state. - - ```text - kubectl get pods -n kube-system | grep ibmcloud-block-storage - ``` - -5. Verify that the storage classes for Block Storage were added to your cluster. - - ```text - kubectl get storageclasses | grep ibmc-block - ``` - -6. Set the Block Storage as the default storageclass. - - ```text - kubectl patch storageclass ibmc-block-gold -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' - kubectl patch storageclass ibmc-file-gold -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' - - # Check the default storageclass is block storage - kubectl get storageclass | grep \(default\) - ``` - - Example output: - - ```text - ibmc-block-gold (default) ibm.io/ibmc-block 65s - ``` - - **Security Context Constraint Setup \(OpenShift only\)** - -By default, in OpenShift, all pods or containers will use the [Restricted SCC](https://docs.openshift.com/container-platform/4.6/authentication/managing-security-context-constraints.html) which limits the UIDs pods can run with, causing the Feast installation to fail. To overcome this, you can allow Feast pods to run with any UID by executing the following: - -```text -oc adm policy add-scc-to-user anyuid -z default,kf-feast-kafka -n feast -``` - -## 3. Installation - -Install Feast using kustomize. The pods may take a few minutes to initialize. - -```bash -git clone https://github.com/kubeflow/manifests -cd manifests/contrib/feast/ -kustomize build feast/base | kubectl apply -n feast -f - -``` - -### Optional: Enable Feast Jupyter and Kafka - -You may optionally enable the Feast Jupyter component which contains code examples to demonstrate Feast. Some examples require Kafka to stream real time features to the Feast online serving. To enable, edit the following properties in the `values.yaml` under the `manifests/contrib/feast` folder: - -```text -kafka.enabled: true -feast-jupyter.enabled: true -``` - -Then regenerate the resource manifests and deploy: - -```text -make feast/base -kustomize build feast/base | kubectl apply -n feast -f - -``` - -## 4. Use Feast Jupyter Notebook Server to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -l app=feast-jupyter -o custom-columns=:metadata.name) 8888:8888 -n feast -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 5. Uninstall Feast - -```text -kustomize build feast/base | kubectl delete -n feast -f - -``` - -## 6. Troubleshooting - -When running the minimal\_ride\_hailing\_example Jupyter Notebook example the following errors may occur: - -1. When running `job = client.get_historical_features(...)`: - - ```text - KeyError: 'historical_feature_output_location' - ``` - - or - - ```text - KeyError: 'spark_staging_location' - ``` - - Add the following environment variable: - - ```text - os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION"] = "file:///home/jovyan/historical_feature_output" - os.environ["FEAST_SPARK_STAGING_LOCATION"] = "file:///home/jovyan/test_data" - ``` - -2. When running `job.get_status()` - - ```text - - ``` - - Add the following environment variable: - - ```text - os.environ["FEAST_REDIS_HOST"] = "feast-release-redis-master" - ``` - -3. When running `job = client.start_stream_to_online_ingestion(...)` - - ```text - org.apache.kafka.vendor.common.KafkaException: Failed to construct kafka consumer - ``` - - Add the following environment variable: - - ```text - os.environ["DEMO_KAFKA_BROKERS"] = "feast-release-kafka:9092" - ``` - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md b/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md deleted file mode 100644 index d03d7fb863..0000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md +++ /dev/null @@ -1,68 +0,0 @@ -# Amazon EKS \(with Terraform\) - -### Overview - -This guide installs Feast on AWS using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/aws). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your AWS account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* Kubernetes cluster on Amazon EKS \(3x r3.large nodes\) -* Kafka managed by Amazon MSK \(2x kafka.t3.small nodes\) -* Postgres database for Feast metadata, using serverless Aurora \(min capacity: 2\) -* Redis cluster, using Amazon Elasticache \(1x cache.t2.micro\) -* Amazon EMR cluster to run Spark \(3x spot m4.xlarge\) -* Staging S3 bucket to store temporary data - -![](../../../.gitbook/assets/feast-on-aws-3-%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%283%29.png) - -### 1. Requirements - -* Create an AWS account and [configure credentials locally](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) -* Install [Terraform](https://www.terraform.io/) > = 0.12 \(tested with 0.13.3\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.3.4\) - -### 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/aws`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. At a minimum, you need to set `name_prefix` and an AWS region: - -{% code title="my\_feast.tfvars" %} -```typescript -name_prefix = "my-feast" -region = "us-east-1" -``` -{% endcode %} - -### 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/aws -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - -Starting may take a minute. A kubectl configuration file is also created in this directory, and the file's name will start with `kubeconfig_` and end with a random suffix. - -### 4. Connect to Feast using Jupyter - -After all pods are running, connect to the Jupyter Notebook Server running in the cluster. - -To connect to the remote Feast server you just created, forward a port from the remote k8s cluster to your local machine. Replace `kubeconfig_XXXXXXX` below with the kubeconfig file name Terraform generates for you. - -```bash -KUBECONFIG=kubeconfig_XXXXXXX kubectl port-forward \ -$(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-helm.md b/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-helm.md deleted file mode 100644 index 39dcdbd700..0000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-helm.md +++ /dev/null @@ -1,139 +0,0 @@ -# Azure AKS \(with Helm\) - -## Overview - -This guide installs Feast on Azure Kubernetes cluster \(known as AKS\), and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Spark -* Kafka -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Requirements - -1. Install and configure [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) -2. Install and configure [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) -3. Install [Helm 3](https://helm.sh/) - -## 2. Preparation - -Create an AKS cluster with Azure CLI. The detailed steps can be found [here](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough), and a high-level walk through includes: - -```bash -az group create --name myResourceGroup --location eastus -az acr create --resource-group myResourceGroup --name feast-AKS-ACR --sku Basic -az aks create -g myResourceGroup -n feast-AKS --location eastus --attach-acr feast-AKS-ACR --generate-ssh-keys - -az aks install-cli -az aks get-credentials --resource-group myResourceGroup --name feast-AKS -``` - -Add the Feast Helm repository and download the latest charts: - -```bash -helm version # make sure you have the latest Helm installed -helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com -helm repo update -``` - -Feast includes a Helm chart that installs all necessary components to run Feast Core, Feast Online Serving, and an example Jupyter notebook. - -Feast Core requires Postgres to run, which requires a secret to be set on Kubernetes: - -```bash -kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password -``` - -## 3. Feast installation - -Install Feast using Helm. The pods may take a few minutes to initialize. - -```bash -helm install feast-release feast-charts/feast -``` - -## 4. Spark operator installation - -Follow the documentation [to install Spark operator on Kubernetes ](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator), and Feast documentation to [configure Spark roles](../../reference-1/feast-and-spark.md) - -```bash -helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator -helm install my-release spark-operator/spark-operator --set serviceAccounts.spark.name=spark --set image.tag=v1beta2-1.1.2-2.4.5 -``` - -and ensure the service account used by Feast has permissions to manage Spark Application resources. This depends on your k8s setup, but typically you'd need to configure a Role and a RoleBinding like the one below: - -```text -cat < -rules: -- apiGroups: ["sparkoperator.k8s.io"] - resources: ["sparkapplications"] - verbs: ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: RoleBinding -metadata: - name: use-spark-operator - namespace: -roleRef: - kind: Role - name: use-spark-operator - apiGroup: rbac.authorization.k8s.io -subjects: - - kind: ServiceAccount - name: default -EOF -``` - -## 5. Use Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 6. Environment variables - -If you are running the [Minimal Ride Hailing Example](https://github.com/feast-dev/feast/blob/master/examples/minimal/minimal_ride_hailing.ipynb), you may want to make sure the following environment variables are correctly set: - -```text -demo_data_location = "wasbs://@.blob.core.windows.net/" -os.environ["FEAST_AZURE_BLOB_ACCOUNT_NAME"] = "" -os.environ["FEAST_AZURE_BLOB_ACCOUNT_ACCESS_KEY"] = -os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION"] = "wasbs://@.blob.core.windows.net/out/" -os.environ["FEAST_SPARK_STAGING_LOCATION"] = "wasbs://@.blob.core.windows.net/artifacts/" -os.environ["FEAST_SPARK_LAUNCHER"] = "k8s" -os.environ["FEAST_SPARK_K8S_NAMESPACE"] = "default" -os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_FORMAT"] = "parquet" -os.environ["FEAST_REDIS_HOST"] = "feast-release-redis-master.default.svc.cluster.local" -os.environ["DEMO_KAFKA_BROKERS"] = "feast-release-kafka.default.svc.cluster.local:9092" -``` - -## 7. Further Reading - -* [Feast Concepts](../../concepts/overview.md) -* [Feast Examples/Tutorials](https://github.com/feast-dev/feast/tree/master/examples) -* [Feast Helm Chart Documentation](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) -* [Configuring Feast components](../../reference-1/configuration-reference.md) -* [Feast and Spark](../../reference-1/feast-and-spark.md) - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md b/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md deleted file mode 100644 index 71dd15908d..0000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md +++ /dev/null @@ -1,63 +0,0 @@ -# Azure AKS \(with Terraform\) - -## Overview - -This guide installs Feast on Azure using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/azure). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your Azure account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* Kubernetes cluster on Azure AKS -* Kafka managed by HDInsight -* Postgres database for Feast metadata, running as a pod on AKS -* Redis cluster, using Azure Cache for Redis -* [spark-on-k8s-operator](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator) to run Spark -* Staging Azure blob storage container to store temporary data - -## 1. Requirements - -* Create an Azure account and [configure credentials locally](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) -* Install [Terraform](https://www.terraform.io/) \(tested with 0.13.5\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.4.2\) - -## 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/azure`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. At a minimum, you need to set `name_prefix` and `resource_group`: - -{% code title="my\_feast.tfvars" %} -```typescript -name_prefix = "feast" -resource_group = "Feast" # pre-existing resource group -``` -{% endcode %} - -## 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/azure -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - -## 4. Connect to Feast using Jupyter - -After all pods are running, connect to the Jupyter Notebook Server running in the cluster. - -To connect to the remote Feast server you just created, forward a port from the remote k8s cluster to your local machine. - -```bash -kubectl port-forward $(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-with-helm.md b/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-with-helm.md deleted file mode 100644 index 032554d120..0000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-with-helm.md +++ /dev/null @@ -1,69 +0,0 @@ -# Kubernetes \(with Helm\) - -## Overview - -This guide installs Feast on an existing Kubernetes cluster, and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Requirements - -1. Install and configure [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) -2. Install [Helm 3](https://helm.sh/) - -## 2. Preparation - -Add the Feast Helm repository and download the latest charts: - -```text -helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com -helm repo update -``` - -Feast includes a Helm chart that installs all necessary components to run Feast Core, Feast Online Serving, and an example Jupyter notebook. - -Feast Core requires Postgres to run, which requires a secret to be set on Kubernetes: - -```bash -kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password -``` - -## 3. Installation - -Install Feast using Helm. The pods may take a few minutes to initialize. - -```bash -helm install feast-release feast-charts/feast -``` - -## 4. Use Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -l app=feast-jupyter -o custom-columns=:metadata.name) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 5. Further Reading - -* [Feast Concepts](../../concepts/overview.md) -* [Feast Examples/Tutorials](https://github.com/feast-dev/feast/tree/master/examples) -* [Feast Helm Chart Documentation](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) -* [Configuring Feast components](../../reference-1/configuration-reference.md) -* [Feast and Spark](../../reference-1/feast-and-spark.md) - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/quickstart.md b/docs/feast-on-kubernetes/getting-started/install-feast/quickstart.md deleted file mode 100644 index b5e50d193c..0000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/quickstart.md +++ /dev/null @@ -1,91 +0,0 @@ -# Docker Compose - -{% hint style="success" %} -This guide is meant for exploratory purposes only. It allows users to run Feast locally using Docker Compose instead of Kubernetes. The goal of this guide is for users to be able to quickly try out the full Feast stack without needing to deploy to Kubernetes. It is not meant for production use. -{% endhint %} - -## Overview - -This guide shows you how to deploy Feast using [Docker Compose](https://docs.docker.com/get-started/). Docker Compose allows you to explore the functionality provided by Feast while requiring only minimal infrastructure. - -This guide includes the following containerized components: - -* [A complete Feast deployment](../../concepts/architecture.md) - * Feast Core with Postgres - * Feast Online Serving with Redis. - * Feast Job Service -* A Jupyter Notebook Server with built in Feast example\(s\). For demo purposes only. -* A Kafka cluster for testing streaming ingestion. For demo purposes only. - -## Get Feast - -Clone the latest stable version of Feast from the [Feast repository](https://github.com/feast-dev/feast/): - -```text -git clone https://github.com/feast-dev/feast.git -cd feast/infra/docker-compose -``` - -Create a new configuration file: - -```text -cp .env.sample .env -``` - -## Start Feast - -Start Feast with Docker Compose: - -```text -docker-compose pull && docker-compose up -d -``` - -Wait until all all containers are in a running state: - -```text -docker-compose ps -``` - -## Try our example\(s\) - -You can now connect to the bundled Jupyter Notebook Server running at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## Troubleshooting - -### Open ports - -Please ensure that the following ports are available on your host machine: - -* `6565` -* `6566` -* `8888` -* `9094` -* `5432` - -If a port conflict cannot be resolved, you can modify the port mappings in the provided [docker-compose.yml](https://github.com/feast-dev/feast/tree/master/infra/docker-compose) file to use different ports on the host. - -### Containers are restarting or unavailable - -If some of the containers continue to restart, or you are unable to access a service, inspect the logs using the following command: - -```javascript -docker-compose logs -f -t -``` - -If you are unable to resolve the problem, visit [GitHub](https://github.com/feast-dev/feast/issues) to create an issue. - -## Configuration - -The Feast Docker Compose setup can be configured by modifying properties in your `.env` file. - -### Accessing Google Cloud Storage \(GCP\) - -To access Google Cloud Storage as a data source, the Docker Compose installation requires access to a GCP service account. - -* Create a new [service account](https://cloud.google.com/iam/docs/creating-managing-service-accounts) and save a JSON key. -* Grant the service account access to your bucket\(s\). -* Copy the service account to the path you have configured in `.env` under `GCP_SERVICE_ACCOUNT`. -* Restart your Docker Compose setup of Feast. - diff --git a/docs/feast-on-kubernetes/getting-started/learn-feast.md b/docs/feast-on-kubernetes/getting-started/learn-feast.md deleted file mode 100644 index 983799ca9b..0000000000 --- a/docs/feast-on-kubernetes/getting-started/learn-feast.md +++ /dev/null @@ -1,15 +0,0 @@ -# Learn Feast - -Explore the following resources to learn more about Feast: - -* [Concepts](../../) describes all important Feast API concepts. -* [User guide](../user-guide/define-and-ingest-features.md) provides guidance on completing Feast workflows. -* [Examples](https://github.com/feast-dev/feast/tree/master/examples) contains Jupyter notebooks that you can run on your Feast deployment. -* [Advanced](../advanced-1/troubleshooting.md) contains information about both advanced and operational aspects of Feast. -* [Reference](../reference-1/api/) contains detailed API and design documents for advanced users. -* [Contributing](../../contributing/contributing.md) contains resources for anyone who wants to contribute to Feast. - -{% hint style="info" %} -The best way to learn Feast is to use it. Jump over to our [Quickstart](install-feast/quickstart.md) guide to have one of our examples running in no time at all! -{% endhint %} - diff --git a/docs/feast-on-kubernetes/reference-1/README.md b/docs/feast-on-kubernetes/reference-1/README.md deleted file mode 100644 index 02577ad8e3..0000000000 --- a/docs/feast-on-kubernetes/reference-1/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Reference - diff --git a/docs/feast-on-kubernetes/reference-1/api/README.md b/docs/feast-on-kubernetes/reference-1/api/README.md deleted file mode 100644 index cd75f5bf88..0000000000 --- a/docs/feast-on-kubernetes/reference-1/api/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# API Reference - -Please see the following API specific reference documentation: - -* [Feast Core gRPC API](https://api.docs.feast.dev/grpc/feast/core/coreservice.pb.html): This is the gRPC API used by Feast Core. This API contains RPCs for creating and managing feature sets, stores, projects, and jobs. -* [Feast Serving gRPC API](https://api.docs.feast.dev/grpc/feast/serving/servingservice.pb.html): This is the gRPC API used by Feast Serving. It contains RPCs used for the retrieval of online feature data or historical feature data. -* [Feast gRPC Types](https://api.docs.feast.dev/grpc/feast/types/value.pb): These are the gRPC types used by both Feast Core, Feast Serving, and the Go, Java, and Python clients. -* [Go Client SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go): The Go library used for the retrieval of online features from Feast. -* [Java Client SDK](https://javadoc.io/doc/dev.feast/feast-sdk): The Java library used for the retrieval of online features from Feast. -* [Python SDK](https://api.docs.feast.dev/python/): This is the complete reference to the Feast Python SDK. The SDK is used to manage feature sets, features, jobs, projects, and entities. It can also be used to retrieve training datasets or online features from Feast Serving. - -## Community Contributions - -The following community provided SDKs are available: - -* [Node.js SDK](https://github.com/MichaelHirn/feast-client/): A Node.js SDK written in TypeScript. The SDK can be used to manage feature sets, features, jobs, projects, and entities. - diff --git a/docs/feast-on-kubernetes/reference-1/configuration-reference.md b/docs/feast-on-kubernetes/reference-1/configuration-reference.md deleted file mode 100644 index 6f9a97dabf..0000000000 --- a/docs/feast-on-kubernetes/reference-1/configuration-reference.md +++ /dev/null @@ -1,132 +0,0 @@ -# Configuration Reference - -## Overview - -This reference describes how to configure Feast components: - -* [Feast Core and Feast Online Serving](configuration-reference.md#2-feast-core-serving-and-job-controller) -* [Feast CLI and Feast Python SDK](configuration-reference.md#3-feast-cli-and-feast-python-sdk) -* [Feast Go and Feast Java SDK](configuration-reference.md#4-feast-java-and-go-sdk) - -## 1. Feast Core and Feast Online Serving - -Available configuration properties for Feast Core and Feast Online Serving can be referenced from the corresponding `application.yml` of each component: - -| Component | Configuration Reference | -| :--- | :--- | -| Core | [core/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/core/src/main/resources/application.yml) | -| Serving \(Online\) | [serving/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/serving/src/main/resources/application.yml) | - -Configuration properties for Feast Core and Feast Online Serving are defined depending on Feast is deployed: - -* [Docker Compose deployment](configuration-reference.md#docker-compose-deployment) - Feast is deployed with Docker Compose. -* [Kubernetes deployment](configuration-reference.md#kubernetes-deployment) - Feast is deployed with Kubernetes. -* [Direct Configuration](configuration-reference.md#direct-configuration) - Feast is built and run from source code. - -## Docker Compose Deployment - -For each Feast component deployed using Docker Compose, configuration properties from `application.yml` can be set at: - -| Component | Configuration Path | -| :--- | :--- | -| Core | `infra/docker-compose/core/core.yml` | -| Online Serving | `infra/docker-compose/serving/online-serving.yml` | - -## Kubernetes Deployment - -The Kubernetes Feast Deployment is configured using `values.yaml` in the [Helm chart](https://github.com/feast-dev/feast-helm-charts) included with Feast: - -```yaml -# values.yaml -feast-core: - enabled: true # whether to deploy the feast-core subchart to deploy Feast Core. - # feast-core subchart specific config. - gcpServiceAccount: - enabled: true - # .... -``` - -A reference of the sub-chart-specific configuration can found in its `values.yml`: - -* [feast-core](https://github.com/feast-dev/feast-java/tree/master/infra/charts/feast-core) -* [feast-serving](https://github.com/feast-dev/feast-java/tree/master/infra/charts/feast-serving) - -Configuration properties can be set via `application-override.yaml` for each component in `values.yaml`: - -```yaml -# values.yaml -feast-core: - # .... - application-override.yaml: - # application.yml config properties for Feast Core. - # ... -``` - -Visit the [Helm chart](https://github.com/feast-dev/feast-helm-charts) included with Feast to learn more about configuration. - -## Direct Configuration - -If Feast is built and running from source, configuration properties can be set directly in the Feast component's `application.yml`: - -| Component | Configuration Path | -| :--- | :--- | -| Core | [core/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/core/src/main/resources/application.yml) | -| Serving \(Online\) | [serving/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/serving/src/main/resources/application.yml) | - -## 2. Feast CLI and Feast Python SDK - -Configuration options for both the [Feast CLI](../getting-started/connect-to-feast/feast-cli.md) and [Feast Python SDK](https://api.docs.feast.dev/python/) can be defined in the following locations, in order of precedence: - -**1. Command line arguments or initialized arguments:** Passing parameters to the Feast CLI or instantiating the Feast Client object with specific parameters will take precedence above other parameters. - -```bash -# Set option as command line arguments. -feast config set core_url "localhost:6565" -``` - -```python -# Pass options as initialized arguments. -client = Client( - core_url="localhost:6565", - project="default" -) -``` - -**2. Environmental variables:** Environmental variables can be set to provide configuration options. They must be prefixed with `FEAST_`. For example `FEAST_CORE_URL`. - -```bash -FEAST_CORE_URL=my_feast:6565 FEAST_PROJECT=default feast projects list -``` - -**3. Configuration file:** Options with the lowest precedence are configured in the Feast configuration file. Feast looks for or creates this configuration file in `~/.feast/config` if it does not already exist. All options must be defined in the `[general]` section of this file. - -```text -[general] -project = default -core_url = localhost:6565 -``` - -Visit the [available configuration parameters](https://api.docs.feast.dev/python/#module-feast.constants) for Feast Python SDK and Feast CLI to learn more. - -## 3. Feast Java and Go SDK - -The [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) and [Feast Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) are configured via arguments passed when instantiating the respective Clients: - -### Go SDK - -```go -// configure serving host and port. -cli := feast.NewGrpcClient("localhost", 6566) -``` - -Visit the[ Feast Go SDK API reference](https://godoc.org/github.com/feast-dev/feast/sdk/go) to learn more about available configuration parameters. - -### Java SDK - -```java -// configure serving host and port. -client = FeastClient.create(servingHost, servingPort); -``` - -Visit the [Feast Java SDK API reference](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to learn more about available configuration parameters. - diff --git a/docs/feast-on-kubernetes/reference-1/feast-and-spark.md b/docs/feast-on-kubernetes/reference-1/feast-and-spark.md deleted file mode 100644 index be05f177ae..0000000000 --- a/docs/feast-on-kubernetes/reference-1/feast-and-spark.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -description: Configuring Feast to use Spark for ingestion. ---- - -# Feast and Spark - -Feast relies on Spark to ingest data from the offline store to the online store, streaming ingestion, and running queries to retrieve historical data from the offline store. Feast supports several Spark deployment options. - -## Option 1. Use Kubernetes Operator for Apache Spark - -To install the Spark on K8s Operator - -```bash -helm repo add spark-operator \ - https://googlecloudplatform.github.io/spark-on-k8s-operator - -helm install my-release spark-operator/spark-operator \ - --set serviceAccounts.spark.name=spark -``` - -Currently Feast is tested using `v1beta2-1.1.2-2.4.5`version of the operator image. To configure Feast to use it, set the following options in Feast config: - -| Feast Setting | Value | -| :--- | :--- | -| `SPARK_LAUNCHER` | `"k8s"` | -| `SPARK_STAGING_LOCATION` | S3/GCS/Azure Blob Storage URL to use as a staging location, must be readable and writable by Feast. For S3, use `s3a://` prefix here. Ex.: `s3a://some-bucket/some-prefix/artifacts/` | -| `HISTORICAL_FEATURE_OUTPUT_LOCATION` | S3/GCS/Azure Blob Storage URL used to store results of historical retrieval queries, must be readable and writable by Feast. For S3, use `s3a://` prefix here. Ex.: `s3a://some-bucket/some-prefix/out/` | -| `SPARK_K8S_NAMESPACE` | Only needs to be set if you are customizing the spark-on-k8s-operator. The name of the Kubernetes namespace to run Spark jobs in. This should match the value of `sparkJobNamespace` set on spark-on-k8s-operator Helm chart. Typically this is also the namespace Feast itself will run in. | -| `SPARK_K8S_JOB_TEMPLATE_PATH` | Only needs to be set if you are customizing the Spark job template. Local file path with the template of the SparkApplication resource. No prefix required. Ex.: `/home/jovyan/work/sparkapp-template.yaml`. An example template is [here](https://github.com/feast-dev/feast/blob/4059a21dc4eba9cd27b2d5b0fabe476c07a8b3bd/sdk/python/feast/pyspark/launchers/k8s/k8s_utils.py#L280-L317) and the spec is defined in the [k8s-operator User Guide](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/user-guide.md). | - -Lastly, make sure that the service account used by Feast has permissions to manage Spark Application resources. This depends on your k8s setup, but typically you'd need to configure a Role and a RoleBinding like the one below: - -```text -cat < - - - Limitation - Motivation - - - - - Features names and entity names cannot overlap in feature table definitions - Features and entities become columns in historical stores which may cause - conflicts - - - -

The following field names are reserved in feature tables

-
    -
  • event_timestamp -
  • -
  • datetime -
  • -
  • created_timestamp -
  • -
  • ingestion_id -
  • -
  • job_id -
  • -
- - These keywords are used for column names when persisting metadata in historical - stores - - - - -### Ingestion - -| Limitation | Motivation | -| :--- | :--- | -| Once data has been ingested into Feast, there is currently no way to delete the data without manually going to the database and deleting it. However, during retrieval only the latest rows will be returned for a specific key \(`event_timestamp`, `entity`\) based on its `created_timestamp`. | This functionality simply doesn't exist yet as a Feast API | - -### Storage - -| Limitation | Motivation | -| :--- | :--- | -| Feast does not support offline storage in Feast 0.8 | As part of our re-architecture of Feast, we moved from GCP to cloud-agnostic deployments. Developing offline storage support that is available in all cloud environments is a pending action. | - diff --git a/docs/feast-on-kubernetes/reference-1/metrics-reference.md b/docs/feast-on-kubernetes/reference-1/metrics-reference.md deleted file mode 100644 index 78f94bc390..0000000000 --- a/docs/feast-on-kubernetes/reference-1/metrics-reference.md +++ /dev/null @@ -1,178 +0,0 @@ -# Metrics Reference - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -Reference of the metrics that each Feast component exports: - -* [Feast Core](metrics-reference.md#feast-core) -* [Feast Serving](metrics-reference.md#feast-serving) -* [Feast Ingestion Job](metrics-reference.md#feast-ingestion-job) - -For how to configure Feast to export Metrics, see the [Metrics user guide.](../advanced-1/metrics.md) - -## Feast Core - -**Exported Metrics** - -Feast Core exports the following metrics: - -| Metrics | Description | Tags | -| :--- | :--- | :--- | -| `feast_core_request_latency_seconds` | Feast Core's latency in serving Requests in Seconds. | `service`, `method`, `status_code` | -| `feast_core_feature_set_total` | No. of Feature Sets registered with Feast Core. | None | -| `feast_core_store_total` | No. of Stores registered with Feast Core. | None | -| `feast_core_max_memory_bytes` | Max amount of memory the Java virtual machine will attempt to use. | None | -| `feast_core_total_memory_bytes` | Total amount of memory in the Java virtual machine | None | -| `feast_core_free_memory_bytes` | Total amount of free memory in the Java virtual machine. | None | -| `feast_core_gc_collection_seconds` | Time spent in a given JVM garbage collector in seconds. | None | - -**Metric Tags** - -Exported Feast Core metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `service` | Name of the Service that request is made to. Should be set to `CoreService` | -| `method` | Name of the Method that the request is calling. \(ie `ListFeatureSets`\) | -| `status_code` | Status code returned as a result of handling the requests \(ie `OK`\). Can be used to find request failures. | - -## Feast Serving - -**Exported Metrics** - -Feast Serving exports the following metrics: - -| Metric | Description | Tags | -| :--- | :--- | :--- | -| `feast_serving_request_latency_seconds` | Feast Serving's latency in serving Requests in Seconds. | `method` | -| `feast_serving_request_feature_count` | No. of requests retrieving a Feature from Feast Serving. | `project`, `feature_name` | -| `feast_serving_not_found_feature_count` | No. of requests retrieving a Feature has resulted in a [`NOT_FOUND` field status.](../user-guide/getting-training-features.md#online-field-statuses) | `project`, `feature_name` | -| `feast_serving_stale_feature_count` | No. of requests retrieving a Feature resulted in a [`OUTSIDE_MAX_AGE` field status.](../user-guide/getting-training-features.md#online-field-statuses) | `project`, `feature_name` | -| `feast_serving_grpc_request_count` | Total gRPC requests served. | `method` | - -**Metric Tags** - -Exported Feast Serving metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `method` | Name of the Method that the request is calling. \(ie `ListFeatureSets`\) | -| `status_code` | Status code returned as a result of handling the requests \(ie `OK`\). Can be used to find request failures. | -| `project` | Name of the project that the FeatureSet of the Feature retrieved belongs to. | -| `feature_name` | Name of the Feature being retrieved. | - -## Feast Ingestion Job - -Feast Ingestion computes both metrics an statistics on [data ingestion.](../user-guide/define-and-ingest-features.md) Make sure you familar with data ingestion concepts before proceeding. - -**Metrics Namespace** - -Metrics are computed at two stages of the Feature Row's/Feature Value's life cycle when being processed by the Ingestion Job: - -* `Inflight`- Prior to writing data to stores, but after successful validation of data. -* `WriteToStoreSucess`- After a successful store write. - -Metrics processed by each staged will be tagged with `metrics_namespace` to the stage where the metric was computed. - -**Metrics Bucketing** - -Metrics with a `{BUCKET}` are computed on a 60 second window/bucket. Suffix with the following to select the bucket to use: - -* `min` - minimum value. -* `max` - maximum value. -* `mean`- mean value. -* `percentile_90`- 90 percentile. -* `percentile_95`- 95 percentile. -* `percentile_99`- 99 percentile. - -**Exported Metrics** - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MetricDescriptionTags
feast_ingestion_feature_row_lag_ms_{BUCKET} - Lag time in milliseconds between succeeding ingested Feature Rows. -

feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_lag_ms_{BUCKET} - Lag time in milliseconds between succeeding ingested values for each Feature. -

feast_store, feast_project_name,feast_featureSet_name,

-

feast_feature_name,

-

ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_{BUCKET} - Last value feature for each Feature.feast_store, feature_project_name, feast_feature_name,feast_featureSet_name, ingest_job_name, metrics_namepace -
feast_ingestion_feature_row_ingested_count - No. of Ingested Feature Rows -

feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_missing_count - No. of times a ingested Feature values did not provide a value for the - Feature. -

feast_store, feast_project_name,feast_featureSet_name,

-

feast_feature_name,

-

ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_deadletter_row_count - No. of Feature Rows that that the Ingestion Job did not successfully write - to store.feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name -
- -**Metric Tags** - -Exported Feast Ingestion Job metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `feast_store` | Name of the target store the Ingestion Job is writing to. | -| `feast_project_name` | Name of the project that the ingested FeatureSet belongs to. | -| `feast_featureSet_name` | Name of the Feature Set being ingested. | -| `feast_feature_name` | Name of the Feature being ingested. | -| `ingestion_job_name` | Name of the Ingestion Job performing data ingestion. Typically this is set to the Id of the Ingestion Job. | -| `metrics_namespace` | Stage where metrics where computed. Either `Inflight` or `WriteToStoreSuccess` | - diff --git a/docs/feast-on-kubernetes/tutorials-1/README.md b/docs/feast-on-kubernetes/tutorials-1/README.md deleted file mode 100644 index 84ce15b788..0000000000 --- a/docs/feast-on-kubernetes/tutorials-1/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Tutorials - diff --git a/docs/feast-on-kubernetes/user-guide/README.md b/docs/feast-on-kubernetes/user-guide/README.md deleted file mode 100644 index be02a73372..0000000000 --- a/docs/feast-on-kubernetes/user-guide/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# User guide - diff --git a/docs/feast-on-kubernetes/user-guide/define-and-ingest-features.md b/docs/feast-on-kubernetes/user-guide/define-and-ingest-features.md deleted file mode 100644 index 5a7e7288ec..0000000000 --- a/docs/feast-on-kubernetes/user-guide/define-and-ingest-features.md +++ /dev/null @@ -1,52 +0,0 @@ -# Define and ingest features - -In order to retrieve features for both training and serving, Feast requires data being ingested into its offline and online stores. - -Users are expected to already have either a batch or stream source with data stored in it, ready to be ingested into Feast. Once a feature table \(with the corresponding sources\) has been registered with Feast, it is possible to load data from this source into stores. - -The following depicts an example ingestion flow from a data source to the online store. - -## Batch Source to Online Store - -```python -from feast import Client -from datetime import datetime, timedelta - -client = Client(core_url="localhost:6565") -driver_ft = client.get_feature_table("driver_trips") - -# Initialize date ranges -today = datetime.now() -yesterday = today - timedelta(1) - -# Launches a short-lived job that ingests data over the provided date range. -client.start_offline_to_online_ingestion( - driver_ft, yesterday, today -) -``` - -## Stream Source to Online Store - -```python -from feast import Client -from datetime import datetime, timedelta - -client = Client(core_url="localhost:6565") -driver_ft = client.get_feature_table("driver_trips") - -# Launches a long running streaming ingestion job -client.start_stream_to_online_ingestion(driver_ft) -``` - -## Batch Source to Offline Store - -{% hint style="danger" %} -Not supported in Feast 0.8 -{% endhint %} - -## Stream Source to Offline Store - -{% hint style="danger" %} -Not supported in Feast 0.8 -{% endhint %} - diff --git a/docs/feast-on-kubernetes/user-guide/getting-online-features.md b/docs/feast-on-kubernetes/user-guide/getting-online-features.md deleted file mode 100644 index c16dc08a01..0000000000 --- a/docs/feast-on-kubernetes/user-guide/getting-online-features.md +++ /dev/null @@ -1,54 +0,0 @@ -# Getting online features - -Feast provides an API through which online feature values can be retrieved. This allows teams to look up feature values at low latency in production during model serving, in order to make online predictions. - -{% hint style="info" %} -Online stores only maintain the current state of features, i.e latest feature values. No historical data is stored or served. -{% endhint %} - -```python -from feast import Client - -online_client = Client( - core_url="localhost:6565", - serving_url="localhost:6566", -) - -entity_rows = [ - {"driver_id": 1001}, - {"driver_id": 1002}, -] - -# Features in format -feature_refs = [ - "driver_trips:average_daily_rides", - "driver_trips:maximum_daily_rides", - "driver_trips:rating", -] - -response = online_client.get_online_features( - feature_refs=feature_refs, # Contains only feature references - entity_rows=entity_rows, # Contains only entities (driver ids) -) - -# Print features in dictionary format -response_dict = response.to_dict() -print(response_dict) -``` - -The online store must be populated through [ingestion jobs](define-and-ingest-features.md#batch-source-to-online-store) prior to being used for online serving. - -Feast Serving provides a [gRPC API](https://api.docs.feast.dev/grpc/feast.serving.pb.html) that is backed by [Redis](https://redis.io/). We have native clients in [Python](https://api.docs.feast.dev/python/), [Go](https://godoc.org/github.com/gojek/feast/sdk/go), and [Java](https://javadoc.io/doc/dev.feast). - -### Online Field Statuses - -Feast also returns status codes when retrieving features from the Feast Serving API. These status code give useful insight into the quality of data being served. - -| Status | Meaning | -| :--- | :--- | -| NOT\_FOUND | The feature value was not found in the online store. This might mean that no feature value was ingested for this feature. | -| NULL\_VALUE | A entity key was successfully found but no feature values had been set. This status code should not occur during normal operation. | -| OUTSIDE\_MAX\_AGE | The age of the feature row in the online store \(in terms of its event timestamp\) has exceeded the maximum age defined within the feature table. | -| PRESENT | The feature values have been found and are within the maximum age. | -| UNKNOWN | Indicates a system failure. | - diff --git a/docs/feast-on-kubernetes/user-guide/getting-training-features.md b/docs/feast-on-kubernetes/user-guide/getting-training-features.md deleted file mode 100644 index e0f52a8cd9..0000000000 --- a/docs/feast-on-kubernetes/user-guide/getting-training-features.md +++ /dev/null @@ -1,72 +0,0 @@ -# Getting training features - -Feast provides a historical retrieval interface for exporting feature data in order to train machine learning models. Essentially, users are able to enrich their data with features from any feature tables. - -### Retrieving historical features - -Below is an example of the process required to produce a training dataset: - -```python -# Feature references with target feature -features = [ - "driver_trips:average_daily_rides", - "driver_trips:maximum_daily_rides", - "driver_trips:rating", - "driver_trips:rating:trip_completed", -] - -# Define entity source -entity_source = FileSource( - "event_timestamp", - ParquetFormat(), - "gs://some-bucket/customer" -) - -# Retrieve historical dataset from Feast. -historical_feature_retrieval_job = client.get_historical_features( - features=features, - entity_rows=entity_source -) - -output_file_uri = historical_feature_retrieval_job.get_output_file_uri() -``` - -#### 1. Define feature references - -[Feature references]() define the specific features that will be retrieved from Feast. These features can come from multiple feature tables. The only requirement is that the feature tables that make up the feature references have the same entity \(or composite entity\). - -**2. Define an entity dataframe** - -Feast needs to join feature values onto specific entities at specific points in time. Thus, it is necessary to provide an [entity dataframe]() as part of the `get_historical_features` method. In the example above we are defining an entity source. This source is an external file that provides Feast with the entity dataframe. - -**3. Launch historical retrieval job** - -Once the feature references and an entity source are defined, it is possible to call `get_historical_features()`. This method launches a job that extracts features from the sources defined in the provided feature tables, joins them onto the provided entity source, and returns a reference to the training dataset that is produced. - -Please see the [Feast SDK](https://api.docs.feast.dev/python) for more details. - -### Point-in-time Joins - -Feast always joins features onto entity data in a point-in-time correct way. The process can be described through an example. - -In the example below there are two tables \(or dataframes\): - -* The dataframe on the left is the [entity dataframe]() that contains timestamps, entities, and the target variable \(trip\_completed\). This dataframe is provided to Feast through an entity source. -* The dataframe on the right contains driver features. This dataframe is represented in Feast through a feature table and its accompanying data source\(s\). - -The user would like to have the driver features joined onto the entity dataframe to produce a training dataset that contains both the target \(trip\_completed\) and features \(average\_daily\_rides, maximum\_daily\_rides, rating\). This dataset will then be used to train their model. - -![](../../.gitbook/assets/point_in_time_join%20%281%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%282%29.png) - -Feast is able to intelligently join feature data with different timestamps to a single entity dataframe. It does this through a point-in-time join as follows: - -1. Feast loads the entity dataframe and all feature tables \(driver dataframe\) into the same location. This can either be a database or in memory. -2. For each [entity row]() in the [entity dataframe](getting-online-features.md), Feast tries to find feature values in each feature table to join to it. Feast extracts the timestamp and entity key of each row in the entity dataframe and scans backward through the feature table until it finds a matching entity key. -3. If the event timestamp of the matching entity key within the driver feature table is within the maximum age configured for the feature table, then the features at that entity key are joined onto the entity dataframe. If the event timestamp is outside of the maximum age, then only null values are returned. -4. If multiple entity keys are found with the same event timestamp, then they are deduplicated by the created timestamp, with newer values taking precedence. -5. Feast repeats this joining process for all feature tables and returns the resulting dataset. - -{% hint style="info" %} -Point-in-time correct joins attempts to prevent the occurrence of feature leakage by trying to recreate the state of the world at a single point in time, instead of joining features based on exact timestamps only. -{% endhint %} - diff --git a/docs/feast-on-kubernetes/user-guide/overview.md b/docs/feast-on-kubernetes/user-guide/overview.md deleted file mode 100644 index 5f36792479..0000000000 --- a/docs/feast-on-kubernetes/user-guide/overview.md +++ /dev/null @@ -1,32 +0,0 @@ -# Overview - -### Using Feast - -Feast development happens through three key workflows: - -1. [Define and load feature data into Feast](define-and-ingest-features.md) -2. [Retrieve historical features for training models](getting-training-features.md) -3. [Retrieve online features for serving models](getting-online-features.md) - -### Defining feature tables and ingesting data into Feast - -Feature creators model the data within their organization into Feast through the definition of [feature tables](../concepts/feature-tables.md) that contain [data sources](../concepts/sources.md). Feature tables are both a schema and a means of identifying data sources for features, and allow Feast to know how to interpret your data, and where to find it. - -After registering a feature table with Feast, users can trigger an ingestion from their data source into Feast. This loads feature values from an upstream data source into Feast stores through ingestion jobs. - -Visit [feature tables](../concepts/feature-tables.md#overview) to learn more about them. - -{% page-ref page="define-and-ingest-features.md" %} - -### Retrieving historical features for training - -In order to generate a training dataset it is necessary to provide both an [entity dataframe ]()and feature references through the[ Feast SDK](https://api.docs.feast.dev/python/) to retrieve historical features. For historical serving, Feast requires that you provide the entities and timestamps for the corresponding feature data. Feast produces a point-in-time correct dataset using the requested features. These features can be requested from an unlimited number of feature sets. - -{% page-ref page="getting-training-features.md" %} - -### Retrieving online features for online serving - -Online retrieval uses feature references through the [Feast Online Serving API](https://api.docs.feast.dev/grpc/feast.serving.pb.html) to retrieve online features. Online serving allows for very low latency requests to feature data at very high throughput. - -{% page-ref page="getting-online-features.md" %} - diff --git a/docs/feature-views.md b/docs/feature-views.md deleted file mode 100644 index 235b828835..0000000000 --- a/docs/feature-views.md +++ /dev/null @@ -1,2 +0,0 @@ -# Feature Views - diff --git a/docs/getting-started/architecture-and-components/untitled.md b/docs/getting-started/architecture-and-components/registry.md similarity index 100% rename from docs/getting-started/architecture-and-components/untitled.md rename to docs/getting-started/architecture-and-components/registry.md diff --git a/docs/getting-started/concepts/README.md b/docs/getting-started/concepts/README.md index 99ff586186..7ad0115a72 100644 --- a/docs/getting-started/concepts/README.md +++ b/docs/getting-started/concepts/README.md @@ -14,3 +14,4 @@ {% page-ref page="point-in-time-joins.md" %} +{% page-ref page="dataset.md" %} diff --git a/docs/getting-started/concepts/dataset.md b/docs/getting-started/concepts/dataset.md new file mode 100644 index 0000000000..59f7168905 --- /dev/null +++ b/docs/getting-started/concepts/dataset.md @@ -0,0 +1,50 @@ +# Dataset + +Feast datasets allow for conveniently saving dataframes that include both features and entities to be subsequently used for data analysis and model training. +[Data Quality Monitoring](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98) was the primary motivation for creating dataset concept. + +Dataset's metadata is stored in the Feast registry and raw data (features, entities, additional input keys and timestamp) is stored in the [offline store](../architecture-and-components/offline-store.md). + +Dataset can be created from: +1. Results of historical retrieval +2. [planned] Logging request (including input for [on demand transformation](../../reference/alpha-on-demand-feature-view.md)) and response during feature serving +3. [planned] Logging features during writing to online store (from batch source or stream) + + +### Creating Saved Dataset from Historical Retrieval + +To create a saved dataset from historical features for later retrieval or analysis, a user needs to call `get_historical_features` method first and then pass the returned retrieval job to `create_saved_dataset` method. +`create_saved_dataset` will trigger provided retrieval job (by calling `.persist()` on it) to store the data using specified `storage`. +Storage type must be the same as globally configured offline store (eg, it's impossible to persist data to Redshift with BigQuery source). +`create_saved_dataset` will also create SavedDataset object with all related metadata and will write it to the registry. + +```python +from feast import FeatureStore +from feast.infra.offline_stores.bigquery_source import SavedDatasetBigQueryStorage + +store = FeatureStore() + +historical_job = store.get_historical_features( + features=["driver:avg_trip"], + entity_df=..., +) + +dataset = store.create_saved_dataset( + from_=historical_job, + name='my_training_dataset', + storage=SavedDatasetBigQueryStorage(table_ref='..my_training_dataset'), + tags={'author': 'oleksii'} +) + +dataset.to_df() +``` + +Saved dataset can be later retrieved using `get_saved_dataset` method: +```python +dataset = store.get_saved_dataset('my_training_dataset') +dataset.to_df() +``` + +--- + +Check out our [tutorial on validating historical features](../../tutorials/validating-historical-features.md) to see how this concept can be applied in real-world use case. \ No newline at end of file diff --git a/docs/getting-started/connect-to-feast/README.md b/docs/getting-started/connect-to-feast/README.md deleted file mode 100644 index 4333359f90..0000000000 --- a/docs/getting-started/connect-to-feast/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Connect to Feast - -### Feast Python SDK - -The Feast Python SDK is used as a library to interact with a Feast deployment. - -* Define, register, and manage entities and features -* Ingest data into Feast -* Build and retrieve training datasets -* Retrieve online features - -{% page-ref page="python-sdk.md" %} - -### Feast CLI - -The Feast CLI is a command line implementation of the Feast Python SDK. - -* Define, register, and manage entities and features from the terminal -* Ingest data into Feast -* Manage ingestion jobs - -{% page-ref page="feast-cli.md" %} - -### Online Serving Clients - -The following clients can be used to retrieve online feature values: - -* [Feast Python SDK](https://api.docs.feast.dev/python/) -* [Feast Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) -* [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk) - diff --git a/docs/getting-started/connect-to-feast/feast-cli.md b/docs/getting-started/connect-to-feast/feast-cli.md deleted file mode 100644 index d15414f360..0000000000 --- a/docs/getting-started/connect-to-feast/feast-cli.md +++ /dev/null @@ -1,37 +0,0 @@ -# Feast CLI - -Install the Feast CLI using pip: - -```bash -pip install feast -``` - -Configure the CLI to connect to your Feast Core deployment: - -```text -feast config set core_url your.feast.deployment -``` - -{% hint style="info" %} -By default, all configuration is stored in `~/.feast/config` -{% endhint %} - -The CLI is a wrapper around the [Feast Python SDK](python-sdk.md): - -```aspnet -$ feast - -Usage: feast [OPTIONS] COMMAND [ARGS]... - -Options: - --help Show this message and exit. - -Commands: - config View and edit Feast properties - entities Create and manage entities - feature-tables Create and manage feature tables - jobs Create and manage jobs - projects Create and manage projects - version Displays version and connectivity information -``` - diff --git a/docs/getting-started/connect-to-feast/python-sdk.md b/docs/getting-started/connect-to-feast/python-sdk.md deleted file mode 100644 index bf31bd3849..0000000000 --- a/docs/getting-started/connect-to-feast/python-sdk.md +++ /dev/null @@ -1,20 +0,0 @@ -# Python SDK - -Install the [Feast Python SDK](https://api.docs.feast.dev/python/) using pip: - -```bash -pip install feast -``` - -Connect to an existing Feast Core deployment: - -```python -from feast import Client - -# Connect to an existing Feast Core deployment -client = Client(core_url='feast.example.com:6565') - -# Ensure that your client is connected by printing out some feature tables -client.list_feature_tables() -``` - diff --git a/docs/getting-started/install-feast/README.md b/docs/getting-started/install-feast/README.md deleted file mode 100644 index 6c1dd80134..0000000000 --- a/docs/getting-started/install-feast/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Install Feast - -{% hint style="success" %} -_Would you prefer a lighter-weight, pip-install, no-Kubernetes deployment of Feast?_ The Feast maintainers are currently building a new deployment experience for Feast. If you have thoughts on Feast's deployment, [chat with the maintainers](https://calendly.com/d/gc29-y88c/feast-chat-w-willem-and-jay) to learn more and provide feedback. -{% endhint %} - -A production deployment of Feast is deployed using Kubernetes. - -## Kubernetes \(with Helm\) - -This guide installs Feast into an existing Kubernetes cluster using Helm. The installation is not specific to any cloud platform or environment, but requires Kubernetes and Helm. - -## Amazon EKS \(with Terraform\) - -This guide installs Feast into an AWS environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -## Azure AKS \(with Helm\) - -This guide installs Feast into an Azure AKS environment with Helm. - -## Azure AKS \(with Terraform\) - -This guide installs Feast into an Azure environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -## Google Cloud GKE \(with Terraform\) - -This guide installs Feast into a Google Cloud environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -## IBM Cloud Kubernetes Service \(IKS\) and Red Hat OpenShift \(using Kustomize\) - -This guide installs Feast into an existing [IBM Cloud Kubernetes Service](https://www.ibm.com/cloud/kubernetes-service) or [Red Hat OpenShift on IBM Cloud](https://www.ibm.com/cloud/openshift) using Kustomize. - -{% page-ref page="ibm-cloud-iks-with-kustomize.md" %} diff --git a/docs/getting-started/install-feast/google-cloud-gke-with-terraform.md b/docs/getting-started/install-feast/google-cloud-gke-with-terraform.md deleted file mode 100644 index a3252cf0bb..0000000000 --- a/docs/getting-started/install-feast/google-cloud-gke-with-terraform.md +++ /dev/null @@ -1,52 +0,0 @@ -# Google Cloud GKE \(with Terraform\) - -### Overview - -This guide installs Feast on GKE using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/gcp). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your GCP account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* GKE cluster -* Feast services running on GKE -* Google Memorystore \(Redis\) as online store -* Dataproc cluster -* Kafka running on GKE, exposed to the dataproc cluster via internal load balancer - -### 1. Requirements - -* Install [Terraform](https://www.terraform.io/) > = 0.12 \(tested with 0.13.3\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.3.4\) -* GCP [authentication](https://cloud.google.com/docs/authentication) and sufficient [privilege](https://cloud.google.com/iam/docs/understanding-roles) to create the resources listed above. - -### 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/gcp`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. Sample configurations are provided below: - -{% code title="my\_feast.tfvars" %} -```typescript -gcp_project_name = "kf-feast" -name_prefix = "feast-0-8" -region = "asia-east1" -gke_machine_type = "n1-standard-2" -network = "default" -subnetwork = "default" -dataproc_staging_bucket = "feast-dataproc" -``` -{% endcode %} - -### 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/gcp -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - - - diff --git a/docs/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md b/docs/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md deleted file mode 100644 index 817d4dbe14..0000000000 --- a/docs/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md +++ /dev/null @@ -1,185 +0,0 @@ -# IBM Cloud Kubernetes Service and Red Hat OpenShift \(with Kustomize\) - -## Overview - -This guide installs Feast on an existing IBM Cloud Kubernetes cluster or Red Hat OpenShift on IBM Cloud , and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Kafka \(Optional\) -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Prerequisites - -1. [IBM Cloud Kubernetes Service](https://www.ibm.com/cloud/kubernetes-service) or [Red Hat OpenShift on IBM Cloud](https://www.ibm.com/cloud/openshift) -2. Install [Kubectl](https://cloud.ibm.com/docs/containers?topic=containers-cs_cli_install#kubectl) that matches the major.minor versions of your IKS or Install the [OpenShift CLI](https://cloud.ibm.com/docs/openshift?topic=openshift-openshift-cli#cli_oc) that matches your local operating system and OpenShift cluster version. -3. Install [Helm 3](https://helm.sh/) -4. Install [Kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/) - -## 2. Preparation -### IBM Cloud Block Storage Setup (IKS only) - -:warning: If you have Red Hat OpenShift Cluster on IBM Cloud skip to this [section](#Security-Context-Constraint-Setup). - -By default, IBM Cloud Kubernetes cluster uses [IBM Cloud File Storage](https://www.ibm.com/cloud/file-storage) based on NFS as the default storage class, and non-root users do not have write permission on the volume mount path for NFS-backed storage. Some common container images in Feast, such as Redis, Postgres, and Kafka specify a non-root user to access the mount path in the images. When containers are deployed using these images, the containers fail to start due to insufficient permissions of the non-root user creating folders on the mount path. - -[IBM Cloud Block Storage](https://www.ibm.com/cloud/block-storage) allows for the creation of raw storage volumes and provides faster performance without the permission restriction of NFS-backed storage - -Therefore, to deploy Feast we need to set up [IBM Cloud Block Storage](https://cloud.ibm.com/docs/containers?topic=containers-block_storage#install_block) as the default storage class so that you can have all the functionalities working and get the best experience from Feast. - -1. [Follow the instructions](https://helm.sh/docs/intro/install/) to install the Helm version 3 client on your local machine. -2. Add the IBM Cloud Helm chart repository to the cluster where you want to use the IBM Cloud Block Storage plug-in. - - ```text - helm repo add iks-charts https://icr.io/helm/iks-charts - helm repo update - ``` - -3. Install the IBM Cloud Block Storage plug-in. When you install the plug-in, pre-defined block storage classes are added to your cluster. - - ```text - helm install v2.0.2 iks-charts/ibmcloud-block-storage-plugin -n kube-system - ``` - - Example output: - - ```text - NAME: v2.0.2 - LAST DEPLOYED: Fri Feb 5 12:29:50 2021 - NAMESPACE: kube-system - STATUS: deployed - REVISION: 1 - NOTES: - Thank you for installing: ibmcloud-block-storage-plugin. Your release is named: v2.0.2 - ... - ``` - -4. Verify that all block storage plugin pods are in a "Running" state. - - ```text - kubectl get pods -n kube-system | grep ibmcloud-block-storage - ``` - -5. Verify that the storage classes for Block Storage were added to your cluster. - - ```text - kubectl get storageclasses | grep ibmc-block - ``` - -6. Set the Block Storage as the default storageclass. - - ```text - kubectl patch storageclass ibmc-block-gold -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' - kubectl patch storageclass ibmc-file-gold -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' - - # Check the default storageclass is block storage - kubectl get storageclass | grep \(default\) - ``` - - Example output: - - ```text - ibmc-block-gold (default) ibm.io/ibmc-block 65s - ``` -### Security Context Constraint Setup - -By default, in OpenShift, all pods or containers will use the [Restricted SCC](https://docs.openshift.com/container-platform/4.6/authentication/managing-security-context-constraints.html) which limits the UIDs pods can run with, causing the Feast installation to fail. To overcome this, you can allow Feast pods to run with any UID by executing the following: - -```text -oc adm policy add-scc-to-user anyuid -z default,kf-feast-kafka -n feast -``` -## 3. Installation - -Install Feast using kustomize. The pods may take a few minutes to initialize. - -```bash -git clone https://github.com/kubeflow/manifests -cd manifests/contrib/feast/ -kustomize build feast/base | kubectl apply -n feast -f - -``` -### Optional: Enable Feast Jupyter and Kafka - -You may optionally enable the Feast Jupyter component which contains code examples to demonstrate Feast. Some examples require Kafka to stream real time features to the Feast online serving. To enable, edit the following properties in the `values.yaml` under the `manifests/contrib/feast` folder: -``` -kafka.enabled: true -feast-jupyter.enabled: true -``` - -Then regenerate the resource manifests and deploy: -``` -make feast/base -kustomize build feast/base | kubectl apply -n feast -f - -``` - -## 4. Use Feast Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -l app=feast-jupyter -o custom-columns=:metadata.name) 8888:8888 -n feast -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 5. Uninstall Feast -```text -kustomize build feast/base | kubectl delete -n feast -f - -``` -## 6. Troubleshooting - -When running the minimal\_ride\_hailing\_example Jupyter Notebook example the following errors may occur: - -1. When running `job = client.get_historical_features(...)`: - - ```text - KeyError: 'historical_feature_output_location' - ``` - - or - - ```text - KeyError: 'spark_staging_location' - ``` - - Add the following environment variable: - - ```text - os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION"] = "file:///home/jovyan/historical_feature_output" - os.environ["FEAST_SPARK_STAGING_LOCATION"] = "file:///home/jovyan/test_data" - ``` - -2. When running `job.get_status()` - - ```text - - ``` - - Add the following environment variable: - - ```text - os.environ["FEAST_REDIS_HOST"] = "feast-release-redis-master" - ``` - -3. When running `job = client.start_stream_to_online_ingestion(...)` - - ```text - org.apache.kafka.vendor.common.KafkaException: Failed to construct kafka consumer - ``` - - Add the following environment variable: - - ```text - os.environ["DEMO_KAFKA_BROKERS"] = "feast-release-kafka:9092" - ``` - diff --git a/docs/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md b/docs/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md deleted file mode 100644 index 99ff4a8e81..0000000000 --- a/docs/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md +++ /dev/null @@ -1,68 +0,0 @@ -# Amazon EKS \(with Terraform\) - -### Overview - -This guide installs Feast on AWS using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/aws). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your AWS account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* Kubernetes cluster on Amazon EKS \(3x r3.large nodes\) -* Kafka managed by Amazon MSK \(2x kafka.t3.small nodes\) -* Postgres database for Feast metadata, using serverless Aurora \(min capacity: 2\) -* Redis cluster, using Amazon Elasticache \(1x cache.t2.micro\) -* Amazon EMR cluster to run Spark \(3x spot m4.xlarge\) -* Staging S3 bucket to store temporary data - -![](../../.gitbook/assets/feast-on-aws-3-%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%281%29.png) - -### 1. Requirements - -* Create an AWS account and [configure credentials locally](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) -* Install [Terraform](https://www.terraform.io/) > = 0.12 \(tested with 0.13.3\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.3.4\) - -### 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/aws`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. At a minimum, you need to set `name_prefix` and an AWS region: - -{% code title="my\_feast.tfvars" %} -```typescript -name_prefix = "my-feast" -region = "us-east-1" -``` -{% endcode %} - -### 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/aws -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - -Starting may take a minute. A kubectl configuration file is also created in this directory, and the file's name will start with `kubeconfig_` and end with a random suffix. - -### 4. Connect to Feast using Jupyter - -After all pods are running, connect to the Jupyter Notebook Server running in the cluster. - -To connect to the remote Feast server you just created, forward a port from the remote k8s cluster to your local machine. Replace `kubeconfig_XXXXXXX` below with the kubeconfig file name Terraform generates for you. - -```bash -KUBECONFIG=kubeconfig_XXXXXXX kubectl port-forward \ -$(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - diff --git a/docs/getting-started/install-feast/kubernetes-azure-aks-with-helm.md b/docs/getting-started/install-feast/kubernetes-azure-aks-with-helm.md deleted file mode 100644 index 66ba73ef23..0000000000 --- a/docs/getting-started/install-feast/kubernetes-azure-aks-with-helm.md +++ /dev/null @@ -1,139 +0,0 @@ -# Azure AKS \(with Helm\) - -## Overview - -This guide installs Feast on Azure Kubernetes cluster \(known as AKS\), and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Spark -* Kafka -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Requirements - -1. Install and configure [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) -2. Install and configure [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) -3. Install [Helm 3](https://helm.sh/) - -## 2. Preparation - -Create an AKS cluster with Azure CLI. The detailed steps can be found [here](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough), and a high-level walk through includes: - -```bash -az group create --name myResourceGroup --location eastus -az acr create --resource-group myResourceGroup --name feast-AKS-ACR --sku Basic -az aks create -g myResourceGroup -n feast-AKS --location eastus --attach-acr feast-AKS-ACR --generate-ssh-keys - -az aks install-cli -az aks get-credentials --resource-group myResourceGroup --name feast-AKS -``` - -Add the Feast Helm repository and download the latest charts: - -```bash -helm version # make sure you have the latest Helm installed -helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com -helm repo update -``` - -Feast includes a Helm chart that installs all necessary components to run Feast Core, Feast Online Serving, and an example Jupyter notebook. - -Feast Core requires Postgres to run, which requires a secret to be set on Kubernetes: - -```bash -kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password -``` - -## 3. Feast installation - -Install Feast using Helm. The pods may take a few minutes to initialize. - -```bash -helm install feast-release feast-charts/feast -``` - -## 4. Spark operator installation - -Follow the documentation [to install Spark operator on Kubernetes ](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator), and Feast documentation to [configure Spark roles](../../reference/feast-and-spark.md) - -```bash -helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator -helm install my-release spark-operator/spark-operator --set serviceAccounts.spark.name=spark --set image.tag=v1beta2-1.1.2-2.4.5 -``` - -and ensure the service account used by Feast has permissions to manage Spark Application resources. This depends on your k8s setup, but typically you'd need to configure a Role and a RoleBinding like the one below: - -```text -cat < -rules: -- apiGroups: ["sparkoperator.k8s.io"] - resources: ["sparkapplications"] - verbs: ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: RoleBinding -metadata: - name: use-spark-operator - namespace: -roleRef: - kind: Role - name: use-spark-operator - apiGroup: rbac.authorization.k8s.io -subjects: - - kind: ServiceAccount - name: default -EOF -``` - -## 5. Use Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 6. Environment variables - -If you are running the [Minimal Ride Hailing Example](https://github.com/feast-dev/feast/blob/master/examples/minimal/minimal_ride_hailing.ipynb), you may want to make sure the following environment variables are correctly set: - -```text -demo_data_location = "wasbs://@.blob.core.windows.net/" -os.environ["FEAST_AZURE_BLOB_ACCOUNT_NAME"] = "" -os.environ["FEAST_AZURE_BLOB_ACCOUNT_ACCESS_KEY"] = -os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION"] = "wasbs://@.blob.core.windows.net/out/" -os.environ["FEAST_SPARK_STAGING_LOCATION"] = "wasbs://@.blob.core.windows.net/artifacts/" -os.environ["FEAST_SPARK_LAUNCHER"] = "k8s" -os.environ["FEAST_SPARK_K8S_NAMESPACE"] = "default" -os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_FORMAT"] = "parquet" -os.environ["FEAST_REDIS_HOST"] = "feast-release-redis-master.default.svc.cluster.local" -os.environ["DEMO_KAFKA_BROKERS"] = "feast-release-kafka.default.svc.cluster.local:9092" -``` - -## 7. Further Reading - -* [Feast Concepts](../../concepts/overview.md) -* [Feast Examples/Tutorials](https://github.com/feast-dev/feast/tree/master/examples) -* [Feast Helm Chart Documentation](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) -* [Configuring Feast components](../../reference/configuration-reference.md) -* [Feast and Spark](../../reference/feast-and-spark.md) - diff --git a/docs/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md b/docs/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md deleted file mode 100644 index 71dd15908d..0000000000 --- a/docs/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md +++ /dev/null @@ -1,63 +0,0 @@ -# Azure AKS \(with Terraform\) - -## Overview - -This guide installs Feast on Azure using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/azure). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your Azure account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* Kubernetes cluster on Azure AKS -* Kafka managed by HDInsight -* Postgres database for Feast metadata, running as a pod on AKS -* Redis cluster, using Azure Cache for Redis -* [spark-on-k8s-operator](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator) to run Spark -* Staging Azure blob storage container to store temporary data - -## 1. Requirements - -* Create an Azure account and [configure credentials locally](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) -* Install [Terraform](https://www.terraform.io/) \(tested with 0.13.5\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.4.2\) - -## 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/azure`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. At a minimum, you need to set `name_prefix` and `resource_group`: - -{% code title="my\_feast.tfvars" %} -```typescript -name_prefix = "feast" -resource_group = "Feast" # pre-existing resource group -``` -{% endcode %} - -## 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/azure -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - -## 4. Connect to Feast using Jupyter - -After all pods are running, connect to the Jupyter Notebook Server running in the cluster. - -To connect to the remote Feast server you just created, forward a port from the remote k8s cluster to your local machine. - -```bash -kubectl port-forward $(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - diff --git a/docs/getting-started/install-feast/kubernetes-with-helm.md b/docs/getting-started/install-feast/kubernetes-with-helm.md deleted file mode 100644 index f31d666ba9..0000000000 --- a/docs/getting-started/install-feast/kubernetes-with-helm.md +++ /dev/null @@ -1,69 +0,0 @@ -# Kubernetes \(with Helm\) - -## Overview - -This guide installs Feast on an existing Kubernetes cluster, and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Requirements - -1. Install and configure [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) -2. Install [Helm 3](https://helm.sh/) - -## 2. Preparation - -Add the Feast Helm repository and download the latest charts: - -```text -helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com -helm repo update -``` - -Feast includes a Helm chart that installs all necessary components to run Feast Core, Feast Online Serving, and an example Jupyter notebook. - -Feast Core requires Postgres to run, which requires a secret to be set on Kubernetes: - -```bash -kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password -``` - -## 3. Installation - -Install Feast using Helm. The pods may take a few minutes to initialize. - -```bash -helm install feast-release feast-charts/feast -``` - -## 4. Use Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -l app=feast-jupyter -o custom-columns=:metadata.name) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 5. Further Reading - -* [Feast Concepts](../../concepts/overview.md) -* [Feast Examples/Tutorials](https://github.com/feast-dev/feast/tree/master/examples) -* [Feast Helm Chart Documentation](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) -* [Configuring Feast components](../../reference/configuration-reference.md) -* [Feast and Spark](../../reference/feast-and-spark.md) - diff --git a/docs/getting-started/learn-feast.md b/docs/getting-started/learn-feast.md deleted file mode 100644 index 10f2eb6d29..0000000000 --- a/docs/getting-started/learn-feast.md +++ /dev/null @@ -1,15 +0,0 @@ -# Learn Feast - -Explore the following resources to learn more about Feast: - -* [Concepts](../) describes all important Feast API concepts. -* [User guide](../user-guide/define-and-ingest-features.md) provides guidance on completing Feast workflows. -* [Examples](https://github.com/feast-dev/feast/tree/master/examples) contains Jupyter notebooks that you can run on your Feast deployment. -* [Advanced](../advanced/troubleshooting.md) contains information about both advanced and operational aspects of Feast. -* [Reference](../reference/api/) contains detailed API and design documents for advanced users. -* [Contributing](../contributing/contributing.md) contains resources for anyone who wants to contribute to Feast. - -{% hint style="info" %} -The best way to learn Feast is to use it. Jump over to our [Quickstart](../quickstart.md) guide to have one of our examples running in no time at all! -{% endhint %} - diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index f93a3aa714..c067513d31 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -17,7 +17,7 @@ In this tutorial, we use feature stores to generate training data and power onli 1. **Training-serving skew and complex data joins:** Feature values often exist across multiple tables. Joining these datasets can be complicated, slow, and error-prone. * Feast joins these tables with battle-tested logic that ensures _point-in-time_ correctness so future feature values do not leak to models. - * _\*Upcoming_: Feast alerts users to offline / online skew with data quality monitoring. + * Feast alerts users to offline / online skew with data quality monitoring 2. **Online feature availability:** At inference time, models often need access to features that aren't readily available and need to be precomputed from other datasources. * Feast manages deployment to a variety of online stores (e.g. DynamoDB, Redis, Google Cloud Datastore) and ensures necessary features are consistently _available_ and _freshly computed_ at inference time. 3. **Feature reusability and model versioning:** Different teams within an organization are often unable to reuse features across projects, resulting in duplicate feature creation logic. Models have data dependencies that need to be versioned, for example when running A/B tests on model versions. @@ -28,7 +28,7 @@ In this tutorial, we use feature stores to generate training data and power onli Install the Feast SDK and CLI using pip: -* In this tutorial, we focus on a local deployment. For a more in-depth guide on how to use Feast with GCP or AWS deployments, see [Running Feast with GCP/AWS](../how-to-guides/feast-gcp-aws/) +* In this tutorial, we focus on a local deployment. For a more in-depth guide on how to use Feast with Snowflake / GCP / AWS deployments, see [Running Feast with Snowflake/GCP/AWS](../how-to-guides/feast-snowflake-gcp-aws/) {% tabs %} {% tab title="Bash" %} @@ -123,11 +123,14 @@ The key line defining the overall architecture of the feature store is the **pro Valid values for `provider` in `feature_store.yaml` are: -* local: use file source / SQLite -* gcp: use BigQuery / Google Cloud Datastore -* aws: use Redshift / DynamoDB +* local: use file source with SQLite/Redis +* gcp: use BigQuery/Snowflake with Google Cloud Datastore/Redis +* aws: use Redshift/Snowflake with DynamoDB/Redis + +Note that there are many other sources Feast works with, including Azure, Hive, Trino, and PostgreSQL via community plugins. See [Third party integrations](../getting-started/third-party-integrations.md) for all supported datasources. + +A custom setup can also be made by following [adding a custom provider](../how-to-guides/creating-a-custom-provider.md). -To use a custom provider, see [adding a custom provider](../how-to-guides/creating-a-custom-provider.md). There are also several plugins maintained by the community: [Azure](https://github.com/Azure/feast-azure), [Postgres](https://github.com/nossrannug/feast-postgres), and [Hive](https://github.com/baineng/feast-hive). Note that the choice of provider gives sensible defaults but does not enforce those choices; for example, if you choose the AWS provider, you can use [Redis](../reference/online-stores/redis.md) as an online store alongside Redshift as an offline store. ## Step 3: Register feature definitions and deploy your feature store @@ -345,5 +348,5 @@ pprint(feature_vector) * Read the [Concepts](concepts/) page to understand the Feast data model. * Read the [Architecture](architecture-and-components/) page. * Check out our [Tutorials](../tutorials/tutorials-overview.md) section for more examples on how to use Feast. -* Follow our [Running Feast with GCP/AWS](../how-to-guides/feast-gcp-aws/) guide for a more in-depth tutorial on using Feast. +* Follow our [Running Feast with Snowflake/GCP/AWS](../how-to-guides/feast-snowflake-gcp-aws/) guide for a more in-depth tutorial on using Feast. * Join other Feast users and contributors in [Slack](https://slack.feast.dev) and become part of the community! diff --git a/docs/getting-started/third-party-integrations.md b/docs/getting-started/third-party-integrations.md index 31b6acdc88..c085d6d0ae 100644 --- a/docs/getting-started/third-party-integrations.md +++ b/docs/getting-started/third-party-integrations.md @@ -13,27 +13,29 @@ Don't see your offline store or online store of choice here? Check out our guide ### **Data Sources** +* [x] [Snowflake source](https://docs.feast.dev/reference/data-sources/snowflake) * [x] [Redshift source](https://docs.feast.dev/reference/data-sources/redshift) * [x] [BigQuery source](https://docs.feast.dev/reference/data-sources/bigquery) * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) +* [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) * [x] Kafka source (with [push support into the online store](https://docs.feast.dev/reference/alpha-stream-ingestion)) -* [x] [Snowflake source (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) * [ ] HTTP source ### Offline Stores +* [x] [Snowflake](https://docs.feast.dev/reference/offline-stores/snowflake) * [x] [Redshift](https://docs.feast.dev/reference/offline-stores/redshift) * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) +* [x] [Trino (community plugin)](https://github.com/Shopify/feast-trino) +* [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) -* [x] [Snowflake source (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) -* [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) ### Online Stores @@ -59,7 +61,7 @@ Don't see your offline store or online store of choice here? Check out our guide In order for a plugin integration to be highlighted on this page, it must meet the following requirements: -1. The plugin must have tests. Ideally it would use the Feast universal tests (see this [guide](broken-reference) for an example), but custom tests are fine. +1. The plugin must have tests. Ideally it would use the Feast universal tests (see this [guide](../how-to-guides/adding-or-reusing-tests.md) for an example), but custom tests are fine. 2. The plugin must have some basic documentation on how it should be used. 3. The author must work with a maintainer to pass a basic code review (e.g. to ensure that the implementation roughly matches the core Feast implementations). diff --git a/docs/how-to-guides/adding-or-reusing-tests.md b/docs/how-to-guides/adding-or-reusing-tests.md index 1730abe209..5a29342d6e 100644 --- a/docs/how-to-guides/adding-or-reusing-tests.md +++ b/docs/how-to-guides/adding-or-reusing-tests.md @@ -79,7 +79,6 @@ def test_historical_features(environment, universal_data_sources, full_feature_n datasets["global"], datasets["entity"], ) - # ... more test code customer_fv, driver_fv, driver_odfv, order_fv, global_fv = ( @@ -93,7 +92,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n feature_service = FeatureService( "convrate_plus100", features=[ - feature_views["driver"][["conv_rate"]], + feature_views["driver"][["conv_rate"]], feature_views["driver_odfv"] ], ) @@ -112,7 +111,6 @@ def test_historical_features(environment, universal_data_sources, full_feature_n ] ) store.apply(feast_objects) - # ... more test code job_from_df = store.get_historical_features( @@ -132,13 +130,11 @@ def test_historical_features(environment, universal_data_sources, full_feature_n full_feature_names=full_feature_names, ) actual_df_from_df_entities = job_from_df.to_df() - # ... more test code assert_frame_equal( expected_df, actual_df_from_df_entities, check_dtype=False, ) - # ... more test code ``` {% endtab %} @@ -186,6 +182,24 @@ def your_test(environment: Environment): your_fv = driver_feature_view(data_source) entity = driver(value_type=ValueType.UNKNOWN) fs.apply([fv, entity]) - + # ... run test ``` + +### Running your own redis cluster for testing + +* Install redis on your computer. If you are a mac user, you should be able to `brew install redis`. + * Running `redis-server --help` and `redis-cli --help` should show corresponding help menus. +* Run `cd scripts/create-cluster` and run `./create-cluster start` then `./create-cluster create` to start the server. You should see output that looks like this: +~~~~ +Starting 6001 +Starting 6002 +Starting 6003 +Starting 6004 +Starting 6005 +Starting 6006 +~~~~ +* You should be able to run the integration tests and have the redis cluster tests pass. +* If you would like to run your own redis cluster, you can run the above commands with your own specified ports and connect to the newly configured cluster. +* To stop the cluster, run `./create-cluster stop` and then `./create-cluster clean`. + diff --git a/docs/how-to-guides/feast-gcp-aws/README.md b/docs/how-to-guides/feast-snowflake-gcp-aws/README.md similarity index 88% rename from docs/how-to-guides/feast-gcp-aws/README.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/README.md index d120eab314..753650080b 100644 --- a/docs/how-to-guides/feast-gcp-aws/README.md +++ b/docs/how-to-guides/feast-snowflake-gcp-aws/README.md @@ -1,4 +1,4 @@ -# Running Feast with GCP/AWS +# Running Feast with Snowflake/GCP/AWS {% page-ref page="install-feast.md" %} diff --git a/docs/how-to-guides/feast-gcp-aws/build-a-training-dataset.md b/docs/how-to-guides/feast-snowflake-gcp-aws/build-a-training-dataset.md similarity index 100% rename from docs/how-to-guides/feast-gcp-aws/build-a-training-dataset.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/build-a-training-dataset.md diff --git a/docs/how-to-guides/feast-gcp-aws/create-a-feature-repository.md b/docs/how-to-guides/feast-snowflake-gcp-aws/create-a-feature-repository.md similarity index 84% rename from docs/how-to-guides/feast-gcp-aws/create-a-feature-repository.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/create-a-feature-repository.md index 1add0a92e8..8754bc051a 100644 --- a/docs/how-to-guides/feast-gcp-aws/create-a-feature-repository.md +++ b/docs/how-to-guides/feast-snowflake-gcp-aws/create-a-feature-repository.md @@ -13,6 +13,21 @@ Creating a new Feast repository in /<...>/tiny_pika. ``` {% endtab %} +{% tabs %} +{% tab title="Snowflake template" %} +```bash +feast init -t snowflake +Snowflake Deployment URL: ... +Snowflake User Name: ... +Snowflake Password: ... +Snowflake Role Name: ... +Snowflake Warehouse Name: ... +Snowflake Database Name: ... + +Creating a new Feast repository in /<...>/tiny_pika. +``` +{% endtab %} + {% tab title="GCP template" %} ```text feast init -t gcp @@ -30,7 +45,7 @@ Redshift Database Name: ... Redshift User Name: ... Redshift S3 Staging Location (s3://*): ... Redshift IAM Role for S3 (arn:aws:iam::*:role/*): ... -Should I upload example data to Redshift (overwriting 'feast_driver_hourly_stats' table)? (Y/n): +Should I upload example data to Redshift (overwriting 'feast_driver_hourly_stats' table)? (Y/n): Creating a new Feast repository in /<...>/tiny_pika. ``` @@ -63,4 +78,3 @@ You can now use this feature repository for development. You can try the followi * Run `feast apply` to apply these definitions to Feast. * Edit the example feature definitions in `example.py` and run `feast apply` again to change feature definitions. * Initialize a git repository in the same directory and checking the feature repository into version control. - diff --git a/docs/how-to-guides/feast-gcp-aws/deploy-a-feature-store.md b/docs/how-to-guides/feast-snowflake-gcp-aws/deploy-a-feature-store.md similarity index 100% rename from docs/how-to-guides/feast-gcp-aws/deploy-a-feature-store.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/deploy-a-feature-store.md diff --git a/docs/how-to-guides/feast-gcp-aws/install-feast.md b/docs/how-to-guides/feast-snowflake-gcp-aws/install-feast.md similarity index 80% rename from docs/how-to-guides/feast-gcp-aws/install-feast.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/install-feast.md index 019231be09..26d95c6117 100644 --- a/docs/how-to-guides/feast-gcp-aws/install-feast.md +++ b/docs/how-to-guides/feast-snowflake-gcp-aws/install-feast.md @@ -6,6 +6,12 @@ Install Feast using [pip](https://pip.pypa.io): pip install feast ``` +Install Feast with Snowflake dependencies (required when using Snowflake): + +``` +pip install 'feast[snowflake]' +``` + Install Feast with GCP dependencies (required when using BigQuery or Firestore): ``` diff --git a/docs/how-to-guides/feast-gcp-aws/load-data-into-the-online-store.md b/docs/how-to-guides/feast-snowflake-gcp-aws/load-data-into-the-online-store.md similarity index 100% rename from docs/how-to-guides/feast-gcp-aws/load-data-into-the-online-store.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/load-data-into-the-online-store.md diff --git a/docs/how-to-guides/feast-gcp-aws/read-features-from-the-online-store.md b/docs/how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md similarity index 100% rename from docs/how-to-guides/feast-gcp-aws/read-features-from-the-online-store.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md diff --git a/docs/how-to-guides/fetching-java-features-k8s.md b/docs/how-to-guides/fetching-java-features-k8s.md new file mode 100644 index 0000000000..1aa6abd52b --- /dev/null +++ b/docs/how-to-guides/fetching-java-features-k8s.md @@ -0,0 +1,15 @@ +# How to set up a Java feature server + +This tutorial guides you on how to: + +* Define features and data sources in Feast using the Feast CLI +* Materialize features to a Redis cluster deployed on Kubernetes. +* Deploy a Feast Java feature server into a Kubernetes cluster using the Feast helm charts +* Retrieve features using the gRPC API exposed by the Feast Java server + +Try it and let us know what you think! + +| ![](../.gitbook/assets/github-mark-32px.png)[ View guide in Github](../../examples/java-demo/README.md) | +|:--------------------------------------------------------------------------------------------------------| + + diff --git a/docs/load-data-into-the-online-store.md b/docs/load-data-into-the-online-store.md deleted file mode 100644 index 48bfb27fc4..0000000000 --- a/docs/load-data-into-the-online-store.md +++ /dev/null @@ -1,2 +0,0 @@ -# Load data into the online store - diff --git a/docs/project/release-process.md b/docs/project/release-process.md index 8ecd55a63f..af573c92c7 100644 --- a/docs/project/release-process.md +++ b/docs/project/release-process.md @@ -22,7 +22,6 @@ For Feast maintainers, these are the concrete steps for making a new release. 2. Add the change log by applying the change log commit created in step 2. 3. Check that versions are updated with `env TARGET_MERGE_BRANCH=master make lint-versions` 7. Create a [GitHub release](https://github.com/feast-dev/feast/releases) which includes a summary of im~~p~~ortant changes as well as any artifacts associated with the release. Make sure to include the same change log as added in [CHANGELOG.md](../../CHANGELOG.md). Use `Feast vX.Y.Z` as the title. -8. Update the[ Upgrade Guide](broken-reference) to include the action required instructions for users to upgrade to this new release. Instructions should include a migration for each breaking change made to this release. When a tag that matches a Semantic Version string is pushed, CI will automatically build and push the relevant artifacts to their repositories or package managers (docker images, Python wheels, etc). JVM artifacts are promoted from Sonatype OSSRH to Maven Central, but it sometimes takes some time for them to be available. The `sdk/go/v tag` is required to version the Go SDK go module so that users can go get a specific tagged release of the Go SDK. diff --git a/docs/read-features-from-the-online-store.md b/docs/read-features-from-the-online-store.md deleted file mode 100644 index db082897a2..0000000000 --- a/docs/read-features-from-the-online-store.md +++ /dev/null @@ -1,2 +0,0 @@ -# Read features from the online store - diff --git a/docs/reference/api.md b/docs/reference/api.md deleted file mode 100644 index 16467bb2dc..0000000000 --- a/docs/reference/api.md +++ /dev/null @@ -1,17 +0,0 @@ -# API Reference - -Please see the following API specific reference documentation: - -* [Feast Core gRPC API](https://api.docs.feast.dev/grpc/feast.core.pb.html): This is the gRPC API used by Feast Core. Feast Core has a dual function of schema registry and job manager. This API contains RPCs for creating and managing feature sets, stores, projects, and jobs. -* [Feast Serving gRPC API](https://api.docs.feast.dev/grpc/feast.serving.pb.html): This is the gRPC API used by Feast Serving. It contains RPCs used for the retrieval of online feature data or historical feature data. -* [Feast gRPC Types](https://api.docs.feast.dev/grpc/feast.types.pb.html): These are the gRPC types used by both Feast Core, Feast Serving, and the Go, Java, and Python clients. -* [Go Client SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go): The Go library used for the retrieval of online features from Feast. -* [Java Client SDK](https://javadoc.io/doc/dev.feast/feast-sdk): The Java library used for the retrieval of online features from Feast. -* [Python SDK](https://api.docs.feast.dev/python/): This is the complete reference to the Feast Python SDK. The SDK is used to manage feature sets, features, jobs, projects, and entities. It can also be used to retrieve training datasets or online features from Feast Serving. - -## Community Contributions - -The following community provided SDKs are available: - -* [Node.js SDK](https://github.com/MichaelHirn/feast-client/): A Node.js SDK written in TypeScript. The SDK can be used to manage feature sets, features, jobs, projects, and entities. - diff --git a/docs/reference/api/README.md b/docs/reference/api/README.md deleted file mode 100644 index cd75f5bf88..0000000000 --- a/docs/reference/api/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# API Reference - -Please see the following API specific reference documentation: - -* [Feast Core gRPC API](https://api.docs.feast.dev/grpc/feast/core/coreservice.pb.html): This is the gRPC API used by Feast Core. This API contains RPCs for creating and managing feature sets, stores, projects, and jobs. -* [Feast Serving gRPC API](https://api.docs.feast.dev/grpc/feast/serving/servingservice.pb.html): This is the gRPC API used by Feast Serving. It contains RPCs used for the retrieval of online feature data or historical feature data. -* [Feast gRPC Types](https://api.docs.feast.dev/grpc/feast/types/value.pb): These are the gRPC types used by both Feast Core, Feast Serving, and the Go, Java, and Python clients. -* [Go Client SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go): The Go library used for the retrieval of online features from Feast. -* [Java Client SDK](https://javadoc.io/doc/dev.feast/feast-sdk): The Java library used for the retrieval of online features from Feast. -* [Python SDK](https://api.docs.feast.dev/python/): This is the complete reference to the Feast Python SDK. The SDK is used to manage feature sets, features, jobs, projects, and entities. It can also be used to retrieve training datasets or online features from Feast Serving. - -## Community Contributions - -The following community provided SDKs are available: - -* [Node.js SDK](https://github.com/MichaelHirn/feast-client/): A Node.js SDK written in TypeScript. The SDK can be used to manage feature sets, features, jobs, projects, and entities. - diff --git a/docs/reference/configuration-reference.md b/docs/reference/configuration-reference.md deleted file mode 100644 index 6f9a97dabf..0000000000 --- a/docs/reference/configuration-reference.md +++ /dev/null @@ -1,132 +0,0 @@ -# Configuration Reference - -## Overview - -This reference describes how to configure Feast components: - -* [Feast Core and Feast Online Serving](configuration-reference.md#2-feast-core-serving-and-job-controller) -* [Feast CLI and Feast Python SDK](configuration-reference.md#3-feast-cli-and-feast-python-sdk) -* [Feast Go and Feast Java SDK](configuration-reference.md#4-feast-java-and-go-sdk) - -## 1. Feast Core and Feast Online Serving - -Available configuration properties for Feast Core and Feast Online Serving can be referenced from the corresponding `application.yml` of each component: - -| Component | Configuration Reference | -| :--- | :--- | -| Core | [core/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/core/src/main/resources/application.yml) | -| Serving \(Online\) | [serving/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/serving/src/main/resources/application.yml) | - -Configuration properties for Feast Core and Feast Online Serving are defined depending on Feast is deployed: - -* [Docker Compose deployment](configuration-reference.md#docker-compose-deployment) - Feast is deployed with Docker Compose. -* [Kubernetes deployment](configuration-reference.md#kubernetes-deployment) - Feast is deployed with Kubernetes. -* [Direct Configuration](configuration-reference.md#direct-configuration) - Feast is built and run from source code. - -## Docker Compose Deployment - -For each Feast component deployed using Docker Compose, configuration properties from `application.yml` can be set at: - -| Component | Configuration Path | -| :--- | :--- | -| Core | `infra/docker-compose/core/core.yml` | -| Online Serving | `infra/docker-compose/serving/online-serving.yml` | - -## Kubernetes Deployment - -The Kubernetes Feast Deployment is configured using `values.yaml` in the [Helm chart](https://github.com/feast-dev/feast-helm-charts) included with Feast: - -```yaml -# values.yaml -feast-core: - enabled: true # whether to deploy the feast-core subchart to deploy Feast Core. - # feast-core subchart specific config. - gcpServiceAccount: - enabled: true - # .... -``` - -A reference of the sub-chart-specific configuration can found in its `values.yml`: - -* [feast-core](https://github.com/feast-dev/feast-java/tree/master/infra/charts/feast-core) -* [feast-serving](https://github.com/feast-dev/feast-java/tree/master/infra/charts/feast-serving) - -Configuration properties can be set via `application-override.yaml` for each component in `values.yaml`: - -```yaml -# values.yaml -feast-core: - # .... - application-override.yaml: - # application.yml config properties for Feast Core. - # ... -``` - -Visit the [Helm chart](https://github.com/feast-dev/feast-helm-charts) included with Feast to learn more about configuration. - -## Direct Configuration - -If Feast is built and running from source, configuration properties can be set directly in the Feast component's `application.yml`: - -| Component | Configuration Path | -| :--- | :--- | -| Core | [core/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/core/src/main/resources/application.yml) | -| Serving \(Online\) | [serving/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/serving/src/main/resources/application.yml) | - -## 2. Feast CLI and Feast Python SDK - -Configuration options for both the [Feast CLI](../getting-started/connect-to-feast/feast-cli.md) and [Feast Python SDK](https://api.docs.feast.dev/python/) can be defined in the following locations, in order of precedence: - -**1. Command line arguments or initialized arguments:** Passing parameters to the Feast CLI or instantiating the Feast Client object with specific parameters will take precedence above other parameters. - -```bash -# Set option as command line arguments. -feast config set core_url "localhost:6565" -``` - -```python -# Pass options as initialized arguments. -client = Client( - core_url="localhost:6565", - project="default" -) -``` - -**2. Environmental variables:** Environmental variables can be set to provide configuration options. They must be prefixed with `FEAST_`. For example `FEAST_CORE_URL`. - -```bash -FEAST_CORE_URL=my_feast:6565 FEAST_PROJECT=default feast projects list -``` - -**3. Configuration file:** Options with the lowest precedence are configured in the Feast configuration file. Feast looks for or creates this configuration file in `~/.feast/config` if it does not already exist. All options must be defined in the `[general]` section of this file. - -```text -[general] -project = default -core_url = localhost:6565 -``` - -Visit the [available configuration parameters](https://api.docs.feast.dev/python/#module-feast.constants) for Feast Python SDK and Feast CLI to learn more. - -## 3. Feast Java and Go SDK - -The [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) and [Feast Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) are configured via arguments passed when instantiating the respective Clients: - -### Go SDK - -```go -// configure serving host and port. -cli := feast.NewGrpcClient("localhost", 6566) -``` - -Visit the[ Feast Go SDK API reference](https://godoc.org/github.com/feast-dev/feast/sdk/go) to learn more about available configuration parameters. - -### Java SDK - -```java -// configure serving host and port. -client = FeastClient.create(servingHost, servingPort); -``` - -Visit the [Feast Java SDK API reference](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to learn more about available configuration parameters. - diff --git a/docs/reference/data-sources/README.md b/docs/reference/data-sources/README.md index 6732fc16a0..fc6e136a9c 100644 --- a/docs/reference/data-sources/README.md +++ b/docs/reference/data-sources/README.md @@ -4,7 +4,8 @@ Please see [Data Source](../../getting-started/concepts/feature-view.md#data-sou {% page-ref page="file.md" %} +{% page-ref page="snowflake.md" %} + {% page-ref page="bigquery.md" %} {% page-ref page="redshift.md" %} - diff --git a/docs/reference/data-sources/snowflake.md b/docs/reference/data-sources/snowflake.md new file mode 100644 index 0000000000..0f5304b6cd --- /dev/null +++ b/docs/reference/data-sources/snowflake.md @@ -0,0 +1,44 @@ +# Snowflake + +## Description + +Snowflake data sources allow for the retrieval of historical feature values from Snowflake for building training datasets as well as materializing features into an online store. + +* Either a table reference or a SQL query can be provided. + +## Examples + +Using a table reference + +```python +from feast import SnowflakeSource + +my_snowflake_source = SnowflakeSource( + database="FEAST", + schema="PUBLIC", + table="FEATURE_TABLE", +) +``` + +Using a query + +```python +from feast import SnowflakeSource + +my_snowflake_source = SnowflakeSource( + query=""" + SELECT + timestamp_column AS "ts", + "created", + "f1", + "f2" + FROM + `FEAST.PUBLIC.FEATURE_TABLE` + """, +) +``` + +One thing to remember is how Snowflake handles table and column name conventions. +You can read more about quote identifiers [here](https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html) + +Configuration options are available [here](https://rtd.feast.dev/en/latest/index.html#feast.data_source.SnowflakeSource). diff --git a/docs/reference/dqm.md b/docs/reference/dqm.md new file mode 100644 index 0000000000..5a02413e53 --- /dev/null +++ b/docs/reference/dqm.md @@ -0,0 +1,77 @@ +# Data Quality Monitoring + +Data Quality Monitoring (DQM) is a Feast module aimed to help users to validate their data with the user-curated set of rules. +Validation could be applied during: +* Historical retrieval (training dataset generation) +* [planned] Writing features into an online store +* [planned] Reading features from an online store + +Its goal is to address several complex data problems, namely: +* Data consistency - new training datasets can be significantly different from previous datasets. This might require a change in model architecture. +* Issues/bugs in the upstream pipeline - bugs in upstream pipelines can cause invalid values to overwrite existing valid values in an online store. +* Training/serving skew - distribution shift could significantly decrease the performance of the model. + +> To monitor data quality, we check that the characteristics of the tested dataset (aka the tested dataset's profile) are "equivalent" to the characteristics of the reference dataset. +> How exactly profile equivalency should be measured is up to the user. + +### Overview + +The validation process consists of the following steps: +1. User prepares reference dataset (currently only [saved datasets](../getting-started/concepts/dataset.md) from historical retrieval are supported). +2. User defines profiler function, which should produce profile by given dataset (currently only profilers based on [Great Expectations](https://docs.greatexpectations.io) are allowed). +3. Validation of tested dataset is performed with reference dataset and profiler provided as parameters. + +### Preparations +Feast with Great Expectations support can be installed via +```shell +pip install 'feast[ge]' +``` + +### Dataset profile +Currently, Feast supports only [Great Expectation's](https://greatexpectations.io/) [ExpectationSuite](https://legacy.docs.greatexpectations.io/en/latest/autoapi/great_expectations/core/expectation_suite/index.html#great_expectations.core.expectation_suite.ExpectationSuite) +as dataset's profile. Hence, the user needs to define a function (profiler) that would receive a dataset and return an [ExpectationSuite](https://legacy.docs.greatexpectations.io/en/latest/autoapi/great_expectations/core/expectation_suite/index.html#great_expectations.core.expectation_suite.ExpectationSuite). + +Great Expectations supports automatic profiling as well as manually specifying expectations: +```python +from great_expectations.dataset import Dataset +from great_expectations.core.expectation_suite import ExpectationSuite + +from feast.dqm.profilers.ge_profiler import ge_profiler + +@ge_profiler +def automatic_profiler(dataset: Dataset) -> ExpectationSuite: + from great_expectations.profile.user_configurable_profiler import UserConfigurableProfiler + + return UserConfigurableProfiler( + profile_dataset=dataset, + ignored_columns=['conv_rate'], + value_set_threshold='few' + ).build_suite() +``` +However, from our experience capabilities of automatic profiler are quite limited. So we would recommend crafting your own expectations: +```python +@ge_profiler +def manual_profiler(dataset: Dataset) -> ExpectationSuite: + dataset.expect_column_max_to_be_between("column", 1, 2) + return dataset.get_expectation_suite() +``` + + + +### Validating Training Dataset +During retrieval of historical features, `validation_reference` can be passed as a parameter to methods `.to_df(validation_reference=...)` or `.to_arrow(validation_reference=...)` of RetrievalJob. +If parameter is provided Feast will run validation once dataset is materialized. In case if validation successful materialized dataset is returned. +Otherwise, `feast.dqm.errors.ValidationFailed` exception would be raised. It will consist of all details for expectations that didn't pass. + +```python +from feast import FeatureStore + +fs = FeatureStore(".") + +job = fs.get_historical_features(...) +job.to_df( + validation_reference=fs + .get_saved_dataset("my_reference_dataset") + .as_reference(profiler=manual_profiler) +) +``` diff --git a/docs/reference/feast-and-spark.md b/docs/reference/feast-and-spark.md deleted file mode 100644 index be05f177ae..0000000000 --- a/docs/reference/feast-and-spark.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -description: Configuring Feast to use Spark for ingestion. ---- - -# Feast and Spark - -Feast relies on Spark to ingest data from the offline store to the online store, streaming ingestion, and running queries to retrieve historical data from the offline store. Feast supports several Spark deployment options. - -## Option 1. Use Kubernetes Operator for Apache Spark - -To install the Spark on K8s Operator - -```bash -helm repo add spark-operator \ - https://googlecloudplatform.github.io/spark-on-k8s-operator - -helm install my-release spark-operator/spark-operator \ - --set serviceAccounts.spark.name=spark -``` - -Currently Feast is tested using `v1beta2-1.1.2-2.4.5`version of the operator image. To configure Feast to use it, set the following options in Feast config: - -| Feast Setting | Value | -| :--- | :--- | -| `SPARK_LAUNCHER` | `"k8s"` | -| `SPARK_STAGING_LOCATION` | S3/GCS/Azure Blob Storage URL to use as a staging location, must be readable and writable by Feast. For S3, use `s3a://` prefix here. Ex.: `s3a://some-bucket/some-prefix/artifacts/` | -| `HISTORICAL_FEATURE_OUTPUT_LOCATION` | S3/GCS/Azure Blob Storage URL used to store results of historical retrieval queries, must be readable and writable by Feast. For S3, use `s3a://` prefix here. Ex.: `s3a://some-bucket/some-prefix/out/` | -| `SPARK_K8S_NAMESPACE` | Only needs to be set if you are customizing the spark-on-k8s-operator. The name of the Kubernetes namespace to run Spark jobs in. This should match the value of `sparkJobNamespace` set on spark-on-k8s-operator Helm chart. Typically this is also the namespace Feast itself will run in. | -| `SPARK_K8S_JOB_TEMPLATE_PATH` | Only needs to be set if you are customizing the Spark job template. Local file path with the template of the SparkApplication resource. No prefix required. Ex.: `/home/jovyan/work/sparkapp-template.yaml`. An example template is [here](https://github.com/feast-dev/feast/blob/4059a21dc4eba9cd27b2d5b0fabe476c07a8b3bd/sdk/python/feast/pyspark/launchers/k8s/k8s_utils.py#L280-L317) and the spec is defined in the [k8s-operator User Guide](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/user-guide.md). | - -Lastly, make sure that the service account used by Feast has permissions to manage Spark Application resources. This depends on your k8s setup, but typically you'd need to configure a Role and a RoleBinding like the one below: - -```text -cat < - - - Limitation - Motivation - - - - - Features names and entity names cannot overlap in feature table definitions - Features and entities become columns in historical stores which may cause - conflicts - - - -

The following field names are reserved in feature tables

-
    -
  • event_timestamp -
  • -
  • datetime -
  • -
  • created_timestamp -
  • -
  • ingestion_id -
  • -
  • job_id -
  • -
- - These keywords are used for column names when persisting metadata in historical - stores - - - - -### Ingestion - -| Limitation | Motivation | -| :--- | :--- | -| Once data has been ingested into Feast, there is currently no way to delete the data without manually going to the database and deleting it. However, during retrieval only the latest rows will be returned for a specific key \(`event_timestamp`, `entity`\) based on its `created_timestamp`. | This functionality simply doesn't exist yet as a Feast API | - -### Storage - -| Limitation | Motivation | -| :--- | :--- | -| Feast does not support offline storage in Feast 0.8 | As part of our re-architecture of Feast, we moved from GCP to cloud-agnostic deployments. Developing offline storage support that is available in all cloud environments is a pending action. | - diff --git a/docs/reference/metrics-reference.md b/docs/reference/metrics-reference.md deleted file mode 100644 index 34c97c7be6..0000000000 --- a/docs/reference/metrics-reference.md +++ /dev/null @@ -1,178 +0,0 @@ -# Metrics Reference - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -Reference of the metrics that each Feast component exports: - -* [Feast Core](metrics-reference.md#feast-core) -* [Feast Serving](metrics-reference.md#feast-serving) -* [Feast Ingestion Job](metrics-reference.md#feast-ingestion-job) - -For how to configure Feast to export Metrics, see the [Metrics user guide.](../advanced/metrics.md) - -## Feast Core - -**Exported Metrics** - -Feast Core exports the following metrics: - -| Metrics | Description | Tags | -| :--- | :--- | :--- | -| `feast_core_request_latency_seconds` | Feast Core's latency in serving Requests in Seconds. | `service`, `method`, `status_code` | -| `feast_core_feature_set_total` | No. of Feature Sets registered with Feast Core. | None | -| `feast_core_store_total` | No. of Stores registered with Feast Core. | None | -| `feast_core_max_memory_bytes` | Max amount of memory the Java virtual machine will attempt to use. | None | -| `feast_core_total_memory_bytes` | Total amount of memory in the Java virtual machine | None | -| `feast_core_free_memory_bytes` | Total amount of free memory in the Java virtual machine. | None | -| `feast_core_gc_collection_seconds` | Time spent in a given JVM garbage collector in seconds. | None | - -**Metric Tags** - -Exported Feast Core metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `service` | Name of the Service that request is made to. Should be set to `CoreService` | -| `method` | Name of the Method that the request is calling. \(ie `ListFeatureSets`\) | -| `status_code` | Status code returned as a result of handling the requests \(ie `OK`\). Can be used to find request failures. | - -## Feast Serving - -**Exported Metrics** - -Feast Serving exports the following metrics: - -| Metric | Description | Tags | -| :--- | :--- | :--- | -| `feast_serving_request_latency_seconds` | Feast Serving's latency in serving Requests in Seconds. | `method` | -| `feast_serving_request_feature_count` | No. of requests retrieving a Feature from Feast Serving. | `project`, `feature_name` | -| `feast_serving_not_found_feature_count` | No. of requests retrieving a Feature has resulted in a [`NOT_FOUND` field status.](../user-guide/getting-training-features.md#online-field-statuses) | `project`, `feature_name` | -| `feast_serving_stale_feature_count` | No. of requests retrieving a Feature resulted in a [`OUTSIDE_MAX_AGE` field status.](../user-guide/getting-training-features.md#online-field-statuses) | `project`, `feature_name` | -| `feast_serving_grpc_request_count` | Total gRPC requests served. | `method` | - -**Metric Tags** - -Exported Feast Serving metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `method` | Name of the Method that the request is calling. \(ie `ListFeatureSets`\) | -| `status_code` | Status code returned as a result of handling the requests \(ie `OK`\). Can be used to find request failures. | -| `project` | Name of the project that the FeatureSet of the Feature retrieved belongs to. | -| `feature_name` | Name of the Feature being retrieved. | - -## Feast Ingestion Job - -Feast Ingestion computes both metrics an statistics on [data ingestion.](../user-guide/define-and-ingest-features.md) Make sure you familar with data ingestion concepts before proceeding. - -**Metrics Namespace** - -Metrics are computed at two stages of the Feature Row's/Feature Value's life cycle when being processed by the Ingestion Job: - -* `Inflight`- Prior to writing data to stores, but after successful validation of data. -* `WriteToStoreSucess`- After a successful store write. - -Metrics processed by each staged will be tagged with `metrics_namespace` to the stage where the metric was computed. - -**Metrics Bucketing** - -Metrics with a `{BUCKET}` are computed on a 60 second window/bucket. Suffix with the following to select the bucket to use: - -* `min` - minimum value. -* `max` - maximum value. -* `mean`- mean value. -* `percentile_90`- 90 percentile. -* `percentile_95`- 95 percentile. -* `percentile_99`- 99 percentile. - -**Exported Metrics** - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MetricDescriptionTags
feast_ingestion_feature_row_lag_ms_{BUCKET} - Lag time in milliseconds between succeeding ingested Feature Rows. -

feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_lag_ms_{BUCKET} - Lag time in milliseconds between succeeding ingested values for each Feature. -

feast_store, feast_project_name,feast_featureSet_name,

-

feast_feature_name,

-

ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_{BUCKET} - Last value feature for each Feature.feast_store, feature_project_name, feast_feature_name,feast_featureSet_name, ingest_job_name, metrics_namepace -
feast_ingestion_feature_row_ingested_count - No. of Ingested Feature Rows -

feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_missing_count - No. of times a ingested Feature values did not provide a value for the - Feature. -

feast_store, feast_project_name,feast_featureSet_name,

-

feast_feature_name,

-

ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_deadletter_row_count - No. of Feature Rows that that the Ingestion Job did not successfully write - to store.feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name -
- -**Metric Tags** - -Exported Feast Ingestion Job metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `feast_store` | Name of the target store the Ingestion Job is writing to. | -| `feast_project_name` | Name of the project that the ingested FeatureSet belongs to. | -| `feast_featureSet_name` | Name of the Feature Set being ingested. | -| `feast_feature_name` | Name of the Feature being ingested. | -| `ingestion_job_name` | Name of the Ingestion Job performing data ingestion. Typically this is set to the Id of the Ingestion Job. | -| `metrics_namespace` | Stage where metrics where computed. Either `Inflight` or `WriteToStoreSuccess` | - diff --git a/docs/reference/offline-stores/README.md b/docs/reference/offline-stores/README.md index 1260fe8b29..141a34d03b 100644 --- a/docs/reference/offline-stores/README.md +++ b/docs/reference/offline-stores/README.md @@ -4,7 +4,8 @@ Please see [Offline Store](../../getting-started/architecture-and-components/off {% page-ref page="file.md" %} +{% page-ref page="snowflake.md" %} + {% page-ref page="bigquery.md" %} {% page-ref page="redshift.md" %} - diff --git a/docs/reference/offline-stores/snowflake.md b/docs/reference/offline-stores/snowflake.md new file mode 100644 index 0000000000..aa006b43bb --- /dev/null +++ b/docs/reference/offline-stores/snowflake.md @@ -0,0 +1,34 @@ +# Snowflake + +## Description + +The Snowflake offline store provides support for reading [SnowflakeSources](../data-sources/snowflake.md). + +* Snowflake tables and views are allowed as sources. +* All joins happen within Snowflake. +* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be uploaded to Snowflake in order to complete join operations. +* A `SnowflakeRetrievalJob` is returned when calling `get_historical_features()`. + * This allows you to call + * `to_snowflake` to save the dataset into Snowflake + * `to_sql` to get the SQL query that would execute on `to_df` + * `to_arrow_chunks` to get the result in batches ([Snowflake python connector docs](https://docs.snowflake.com/en/user-guide/python-connector-api.html#get_result_batches)) + +## Example + +{% code title="feature_store.yaml" %} +```yaml +project: my_feature_repo +registry: data/registry.db +provider: local +offline_store: + type: snowflake.offline + account: snowflake_deployment.us-east-1 + user: user_login + password: user_password + role: sysadmin + warehouse: demo_wh + database: FEAST +``` +{% endcode %} + +Configuration options are available in [SnowflakeOfflineStoreConfig](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/infra/offline_stores/snowflake.py#L56). diff --git a/docs/reference/offline-stores/untitled.md b/docs/reference/offline-stores/untitled.md deleted file mode 100644 index 8ffa566a70..0000000000 --- a/docs/reference/offline-stores/untitled.md +++ /dev/null @@ -1,26 +0,0 @@ -# BigQuery - -### Description - -The BigQuery offline store provides support for reading [BigQuerySources](../data-sources/bigquery.md). - -* BigQuery tables and views are allowed as sources. -* All joins happen within BigQuery. -* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be uploaded to BigQuery in order to complete join operations. -* A [BigQueryRetrievalJob](https://github.com/feast-dev/feast/blob/c50a36ec1ad5b8d81c6f773c23204db7c7a7d218/sdk/python/feast/infra/offline_stores/bigquery.py#L210) is returned when calling `get_historical_features()`. - -### Example - -{% code title="feature\_store.yaml" %} -```yaml -project: my_feature_repo -registry: gs://my-bucket/data/registry.db -provider: gcp -offline_store: - type: bigquery - dataset: feast_bq_dataset -``` -{% endcode %} - -Configuration options are available [here](https://rtd.feast.dev/en/latest/#feast.repo_config.BigQueryOfflineStoreConfig). - diff --git a/docs/reference/online-stores/README.md b/docs/reference/online-stores/README.md index aadcc0eb65..2c2902bc57 100644 --- a/docs/reference/online-stores/README.md +++ b/docs/reference/online-stores/README.md @@ -9,4 +9,3 @@ Please see [Online Store](../../getting-started/architecture-and-components/onli {% page-ref page="datastore.md" %} {% page-ref page="dynamodb.md" %} - diff --git a/docs/reference/providers/README.md b/docs/reference/providers/README.md index 7eb992d5ac..dc52d92726 100644 --- a/docs/reference/providers/README.md +++ b/docs/reference/providers/README.md @@ -7,4 +7,3 @@ Please see [Provider](../../getting-started/architecture-and-components/provider {% page-ref page="google-cloud-platform.md" %} {% page-ref page="amazon-web-services.md" %} - diff --git a/docs/reference/repository-config.md b/docs/reference/repository-config.md deleted file mode 100644 index 128d773071..0000000000 --- a/docs/reference/repository-config.md +++ /dev/null @@ -1,2 +0,0 @@ -# Repository Config - diff --git a/docs/reference/telemetry.md b/docs/reference/telemetry.md deleted file mode 100644 index f8f7678764..0000000000 --- a/docs/reference/telemetry.md +++ /dev/null @@ -1,12 +0,0 @@ -# Telemetry - -### How telemetry is used - -The Feast project logs anonymous usage statistics and errors in order to inform our planning. Several client methods are tracked, beginning in Feast 0.9. Users are assigned a UUID which is sent along with the name of the method, the Feast version, the OS \(using `sys.platform`\), and the current time. - -The [source code](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/telemetry.py) is available here. - -### How to disable telemetry - -Set the environment variable `FEAST_TELEMETRY` to `False`. - diff --git a/docs/repository-config.md b/docs/repository-config.md deleted file mode 100644 index 128d773071..0000000000 --- a/docs/repository-config.md +++ /dev/null @@ -1,2 +0,0 @@ -# Repository Config - diff --git a/docs/roadmap.md b/docs/roadmap.md index 723bfba82a..03ea32a4b2 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -8,25 +8,27 @@ The list below contains the functionality that contributors are planning to deve * Want to speak to a Feast contributor? We are more than happy to jump on a call. Please schedule a time using [Calendly](https://calendly.com/d/x2ry-g5bb/meet-with-feast-team). * **Data Sources** + * [x] [Snowflake source](https://docs.feast.dev/reference/data-sources/snowflake) * [x] [Redshift source](https://docs.feast.dev/reference/data-sources/redshift) * [x] [BigQuery source](https://docs.feast.dev/reference/data-sources/bigquery) * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) - * [x] Kafka source (with [push support into the online store](reference/alpha-stream-ingestion.md)) - * [x] [Snowflake source (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) + * [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) + * [x] Kafka source (with [push support into the online store](https://docs.feast.dev/reference/alpha-stream-ingestion)) * [ ] HTTP source * **Offline Stores** + * [x] [Snowflake](https://docs.feast.dev/reference/offline-stores/snowflake) * [x] [Redshift](https://docs.feast.dev/reference/offline-stores/redshift) * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) + * [x] [Trino (community plugin)](https://github.com/Shopify/feast-trino) + * [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) - * [x] [Snowflake (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) - * [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) * **Online Stores** * [x] [DynamoDB](https://docs.feast.dev/reference/online-stores/dynamodb) * [x] [Redis](https://docs.feast.dev/reference/online-stores/redis) @@ -61,7 +63,7 @@ The list below contains the functionality that contributors are planning to deve * [ ] Delete API * [ ] Feature Logging (for training) * **Data Quality Management (See [RFC](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98/edit))** - * [ ] Data profiling and validation (Great Expectations) (Planned for Q1 2022) + * [x] Data profiling and validation (Great Expectations) * [ ] Metric production * [ ] Training-serving skew detection * [ ] Drift detection @@ -69,7 +71,7 @@ The list below contains the functionality that contributors are planning to deve * [x] Python SDK for browsing feature registry * [x] CLI for browsing feature registry * [x] Model-centric feature tracking (feature services) + * [x] Amundsen integration (see [Feast extractor](https://github.com/amundsen-io/amundsen/blob/main/databuilder/databuilder/extractor/feast_extractor.py)) * [ ] REST API for browsing feature registry * [ ] Feast Web UI * [ ] Feature versioning - * [ ] Amundsen integration diff --git a/docs/sources.md b/docs/sources.md deleted file mode 100644 index a76d395d09..0000000000 --- a/docs/sources.md +++ /dev/null @@ -1,2 +0,0 @@ -# Sources - diff --git a/docs/specs/offline_store_format.md b/docs/specs/offline_store_format.md index 6826c50190..ac829dd52f 100644 --- a/docs/specs/offline_store_format.md +++ b/docs/specs/offline_store_format.md @@ -7,8 +7,8 @@ One of the design goals of Feast is being able to plug seamlessly into existing Feast provides first class support for the following data warehouses (DWH) to store feature data offline out of the box: * [BigQuery](https://cloud.google.com/bigquery) -* [Snowflake](https://www.snowflake.com/) (Coming Soon) -* [Redshift](https://aws.amazon.com/redshift/) (Coming Soon) +* [Snowflake](https://www.snowflake.com/) +* [Redshift](https://aws.amazon.com/redshift/) The integration between Feast and the DWH is highly configurable, but at the same time there are some non-configurable implications and assumptions that Feast imposes on table schemas and mapping between database-native types and Feast type system. This is what this document is about. @@ -28,14 +28,14 @@ Feature data is stored in tables in the DWH. There is one DWH table per Feast Fe ## Type mappings #### Pandas types -Here's how Feast types map to Pandas types for Feast APIs that take in or return a Pandas dataframe: +Here's how Feast types map to Pandas types for Feast APIs that take in or return a Pandas dataframe: | Feast Type | Pandas Type | |-------------|--| | Event Timestamp | `datetime64[ns]` | | BYTES | `bytes` | | STRING | `str` , `category`| -| INT32 | `int32`, `uint32` | +| INT32 | `int16`, `uint16`, `int32`, `uint32` | | INT64 | `int64`, `uint64` | | UNIX_TIMESTAMP | `datetime64[ns]`, `datetime64[ns, tz]` | | DOUBLE | `float64` | @@ -80,3 +80,17 @@ Here's how Feast types map to BigQuery types when using BigQuery for offline sto | BOOL\_LIST | `ARRAY`| Values that are not specified by the table above will cause an error on conversion. + +#### Snowflake Types +Here's how Feast types map to Snowflake types when using Snowflake for offline storage +See source here: +https://docs.snowflake.com/en/user-guide/python-connector-pandas.html#snowflake-to-pandas-data-mapping + +| Feast Type | Snowflake Python Type | +|-------------|--| +| Event Timestamp | `DATETIME64[NS]` | +| UNIX_TIMESTAMP | `DATETIME64[NS]` | +| STRING | `STR` | +| INT32 | `INT8 / UINT8 / INT16 / UINT16 / INT32 / UINT32` | +| INT64 | `INT64 / UINT64` | +| DOUBLE | `FLOAT64` | diff --git a/docs/tutorials/driver-stats-on-snowflake.md b/docs/tutorials/driver-stats-on-snowflake.md new file mode 100644 index 0000000000..94ac109c94 --- /dev/null +++ b/docs/tutorials/driver-stats-on-snowflake.md @@ -0,0 +1,130 @@ +--- +description: >- + Initial demonstration of Snowflake as an offline store with Feast, using the Snowflake demo template. +--- + +# Drivers stats on Snowflake + +In the steps below, we will set up a sample Feast project that leverages Snowflake +as an offline store. + +Starting with data in a Snowflake table, we will register that table to the feature store and define features associated with the columns in that table. From there, we will generate historical training data based on those feature definitions and then materialize the latest feature values into the online store. Lastly, we will retrieve the materialized feature values. + +Our template will generate new data containing driver statistics. From there, we will show you code snippets that will call to the offline store for generating training datasets, and then the code for calling the online store to serve you the latest feature values to serve models in production. + +## Snowflake Offline Store Example + +#### Install feast-snowflake + +```shell +pip install 'feast[snowflake]' +``` + +#### Get a Snowflake Trial Account (Optional) + +[Snowflake Trial Account](http://trial.snowflake.com) + +#### Create a feature repository + +```shell +feast init -t snowflake {feature_repo_name} +Snowflake Deployment URL (exclude .snowflakecomputing.com): +Snowflake User Name:: +Snowflake Password:: +Snowflake Role Name (Case Sensitive):: +Snowflake Warehouse Name (Case Sensitive):: +Snowflake Database Name (Case Sensitive):: +Should I upload example data to Snowflake (overwrite table)? [Y/n]: Y +cd {feature_repo_name} +``` + +The following files will automatically be created in your project folder: + +* feature_store.yaml -- This is your main configuration file +* driver_repo.py -- This is your main feature definition file +* test.py -- This is a file to test your feature store configuration + +#### Inspect `feature_store.yaml` + +Here you will see the information that you entered. This template will use Snowflake as an offline store and SQLite as the online store. The main thing to remember is by default, Snowflake objects have ALL CAPS names unless lower case was specified. + +{% code title="feature_store.yaml" %} +```yaml +project: ... +registry: ... +provider: local +offline_store: + type: snowflake.offline + account: SNOWFLAKE_DEPLOYMENT_URL #drop .snowflakecomputing.com + user: USERNAME + password: PASSWORD + role: ROLE_NAME #case sensitive + warehouse: WAREHOUSE_NAME #case sensitive + database: DATABASE_NAME #case cap sensitive +``` +{% endcode %} + +#### Run our test python script `test.py` + +```shell +python test.py +``` + +## What we did in `test.py` + +#### Initialize our Feature Store +{% code title="test.py" %} +```python +from datetime import datetime, timedelta + +import pandas as pd +from driver_repo import driver, driver_stats_fv + +from feast import FeatureStore + +fs = FeatureStore(repo_path=".") + +fs.apply([driver, driver_stats_fv]) +``` +{% endcode %} + +#### Create a dummy training dataframe, then call our offline store to add additional columns +{% code title="test.py" %} +```python +entity_df = pd.DataFrame( + { + "event_timestamp": [ + pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + for dt in pd.date_range( + start=datetime.now() - timedelta(days=3), + end=datetime.now(), + periods=3, + ) + ], + "driver_id": [1001, 1002, 1003], + } +) + +features = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"] + +training_df = fs.get_historical_features( + features=features, entity_df=entity_df +).to_df() +``` +{% endcode %} + +#### Materialize the latest feature values into our online store +{% code title="test.py" %} +```python +fs.materialize_incremental(end_date=datetime.now()) +``` +{% endcode %} + +#### Retrieve the latest values from our online store based on our entity key +{% code title="test.py" %} +```python +online_features = fs.get_online_features( + features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], +).to_dict() +``` +{% endcode %} diff --git a/docs/tutorials/tutorials-overview.md b/docs/tutorials/tutorials-overview.md index a523f9b38e..32e64071b0 100644 --- a/docs/tutorials/tutorials-overview.md +++ b/docs/tutorials/tutorials-overview.md @@ -8,3 +8,6 @@ These Feast tutorials showcase how to use Feast to simplify end to end model tra {% page-ref page="real-time-credit-scoring-on-aws.md" %} +{% page-ref page="driver-stats-on-snowflake.md" %} + +{% page-ref page="validating-historical-features.md" %} diff --git a/docs/tutorials/validating-historical-features.md b/docs/tutorials/validating-historical-features.md new file mode 100644 index 0000000000..19ae4ef434 --- /dev/null +++ b/docs/tutorials/validating-historical-features.md @@ -0,0 +1,915 @@ +# Validating historical features with Great Expectations + +In this tutorial, we will use the public dataset of Chicago taxi trips to present data validation capabilities of Feast. +- The original dataset is stored in BigQuery and consists of raw data for each taxi trip (one row per trip) since 2013. +- We will generate several training datasets (aka historical features in Feast) for different periods and evaluate expectations made on one dataset against another. + +Types of features we're ingesting and generating: +- Features that aggregate raw data with daily intervals (eg, trips per day, average fare or speed for a specific day, etc.). +- Features using SQL while pulling data from BigQuery (like total trips time or total miles travelled). +- Features calculated on the fly when requested using Feast's on-demand transformations + +Our plan: + +0. Prepare environment +1. Pull data from BigQuery (optional) +2. Declare & apply features and feature views in Feast +3. Generate reference dataset +4. Develop & test profiler function +5. Run validation on different dataset using reference dataset & profiler + + +> The original notebook and datasets for this tutorial can be found on [GitHub](https://github.com/feast-dev/dqm-tutorial). + +### 0. Setup + +Install Feast Python SDK and great expectations: + + +```python +!pip install 'feast[ge]' +``` + + +### 1. Dataset preparation (Optional) + +**You can skip this step if you don't have GCP account. Please use parquet files that are coming with this tutorial instead** + + +```python +!pip install google-cloud-bigquery +``` + + +```python +import pyarrow.parquet + +from google.cloud.bigquery import Client +``` + + +```python +bq_client = Client(project='kf-feast') +``` + +Running some basic aggregations while pulling data from BigQuery. Grouping by taxi_id and day: + + +```python +data_query = """SELECT + taxi_id, + TIMESTAMP_TRUNC(trip_start_timestamp, DAY) as day, + SUM(trip_miles) as total_miles_travelled, + SUM(trip_seconds) as total_trip_seconds, + SUM(fare) as total_earned, + COUNT(*) as trip_count +FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips` +WHERE + trip_miles > 0 AND trip_seconds > 60 AND + trip_start_timestamp BETWEEN '2019-01-01' and '2020-12-31' AND + trip_total < 1000 +GROUP BY taxi_id, TIMESTAMP_TRUNC(trip_start_timestamp, DAY)""" +``` + + +```python +driver_stats_table = bq_client.query(data_query).to_arrow() + +# Storing resulting dataset into parquet file +pyarrow.parquet.write_table(driver_stats_table, "trips_stats.parquet") +``` + + +```python +def entities_query(year): + return f"""SELECT + distinct taxi_id +FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips` +WHERE + trip_miles > 0 AND trip_seconds > 0 AND + trip_start_timestamp BETWEEN '{year}-01-01' and '{year}-12-31' +""" +``` + + +```python +entities_2019_table = bq_client.query(entities_query(2019)).to_arrow() + +# Storing entities (taxi ids) into parquet file +pyarrow.parquet.write_table(entities_2019_table, "entities.parquet") +``` + + +## 2. Declaring features + + +```python +import pyarrow.parquet +import pandas as pd + +from feast import Feature, FeatureView, Entity, FeatureStore +from feast.value_type import ValueType +from feast.data_format import ParquetFormat +from feast.on_demand_feature_view import on_demand_feature_view +from feast.infra.offline_stores.file_source import FileSource +from feast.infra.offline_stores.file import SavedDatasetFileStorage + +from google.protobuf.duration_pb2 import Duration +``` + + +```python +batch_source = FileSource( + event_timestamp_column="day", + path="trips_stats.parquet", # using parquet file that we created on previous step + file_format=ParquetFormat() +) +``` + + +```python +taxi_entity = Entity(name='taxi', join_key='taxi_id') +``` + + +```python +trips_stats_fv = FeatureView( + name='trip_stats', + entities=['taxi'], + features=[ + Feature("total_miles_travelled", ValueType.DOUBLE), + Feature("total_trip_seconds", ValueType.DOUBLE), + Feature("total_earned", ValueType.DOUBLE), + Feature("trip_count", ValueType.INT64), + + ], + ttl=Duration(seconds=86400), + batch_source=batch_source, +) +``` + +*Read more about feature views in [Feast docs](https://docs.feast.dev/getting-started/concepts/feature-view)* + + +```python +@on_demand_feature_view( + features=[ + Feature("avg_fare", ValueType.DOUBLE), + Feature("avg_speed", ValueType.DOUBLE), + Feature("avg_trip_seconds", ValueType.DOUBLE), + Feature("earned_per_hour", ValueType.DOUBLE), + ], + inputs={ + "stats": trips_stats_fv + } +) +def on_demand_stats(inp): + out = pd.DataFrame() + out["avg_fare"] = inp["total_earned"] / inp["trip_count"] + out["avg_speed"] = 3600 * inp["total_miles_travelled"] / inp["total_trip_seconds"] + out["avg_trip_seconds"] = inp["total_trip_seconds"] / inp["trip_count"] + out["earned_per_hour"] = 3600 * inp["total_earned"] / inp["total_trip_seconds"] + return out +``` + +*Read more about on demand feature views [here](https://docs.feast.dev/reference/alpha-on-demand-feature-view)* + + +```python +store = FeatureStore(".") # using feature_store.yaml that stored in the same directory +``` + + +```python +store.apply([taxi_entity, trips_stats_fv, on_demand_stats]) # writing to the registry +``` + + +## 3. Generating training (reference) dataset + + +```python +taxi_ids = pyarrow.parquet.read_table("entities.parquet").to_pandas() +``` + +Generating range of timestamps with daily frequency: + + +```python +timestamps = pd.DataFrame() +timestamps["event_timestamp"] = pd.date_range("2019-06-01", "2019-07-01", freq='D') +``` + +Cross merge (aka relation multiplication) produces entity dataframe with each taxi_id repeated for each timestamp: + + +```python +entity_df = pd.merge(taxi_ids, timestamps, how='cross') +entity_df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
taxi_idevent_timestamp
091d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-01
191d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-02
291d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-03
391d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-04
491d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-05
.........
1569797ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-06-27
1569807ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-06-28
1569817ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-06-29
1569827ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-06-30
1569837ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-07-01
+

156984 rows × 2 columns

+
+ + + +Retrieving historical features for resulting entity dataframe and persisting output as a saved dataset: + + +```python +job = store.get_historical_features( + entity_df=entity_df, + features=[ + "trip_stats:total_miles_travelled", + "trip_stats:total_trip_seconds", + "trip_stats:total_earned", + "trip_stats:trip_count", + "on_demand_stats:avg_fare", + "on_demand_stats:avg_trip_seconds", + "on_demand_stats:avg_speed", + "on_demand_stats:earned_per_hour", + ] +) + +store.create_saved_dataset( + from_=job, + name='my_training_ds', + storage=SavedDatasetFileStorage(path='my_training_ds.parquet') +) +``` + +```python +, full_feature_names = False, tags = {}, _retrieval_job = , min_event_timestamp = 2019-06-01 00:00:00, max_event_timestamp = 2019-07-01 00:00:00)> +``` + + +## 4. Developing dataset profiler + +Dataset profiler is a function that accepts dataset and generates set of its characteristics. This charasteristics will be then used to evaluate (validate) next datasets. + +**Important: datasets are not compared to each other! +Feast use a reference dataset and a profiler function to generate a reference profile. +This profile will be then used during validation of the tested dataset.** + + +```python +import numpy as np + +from feast.dqm.profilers.ge_profiler import ge_profiler + +from great_expectations.core.expectation_suite import ExpectationSuite +from great_expectations.dataset import PandasDataset +``` + + +Loading saved dataset first and exploring the data: + + +```python +ds = store.get_saved_dataset('my_training_ds') +ds.to_df() +``` + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
total_earnedavg_trip_secondstaxi_idtotal_miles_travelledtrip_countearned_per_hourevent_timestamptotal_trip_secondsavg_fareavg_speed
068.252270.00000091d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...24.702.054.1189432019-06-01 00:00:00+00:004540.034.12500019.585903
1221.00560.5000007a4a6162eaf27805aef407d25d5cb21fe779cd962922cb...54.1824.059.1436222019-06-01 00:00:00+00:0013452.09.20833314.499554
2160.501010.769231f4c9d05b215d7cbd08eca76252dae51cdb7aca9651d4ef...41.3013.043.9726032019-06-01 00:00:00+00:0013140.012.34615411.315068
3183.75697.550000c1f533318f8480a59173a9728ea0248c0d3eb187f4b897...37.3020.047.4159562019-06-01 00:00:00+00:0013951.09.1875009.625116
4217.751054.076923455b6b5cae6ca5a17cddd251485f2266d13d6a2c92f07c...69.6913.057.2064512019-06-01 00:00:00+00:0013703.016.75000018.308692
.................................
15697938.001980.0000000cccf0ec1f46d1e0beefcfdeaf5188d67e170cdff92618...14.901.069.0909092019-07-01 00:00:00+00:001980.038.00000027.090909
156980135.00551.250000beefd3462e3f5a8e854942a2796876f6db73ebbd25b435...28.4016.055.1020412019-07-01 00:00:00+00:008820.08.43750011.591837
156981NaNNaN9a3c52aa112f46cf0d129fafbd42051b0fb9b0ff8dcb0e...NaNNaNNaN2019-07-01 00:00:00+00:00NaNNaNNaN
15698263.00815.00000008308c31cd99f495dea73ca276d19a6258d7b4c9c88e43...19.964.069.5705522019-07-01 00:00:00+00:003260.015.75000022.041718
156983NaNNaN7ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...NaNNaNNaN2019-07-01 00:00:00+00:00NaNNaNNaN
+

156984 rows × 10 columns

+
+ + + +Feast uses [Great Expectations](https://docs.greatexpectations.io/docs/) as a validation engine and [ExpectationSuite](https://legacy.docs.greatexpectations.io/en/latest/autoapi/great_expectations/core/expectation_suite/index.html#great_expectations.core.expectation_suite.ExpectationSuite) as a dataset's profile. Hence, we need to develop a function that will generate ExpectationSuite. This function will receive instance of [PandasDataset](https://legacy.docs.greatexpectations.io/en/latest/autoapi/great_expectations/dataset/index.html?highlight=pandasdataset#great_expectations.dataset.PandasDataset) (wrapper around pandas.DataFrame) so we can utilize both Pandas DataFrame API and some helper functions from PandasDataset during profiling. + + +```python +DELTA = 0.1 # controlling allowed window in fraction of the value on scale [0, 1] + +@ge_profiler +def stats_profiler(ds: PandasDataset) -> ExpectationSuite: + # simple checks on data consistency + ds.expect_column_values_to_be_between( + "avg_speed", + min_value=0, + max_value=60, + mostly=0.99 # allow some outliers + ) + + ds.expect_column_values_to_be_between( + "total_miles_travelled", + min_value=0, + max_value=500, + mostly=0.99 # allow some outliers + ) + + # expectation of means based on observed values + observed_mean = ds.trip_count.mean() + ds.expect_column_mean_to_be_between("trip_count", + min_value=observed_mean * (1 - DELTA), + max_value=observed_mean * (1 + DELTA)) + + observed_mean = ds.earned_per_hour.mean() + ds.expect_column_mean_to_be_between("earned_per_hour", + min_value=observed_mean * (1 - DELTA), + max_value=observed_mean * (1 + DELTA)) + + + # expectation of quantiles + qs = [0.5, 0.75, 0.9, 0.95] + observed_quantiles = ds.avg_fare.quantile(qs) + + ds.expect_column_quantile_values_to_be_between( + "avg_fare", + quantile_ranges={ + "quantiles": qs, + "value_ranges": [[None, max_value] for max_value in observed_quantiles] + }) + + return ds.get_expectation_suite() +``` + +Testing our profiler function: + + +```python +ds.get_profile(profiler=stats_profiler) +``` + 02/02/2022 02:43:47 PM INFO: 5 expectation(s) included in expectation_suite. result_format settings filtered. + + + + +**Verify that all expectations that we coded in our profiler are present here. Otherwise (if you can't find some expectations) it means that it failed to pass on the reference dataset (do it silently is default behavior of Great Expectations).** + +Now we can create validation reference from dataset and profiler function: + + +```python +validation_reference = ds.as_reference(profiler=stats_profiler) +``` + +and test it against our existing retrieval job + + +```python +_ = job.to_df(validation_reference=validation_reference) +``` + + 02/02/2022 02:43:52 PM INFO: 5 expectation(s) included in expectation_suite. result_format settings filtered. + 02/02/2022 02:43:53 PM INFO: Validating data_asset_name None with expectation_suite_name default + + +Validation successfully passed as no exception were raised. + + +### 5. Validating new historical retrieval + +Creating new timestamps for Dec 2020: + + +```python +from feast.dqm.errors import ValidationFailed +``` + + +```python +timestamps = pd.DataFrame() +timestamps["event_timestamp"] = pd.date_range("2020-12-01", "2020-12-07", freq='D') +``` + + +```python +entity_df = pd.merge(taxi_ids, timestamps, how='cross') +entity_df +``` + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
taxi_idevent_timestamp
091d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-01
191d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-02
291d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-03
391d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-04
491d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-05
.........
354437ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-03
354447ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-04
354457ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-05
354467ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-06
354477ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-07
+

35448 rows × 2 columns

+
+ + +```python +job = store.get_historical_features( + entity_df=entity_df, + features=[ + "trip_stats:total_miles_travelled", + "trip_stats:total_trip_seconds", + "trip_stats:total_earned", + "trip_stats:trip_count", + "on_demand_stats:avg_fare", + "on_demand_stats:avg_trip_seconds", + "on_demand_stats:avg_speed", + "on_demand_stats:earned_per_hour", + ] +) +``` + +Execute retrieval job with validation reference: + + +```python +try: + df = job.to_df(validation_reference=validation_reference) +except ValidationFailed as exc: + print(exc.validation_report) +``` + + 02/02/2022 02:43:58 PM INFO: 5 expectation(s) included in expectation_suite. result_format settings filtered. + 02/02/2022 02:43:59 PM INFO: Validating data_asset_name None with expectation_suite_name default + + [ + { + "expectation_config": { + "expectation_type": "expect_column_mean_to_be_between", + "kwargs": { + "column": "trip_count", + "min_value": 10.387244591346153, + "max_value": 12.695521167200855, + "result_format": "COMPLETE" + }, + "meta": {} + }, + "meta": {}, + "result": { + "observed_value": 6.692920555429092, + "element_count": 35448, + "missing_count": 31055, + "missing_percent": 87.6071992778154 + }, + "exception_info": { + "raised_exception": false, + "exception_message": null, + "exception_traceback": null + }, + "success": false + }, + { + "expectation_config": { + "expectation_type": "expect_column_mean_to_be_between", + "kwargs": { + "column": "earned_per_hour", + "min_value": 52.320624975640214, + "max_value": 63.94743052578249, + "result_format": "COMPLETE" + }, + "meta": {} + }, + "meta": {}, + "result": { + "observed_value": 68.99268345164135, + "element_count": 35448, + "missing_count": 31055, + "missing_percent": 87.6071992778154 + }, + "exception_info": { + "raised_exception": false, + "exception_message": null, + "exception_traceback": null + }, + "success": false + }, + { + "expectation_config": { + "expectation_type": "expect_column_quantile_values_to_be_between", + "kwargs": { + "column": "avg_fare", + "quantile_ranges": { + "quantiles": [ + 0.5, + 0.75, + 0.9, + 0.95 + ], + "value_ranges": [ + [ + null, + 16.4 + ], + [ + null, + 26.229166666666668 + ], + [ + null, + 36.4375 + ], + [ + null, + 42.0 + ] + ] + }, + "result_format": "COMPLETE" + }, + "meta": {} + }, + "meta": {}, + "result": { + "observed_value": { + "quantiles": [ + 0.5, + 0.75, + 0.9, + 0.95 + ], + "values": [ + 19.5, + 28.1, + 38.0, + 44.125 + ] + }, + "element_count": 35448, + "missing_count": 31055, + "missing_percent": 87.6071992778154, + "details": { + "success_details": [ + false, + false, + false, + false + ] + } + }, + "exception_info": { + "raised_exception": false, + "exception_message": null, + "exception_traceback": null + }, + "success": false + } + ] + + +Validation failed since several expectations didn't pass: +* Trip count (mean) decreased more than 10% (which is expected when comparing Dec 2020 vs June 2019) +* Average Fare increased - all quantiles are higher than expected +* Earn per hour (mean) increased more than 10% (most probably due to increased fare) + diff --git a/docs/user-guide/define-and-ingest-features.md b/docs/user-guide/define-and-ingest-features.md deleted file mode 100644 index d55fcb1d85..0000000000 --- a/docs/user-guide/define-and-ingest-features.md +++ /dev/null @@ -1,56 +0,0 @@ -# Define and ingest features - -In order to retrieve features for both training and serving, Feast requires data being ingested into its offline and online stores. - -{% hint style="warning" %} -Feast 0.8 does not have an offline store. Only Online storage support exists currently. Feast 0.9 will have offline storage support. In Feast 0.8, historical data is retrieved directly from batch sources. -{% endhint %} - -Users are expected to already have either a batch or stream source with data stored in it, ready to be ingested into Feast. Once a feature table \(with the corresponding sources\) has been registered with Feast, it is possible to load data from this source into stores. - -The following depicts an example ingestion flow from a data source to the online store. - -### Batch Source to Online Store - -```python -from feast import Client -from datetime import datetime, timedelta - -client = Client(core_url="localhost:6565") -driver_ft = client.get_feature_table("driver_trips") - -# Initialize date ranges -today = datetime.now() -yesterday = today - timedelta(1) - -# Launches a short-lived job that ingests data over the provided date range. -client.start_offline_to_online_ingestion( - driver_ft, yesterday, today -) -``` - -### Stream Source to Online Store - -```python -from feast import Client -from datetime import datetime, timedelta - -client = Client(core_url="localhost:6565") -driver_ft = client.get_feature_table("driver_trips") - -# Launches a long running streaming ingestion job -client.start_stream_to_online_ingestion(driver_ft) -``` - -### Batch Source to Offline Store - -{% hint style="danger" %} -Not supported in Feast 0.8 -{% endhint %} - -### Stream Source to Offline Store - -{% hint style="danger" %} -Not supported in Feast 0.8 -{% endhint %} - diff --git a/docs/user-guide/getting-online-features.md b/docs/user-guide/getting-online-features.md deleted file mode 100644 index c16dc08a01..0000000000 --- a/docs/user-guide/getting-online-features.md +++ /dev/null @@ -1,54 +0,0 @@ -# Getting online features - -Feast provides an API through which online feature values can be retrieved. This allows teams to look up feature values at low latency in production during model serving, in order to make online predictions. - -{% hint style="info" %} -Online stores only maintain the current state of features, i.e latest feature values. No historical data is stored or served. -{% endhint %} - -```python -from feast import Client - -online_client = Client( - core_url="localhost:6565", - serving_url="localhost:6566", -) - -entity_rows = [ - {"driver_id": 1001}, - {"driver_id": 1002}, -] - -# Features in format -feature_refs = [ - "driver_trips:average_daily_rides", - "driver_trips:maximum_daily_rides", - "driver_trips:rating", -] - -response = online_client.get_online_features( - feature_refs=feature_refs, # Contains only feature references - entity_rows=entity_rows, # Contains only entities (driver ids) -) - -# Print features in dictionary format -response_dict = response.to_dict() -print(response_dict) -``` - -The online store must be populated through [ingestion jobs](define-and-ingest-features.md#batch-source-to-online-store) prior to being used for online serving. - -Feast Serving provides a [gRPC API](https://api.docs.feast.dev/grpc/feast.serving.pb.html) that is backed by [Redis](https://redis.io/). We have native clients in [Python](https://api.docs.feast.dev/python/), [Go](https://godoc.org/github.com/gojek/feast/sdk/go), and [Java](https://javadoc.io/doc/dev.feast). - -### Online Field Statuses - -Feast also returns status codes when retrieving features from the Feast Serving API. These status code give useful insight into the quality of data being served. - -| Status | Meaning | -| :--- | :--- | -| NOT\_FOUND | The feature value was not found in the online store. This might mean that no feature value was ingested for this feature. | -| NULL\_VALUE | A entity key was successfully found but no feature values had been set. This status code should not occur during normal operation. | -| OUTSIDE\_MAX\_AGE | The age of the feature row in the online store \(in terms of its event timestamp\) has exceeded the maximum age defined within the feature table. | -| PRESENT | The feature values have been found and are within the maximum age. | -| UNKNOWN | Indicates a system failure. | - diff --git a/docs/user-guide/getting-training-features.md b/docs/user-guide/getting-training-features.md deleted file mode 100644 index b9d0b050f2..0000000000 --- a/docs/user-guide/getting-training-features.md +++ /dev/null @@ -1,72 +0,0 @@ -# Getting training features - -Feast provides a historical retrieval interface for exporting feature data in order to train machine learning models. Essentially, users are able to enrich their data with features from any feature tables. - -### Retrieving historical features - -Below is an example of the process required to produce a training dataset: - -```python -# Feature references with target feature -features = [ - "driver_trips:average_daily_rides", - "driver_trips:maximum_daily_rides", - "driver_trips:rating", - "driver_trips:rating:trip_completed", -] - -# Define entity source -entity_source = FileSource( - "event_timestamp", - ParquetFormat(), - "gs://some-bucket/customer" -) - -# Retrieve historical dataset from Feast. -historical_feature_retrieval_job = client.get_historical_features( - features=features, - entity_rows=entity_source -) - -output_file_uri = historical_feature_retrieval_job.get_output_file_uri() -``` - -#### 1. Define feature references - -[Feature references](../concepts/glossary.md#feature-references) define the specific features that will be retrieved from Feast. These features can come from multiple feature tables. The only requirement is that the feature tables that make up the feature references have the same entity \(or composite entity\). - -**2. Define an entity dataframe** - -Feast needs to join feature values onto specific entities at specific points in time. Thus, it is necessary to provide an [entity dataframe](../concepts/glossary.md#entity-dataframe) as part of the `get_historical_features` method. In the example above we are defining an entity source. This source is an external file that provides Feast with the entity dataframe. - -**3. Launch historical retrieval job** - -Once the feature references and an entity source are defined, it is possible to call `get_historical_features()`. This method launches a job that extracts features from the sources defined in the provided feature tables, joins them onto the provided entity source, and returns a reference to the training dataset that is produced. - -Please see the [Feast SDK](https://api.docs.feast.dev/python) for more details. - -### Point-in-time Joins - -Feast always joins features onto entity data in a point-in-time correct way. The process can be described through an example. - -In the example below there are two tables \(or dataframes\): - -* The dataframe on the left is the [entity dataframe](../concepts/glossary.md#entity-dataframe) that contains timestamps, entities, and the target variable \(trip\_completed\). This dataframe is provided to Feast through an entity source. -* The dataframe on the right contains driver features. This dataframe is represented in Feast through a feature table and its accompanying data source\(s\). - -The user would like to have the driver features joined onto the entity dataframe to produce a training dataset that contains both the target \(trip\_completed\) and features \(average\_daily\_rides, maximum\_daily\_rides, rating\). This dataset will then be used to train their model. - -![](../.gitbook/assets/point_in_time_join%20%281%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%281%29.png) - -Feast is able to intelligently join feature data with different timestamps to a single entity dataframe. It does this through a point-in-time join as follows: - -1. Feast loads the entity dataframe and all feature tables \(driver dataframe\) into the same location. This can either be a database or in memory. -2. For each [entity row](../concepts/glossary.md#entity-rows) in the [entity dataframe](getting-online-features.md), Feast tries to find feature values in each feature table to join to it. Feast extracts the timestamp and entity key of each row in the entity dataframe and scans backward through the feature table until it finds a matching entity key. -3. If the event timestamp of the matching entity key within the driver feature table is within the maximum age configured for the feature table, then the features at that entity key are joined onto the entity dataframe. If the event timestamp is outside of the maximum age, then only null values are returned. -4. If multiple entity keys are found with the same event timestamp, then they are deduplicated by the created timestamp, with newer values taking precedence. -5. Feast repeats this joining process for all feature tables and returns the resulting dataset. - -{% hint style="info" %} -Point-in-time correct joins attempts to prevent the occurrence of feature leakage by trying to recreate the state of the world at a single point in time, instead of joining features based on exact timestamps only. -{% endhint %} - diff --git a/docs/user-guide/overview.md b/docs/user-guide/overview.md deleted file mode 100644 index 2d6eb9981b..0000000000 --- a/docs/user-guide/overview.md +++ /dev/null @@ -1,32 +0,0 @@ -# Overview - -### Using Feast - -Feast development happens through three key workflows: - -1. [Define and load feature data into Feast](define-and-ingest-features.md) -2. [Retrieve historical features for training models](getting-training-features.md) -3. [Retrieve online features for serving models](getting-online-features.md) - -### Defining feature tables and ingesting data into Feast - -Feature creators model the data within their organization into Feast through the definition of [feature tables](../concepts/feature-tables.md) that contain [data sources](../concepts/sources.md). Feature tables are both a schema and a means of identifying data sources for features, and allow Feast to know how to interpret your data, and where to find it. - -After registering a feature table with Feast, users can trigger an ingestion from their data source into Feast. This loads feature values from an upstream data source into Feast stores through ingestion jobs. - -Visit [feature tables](../concepts/feature-tables.md#overview) to learn more about them. - -{% page-ref page="define-and-ingest-features.md" %} - -### Retrieving historical features for training - -In order to generate a training dataset it is necessary to provide both an [entity dataframe ](../concepts/glossary.md#entity-dataframe)and feature references through the[ Feast SDK](https://api.docs.feast.dev/python/) to retrieve historical features. For historical serving, Feast requires that you provide the entities and timestamps for the corresponding feature data. Feast produces a point-in-time correct dataset using the requested features. These features can be requested from an unlimited number of feature sets. - -{% page-ref page="getting-training-features.md" %} - -### Retrieving online features for online serving - -Online retrieval uses feature references through the [Feast Online Serving API](https://api.docs.feast.dev/grpc/feast.serving.pb.html) to retrieve online features. Online serving allows for very low latency requests to feature data at very high throughput. - -{% page-ref page="getting-online-features.md" %} - diff --git a/examples/java-demo/README.md b/examples/java-demo/README.md new file mode 100644 index 0000000000..b908bb7625 --- /dev/null +++ b/examples/java-demo/README.md @@ -0,0 +1,162 @@ + +# Running Feast Java Server with Redis & calling with python (with registry in GCP) + +For this tutorial, we setup Feast with Redis, using the Feast CLI to register and materialize features, and then retrieving via a Feast Java server deployed in Kubernetes via a gRPC call. +> :point_right: for tips on how to run and debug this locally without using Kubernetes, see [java/serving/README.md](https://github.com/feast-dev/feast/blob/master/java/serving/README.md) + +## First, let's setup a Redis cluster +1. Start minikube (`minikube start`) +2. Use helm to install a default Redis cluster + ```bash + helm repo add bitnami https://charts.bitnami.com/bitnami + helm repo update + helm install my-redis bitnami/redis + ``` + ![](redis-screenshot.png) +3. Port forward Redis so we can materialize features to it + + ```bash + kubectl port-forward --namespace default svc/my-redis-master 6379:6379 + ``` +4. Get your Redis password using the command (pasted below for convenience). We'll need this to tell Feast how to communicate with the cluster. + + ```bash + export REDIS_PASSWORD=$(kubectl get secret --namespace default my-redis -o jsonpath="{.data.redis-password}" | base64 --decode) + echo $REDIS_PASSWORD + ``` + +## Next, we setup a local Feast repo +1. Install Feast with Redis dependencies `pip install "feast[redis]"` +2. Make a bucket in GCS (or S3) +3. The feature repo is already setup here, so you just need to swap in your GCS bucket and Redis credentials. + We need to modify the `feature_store.yaml`, which has two fields for you to replace: + ```yaml + registry: gs://[YOUR BUCKET]/demo-repo/registry.db + project: feast_java_demo + provider: gcp + online_store: + type: redis + connection_string: localhost:6379,password=[YOUR PASSWORD] + offline_store: + type: file + flags: + alpha_features: true + on_demand_transforms: true + ``` +4. Run `feast apply` to apply your local features to the remote registry +5. Materialize features to the online store: + ```bash + CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S") + feast materialize-incremental $CURRENT_TIME + ``` + +## Now let's setup the Feast Server +1. Add the gcp-auth addon to mount GCP credentials: + ```bash + minikube addons enable gcp-auth + ``` +3. Add Feast's Java feature server chart repo + ```bash + helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com + helm repo update + ``` +4. Modify the application-override.yaml file to have your credentials + bucket location: + ```yaml + feature-server: + application-override.yaml: + enabled: true + feast: + activeStore: online + stores: + - name: online + type: REDIS + config: + host: my-redis-master + port: 6379 + password: [YOUR PASSWORD] + global: + registry: + path: gs://[YOUR BUCKET]/demo-repo/registry.db + cache_ttl_seconds: 60 + project: feast_java_demo + ``` +5. Install the Feast helm chart: `helm install feast-release feast-charts/feast --values application-override.yaml` +6. (Optional): check logs of the server to make sure it’s working + ```bash + kubectl logs svc/feast-release-feature-server + ``` +7. Port forward to expose the grpc endpoint: + ```bash + kubectl port-forward svc/feast-release-feature-server 6566:6566 + ``` +8. Make a gRPC call: + - Python example + ```bash + python test.py + ``` + - gRPC cli: + + ```bash + grpc_cli call localhost:6566 GetOnlineFeatures ' + features { + val: "driver_hourly_stats:conv_rate" + val: "driver_hourly_stats:acc_rate" + } + entities { + key: "driver_id" + value { + val { + int64_val: 1001 + } + val { + int64_val: 1002 + } + } + }' + ``` + + - Response: + + ```bash + connecting to localhost:6566 + metadata { + feature_names { + val: "driver_hourly_stats:conv_rate" + val: "driver_hourly_stats:acc_rate" + } + } + results { + values { + float_val: 0.812357187 + } + values { + float_val: 0.379484832 + } + statuses: PRESENT + statuses: PRESENT + event_timestamps { + seconds: 1631725200 + } + event_timestamps { + seconds: 1631725200 + } + } + results { + values { + float_val: 0.840873241 + } + values { + float_val: 0.151376978 + } + statuses: PRESENT + statuses: PRESENT + event_timestamps { + seconds: 1631725200 + } + event_timestamps { + seconds: 1631725200 + } + } + Rpc succeeded with OK status + + ``` \ No newline at end of file diff --git a/examples/java-demo/feature_repo/__init__.py b/examples/java-demo/feature_repo/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/java-demo/feature_repo/application-override.yaml b/examples/java-demo/feature_repo/application-override.yaml new file mode 100644 index 0000000000..dbdeda4c04 --- /dev/null +++ b/examples/java-demo/feature_repo/application-override.yaml @@ -0,0 +1,17 @@ +feature-server: + application-override.yaml: + enabled: true + feast: + activeStore: online + stores: + - name: online + type: REDIS + config: + host: my-redis-master + port: 6379 + password: [YOUR PASSWORD] +global: + registry: + path: gs://[YOUR BUCKET]/demo-repo/registry.db + cache_ttl_seconds: 60 + project: feast_java_demo diff --git a/examples/java-demo/feature_repo/data/driver_stats_with_string.parquet b/examples/java-demo/feature_repo/data/driver_stats_with_string.parquet new file mode 100644 index 0000000000..83b8c31aa5 Binary files /dev/null and b/examples/java-demo/feature_repo/data/driver_stats_with_string.parquet differ diff --git a/examples/java-demo/feature_repo/driver_repo.py b/examples/java-demo/feature_repo/driver_repo.py new file mode 100644 index 0000000000..233593ff02 --- /dev/null +++ b/examples/java-demo/feature_repo/driver_repo.py @@ -0,0 +1,61 @@ +import pandas as pd +from feast import Entity, Feature, FeatureView, FileSource, ValueType +from feast.data_source import RequestDataSource +from feast.on_demand_feature_view import on_demand_feature_view +from feast.request_feature_view import RequestFeatureView +from google.protobuf.duration_pb2 import Duration + +driver_hourly_stats = FileSource( + path="data/driver_stats_with_string.parquet", + event_timestamp_column="event_timestamp", + created_timestamp_column="created", +) +driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) +driver_hourly_stats_view = FeatureView( + name="driver_hourly_stats", + entities=["driver_id"], + ttl=Duration(seconds=86400000), + features=[ + Feature(name="conv_rate", dtype=ValueType.FLOAT), + Feature(name="acc_rate", dtype=ValueType.FLOAT), + Feature(name="avg_daily_trips", dtype=ValueType.INT64), + Feature(name="string_feature", dtype=ValueType.STRING), + ], + online=True, + batch_source=driver_hourly_stats, + tags={}, +) + +# Define a request data source which encodes features / information only +# available at request time (e.g. part of the user initiated HTTP request) +input_request = RequestDataSource( + name="vals_to_add", + schema={"val_to_add": ValueType.INT64, "val_to_add_2": ValueType.INT64}, +) + +# Define an on demand feature view which can generate new features based on +# existing feature views and RequestDataSource features +@on_demand_feature_view( + inputs={ + "driver_hourly_stats": driver_hourly_stats_view, + "vals_to_add": input_request, + }, + features=[ + Feature(name="conv_rate_plus_val1", dtype=ValueType.DOUBLE), + Feature(name="conv_rate_plus_val2", dtype=ValueType.DOUBLE), + ], +) +def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["conv_rate_plus_val1"] = inputs["conv_rate"] + inputs["val_to_add"] + df["conv_rate_plus_val2"] = inputs["conv_rate"] + inputs["val_to_add_2"] + return df + + +# Define request feature view +driver_age_request_fv = RequestFeatureView( + name="driver_age", + request_data_source=RequestDataSource( + name="driver_age", schema={"driver_age": ValueType.INT64,} + ), +) diff --git a/examples/java-demo/feature_repo/feature_store.yaml b/examples/java-demo/feature_repo/feature_store.yaml new file mode 100644 index 0000000000..91c65b512a --- /dev/null +++ b/examples/java-demo/feature_repo/feature_store.yaml @@ -0,0 +1,11 @@ +registry: gs://[YOUR BUCKET]/demo-repo/registry.db +project: feast_java_demo +provider: gcp +online_store: + type: redis + connection_string: localhost:6379,password=[YOUR PASSWORD] +offline_store: + type: file +flags: + alpha_features: true + on_demand_transforms: true diff --git a/examples/java-demo/feature_repo/test.py b/examples/java-demo/feature_repo/test.py new file mode 100644 index 0000000000..f73883019d --- /dev/null +++ b/examples/java-demo/feature_repo/test.py @@ -0,0 +1,28 @@ +import grpc +from feast.protos.feast.serving.ServingService_pb2 import ( + FeatureList, + GetOnlineFeaturesRequest, +) +from feast.protos.feast.serving.ServingService_pb2_grpc import ServingServiceStub +from feast.protos.feast.types.Value_pb2 import RepeatedValue, Value + + +# Sample logic to fetch from a local gRPC java server deployed at 6566 +def fetch_java(): + channel = grpc.insecure_channel("localhost:6566") + stub = ServingServiceStub(channel) + feature_refs = FeatureList(val=["driver_hourly_stats:conv_rate"]) + entity_rows = { + "driver_id": RepeatedValue( + val=[Value(int64_val=driver_id) for driver_id in range(1001, 1003)] + ) + } + + print( + stub.GetOnlineFeatures( + GetOnlineFeaturesRequest(features=feature_refs, entities=entity_rows,) + ) + ) + +if __name__ == "__main__": + fetch_java() diff --git a/examples/java-demo/redis-screenshot.png b/examples/java-demo/redis-screenshot.png new file mode 100644 index 0000000000..489deb699d Binary files /dev/null and b/examples/java-demo/redis-screenshot.png differ diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 3b148137ef..3679fcc778 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -27,12 +27,12 @@ "In this tutorial, we use feature stores to generate training data and power online model inference for a ride-sharing driver satisfaction prediction model. Feast addresses several common issues in this flow:\n", "1. **Training-serving skew and complex data joins:** Feature values often exist across multiple tables. Joining these datasets can be complicated, slow, and error-prone.\n", " - Feast joins these tables with battle-tested logic that ensures *point-in-time* correctness so future feature values do not leak to models.\n", - " - **Upcoming*: Feast alerts users to offline / online skew with data quality monitoring. \n", + " - Feast alerts users to offline / online skew with data quality monitoring. \n", "2. **Online feature availability:** At inference time, models often need access to features that aren't readily available and need to be precomputed from other datasources. \n", " - Feast manages deployment to a variety of online stores (e.g. DynamoDB, Redis, Google Cloud Datastore) and ensures necessary features are consistently *available* and *freshly computed* at inference time.\n", "3. **Feature reusability and model versioning:** Different teams within an organization are often unable to reuse features across projects, resulting in duplicate feature creation logic. Models have data dependencies that need to be versioned, for example when running A/B tests on model versions.\n", " - Feast enables discovery of and collaboration on previously used features and enables versioning of sets of features (via *feature services*). \n", - " - **Upcoming*: Feast enables feature transformation so users can re-use transformation logic across online / offline usecases and across models.\n", + " - Feast enables feature transformation so users can re-use transformation logic across online / offline usecases and across models.\n", "\n", "We will:\n", "- Deploy a local feature store with a Parquet file offline store and Sqlite online store.\n", @@ -188,11 +188,13 @@ "\n", "Valid values for `provider` in `feature_store.yaml` are:\n", "\n", - "* local: use file source / SQLite\n", - "* gcp: use BigQuery / Google Cloud Datastore\n", - "* aws: use Redshift / DynamoDB\n", + "* local: use file source with SQLite/Redis\n", + "* gcp: use BigQuery/Snowflake with Google Cloud Datastore/Redis\n", + "* aws: use Redshift/Snowflake with DynamoDB/Redis\n", "\n", - "A custom setup (e.g. using the built-in support for Redis) can be made by following https://docs.feast.dev/v/master/how-to-guides/creating-a-custom-provider" + "Note that there are many other sources Feast works with, including Azure, Hive, Trino, and PostgreSQL via community plugins. See https://docs.feast.dev/getting-started/third-party-integrations for all supported datasources.", + "\n", + "A custom setup can also be made by following https://docs.feast.dev/v/master/how-to-guides/creating-a-custom-provider" ] }, { @@ -794,7 +796,7 @@ "\n", "- Read the [Concepts](https://docs.feast.dev/getting-started/concepts/) page to understand the Feast data model and architecture.\n", "- Check out our [Tutorials](https://docs.feast.dev/tutorials/tutorials-overview) section for more examples on how to use Feast.\n", - "- Follow our [Running Feast with GCP/AWS](https://docs.feast.dev/how-to-guides/feast-gcp-aws) guide for a more in-depth tutorial on using Feast.\n", + "- Follow our [Running Feast with Snowflake/GCP/AWS](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws) guide for a more in-depth tutorial on using Feast.\n", "- Join other Feast users and contributors in [Slack](https://slack.feast.dev/) and become part of the community!" ] } diff --git a/go.mod b/go.mod index f4a1455056..109666b762 100644 --- a/go.mod +++ b/go.mod @@ -25,8 +25,8 @@ require ( go.opencensus.io v0.22.3 // indirect golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect - golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d - golang.org/x/tools v0.1.7 // indirect + golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f + golang.org/x/tools v0.1.8 // indirect google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect google.golang.org/grpc v1.29.1 google.golang.org/protobuf v1.27.1 // indirect diff --git a/go.sum b/go.sum index 5e87ccf6db..8b0c2677f3 100644 --- a/go.sum +++ b/go.sum @@ -345,6 +345,7 @@ github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.21.0 h1:mU6zScU4U1YAFPHEHYk+3JC4SY7JxgkqS10ZOSyksNg= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= @@ -386,6 +387,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d h1:g9qWBGx4puODJTMVyoPrpoxPFgVGd+z1DZwjfRu4d0I= @@ -415,6 +417,7 @@ golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -454,6 +457,7 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= @@ -464,6 +468,7 @@ golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/time v0.0.0-20161028155119-f51c12702a4d/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -525,6 +530,8 @@ golang.org/x/tools v0.0.0-20201124005743-911501bfb504 h1:jOKV2ysikH1GANB7t2Lotmh golang.org/x/tools v0.0.0-20201124005743-911501bfb504/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.7 h1:6j8CgantCy3yc8JGBqkDLMKWqZ0RDU2g1HVgacojGWQ= golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo= +golang.org/x/tools v0.1.8 h1:P1HhGGuLW4aAclzjtmJdf0mJOjVUZUzOTqkAkWL+l6w= +golang.org/x/tools v0.1.8/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= diff --git a/infra/charts/feast-python-server/Chart.yaml b/infra/charts/feast-python-server/Chart.yaml index fc20d180bc..c71d0ae78b 100644 --- a/infra/charts/feast-python-server/Chart.yaml +++ b/infra/charts/feast-python-server/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: feast-python-server description: Feast Feature Server in Python type: application -version: 0.1.0 +version: 0.18.1 keywords: - machine learning - big data diff --git a/infra/charts/feast-python-server/README.md b/infra/charts/feast-python-server/README.md index b8516bc6dc..6639046f2c 100644 --- a/infra/charts/feast-python-server/README.md +++ b/infra/charts/feast-python-server/README.md @@ -1,6 +1,6 @@ # feast-python-server -![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.18.1](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) Feast Feature Server in Python @@ -56,16 +56,4 @@ RUN pip install pip --upgrade RUN pip install feast COPY feature_store.yaml /feature_store.yaml -``` - -Make sure that you have enabled the flags for the python server. Example `feature_store.yaml`: -``` -project: feature_repo -registry: data/registry.db -provider: local -online_store: - path: data/online_store.db -flags: - alpha_features: true - python_feature_server: true ``` \ No newline at end of file diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml index 4dd16aa906..b8cc79e241 100644 --- a/infra/charts/feast/Chart.yaml +++ b/infra/charts/feast/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: Feature store for machine learning name: feast -version: 0.101.0 +version: 0.18.1 keywords: - machine learning - big data diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index b8411cc9f7..bcde076865 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -10,7 +10,7 @@ This repo contains Helm charts for Feast components that are being installed on ## Chart: Feast -Feature store for machine learning Current chart version is `0.101.0` +Feature store for machine learning Current chart version is `0.18.1` ## Installation @@ -56,9 +56,9 @@ For more details, please see: https://docs.feast.dev/how-to-guides/running-feast | Repository | Name | Version | |------------|------|---------| -| https://charts.helm.sh/stable | redis | 10.5.6 | -| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.101.0 | -| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.101.0 | +| https://charts.helm.sh/stable | redis | 10.5.6 | +| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.18.1 | +| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.18.1 | ## Values diff --git a/infra/charts/feast/charts/feature-server/Chart.yaml b/infra/charts/feast/charts/feature-server/Chart.yaml index f0336cee2f..2c0155919e 100644 --- a/infra/charts/feast/charts/feature-server/Chart.yaml +++ b/infra/charts/feast/charts/feature-server/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Feast Feature Server: Online feature serving service for Feast" name: feature-server -version: 0.100.4 -appVersion: v0.15.0 +version: 0.18.1 +appVersion: v0.18.1 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/feature-server/README.md b/infra/charts/feast/charts/feature-server/README.md index 773f03af5e..7ce4e67822 100644 --- a/infra/charts/feast/charts/feature-server/README.md +++ b/infra/charts/feast/charts/feature-server/README.md @@ -1,6 +1,6 @@ # feature-server -![Version: 0.100.4](https://img.shields.io/badge/Version-0.100.4-informational?style=flat-square) ![AppVersion: v0.15.0](https://img.shields.io/badge/AppVersion-v0.15.0-informational?style=flat-square) +![Version: 0.18.1](https://img.shields.io/badge/Version-0.18.1-informational?style=flat-square) ![AppVersion: v0.18.1](https://img.shields.io/badge/AppVersion-v0.18.1-informational?style=flat-square) Feast Feature Server: Online feature serving service for Feast @@ -8,62 +8,63 @@ Feast Feature Server: Online feature serving service for Feast ## Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| "application-generated.yaml".enabled | bool | `true` | Flag to include Helm generated configuration. Please set `application-override.yaml` to override this configuration. | -| "application-override.yaml" | object | `{"enabled":true}` | Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a ConfigMap. `application-override.yaml` has a higher precedence than `application-secret.yaml` | -| "application-secret.yaml" | object | `{"enabled":true}` | Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a Secret. `application-override.yaml` has a higher precedence than `application-secret.yaml`. It is recommended to either set `application-override.yaml` or `application-secret.yaml` only to simplify config management. | -| "application.yaml".enabled | bool | `true` | Flag to include the default [configuration](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Please set `application-override.yaml` to override this configuration. | -| envOverrides | object | `{}` | Extra environment variables to set | -| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | -| image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository | -| image.tag | string | `"0.17.0"` | Image tag | -| ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress | -| ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth | -| ingress.grpc.class | string | `"nginx"` | Which ingress controller to use | -| ingress.grpc.enabled | bool | `false` | Flag to create an ingress resource for the service | -| ingress.grpc.hosts | list | `[]` | List of hostnames to match when routing requests | -| ingress.grpc.https.enabled | bool | `true` | Flag to enable HTTPS | -| ingress.grpc.https.secretNames | object | `{}` | Map of hostname to TLS secret name | -| ingress.grpc.whitelist | string | `""` | Allowed client IP source ranges | -| ingress.http.annotations | object | `{}` | Extra annotations for the ingress | +| Key | Type | Default | Description | +|-----|------|-------------------------------------------------------|-------------| +| "application-generated.yaml".enabled | bool | `true` | Flag to include Helm generated configuration. Please set `application-override.yaml` to override this configuration. | +| "application-override.yaml" | object | `{"enabled":true}` | Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a ConfigMap. `application-override.yaml` has a higher precedence than `application-secret.yaml` | +| "application-secret.yaml" | object | `{"enabled":true}` | Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a Secret. `application-override.yaml` has a higher precedence than `application-secret.yaml`. It is recommended to either set `application-override.yaml` or `application-secret.yaml` only to simplify config management. | +| "application.yaml".enabled | bool | `true` | Flag to include the default [configuration](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Please set `application-override.yaml` to override this configuration. | +| envOverrides | object | `{}` | Extra environment variables to set | +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | +| image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository | +| image.tag | string | `"0.18.1"` | Image tag | +| ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress | +| ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth | +| ingress.grpc.class | string | `"nginx"` | Which ingress controller to use | +| ingress.grpc.enabled | bool | `false` | Flag to create an ingress resource for the service | +| ingress.grpc.hosts | list | `[]` | List of hostnames to match when routing requests | +| ingress.grpc.https.enabled | bool | `true` | Flag to enable HTTPS | +| ingress.grpc.https.secretNames | object | `{}` | Map of hostname to TLS secret name | +| ingress.grpc.whitelist | string | `""` | Allowed client IP source ranges | +| ingress.http.annotations | object | `{}` | Extra annotations for the ingress | | ingress.http.auth.authUrl | string | `"http://auth-server.auth-ns.svc.cluster.local/auth"` | URL to an existing authentication service | -| ingress.http.auth.enabled | bool | `false` | Flag to enable auth | -| ingress.http.class | string | `"nginx"` | Which ingress controller to use | -| ingress.http.enabled | bool | `false` | Flag to create an ingress resource for the service | -| ingress.http.hosts | list | `[]` | List of hostnames to match when routing requests | -| ingress.http.https.enabled | bool | `true` | Flag to enable HTTPS | -| ingress.http.https.secretNames | object | `{}` | Map of hostname to TLS secret name | -| ingress.http.whitelist | string | `""` | Allowed client IP source ranges | -| javaOpts | string | `nil` | [JVM options](https://docs.oracle.com/cd/E22289_01/html/821-1274/configuring-the-default-jvm-and-java-arguments.html). For better performance, it is advised to set the min and max heap:
`-Xms2048m -Xmx2048m` | -| livenessProbe.enabled | bool | `true` | Flag to enabled the probe | -| livenessProbe.failureThreshold | int | `5` | Min consecutive failures for the probe to be considered failed | -| livenessProbe.initialDelaySeconds | int | `60` | Delay before the probe is initiated | -| livenessProbe.periodSeconds | int | `10` | How often to perform the probe | -| livenessProbe.successThreshold | int | `1` | Min consecutive success for the probe to be considered successful | -| livenessProbe.timeoutSeconds | int | `5` | When the probe times out | -| logLevel | string | `"WARN"` | Default log level, use either one of `DEBUG`, `INFO`, `WARN` or `ERROR` | -| logType | string | `"Console"` | Log format, either `JSON` or `Console` | -| nodeSelector | object | `{}` | Node labels for pod assignment | -| podLabels | object | `{}` | Labels to be added to Feast Serving pods | -| readinessProbe.enabled | bool | `true` | Flag to enabled the probe | -| readinessProbe.failureThreshold | int | `5` | Min consecutive failures for the probe to be considered failed | -| readinessProbe.initialDelaySeconds | int | `15` | Delay before the probe is initiated | -| readinessProbe.periodSeconds | int | `10` | How often to perform the probe | -| readinessProbe.successThreshold | int | `1` | Min consecutive success for the probe to be considered successful | -| readinessProbe.timeoutSeconds | int | `10` | When the probe times out | -| replicaCount | int | `1` | Number of pods that will be created | -| resources | object | `{}` | CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) | -| secrets | list | `[]` | List of Kubernetes secrets to be mounted. These secrets will be mounted on /etc/secrets/. | -| service.grpc.nodePort | string | `nil` | Port number that each cluster node will listen to | -| service.grpc.port | int | `6566` | Service port for GRPC requests | -| service.grpc.targetPort | int | `6566` | Container port serving GRPC requests | -| service.http.nodePort | string | `nil` | Port number that each cluster node will listen to | -| service.http.port | int | `80` | Service port for HTTP requests | -| service.http.targetPort | int | `8080` | Container port serving HTTP requests and Prometheus metrics | -| service.type | string | `"ClusterIP"` | Kubernetes service type | -| transformationService.host | string | `""` | | -| transformationService.port | int | `6566` | | +| ingress.http.auth.enabled | bool | `false` | Flag to enable auth | +| ingress.http.class | string | `"nginx"` | Which ingress controller to use | +| ingress.http.enabled | bool | `false` | Flag to create an ingress resource for the service | +| ingress.http.hosts | list | `[]` | List of hostnames to match when routing requests | +| ingress.http.https.enabled | bool | `true` | Flag to enable HTTPS | +| ingress.http.https.secretNames | object | `{}` | Map of hostname to TLS secret name | +| ingress.http.whitelist | string | `""` | Allowed client IP source ranges | +| javaOpts | string | `nil` | [JVM options](https://docs.oracle.com/cd/E22289_01/html/821-1274/configuring-the-default-jvm-and-java-arguments.html). For better performance, it is advised to set the min and max heap:
`-Xms2048m -Xmx2048m` | +| livenessProbe.enabled | bool | `true` | Flag to enabled the probe | +| livenessProbe.failureThreshold | int | `5` | Min consecutive failures for the probe to be considered failed | +| livenessProbe.initialDelaySeconds | int | `60` | Delay before the probe is initiated | +| livenessProbe.periodSeconds | int | `10` | How often to perform the probe | +| livenessProbe.successThreshold | int | `1` | Min consecutive success for the probe to be considered successful | +| livenessProbe.timeoutSeconds | int | `5` | When the probe times out | +| logLevel | string | `"WARN"` | Default log level, use either one of `DEBUG`, `INFO`, `WARN` or `ERROR` | +| logType | string | `"Console"` | Log format, either `JSON` or `Console` | +| nodeSelector | object | `{}` | Node labels for pod assignment | +| podAnnotations | object | `{}` | Annotations to be added to Feast Serving pods | +| podLabels | object | `{}` | Labels to be added to Feast Serving pods | +| readinessProbe.enabled | bool | `true` | Flag to enabled the probe | +| readinessProbe.failureThreshold | int | `5` | Min consecutive failures for the probe to be considered failed | +| readinessProbe.initialDelaySeconds | int | `15` | Delay before the probe is initiated | +| readinessProbe.periodSeconds | int | `10` | How often to perform the probe | +| readinessProbe.successThreshold | int | `1` | Min consecutive success for the probe to be considered successful | +| readinessProbe.timeoutSeconds | int | `10` | When the probe times out | +| replicaCount | int | `1` | Number of pods that will be created | +| resources | object | `{}` | CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) | +| secrets | list | `[]` | List of Kubernetes secrets to be mounted. These secrets will be mounted on /etc/secrets/. | +| service.grpc.nodePort | string | `nil` | Port number that each cluster node will listen to | +| service.grpc.port | int | `6566` | Service port for GRPC requests | +| service.grpc.targetPort | int | `6566` | Container port serving GRPC requests | +| service.http.nodePort | string | `nil` | Port number that each cluster node will listen to | +| service.http.port | int | `80` | Service port for HTTP requests | +| service.http.targetPort | int | `8080` | Container port serving HTTP requests and Prometheus metrics | +| service.type | string | `"ClusterIP"` | Kubernetes service type | +| transformationService.host | string | `""` | | +| transformationService.port | int | `6566` | | ---------------------------------------------- Autogenerated from chart metadata using [helm-docs v1.5.0](https://github.com/norwoodj/helm-docs/releases/v1.5.0) diff --git a/infra/charts/feast/charts/feature-server/templates/configmap.yaml b/infra/charts/feast/charts/feature-server/templates/configmap.yaml index c4bdd5a664..fbf2633e8e 100644 --- a/infra/charts/feast/charts/feature-server/templates/configmap.yaml +++ b/infra/charts/feast/charts/feature-server/templates/configmap.yaml @@ -14,26 +14,40 @@ data: {{- if index .Values "application-generated.yaml" "enabled" }} feast: registry: {{ .Values.global.registry.path }} - registry-refresh-interval: {{ .Values.global.registry.cache_ttl_seconds }} + registryRefreshInterval: {{ .Values.global.registry.cache_ttl_seconds }} {{- if .Values.transformationService.host }} - transformation-service-endpoint: {{ .Values.transformationService.host}}:{{ .Values.transformationService.port }} + transformationServiceEndpoint: {{ .Values.transformationService.host}}:{{ .Values.transformationService.port }} {{- else }} - transformation-service-endpoint: {{ .Release.Name }}-transformation-service:{{ .Values.transformationService.port }} + transformationServiceEndpoint: {{ .Release.Name }}-transformation-service:{{ .Values.transformationService.port }} {{- end }} - active_store: online + activeStore: online stores: - name: online type: REDIS config: host: {{ .Release.Name }}-redis-master port: 6379 - - server: - port: {{ .Values.service.http.targetPort }} + rest: + server: + port: {{ .Values.service.http.targetPort }} + grpc: + server: + port: {{ .Values.service.grpc.targetPort }} {{- end }} application-override.yaml: | {{- if index .Values "application-override.yaml" "enabled" }} -{{- toYaml (index .Values "application-override.yaml") | nindent 4 }} + {{- if index .Values "application-override.yaml" "feast" }} + feast: {{- toYaml (index .Values "application-override.yaml" "feast") | nindent 6 }} + registry: {{ .Values.global.registry.path }} + registryRefreshInterval: {{ .Values.global.registry.cache_ttl_seconds }} + project: {{ .Values.global.project }} + {{- end }} + {{- if index .Values "application-override.yaml" "rest" }} + rest: {{- toYaml (index .Values "application-override.yaml" "rest") | nindent 6 }} + {{- end }} + {{- if index .Values "application-override.yaml" "grpc" }} + grpc: {{- toYaml (index .Values "application-override.yaml" "grpc") | nindent 6 }} + {{- end }} {{- end }} diff --git a/infra/charts/feast/charts/feature-server/templates/deployment.yaml b/infra/charts/feast/charts/feature-server/templates/deployment.yaml index ad0529978d..1d1bc40029 100644 --- a/infra/charts/feast/charts/feature-server/templates/deployment.yaml +++ b/infra/charts/feast/charts/feature-server/templates/deployment.yaml @@ -21,6 +21,9 @@ spec: annotations: checksum/configmap: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }} + {{- if .Values.podAnnotations }} + {{ toYaml .Values.podAnnotations | nindent 8 }} + {{- end }} labels: app: {{ template "feature-server.name" . }} component: serving @@ -89,7 +92,7 @@ spec: - java - -jar - /opt/feast/feast-serving.jar - - {{- if index .Values "application.yaml" "enabled" -}} + - {{ if index .Values "application.yaml" "enabled" -}} classpath:/application.yml {{- end }} {{- if index .Values "application-generated.yaml" "enabled" -}} diff --git a/infra/charts/feast/charts/feature-server/templates/secret.yaml b/infra/charts/feast/charts/feature-server/templates/secret.yaml index d821f8e6f1..b6aa88c258 100644 --- a/infra/charts/feast/charts/feature-server/templates/secret.yaml +++ b/infra/charts/feast/charts/feature-server/templates/secret.yaml @@ -12,4 +12,12 @@ metadata: type: Opaque stringData: application-secret.yaml: | -{{- toYaml (index .Values "application-secret.yaml") | nindent 4 }} + {{- if index .Values "application-secret.yaml" "feast" }} + feast: {{- toYaml (index .Values "application-secret.yaml" "feast") | nindent 6 }} + {{- end }} + {{- if index .Values "application-secret.yaml" "rest" }} + rest: {{- toYaml (index .Values "application-secret.yaml" "rest") | nindent 6 }} + {{- end }} + {{- if index .Values "application-secret.yaml" "grpc" }} + grpc: {{- toYaml (index .Values "application-secret.yaml" "grpc") | nindent 6 }} + {{- end }} diff --git a/infra/charts/feast/charts/feature-server/values.yaml b/infra/charts/feast/charts/feature-server/values.yaml index 92de49763c..c3fc43cef8 100644 --- a/infra/charts/feast/charts/feature-server/values.yaml +++ b/infra/charts/feast/charts/feature-server/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Feature Server repository repository: feastdev/feature-server-java # image.tag -- Image tag - tag: 0.17.0 + tag: 0.18.1 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent @@ -25,7 +25,7 @@ application-generated.yaml: # "application-secret.yaml" -- Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a Secret. `application-override.yaml` has a higher precedence than `application-secret.yaml`. It is recommended to either set `application-override.yaml` or `application-secret.yaml` only to simplify config management. application-secret.yaml: - enabled: true + enabled: false # "application-override.yaml" -- Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a ConfigMap. `application-override.yaml` has a higher precedence than `application-secret.yaml` application-override.yaml: @@ -140,5 +140,8 @@ envOverrides: {} # secrets -- List of Kubernetes secrets to be mounted. These secrets will be mounted on /etc/secrets/. secrets: [] +# podAnnotations -- Annotations to be added to Feast Serving pods +podAnnotations: {} + # podLabels -- Labels to be added to Feast Serving pods podLabels: {} diff --git a/infra/charts/feast/charts/transformation-service/Chart.yaml b/infra/charts/feast/charts/transformation-service/Chart.yaml index 2760aa93fd..434850ca8b 100644 --- a/infra/charts/feast/charts/transformation-service/Chart.yaml +++ b/infra/charts/feast/charts/transformation-service/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Transformation service: to compute on-demand features" name: transformation-service -version: 0.100.4 -appVersion: v0.15.0 +version: 0.18.1 +appVersion: v0.18.1 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/transformation-service/README.md b/infra/charts/feast/charts/transformation-service/README.md index 8089c1572b..20b39443d6 100644 --- a/infra/charts/feast/charts/transformation-service/README.md +++ b/infra/charts/feast/charts/transformation-service/README.md @@ -1,6 +1,6 @@ # transformation-service -![Version: 0.100.4](https://img.shields.io/badge/Version-0.100.4-informational?style=flat-square) ![AppVersion: v0.15.0](https://img.shields.io/badge/AppVersion-v0.15.0-informational?style=flat-square) +![Version: 0.18.1](https://img.shields.io/badge/Version-0.18.1-informational?style=flat-square) ![AppVersion: v0.18.1](https://img.shields.io/badge/AppVersion-v0.18.1-informational?style=flat-square) Transformation service: to compute on-demand features @@ -8,20 +8,20 @@ Transformation service: to compute on-demand features ## Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| envOverrides | object | `{}` | Extra environment variables to set | -| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | +| Key | Type | Default | Description | +|-----|------|--------------------------------------------|-------------| +| envOverrides | object | `{}` | Extra environment variables to set | +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-transformation-server"` | Docker image for Transformation Server repository | -| image.tag | string | `"0.17.0"` | Image tag | -| nodeSelector | object | `{}` | Node labels for pod assignment | -| podLabels | object | `{}` | Labels to be added to Feast Serving pods | -| replicaCount | int | `1` | Number of pods that will be created | -| resources | object | `{}` | CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) | -| service.grpc.nodePort | string | `nil` | Port number that each cluster node will listen to | -| service.grpc.port | int | `6566` | Service port for GRPC requests | -| service.grpc.targetPort | int | `6566` | Container port serving GRPC requests | -| service.type | string | `"ClusterIP"` | Kubernetes service type | +| image.tag | string | `"0.18.1"` | Image tag | +| nodeSelector | object | `{}` | Node labels for pod assignment | +| podLabels | object | `{}` | Labels to be added to Feast Serving pods | +| replicaCount | int | `1` | Number of pods that will be created | +| resources | object | `{}` | CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) | +| service.grpc.nodePort | string | `nil` | Port number that each cluster node will listen to | +| service.grpc.port | int | `6566` | Service port for GRPC requests | +| service.grpc.targetPort | int | `6566` | Container port serving GRPC requests | +| service.type | string | `"ClusterIP"` | Kubernetes service type | ---------------------------------------------- Autogenerated from chart metadata using [helm-docs v1.5.0](https://github.com/norwoodj/helm-docs/releases/v1.5.0) diff --git a/infra/charts/feast/charts/transformation-service/config/feature_store.yaml b/infra/charts/feast/charts/transformation-service/config/feature_store.yaml index 234471fb96..555e93a306 100644 --- a/infra/charts/feast/charts/transformation-service/config/feature_store.yaml +++ b/infra/charts/feast/charts/transformation-service/config/feature_store.yaml @@ -5,5 +5,4 @@ provider: local project: {{ .Values.global.project }} flags: on_demand_transforms: true - python_feature_server: true alpha_features: true \ No newline at end of file diff --git a/infra/charts/feast/charts/transformation-service/values.yaml b/infra/charts/feast/charts/transformation-service/values.yaml index 7babb5f6b6..9c22dafdee 100644 --- a/infra/charts/feast/charts/transformation-service/values.yaml +++ b/infra/charts/feast/charts/transformation-service/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Transformation Server repository repository: feastdev/feature-transformation-server # image.tag -- Image tag - tag: 0.17.0 + tag: 0.18.1 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index a1ccdde0f3..96f78a6ee6 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feature-server alias: feature-server - version: 0.101.0 + version: 0.18.1 condition: feature-server.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: transformation-service alias: transformation-service - version: 0.101.0 + version: 0.18.1 condition: transformation-service.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: redis diff --git a/infra/scripts/create-cluster.sh b/infra/scripts/create-cluster.sh new file mode 100755 index 0000000000..24b961b977 --- /dev/null +++ b/infra/scripts/create-cluster.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# Settings +# Make sure you run "brew install redis" + +# BIN_PATH="/opt/homebrew/bin" +REDIS_CLI=`which redis-cli` +REDIS_SERVER=`which redis-server` +CLUSTER_HOST=127.0.0.1 +# Creates a cluster at ports 6001-6006 with 3 masters 6001-6003 and 3 slaves 6004-6006 +PORT=${2:-6000} +TIMEOUT=2000 +NODES=6 +REPLICAS=1 +PROTECTED_MODE=yes +ADDITIONAL_OPTIONS="" + +if [ -a config.sh ] +then + source "config.sh" +fi + +# Computed vars +ENDPORT=$((PORT+NODES)) + +if [ "$1" == "start" ] +then + while [ $((PORT < ENDPORT)) != "0" ]; do + PORT=$((PORT+1)) + echo "Starting $PORT" + $REDIS_SERVER --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes ${ADDITIONAL_OPTIONS} + done + exit 0 +fi + +if [ "$1" == "create" ] +then + HOSTS="" + while [ $((PORT < ENDPORT)) != "0" ]; do + PORT=$((PORT+1)) + HOSTS="$HOSTS $CLUSTER_HOST:$PORT" + done + OPT_ARG="" + if [ "$2" == "-f" ]; then + OPT_ARG="--cluster-yes" + fi + $REDIS_CLI --cluster create $HOSTS --cluster-replicas $REPLICAS $OPT_ARG + exit 0 +fi + +if [ "$1" == "stop" ] +then + while [ $((PORT < ENDPORT)) != "0" ]; do + PORT=$((PORT+1)) + echo "Stopping $PORT" + $REDIS_CLI -p $PORT shutdown nosave + done + exit 0 +fi + +if [ "$1" == "watch" ] +then + PORT=$((PORT+1)) + while [ 1 ]; do + clear + date + $REDIS_CLI -p $PORT cluster nodes | head -30 + sleep 1 + done + exit 0 +fi + +if [ "$1" == "clean" ] +then + echo "Cleaning *.log" + rm -rf *.log + echo "Cleaning appendonly-*" + rm -rf appendonly-* + echo "Cleaning dump-*.rdb" + rm -rf dump-*.rdb + echo "Cleaning nodes-*.conf" + rm -rf nodes-*.conf + exit 0 +fi + +if [ "$1" == "clean-logs" ] +then + echo "Cleaning *.log" + rm -rf *.log + exit 0 +fi + +echo "Usage: $0 [start|create|stop|watch|clean|clean-logs|call]" +echo "start [PORT] -- Launch Redis Cluster instances." +echo "create [PORT] [-f] -- Create a cluster using redis-cli --cluster create." +echo "stop [PORT] -- Stop Redis Cluster instances." +echo "watch [PORT] -- Show CLUSTER NODES output (first 30 lines) of first node." +echo "clean -- Remove all instances data, logs, configs." +echo "clean-logs -- Remove just instances logs." diff --git a/infra/scripts/helm/install-helm.sh b/infra/scripts/helm/install-helm.sh index 3686f9dfdb..a5073289df 100755 --- a/infra/scripts/helm/install-helm.sh +++ b/infra/scripts/helm/install-helm.sh @@ -6,5 +6,4 @@ readonly STABLE_REPO_URL=https://charts.helm.sh/stable readonly INCUBATOR_REPO_URL=https://charts.helm.sh/incubator curl -s "https://get.helm.sh/helm-${HELM_VERSION}-linux-amd64.tar.gz" | tar -C /tmp -xz sudo mv /tmp/linux-amd64/helm /usr/bin/helm -helm init --client-only -helm repo add incubator "$INCUBATOR_REPO_URL" \ No newline at end of file +helm repo add incubator "$INCUBATOR_REPO_URL" diff --git a/infra/scripts/helm/push-helm-charts.sh b/infra/scripts/helm/push-helm-charts.sh index 74961b196a..08753adb3c 100755 --- a/infra/scripts/helm/push-helm-charts.sh +++ b/infra/scripts/helm/push-helm-charts.sh @@ -10,13 +10,14 @@ fi bucket=gs://feast-helm-charts repo_url=https://feast-helm-charts.storage.googleapis.com/ -helm plugin install https://github.com/hayorov/helm-gcs.git --version 0.2.2 || true +helm plugin install https://github.com/hayorov/helm-gcs.git --version 0.3.18 || true helm repo add feast-helm-chart-repo $bucket -mkdir -p feast -cp -R * feast/ || true - +cd infra/charts helm package feast +helm package feast-python-server -helm gcs push --public feast-${1}.tgz feast-helm-chart-repo --force \ No newline at end of file +helm gcs push --public feast-${1}.tgz feast-helm-chart-repo --force +helm gcs push --public feast-python-server-${1}.tgz feast-helm-chart-repo --force +rm -f ./*.tgz \ No newline at end of file diff --git a/infra/scripts/helm/validate-helm-chart-versions.sh b/infra/scripts/helm/validate-helm-chart-versions.sh index 8d0b2941f5..8a6b464cbb 100755 --- a/infra/scripts/helm/validate-helm-chart-versions.sh +++ b/infra/scripts/helm/validate-helm-chart-versions.sh @@ -3,7 +3,7 @@ set -e # Amount of file locations that need to be bumped in unison when versions increment -UNIQUE_VERSIONS_COUNT=4 +UNIQUE_VERSIONS_COUNT=18 if [ $# -ne 1 ]; then echo "Please provide a single semver version (without a \"v\" prefix) to test the repository against, e.g 0.99.0" diff --git a/infra/scripts/publish-java-sdk.sh b/infra/scripts/publish-java-sdk.sh index ed00799e84..68174db17a 100755 --- a/infra/scripts/publish-java-sdk.sh +++ b/infra/scripts/publish-java-sdk.sh @@ -69,4 +69,4 @@ gpg --import --batch --yes $GPG_KEY_IMPORT_DIR/private-key echo "============================================================" echo "Deploying Java SDK with revision: $REVISION" echo "============================================================" -mvn -f java/pom.xml --projects datatypes/java,sdk/java -Drevision=$REVISION --batch-mode clean deploy +mvn -f java/pom.xml --projects .,datatypes,sdk -Drevision=$REVISION --batch-mode clean deploy diff --git a/infra/templates/README.md.jinja2 b/infra/templates/README.md.jinja2 index a9277bb070..7d08c0d36f 100644 --- a/infra/templates/README.md.jinja2 +++ b/infra/templates/README.md.jinja2 @@ -21,9 +21,9 @@ Feast is an open source feature store for machine learning. Feast is the fastest Please see our [documentation](https://docs.feast.dev/) for more information about the project. ## 📐 Architecture - +![](docs/assets/feast-marchitecture.png) -The above architecture is the minimal Feast deployment. Want to run the full Feast on GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-gcp-aws). +The above architecture is the minimal Feast deployment. Want to run the full Feast on Snowflake/GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws). ## 🐣 Getting Started @@ -133,7 +133,7 @@ pprint(feature_vector) Please refer to the official documentation at [Documentation](https://docs.feast.dev/) * [Quickstart](https://docs.feast.dev/getting-started/quickstart) * [Tutorials](https://docs.feast.dev/tutorials/tutorials-overview) - * [Running Feast with GCP/AWS](https://docs.feast.dev/how-to-guides/feast-gcp-aws) + * [Running Feast with Snowflake/GCP/AWS](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws) * [Change Log](https://github.com/feast-dev/feast/blob/master/CHANGELOG.md) * [Slack (#Feast)](https://slack.feast.dev/) diff --git a/java/CONTRIBUTING.md b/java/CONTRIBUTING.md index 1694b3f33f..86eacfef41 100644 --- a/java/CONTRIBUTING.md +++ b/java/CONTRIBUTING.md @@ -5,7 +5,6 @@ ### Overview This guide is targeted at developers looking to contribute to Feast components in the feast-java Repository: -- [Feast Core](#feast-core) - [Feast Serving](#feast-serving) - [Feast Java Client](#feast-java-client) @@ -15,11 +14,14 @@ the feast-java Repository: #### Common Setup Common Environment Setup for all feast-java Feast components: -1. . Ensure following development tools are installed: -- Java SE Development Kit 11, Maven 3.6, `make` + +Ensure following development tools are installed: +- Java SE Development Kit 11 +- Maven 3.6 +- `make` #### Code Style -feast-java's codebase conforms to the [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html). +Feast's Java codebase conforms to the [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html). Automatically format the code to conform the style guide by: @@ -59,82 +61,8 @@ Specifically, proto-generated code is not indexed by IntelliJ. To fix this, navi - target/generated-sources/protobuf/java - target/generated-sources/annotations - -## Feast Core -### Environment Setup -Setting up your development environment for Feast Core: -1. Complete the feast-java [Common Setup](#common-setup) -2. Boot up a PostgreSQL instance (version 11 and above). Example of doing so via Docker: -```sh -# spawn a PostgreSQL instance as a Docker container running in the background -docker run \ - --rm -it -d \ - --name postgres \ - -e POSTGRES_DB=postgres \ - -e POSTGRES_USER=postgres \ - -e POSTGRES_PASSWORD=password \ - -p 5432:5432 postgres:12-alpine -``` - -### Configuration -Feast Core is configured using it's [application.yml](https://docs.feast.dev/reference/configuration-reference#1-feast-core-and-feast-online-serving). - -### Building and Running -1. Build / Compile Feast Core with Maven to produce an executable Feast Core JAR -```sh -mvn package -pl core --also-make -Dmaven.test.skip=true -``` - -2. Run Feast Core using the built JAR: -```sh -# where X.X.X is the version of the Feast Core JAR built -java -jar core/target/feast-core-X.X.X-exec.jar -``` - -### Unit / Integration Tests -Unit & Integration Tests can be used to verify functionality: -```sh -# run unit tests -mvn test -pl core --also-make -# run integration tests -mvn verify -pl core --also-make -``` - ## Feast Serving -### Environment Setup -Setting up your development environment for Feast Serving: -1. Complete the feast-java [Common Setup](#common-setup) -2. Boot up a Redis instance (version 5.x). Example of doing so via Docker: -```sh -docker run --name redis --rm -it -d -p 6379:6379 redis:5-alpine -``` - -> Feast Serving requires a running Feast Core instance to retrieve Feature metadata -> in order to serve features. See the [Feast Core section](#feast-core) for -> how to get a Feast Core instance running. - -### Configuration -Feast Serving is configured using it's [application.yml](https://docs.feast.dev/reference/configuration-reference#1-feast-core-and-feast-online-serving). - -### Building and Running -1. Build / Compile Feast Serving with Maven to produce an executable Feast Serving JAR -```sh -mvn package -pl serving --also-make -Dmaven.test.skip=true - -2. Run Feast Serving using the built JAR: -```sh -# where X.X.X is the version of the Feast serving JAR built -java -jar serving/target/feast-serving-X.X.X-exec.jar -``` - -### Unit / Integration Tests -Unit & Integration Tests can be used to verify functionality: -```sh -# run unit tests -mvn test -pl serving --also-make -# run integration tests -mvn verify -pl serving --also-make -``` +See instructions [here](serving/README.md) for developing. ## Feast Java Client ### Environment Setup @@ -144,9 +72,6 @@ Setting up your development environment for Feast Java SDK: > Feast Java Client is a Java Client for retrieving Features from a running Feast Serving instance. > See the [Feast Serving Section](#feast-serving) section for how to get a Feast Serving instance running. -### Configuration -Feast Java Client is [configured as code](https://docs.feast.dev/v/master/reference/configuration-reference#4-feast-java-and-go-sdk) - ### Building 1. Build / Compile Feast Java Client with Maven: diff --git a/java/README.md b/java/README.md index 8d6141faa8..ff5a1b8553 100644 --- a/java/README.md +++ b/java/README.md @@ -1,5 +1,4 @@ # Feast Java components -[![complete](https://github.com/feast-dev/feast-java/actions/workflows/complete.yml/badge.svg)](https://github.com/feast-dev/feast-java/actions/workflows/complete.yml) ### Overview @@ -19,4 +18,4 @@ Guides on Contributing: - [Development Guide for feast-java (this repository)](CONTRIBUTING.md) ### Installing using Helm -Please see the Helm charts in [charts](https://github.com/feast-dev/feast-helm-charts). +Please see the Helm charts in [infra/charts/feast](../infra/charts/feast). diff --git a/java/common/pom.xml b/java/common/pom.xml index 0c5651876e..e5a648a7f9 100644 --- a/java/common/pom.xml +++ b/java/common/pom.xml @@ -33,13 +33,14 @@ dev.feast - datatypes-java + feast-datatypes ${project.version} compile com.google.protobuf protobuf-java-util + ${protobuf.version} @@ -52,75 +53,34 @@ org.projectlombok lombok + ${lombok.version} com.google.auto.value auto-value-annotations + ${auto.value.version} com.google.code.gson gson + ${gson.version} io.gsonfire gson-fire + ${gson.fire.version} com.fasterxml.jackson.core jackson-databind + 2.10.5.1 com.fasterxml.jackson.datatype jackson-datatype-jsr310 - - - - - org.springframework - spring-context-support - - - net.devh - grpc-server-spring-boot-starter - - - org.springframework.boot - spring-boot-starter-logging - - - - - org.springframework.boot - spring-boot-starter-data-jpa - - - org.springframework.boot - spring-boot-starter-web - - - org.hibernate.validator - hibernate-validator - 6.1.5.Final - - - - - org.springframework.security - spring-security-core - - - org.springframework.security - spring-security-config - - - org.springframework.security - spring-security-oauth2-resource-server - - - org.springframework.security - spring-security-oauth2-jose + 2.10.1 @@ -134,7 +94,6 @@ 0.3.1 - javax.xml.bind jaxb-api @@ -156,6 +115,7 @@ org.hamcrest hamcrest-library test + ${hamcrest.version} @@ -163,28 +123,12 @@ junit 4.13.2 - - org.springframework - spring-test - test - org.mockito mockito-core ${mockito.version} test - - org.springframework.boot - spring-boot-starter-test - test - - - org.junit.vintage - junit-vintage-engine - - - @@ -206,6 +150,13 @@ -Xms2048m -Xmx2048m -Djdk.net.URLClassPath.disableClassPathURLCheck=true + + org.sonatype.plugins + nexus-staging-maven-plugin + + true + + diff --git a/java/common/src/main/java/feast/common/logging/AuditLogger.java b/java/common/src/main/java/feast/common/logging/AuditLogger.java index 5f70fbfc97..f3538a794b 100644 --- a/java/common/src/main/java/feast/common/logging/AuditLogger.java +++ b/java/common/src/main/java/feast/common/logging/AuditLogger.java @@ -32,26 +32,23 @@ import org.slf4j.Marker; import org.slf4j.MarkerFactory; import org.slf4j.event.Level; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.info.BuildProperties; -import org.springframework.stereotype.Component; @Slf4j -@Component public class AuditLogger { private static final String FLUENTD_DESTINATION = "fluentd"; private static final Marker AUDIT_MARKER = MarkerFactory.getMarker("AUDIT_MARK"); private static FluentLogger fluentLogger; private static AuditLogProperties properties; - private static BuildProperties buildProperties; + private static String artifact; + private static String version; - @Autowired - public AuditLogger(LoggingProperties loggingProperties, BuildProperties buildProperties) { + public AuditLogger(LoggingProperties loggingProperties, String artifact, String version) { // Spring runs this constructor when creating the AuditLogger bean, // which allows us to populate the AuditLogger class with dependencies. // This allows us to use the dependencies in the AuditLogger's static methods AuditLogger.properties = loggingProperties.getAudit(); - AuditLogger.buildProperties = buildProperties; + AuditLogger.artifact = artifact; + AuditLogger.version = version; if (AuditLogger.properties.getMessageLogging() != null && AuditLogger.properties.getMessageLogging().isEnabled()) { AuditLogger.fluentLogger = @@ -69,12 +66,7 @@ public AuditLogger(LoggingProperties loggingProperties, BuildProperties buildPro * @param entryBuilder with all fields set except instance. */ public static void logMessage(Level level, MessageAuditLogEntry.Builder entryBuilder) { - log( - level, - entryBuilder - .setComponent(buildProperties.getArtifact()) - .setVersion(buildProperties.getVersion()) - .build()); + log(level, entryBuilder.setComponent(artifact).setVersion(version).build()); } /** @@ -90,10 +82,7 @@ public static void logAction( log( level, ActionAuditLogEntry.of( - buildProperties.getArtifact(), - buildProperties.getArtifact(), - LogResource.of(resourceType, resourceId), - action)); + artifact, version, LogResource.of(resourceType, resourceId), action)); } /** @@ -109,10 +98,7 @@ public static void logTransition( log( level, TransitionAuditLogEntry.of( - buildProperties.getArtifact(), - buildProperties.getArtifact(), - LogResource.of(resourceType, resourceId), - status)); + artifact, version, LogResource.of(resourceType, resourceId), status)); } /** diff --git a/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java b/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java index ffd7c6b954..661642a89a 100644 --- a/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java +++ b/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java @@ -30,10 +30,6 @@ import io.grpc.ServerInterceptor; import io.grpc.Status; import org.slf4j.event.Level; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.security.core.Authentication; -import org.springframework.security.core.context.SecurityContextHolder; -import org.springframework.stereotype.Component; /** * GrpcMessageInterceptor intercepts a GRPC calls to log handling of GRPC messages to the Audit Log. @@ -41,7 +37,6 @@ * name and assumed authenticated identity (if authentication is enabled). NOTE: * GrpcMessageInterceptor assumes that all service calls are unary (ie single request/response). */ -@Component public class GrpcMessageInterceptor implements ServerInterceptor { private LoggingProperties loggingProperties; @@ -50,7 +45,6 @@ public class GrpcMessageInterceptor implements ServerInterceptor { * * @param loggingProperties properties used to configure logging interceptor. */ - @Autowired public GrpcMessageInterceptor(LoggingProperties loggingProperties) { this.loggingProperties = loggingProperties; } @@ -80,9 +74,7 @@ public Listener interceptCall( entryBuilder.setMethod(fullMethodName.substring(fullMethodName.indexOf("/") + 1)); // Attempt Extract current authenticated identity. - Authentication authentication = SecurityContextHolder.getContext().getAuthentication(); - String identity = (authentication != null) ? getIdentity(authentication) : ""; - entryBuilder.setIdentity(identity); + entryBuilder.setIdentity(""); // Register forwarding call to intercept outgoing response and log to audit log call = @@ -115,13 +107,4 @@ public void onMessage(ReqT message) { } }; } - - /** - * Extract current authenticated identity from given {@link Authentication}. Extracts subject - * claim if specified in AuthorizationProperties, otherwise returns authentication subject. - */ - private String getIdentity(Authentication authentication) { - // use subject claim as identity if set in security authorization properties - return authentication.getName(); - } } diff --git a/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java b/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java index 0c96ee9c56..bc3dcbcf74 100644 --- a/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java +++ b/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java @@ -21,11 +21,12 @@ import com.google.gson.JsonObject; import com.google.gson.JsonParser; +import com.google.protobuf.Timestamp; import feast.common.logging.entry.LogResource.ResourceType; +import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldValues; import feast.proto.types.ValueProto.Value; import io.grpc.Status; import java.util.Arrays; @@ -50,15 +51,24 @@ public List getTestAuditLogs() { GetOnlineFeaturesResponse responseSpec = GetOnlineFeaturesResponse.newBuilder() - .addAllFieldValues( + .setMetadata( + ServingAPIProto.GetOnlineFeaturesResponseMetadata.newBuilder() + .setFeatureNames( + ServingAPIProto.FeatureList.newBuilder() + .addAllVal( + Arrays.asList( + "featuretable_1:feature_1", "featuretable_1:feature2")))) + .addAllResults( Arrays.asList( - FieldValues.newBuilder() - .putFields( - "featuretable_1:feature_1", Value.newBuilder().setInt32Val(32).build()) + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(Value.newBuilder().setInt32Val(32).build()) + .addStatuses(ServingAPIProto.FieldStatus.PRESENT) + .addEventTimestamps(Timestamp.newBuilder().build()) .build(), - FieldValues.newBuilder() - .putFields( - "featuretable_1:feature2", Value.newBuilder().setInt32Val(64).build()) + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(Value.newBuilder().setInt32Val(64).build()) + .addStatuses(ServingAPIProto.FieldStatus.PRESENT) + .addEventTimestamps(Timestamp.newBuilder().build()) .build())) .build(); diff --git a/java/datatypes/java/README.md b/java/datatypes/README.md similarity index 100% rename from java/datatypes/java/README.md rename to java/datatypes/README.md diff --git a/java/datatypes/java/src/main/proto/feast b/java/datatypes/java/src/main/proto/feast deleted file mode 120000 index 53364e5f45..0000000000 --- a/java/datatypes/java/src/main/proto/feast +++ /dev/null @@ -1 +0,0 @@ -../../../../../../protos/feast \ No newline at end of file diff --git a/java/datatypes/java/pom.xml b/java/datatypes/pom.xml similarity index 83% rename from java/datatypes/java/pom.xml rename to java/datatypes/pom.xml index fe6c380a10..a4f1932ada 100644 --- a/java/datatypes/java/pom.xml +++ b/java/datatypes/pom.xml @@ -27,16 +27,16 @@ definitions included in the package. - 11 - 11 + 8 + 8 - datatypes-java + feast-datatypes dev.feast feast-parent ${revision} - ../.. + ../ @@ -75,6 +75,13 @@ + + org.sonatype.plugins + nexus-staging-maven-plugin + + false + + @@ -83,29 +90,34 @@ com.google.guava guava + ${guava.version} com.google.protobuf protobuf-java + ${protobuf.version} io.grpc grpc-core + ${grpc.version} io.grpc grpc-protobuf + ${grpc.version} io.grpc grpc-services + ${grpc.version} io.grpc grpc-stub + ${grpc.version} - javax.annotation javax.annotation-api diff --git a/java/datatypes/src/main/proto/feast b/java/datatypes/src/main/proto/feast new file mode 120000 index 0000000000..463e4045de --- /dev/null +++ b/java/datatypes/src/main/proto/feast @@ -0,0 +1 @@ +../../../../../protos/feast \ No newline at end of file diff --git a/java/docs/coverage/java/pom.xml b/java/docs/coverage/pom.xml similarity index 96% rename from java/docs/coverage/java/pom.xml rename to java/docs/coverage/pom.xml index 5f79422496..f6e08909ee 100644 --- a/java/docs/coverage/java/pom.xml +++ b/java/docs/coverage/pom.xml @@ -30,7 +30,7 @@ dev.feast feast-parent ${revision} - ../../.. + ../.. Feast Coverage Java @@ -61,7 +61,7 @@ dev.feast - feast-sdk + feast-serving-client ${project.version} diff --git a/java/infra/docker/feature-server/Dockerfile b/java/infra/docker/feature-server/Dockerfile index a07d3301b2..dbd8c91472 100644 --- a/java/infra/docker/feature-server/Dockerfile +++ b/java/infra/docker/feature-server/Dockerfile @@ -7,14 +7,14 @@ FROM maven:3.6-jdk-11 as builder WORKDIR /build COPY java/pom.xml . -COPY java/datatypes/java/pom.xml datatypes/java/pom.xml +COPY java/datatypes/pom.xml datatypes/pom.xml COPY java/common/pom.xml common/pom.xml COPY java/serving/pom.xml serving/pom.xml COPY java/storage/api/pom.xml storage/api/pom.xml COPY java/storage/connectors/pom.xml storage/connectors/pom.xml COPY java/storage/connectors/redis/pom.xml storage/connectors/redis/pom.xml -COPY java/sdk/java/pom.xml sdk/java/pom.xml -COPY java/docs/coverage/java/pom.xml docs/coverage/java/pom.xml +COPY java/sdk/pom.xml sdk/pom.xml +COPY java/docs/coverage/pom.xml docs/coverage/pom.xml # Setting Maven repository .m2 directory relative to /build folder gives the # user to optionally use cached repository when building the image by copying @@ -24,7 +24,7 @@ COPY java/pom.xml .m2/* .m2/ RUN mvn dependency:go-offline -DexcludeGroupIds:dev.feast 2>/dev/null || true COPY java/ . -COPY protos/feast datatypes/java/src/main/proto/feast +COPY protos/feast datatypes/src/main/proto/feast ARG VERSION=dev RUN mvn --also-make --projects serving -Drevision=$VERSION \ diff --git a/java/pom.xml b/java/pom.xml index 38f037431c..013e2c9701 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,17 +28,17 @@ pom - datatypes/java + datatypes storage/api storage/connectors serving - sdk/java - docs/coverage/java + sdk + docs/coverage common - 0.15.2-SNAPSHOT + 0.18.2-SNAPSHOT https://github.com/feast-dev/feast UTF-8 @@ -46,11 +46,7 @@ 1.30.2 3.12.2 - 3.12.2 - 2.3.1.RELEASE - 5.2.7.RELEASE - 5.3.0.RELEASE - 2.9.0.RELEASE + 3.16.1 1.111.1 0.8.0 1.9.10 @@ -62,7 +58,6 @@ 2.17.1 2.9.9 2.0.2 - 2.5.0.RELEASE 1.18.12 1.8.4 2.8.6 @@ -72,9 +67,9 @@ 2.3.1 1.3.2 2.0.1.Final - 2.8.0 0.20.0 1.6.6 + 29.0-jre - - org.apache.commons - commons-lang3 - ${commons.lang3.version} - - - - com.google.inject - guice - 5.0.1 - - - - - com.google.cloud - google-cloud-bigquery - ${com.google.cloud.version} - - - com.google.cloud - google-cloud-storage - ${com.google.cloud.version} - - - - - com.google.cloud - google-cloud-nio - 0.83.0-alpha - - - - io.opencensus - opencensus-api - ${opencensus.version} - - - io.opencensus - opencensus-contrib-grpc-util - ${opencensus.version} - - - io.opencensus - opencensus-contrib-http-util - ${opencensus.version} - - - - - io.grpc - grpc-core - ${grpc.version} - - - io.grpc - grpc-api - ${grpc.version} - - - io.grpc - grpc-context - ${grpc.version} - - - io.grpc - grpc-all - ${grpc.version} - - - io.grpc - grpc-okhttp - ${grpc.version} - - - io.grpc - grpc-auth - ${grpc.version} - - - io.grpc - grpc-grpclb - ${grpc.version} - - - io.grpc - grpc-alts - ${grpc.version} - - - io.grpc - grpc-netty - ${grpc.version} - - - io.grpc - grpc-netty-shaded - ${grpc.version} - - - io.grpc - grpc-protobuf - ${grpc.version} - - - io.grpc - grpc-services - ${grpc.version} - - - io.grpc - grpc-stub - ${grpc.version} - - - io.grpc - grpc-testing - ${grpc.version} - test - - - - - org.apache.arrow - arrow-java-root - 5.0.0 - pom - - - - - org.apache.arrow - arrow-vector - 5.0.0 - - - - - org.apache.arrow - arrow-memory - 5.0.0 - pom - - - - - org.apache.arrow - arrow-memory-netty - 5.0.0 - runtime - - - - - net.devh - grpc-server-spring-boot-starter - ${grpc.spring.boot.starter.version} - - - - - io.prometheus - simpleclient - ${io.prometheus.version} - - - io.prometheus - simpleclient_servlet - ${io.prometheus.version} - - - - - org.springframework.security - spring-security-core - ${spring.security.version} - - - org.springframework.security - spring-security-config - ${spring.security.version} - - - org.springframework.security - spring-security-oauth2-resource-server - ${spring.security.version} - - - org.springframework.security - spring-security-oauth2-jose - ${spring.security.version} - - - com.google.auth - google-auth-library-oauth2-http - ${google.auth.library.oauth2.http.version} - - - - - joda-time - joda-time - ${joda.time.version} - - - com.datadoghq - java-dogstatsd-client - 2.6.1 - - - com.google.guava - guava - 29.0-jre - - - com.google.protobuf - protobuf-java - ${protobuf.version} - - - com.google.protobuf - protobuf-java-util - ${protobuf.version} - - - org.projectlombok - lombok - ${lombok.version} - provided - - - com.google.auto.value - auto-value-annotations - ${auto.value.version} - - - com.google.auto.value - auto-value - ${auto.value.version} - - - com.google.code.gson - gson - ${gson.version} - - - io.gsonfire - gson-fire - ${gson.fire.version} - - - - com.github.kstyrc - embedded-redis - 0.6 - test - - - - - - net.bytebuddy - byte-buddy - ${byte-buddy.version} - - - org.mockito - mockito-core - ${mockito.version} - test - - - org.springframework.boot - spring-boot-starter-web - ${spring.boot.version} - - - org.springframework.boot - spring-boot-starter-logging - - - - - org.apache.logging.log4j - log4j-api - ${log4jVersion} - - - org.apache.logging.log4j - log4j-core - ${log4jVersion} - - - org.apache.logging.log4j - log4j-jul - ${log4jVersion} - - - org.apache.logging.log4j - log4j-web - ${log4jVersion} - org.apache.logging.log4j log4j-slf4j-impl @@ -462,26 +156,6 @@ 1.7.30 - - - - org.springframework.boot - spring-boot-dependencies - ${spring.boot.version} - pom - import - - - com.squareup.okio - okio - 1.17.2 - javax.xml.bind jaxb-api @@ -497,6 +171,19 @@ validation-api ${javax.validation.version} + + + org.junit.platform + junit-platform-engine + 1.8.2 + test + + + org.junit.platform + junit-platform-commons + 1.8.2 + test + @@ -535,7 +222,7 @@ - feast.proto.*:io.grpc.*:org.tensorflow.* + io.grpc.*:org.tensorflow.* @@ -693,22 +380,6 @@ - - org.springframework.boot - spring-boot-maven-plugin - - - true - - - - build-info - - build-info - - - - org.sonatype.plugins @@ -720,6 +391,7 @@ https://oss.sonatype.org/ true + true @@ -137,6 +142,13 @@ org.jacoco jacoco-maven-plugin + + org.sonatype.plugins + nexus-staging-maven-plugin + + false + + diff --git a/java/sdk/java/src/main/java/dev/feast/FeastClient.java b/java/sdk/src/main/java/dev/feast/FeastClient.java similarity index 98% rename from java/sdk/java/src/main/java/dev/feast/FeastClient.java rename to java/sdk/src/main/java/dev/feast/FeastClient.java index e9aaab151a..c10a76ecf8 100644 --- a/java/sdk/java/src/main/java/dev/feast/FeastClient.java +++ b/java/sdk/src/main/java/dev/feast/FeastClient.java @@ -21,7 +21,7 @@ import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; import feast.proto.serving.ServingAPIProto.GetFeastServingInfoResponse; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequest; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponseV2; +import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.proto.serving.ServingServiceGrpc; import feast.proto.serving.ServingServiceGrpc.ServingServiceBlockingStub; import feast.proto.types.ValueProto; @@ -129,7 +129,7 @@ public List getOnlineFeatures(List featureRefs, List entities) requestBuilder.putAllEntities(getEntityValuesMap(entities)); - GetOnlineFeaturesResponseV2 response = stub.getOnlineFeatures(requestBuilder.build()); + GetOnlineFeaturesResponse response = stub.getOnlineFeatures(requestBuilder.build()); List results = Lists.newArrayList(); if (response.getResultsCount() == 0) { diff --git a/java/sdk/java/src/main/java/dev/feast/RequestUtil.java b/java/sdk/src/main/java/dev/feast/RequestUtil.java similarity index 100% rename from java/sdk/java/src/main/java/dev/feast/RequestUtil.java rename to java/sdk/src/main/java/dev/feast/RequestUtil.java diff --git a/java/sdk/java/src/main/java/dev/feast/Row.java b/java/sdk/src/main/java/dev/feast/Row.java similarity index 100% rename from java/sdk/java/src/main/java/dev/feast/Row.java rename to java/sdk/src/main/java/dev/feast/Row.java diff --git a/java/sdk/java/src/main/java/dev/feast/SecurityConfig.java b/java/sdk/src/main/java/dev/feast/SecurityConfig.java similarity index 100% rename from java/sdk/java/src/main/java/dev/feast/SecurityConfig.java rename to java/sdk/src/main/java/dev/feast/SecurityConfig.java diff --git a/java/sdk/java/src/test/java/dev/feast/FeastClientTest.java b/java/sdk/src/test/java/dev/feast/FeastClientTest.java similarity index 93% rename from java/sdk/java/src/test/java/dev/feast/FeastClientTest.java rename to java/sdk/src/test/java/dev/feast/FeastClientTest.java index 3de5142a85..1dfb9989c9 100644 --- a/java/sdk/java/src/test/java/dev/feast/FeastClientTest.java +++ b/java/sdk/src/test/java/dev/feast/FeastClientTest.java @@ -24,7 +24,7 @@ import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingAPIProto.FieldStatus; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequest; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponseV2; +import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.proto.serving.ServingServiceGrpc.ServingServiceImplBase; import feast.proto.types.ValueProto; import feast.proto.types.ValueProto.Value; @@ -57,7 +57,7 @@ public class FeastClientTest { @Override public void getOnlineFeatures( GetOnlineFeaturesRequest request, - StreamObserver responseObserver) { + StreamObserver responseObserver) { if (!request.equals(FeastClientTest.getFakeRequest())) { responseObserver.onError(Status.FAILED_PRECONDITION.asRuntimeException()); } @@ -137,22 +137,22 @@ private static GetOnlineFeaturesRequest getFakeRequest() { .build(); } - private static GetOnlineFeaturesResponseV2 getFakeResponse() { - return GetOnlineFeaturesResponseV2.newBuilder() + private static GetOnlineFeaturesResponse getFakeResponse() { + return GetOnlineFeaturesResponse.newBuilder() .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(strValue("david")) .addStatuses(FieldStatus.PRESENT) .addEventTimestamps(Timestamp.newBuilder()) .build()) .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(intValue(3)) .addStatuses(FieldStatus.PRESENT) .addEventTimestamps(Timestamp.newBuilder()) .build()) .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(Value.newBuilder().build()) .addStatuses(FieldStatus.NULL_VALUE) .addEventTimestamps(Timestamp.newBuilder()) diff --git a/java/sdk/java/src/test/java/dev/feast/RequestUtilTest.java b/java/sdk/src/test/java/dev/feast/RequestUtilTest.java similarity index 96% rename from java/sdk/java/src/test/java/dev/feast/RequestUtilTest.java rename to java/sdk/src/test/java/dev/feast/RequestUtilTest.java index 21fb145b24..e5684ecd18 100644 --- a/java/sdk/java/src/test/java/dev/feast/RequestUtilTest.java +++ b/java/sdk/src/test/java/dev/feast/RequestUtilTest.java @@ -21,7 +21,6 @@ import com.google.common.collect.ImmutableList; import com.google.protobuf.TextFormat; -import feast.common.models.Feature; import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; import java.util.Arrays; import java.util.Comparator; @@ -68,7 +67,9 @@ void renderFeatureRef_ShouldReturnFeatureRefString( List expected, List input) { input = input.stream().map(ref -> ref.toBuilder().build()).collect(Collectors.toList()); List actual = - input.stream().map(ref -> Feature.getFeatureReference(ref)).collect(Collectors.toList()); + input.stream() + .map(ref -> String.format("%s:%s", ref.getFeatureViewName(), ref.getFeatureName())) + .collect(Collectors.toList()); assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { assertEquals(expected.get(i), actual.get(i)); diff --git a/java/serving/README.md b/java/serving/README.md index cce8c7d6e2..5ac7194924 100644 --- a/java/serving/README.md +++ b/java/serving/README.md @@ -1,101 +1,139 @@ -### Getting Started Guide for Feast Serving Developers +## Getting Started Guide for Feast Serving Developers -Pre-requisites: +### Overview +This guide is targeted at developers looking to contribute to Feast Serving: +- [Building and running Feast Serving locally](#building-and-running-feast-serving-locally) + +### Pre-requisites: - [Maven](https://maven.apache.org/install.html) build tool version 3.6.x -- A running Feast Core instance -- A running Store instance e.g. local Redis Store instance +- A Feast feature repo (e.g. https://github.com/feast-dev/feast-demo) +- A running Store instance e.g. local Redis instance with `redis-server` -From the Feast project root directory, run the following Maven command to start Feast Serving gRPC service running on port 6566 locally: +### Building and running Feast Serving locally: +From the Feast GitHub root, run: -```bash -# Assumptions: -# - Local Feast Core is running on localhost:6565 -# Uses configuration from serving/src/main/resources/application.yml -mvn -pl serving spring-boot:run -Dspring-boot.run.arguments=\ ---feast.core-host=localhost,\ ---feast.core-port=6565 -``` +1. `mvn -f java/pom.xml install -Dmaven.test.skip=true` +2. Package an executable jar for serving: `mvn -f java/serving/pom.xml package -Dmaven.test.skip=true` +3. Make a file called `application-override.yaml` that specifies your Feast repo project and registry path: + 1. Note if you have a remote registry, you can specify that too (e.g. `gs://...`) + ```yaml + feast: + project: feast_demo + registry: /Users/[your username]/GitHub/feast-demo/feature_repo/data/registry.db + ``` + 2. An example of if you're using Redis with a remote registry: + ```yaml + feast: + project: feast_java_demo + registry: gs://[YOUR BUCKET]/demo-repo/registry.db + activeStore: online + stores: + - name: online + type: REDIS + config: + host: localhost + port: 6379 + password: [YOUR PASSWORD] + ``` +4. Run the jar with dependencies that was built from Maven (note the version might vary): + ``` + java \ + -Xms1g \ + -Xmx4g \ + -jar java/serving/target/feast-serving-0.17.1-SNAPSHOT-jar-with-dependencies.jar \ + classpath:/application.yml,file:./application-override.yaml + ``` +5. Now you have a Feast Serving gRPC service running on port 6566 locally! +### Running test queries If you have [grpc_cli](https://github.com/grpc/grpc/blob/master/doc/command_line_tool.md) installed, you can check that Feast Serving is running ``` grpc_cli ls localhost:6566 -grpc_cli call localhost:6566 GetFeastServingVersion '' -grpc_cli call localhost:6566 GetFeastServingType '' ``` +An example of fetching features ```bash -grpc_cli call localhost:6565 ApplyFeatureSet ' -feature_set { - name: "driver" - entities { - name: "driver_id" - value_type: STRING - } - features { - name: "city" - value_type: STRING - } - features { - name: "booking_completed_count" - value_type: INT64 - } - source { - type: KAFKA - kafka_source_config { - bootstrap_servers: "localhost:9092" +grpc_cli call localhost:6566 GetOnlineFeatures ' +features { + val: "driver_hourly_stats:conv_rate" + val: "driver_hourly_stats:acc_rate" +} +entities { + key: "driver_id" + value { + val { + int64_val: 1001 + } + val { + int64_val: 1002 } } } ' - -grpc_cli call localhost:6565 GetFeatureSets ' -filter { - feature_set_name: "driver" +``` +Example output: +``` +connecting to localhost:6566 +metadata { + feature_names { + val: "driver_hourly_stats:conv_rate" + val: "driver_hourly_stats:acc_rate" + } } -' - -grpc_cli call localhost:6566 GetBatchFeatures ' -feature_sets { - name: "driver" - feature_names: "booking_completed_count" - max_age { - seconds: 86400 +results { + values { + float_val: 0.812357187 + } + values { + float_val: 0.379484832 + } + statuses: PRESENT + statuses: PRESENT + event_timestamps { + seconds: 1631725200 + } + event_timestamps { + seconds: 1631725200 } } -entity_dataset { - entity_names: "driver_id" - entity_dataset_rows { - entity_timestamp { - seconds: 1569873954 - } +results { + values { + float_val: 0.840873241 + } + values { + float_val: 0.151376978 + } + statuses: PRESENT + statuses: PRESENT + event_timestamps { + seconds: 1631725200 + } + event_timestamps { + seconds: 1631725200 } } -' +Rpc succeeded with OK status ``` +### Debugging Feast Serving +You can debug this like any other Java executable. Swap the java command above with: ``` -python3 < org.apache.maven.plugins maven-jar-plugin + 3.2.2 @@ -87,10 +88,15 @@ dev.feast - datatypes-java + feast-datatypes ${project.version} + + dev.feast + feast-common + ${project.version} + dev.feast @@ -119,38 +125,50 @@ org.slf4j slf4j-simple + 1.7.30 org.apache.logging.log4j log4j-web + ${log4jVersion} io.grpc grpc-services + ${grpc.version} io.grpc grpc-stub + ${grpc.version} + + + io.grpc + grpc-netty-shaded + ${grpc.version} com.google.protobuf protobuf-java-util + ${protobuf.version} com.google.guava guava + ${guava.version} joda-time joda-time + ${joda.time.version} @@ -198,7 +216,7 @@ com.google.auto.value auto-value-annotations - 1.6.6 + ${auto.value.version} @@ -231,11 +249,13 @@ io.grpc grpc-testing + ${grpc.version} org.mockito mockito-core + ${mockito.version} test @@ -281,11 +301,19 @@ com.fasterxml.jackson.dataformat jackson-dataformat-yaml + 2.11.0 + + + + com.fasterxml.jackson.core + jackson-annotations + 2.12.2 com.github.kstyrc embedded-redis + 0.6 test @@ -340,6 +368,13 @@ false + + org.sonatype.plugins + nexus-staging-maven-plugin + + true + + diff --git a/java/serving/src/main/java/feast/serving/config/ApplicationProperties.java b/java/serving/src/main/java/feast/serving/config/ApplicationProperties.java index 2e2448ca90..791c871e59 100644 --- a/java/serving/src/main/java/feast/serving/config/ApplicationProperties.java +++ b/java/serving/src/main/java/feast/serving/config/ApplicationProperties.java @@ -147,38 +147,46 @@ public TracingProperties getTracing() { public LoggingProperties getLogging() { return logging; } - } - private FeastProperties feast; + private String gcpProject; - public void setFeast(FeastProperties feast) { - this.feast = feast; - } + public String getGcpProject() { + return gcpProject; + } - public FeastProperties getFeast() { - return feast; - } + public void setGcpProject(String gcpProject) { + this.gcpProject = gcpProject; + } - private String gcpProject; + public void setAwsRegion(String awsRegion) { + this.awsRegion = awsRegion; + } - public String getGcpProject() { - return gcpProject; - } + private String awsRegion; - public void setAwsRegion(String awsRegion) { - this.awsRegion = awsRegion; - } + public String getAwsRegion() { + return awsRegion; + } + + private String transformationServiceEndpoint; - private String awsRegion; + public String getTransformationServiceEndpoint() { + return transformationServiceEndpoint; + } - public String getAwsRegion() { - return awsRegion; + public void setTransformationServiceEndpoint(String transformationServiceEndpoint) { + this.transformationServiceEndpoint = transformationServiceEndpoint; + } } - private String transformationServiceEndpoint; + private FeastProperties feast; - public String getTransformationServiceEndpoint() { - return transformationServiceEndpoint; + public void setFeast(FeastProperties feast) { + this.feast = feast; + } + + public FeastProperties getFeast() { + return feast; } /** Store configuration class for database that this Feast Serving uses. */ @@ -263,6 +271,10 @@ public static class Server { public int getPort() { return port; } + + public void setPort(int port) { + this.port = port; + } } public static class GrpcServer { @@ -271,6 +283,10 @@ public static class GrpcServer { public Server getServer() { return server; } + + public void setServer(Server server) { + this.server = server; + } } public static class RestServer { @@ -279,6 +295,10 @@ public static class RestServer { public Server getServer() { return server; } + + public void setServer(Server server) { + this.server = server; + } } private GrpcServer grpc; @@ -288,10 +308,18 @@ public GrpcServer getGrpc() { return grpc; } + public void setGrpc(GrpcServer grpc) { + this.grpc = grpc; + } + public RestServer getRest() { return rest; } + public void setRest(RestServer rest) { + this.rest = rest; + } + public enum StoreType { REDIS, REDIS_CLUSTER; diff --git a/java/serving/src/main/java/feast/serving/config/RegistryConfig.java b/java/serving/src/main/java/feast/serving/config/RegistryConfig.java index d23ab374d8..3e7cbe3f1f 100644 --- a/java/serving/src/main/java/feast/serving/config/RegistryConfig.java +++ b/java/serving/src/main/java/feast/serving/config/RegistryConfig.java @@ -31,7 +31,7 @@ public class RegistryConfig extends AbstractModule { @Provides Storage googleStorage(ApplicationProperties applicationProperties) { return StorageOptions.newBuilder() - .setProjectId(applicationProperties.getGcpProject()) + .setProjectId(applicationProperties.getFeast().getGcpProject()) .build() .getService(); } @@ -39,7 +39,7 @@ Storage googleStorage(ApplicationProperties applicationProperties) { @Provides public AmazonS3 awsStorage(ApplicationProperties applicationProperties) { return AmazonS3ClientBuilder.standard() - .withRegion(applicationProperties.getAwsRegion()) + .withRegion(applicationProperties.getFeast().getAwsRegion()) .build(); } diff --git a/java/serving/src/main/java/feast/serving/config/ServerModule.java b/java/serving/src/main/java/feast/serving/config/ServerModule.java index cb3a18cf95..5428306f2b 100644 --- a/java/serving/src/main/java/feast/serving/config/ServerModule.java +++ b/java/serving/src/main/java/feast/serving/config/ServerModule.java @@ -18,9 +18,12 @@ import com.google.inject.AbstractModule; import com.google.inject.Provides; +import feast.serving.controller.HealthServiceController; import feast.serving.grpc.OnlineServingGrpcServiceV2; +import feast.serving.service.ServingServiceV2; import io.grpc.Server; import io.grpc.ServerBuilder; +import io.grpc.health.v1.HealthGrpc; import io.grpc.protobuf.services.ProtoReflectionService; import io.opentracing.contrib.grpc.TracingServerInterceptor; @@ -35,13 +38,20 @@ protected void configure() { public Server provideGrpcServer( ApplicationProperties applicationProperties, OnlineServingGrpcServiceV2 onlineServingGrpcServiceV2, - TracingServerInterceptor tracingServerInterceptor) { + TracingServerInterceptor tracingServerInterceptor, + HealthGrpc.HealthImplBase healthImplBase) { ServerBuilder serverBuilder = ServerBuilder.forPort(applicationProperties.getGrpc().getServer().getPort()); serverBuilder .addService(ProtoReflectionService.newInstance()) - .addService(tracingServerInterceptor.intercept(onlineServingGrpcServiceV2)); + .addService(tracingServerInterceptor.intercept(onlineServingGrpcServiceV2)) + .addService(healthImplBase); return serverBuilder.build(); } + + @Provides + public HealthGrpc.HealthImplBase healthService(ServingServiceV2 servingServiceV2) { + return new HealthServiceController(servingServiceV2); + } } diff --git a/java/serving/src/main/java/feast/serving/config/ServingApiConfiguration.java b/java/serving/src/main/java/feast/serving/config/ServingApiConfiguration.java deleted file mode 100644 index ce4fe13437..0000000000 --- a/java/serving/src/main/java/feast/serving/config/ServingApiConfiguration.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.config; - -import java.util.List; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.http.converter.HttpMessageConverter; -import org.springframework.http.converter.protobuf.ProtobufJsonFormatHttpMessageConverter; -import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; - -@Configuration -public class ServingApiConfiguration implements WebMvcConfigurer { - @Autowired private ProtobufJsonFormatHttpMessageConverter protobufConverter; - - @Bean - ProtobufJsonFormatHttpMessageConverter protobufHttpMessageConverter() { - return new ProtobufJsonFormatHttpMessageConverter(); - } - - @Override - public void configureMessageConverters(List> converters) { - converters.add(protobufConverter); - } -} diff --git a/java/serving/src/main/java/feast/serving/config/ServingServiceConfigV2.java b/java/serving/src/main/java/feast/serving/config/ServingServiceConfigV2.java index d3fe1ba116..4ea0692ccd 100644 --- a/java/serving/src/main/java/feast/serving/config/ServingServiceConfigV2.java +++ b/java/serving/src/main/java/feast/serving/config/ServingServiceConfigV2.java @@ -68,10 +68,10 @@ public ServingServiceV2 registryBasedServingServiceV2( log.info("Working Directory = " + System.getProperty("user.dir")); - final String transformationServiceEndpoint = - applicationProperties.getTransformationServiceEndpoint(); final OnlineTransformationService onlineTransformationService = - new OnlineTransformationService(transformationServiceEndpoint, registryRepository); + new OnlineTransformationService( + applicationProperties.getFeast().getTransformationServiceEndpoint(), + registryRepository); servingService = new OnlineServingServiceV2( diff --git a/java/serving/src/main/java/feast/serving/config/WebSecurityConfig.java b/java/serving/src/main/java/feast/serving/config/WebSecurityConfig.java deleted file mode 100644 index 04d3f4b5af..0000000000 --- a/java/serving/src/main/java/feast/serving/config/WebSecurityConfig.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2020 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.config; - -import org.springframework.context.annotation.Configuration; -import org.springframework.security.config.annotation.web.builders.HttpSecurity; -import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity; -import org.springframework.security.config.annotation.web.configuration.WebSecurityConfigurerAdapter; - -/** - * WebSecurityConfig disables auto configuration of Spring HTTP Security and allows security methods - * to be overridden - */ -@Configuration -@EnableWebSecurity -public class WebSecurityConfig extends WebSecurityConfigurerAdapter { - - /** - * Allows for custom web security rules to be applied. - * - * @param http {@link HttpSecurity} for configuring web based security - * @throws Exception exception - */ - @Override - protected void configure(HttpSecurity http) throws Exception { - - // Bypasses security/authentication for the following paths - http.authorizeRequests() - .antMatchers("/actuator/**", "/metrics/**") - .permitAll() - .anyRequest() - .authenticated() - .and() - .csrf() - .disable(); - } -} diff --git a/java/serving/src/main/java/feast/serving/controller/HealthServiceController.java b/java/serving/src/main/java/feast/serving/controller/HealthServiceController.java index ef675d4c15..2f98ae032f 100644 --- a/java/serving/src/main/java/feast/serving/controller/HealthServiceController.java +++ b/java/serving/src/main/java/feast/serving/controller/HealthServiceController.java @@ -16,24 +16,20 @@ */ package feast.serving.controller; +import com.google.inject.Inject; import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; -import feast.serving.interceptors.GrpcMonitoringInterceptor; import feast.serving.service.ServingServiceV2; import io.grpc.health.v1.HealthGrpc.HealthImplBase; import io.grpc.health.v1.HealthProto.HealthCheckRequest; import io.grpc.health.v1.HealthProto.HealthCheckResponse; import io.grpc.health.v1.HealthProto.ServingStatus; import io.grpc.stub.StreamObserver; -import net.devh.boot.grpc.server.service.GrpcService; -import org.springframework.beans.factory.annotation.Autowired; // Reference: https://github.com/grpc/grpc/blob/master/doc/health-checking.md - -@GrpcService(interceptors = {GrpcMonitoringInterceptor.class}) public class HealthServiceController extends HealthImplBase { private final ServingServiceV2 servingService; - @Autowired + @Inject public HealthServiceController(final ServingServiceV2 servingService) { this.servingService = servingService; } diff --git a/java/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java b/java/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java deleted file mode 100644 index 0f4ef7b5ae..0000000000 --- a/java/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.controller; - -import feast.proto.serving.ServingAPIProto; -import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; -import feast.proto.serving.ServingAPIProto.GetFeastServingInfoResponse; -import feast.proto.serving.ServingServiceGrpc.ServingServiceImplBase; -import feast.serving.config.ApplicationProperties; -import feast.serving.exception.SpecRetrievalException; -import feast.serving.service.ServingServiceV2; -import feast.serving.util.RequestHelper; -import io.grpc.Status; -import io.grpc.stub.StreamObserver; -import io.opentracing.Span; -import io.opentracing.Tracer; -import org.slf4j.Logger; - -public class ServingServiceGRpcController extends ServingServiceImplBase { - - private static final Logger log = - org.slf4j.LoggerFactory.getLogger(ServingServiceGRpcController.class); - private final ServingServiceV2 servingServiceV2; - private final String version; - private final Tracer tracer; - - public ServingServiceGRpcController( - ServingServiceV2 servingServiceV2, - ApplicationProperties applicationProperties, - Tracer tracer) { - this.servingServiceV2 = servingServiceV2; - this.version = applicationProperties.getFeast().getVersion(); - this.tracer = tracer; - } - - @Override - public void getFeastServingInfo( - GetFeastServingInfoRequest request, - StreamObserver responseObserver) { - GetFeastServingInfoResponse feastServingInfo = servingServiceV2.getFeastServingInfo(request); - feastServingInfo = feastServingInfo.toBuilder().setVersion(version).build(); - responseObserver.onNext(feastServingInfo); - responseObserver.onCompleted(); - } - - @Override - public void getOnlineFeatures( - ServingAPIProto.GetOnlineFeaturesRequest request, - StreamObserver responseObserver) { - try { - // authorize for the project in request object. - RequestHelper.validateOnlineRequest(request); - Span span = tracer.buildSpan("getOnlineFeaturesV2").start(); - ServingAPIProto.GetOnlineFeaturesResponseV2 onlineFeatures = - servingServiceV2.getOnlineFeatures(request); - if (span != null) { - span.finish(); - } - - responseObserver.onNext(onlineFeatures); - responseObserver.onCompleted(); - } catch (SpecRetrievalException e) { - log.error("Failed to retrieve specs from Registry", e); - responseObserver.onError( - Status.NOT_FOUND.withDescription(e.getMessage()).withCause(e).asException()); - } catch (Exception e) { - log.warn("Failed to get Online Features", e); - responseObserver.onError( - Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); - } - } -} diff --git a/java/serving/src/main/java/feast/serving/controller/ServingServiceRestController.java b/java/serving/src/main/java/feast/serving/controller/ServingServiceRestController.java deleted file mode 100644 index fe8f13d8bc..0000000000 --- a/java/serving/src/main/java/feast/serving/controller/ServingServiceRestController.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.controller; - -import static feast.serving.util.mappers.ResponseJSONMapper.mapGetOnlineFeaturesResponse; - -import feast.proto.serving.ServingAPIProto; -import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; -import feast.proto.serving.ServingAPIProto.GetFeastServingInfoResponse; -import feast.serving.config.ApplicationProperties; -import feast.serving.service.ServingServiceV2; -import feast.serving.util.RequestHelper; -import java.util.List; -import java.util.Map; -import org.springframework.web.bind.annotation.RequestBody; -import org.springframework.web.bind.annotation.RequestMapping; - -public class ServingServiceRestController { - - private final ServingServiceV2 servingService; - private final String version; - - public ServingServiceRestController( - ServingServiceV2 servingService, ApplicationProperties applicationProperties) { - this.servingService = servingService; - this.version = applicationProperties.getFeast().getVersion(); - } - - @RequestMapping(value = "/api/v1/info", produces = "application/json") - public GetFeastServingInfoResponse getInfo() { - GetFeastServingInfoResponse feastServingInfo = - servingService.getFeastServingInfo(GetFeastServingInfoRequest.getDefaultInstance()); - return feastServingInfo.toBuilder().setVersion(version).build(); - } - - @RequestMapping( - value = "/api/v1/features/online", - produces = "application/json", - consumes = "application/json") - public List> getOnlineFeatures( - @RequestBody ServingAPIProto.GetOnlineFeaturesRequest request) { - RequestHelper.validateOnlineRequest(request); - ServingAPIProto.GetOnlineFeaturesResponseV2 onlineFeatures = - servingService.getOnlineFeatures(request); - return mapGetOnlineFeaturesResponse(onlineFeatures); - } -} diff --git a/java/serving/src/main/java/feast/serving/grpc/OnlineServingGrpcServiceV2.java b/java/serving/src/main/java/feast/serving/grpc/OnlineServingGrpcServiceV2.java index f3a35d1d0f..fe024404f3 100644 --- a/java/serving/src/main/java/feast/serving/grpc/OnlineServingGrpcServiceV2.java +++ b/java/serving/src/main/java/feast/serving/grpc/OnlineServingGrpcServiceV2.java @@ -19,11 +19,15 @@ import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingServiceGrpc; import feast.serving.service.ServingServiceV2; +import io.grpc.Status; import io.grpc.stub.StreamObserver; import javax.inject.Inject; +import org.slf4j.Logger; public class OnlineServingGrpcServiceV2 extends ServingServiceGrpc.ServingServiceImplBase { private final ServingServiceV2 servingServiceV2; + private static final Logger log = + org.slf4j.LoggerFactory.getLogger(OnlineServingGrpcServiceV2.class); @Inject OnlineServingGrpcServiceV2(ServingServiceV2 servingServiceV2) { @@ -34,15 +38,27 @@ public class OnlineServingGrpcServiceV2 extends ServingServiceGrpc.ServingServic public void getFeastServingInfo( ServingAPIProto.GetFeastServingInfoRequest request, StreamObserver responseObserver) { - responseObserver.onNext(this.servingServiceV2.getFeastServingInfo(request)); - responseObserver.onCompleted(); + try { + responseObserver.onNext(this.servingServiceV2.getFeastServingInfo(request)); + responseObserver.onCompleted(); + } catch (RuntimeException e) { + log.warn("Failed to get Serving Info", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } } @Override public void getOnlineFeatures( ServingAPIProto.GetOnlineFeaturesRequest request, - StreamObserver responseObserver) { - responseObserver.onNext(this.servingServiceV2.getOnlineFeatures(request)); - responseObserver.onCompleted(); + StreamObserver responseObserver) { + try { + responseObserver.onNext(this.servingServiceV2.getOnlineFeatures(request)); + responseObserver.onCompleted(); + } catch (RuntimeException e) { + log.warn("Failed to get Online Features", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } } } diff --git a/java/serving/src/main/java/feast/serving/modules/ServerModule.java b/java/serving/src/main/java/feast/serving/modules/ServerModule.java deleted file mode 100644 index 29d1f57432..0000000000 --- a/java/serving/src/main/java/feast/serving/modules/ServerModule.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2021 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.modules; - -public class ServerModule {} diff --git a/java/serving/src/main/java/feast/serving/registry/S3RegistryFile.java b/java/serving/src/main/java/feast/serving/registry/S3RegistryFile.java index 486e2ca39c..4b122a5de0 100644 --- a/java/serving/src/main/java/feast/serving/registry/S3RegistryFile.java +++ b/java/serving/src/main/java/feast/serving/registry/S3RegistryFile.java @@ -33,7 +33,8 @@ public S3RegistryFile(AmazonS3 s3Client, String url) { this.s3Client = s3Client; String[] split = url.replace("s3://", "").split("/"); - this.s3Object = this.s3Client.getObject(split[0], split[1]); + String objectPath = String.join("/", java.util.Arrays.copyOfRange(split, 1, split.length)); + this.s3Object = this.s3Client.getObject(split[0], objectPath); } @Override diff --git a/java/serving/src/main/java/feast/serving/service/OnlineServingServiceV2.java b/java/serving/src/main/java/feast/serving/service/OnlineServingServiceV2.java index 5774dc361a..f4e330fbf7 100644 --- a/java/serving/src/main/java/feast/serving/service/OnlineServingServiceV2.java +++ b/java/serving/src/main/java/feast/serving/service/OnlineServingServiceV2.java @@ -72,7 +72,7 @@ public GetFeastServingInfoResponse getFeastServingInfo( } @Override - public ServingAPIProto.GetOnlineFeaturesResponseV2 getOnlineFeatures( + public ServingAPIProto.GetOnlineFeaturesResponse getOnlineFeatures( ServingAPIProto.GetOnlineFeaturesRequest request) { // Split all feature references into non-ODFV (e.g. batch and stream) references and ODFV. List allFeatureReferences = getFeaturesList(request); @@ -91,9 +91,8 @@ public ServingAPIProto.GetOnlineFeaturesResponseV2 getOnlineFeatures( // Pair from extractRequestDataFeatureNamesAndOnDemandFeatureInputs. // Currently, we can retrieve context variables directly from GetOnlineFeaturesRequest. List onDemandFeatureInputs = - this.onlineTransformationService - .extractRequestDataFeatureNamesAndOnDemandFeatureInputs(onDemandFeatureReferences) - .getRight(); + this.onlineTransformationService.extractOnDemandFeaturesDependencies( + onDemandFeatureReferences); // Add on demand feature inputs to list of feature references to retrieve. for (FeatureReferenceV2 onDemandFeatureInput : onDemandFeatureInputs) { @@ -132,8 +131,8 @@ public ServingAPIProto.GetOnlineFeaturesResponseV2 getOnlineFeatures( Span postProcessingSpan = tracer.buildSpan("postProcessing").start(); - ServingAPIProto.GetOnlineFeaturesResponseV2.Builder responseBuilder = - ServingAPIProto.GetOnlineFeaturesResponseV2.newBuilder(); + ServingAPIProto.GetOnlineFeaturesResponse.Builder responseBuilder = + ServingAPIProto.GetOnlineFeaturesResponse.newBuilder(); Timestamp now = Timestamp.newBuilder().setSeconds(System.currentTimeMillis() / 1000).build(); Timestamp nullTimestamp = Timestamp.newBuilder().build(); @@ -147,7 +146,7 @@ public ServingAPIProto.GetOnlineFeaturesResponseV2 getOnlineFeatures( Duration maxAge = this.registryRepository.getMaxAge(featureReference); - ServingAPIProto.GetOnlineFeaturesResponseV2.FeatureVector.Builder vectorBuilder = + ServingAPIProto.GetOnlineFeaturesResponse.FeatureVector.Builder vectorBuilder = responseBuilder.addResultsBuilder(); for (int rowIdx = 0; rowIdx < features.size(); rowIdx++) { @@ -262,7 +261,7 @@ private void populateOnDemandFeatures( List retrievedFeatureReferences, ServingAPIProto.GetOnlineFeaturesRequest request, List> features, - ServingAPIProto.GetOnlineFeaturesResponseV2.Builder responseBuilder) { + ServingAPIProto.GetOnlineFeaturesResponse.Builder responseBuilder) { List>> onDemandContext = request.getRequestContextMap().entrySet().stream() @@ -284,7 +283,12 @@ private void populateOnDemandFeatures( valueList.add(features.get(rowIdx).get(featureIdx).getFeatureValue(valueType)); } - onDemandContext.add(Pair.of(Feature.getFeatureReference(featureReference), valueList)); + onDemandContext.add( + Pair.of( + String.format( + "%s__%s", + featureReference.getFeatureViewName(), featureReference.getFeatureName()), + valueList)); } // Serialize the augmented values. ValueType transformationInput = @@ -383,7 +387,7 @@ private void populateHistogramMetrics( */ private void populateCountMetrics( FeatureReferenceV2 featureRef, - ServingAPIProto.GetOnlineFeaturesResponseV2.FeatureVectorOrBuilder featureVector) { + ServingAPIProto.GetOnlineFeaturesResponse.FeatureVectorOrBuilder featureVector) { String featureRefString = Feature.getFeatureReference(featureRef); featureVector .getStatusesList() diff --git a/java/serving/src/main/java/feast/serving/service/OnlineTransformationService.java b/java/serving/src/main/java/feast/serving/service/OnlineTransformationService.java index bfe717aa96..d1df763f6e 100644 --- a/java/serving/src/main/java/feast/serving/service/OnlineTransformationService.java +++ b/java/serving/src/main/java/feast/serving/service/OnlineTransformationService.java @@ -19,11 +19,7 @@ import com.google.common.collect.Lists; import com.google.protobuf.ByteString; import com.google.protobuf.Timestamp; -import feast.common.models.Feature; -import feast.proto.core.DataSourceProto; -import feast.proto.core.FeatureProto; -import feast.proto.core.FeatureViewProto; -import feast.proto.core.OnDemandFeatureViewProto; +import feast.proto.core.*; import feast.proto.serving.ServingAPIProto; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesRequest; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesResponse; @@ -34,6 +30,7 @@ import io.grpc.ManagedChannel; import io.grpc.ManagedChannelBuilder; import io.grpc.Status; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.channels.Channels; import java.util.*; @@ -47,9 +44,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; -import org.apache.tomcat.util.http.fileupload.ByteArrayOutputStream; import org.slf4j.Logger; public class OnlineTransformationService implements TransformationService { @@ -77,17 +72,18 @@ public OnlineTransformationService( @Override public TransformFeaturesResponse transformFeatures( TransformFeaturesRequest transformFeaturesRequest) { + if (this.stub == null) { + throw new RuntimeException( + "Transformation service endpoint must be configured to enable this functionality."); + } return this.stub.transformFeatures(transformFeaturesRequest); } /** {@inheritDoc} */ @Override - public Pair, List> - extractRequestDataFeatureNamesAndOnDemandFeatureInputs( - List onDemandFeatureReferences) { - Set requestDataFeatureNames = new HashSet(); - List onDemandFeatureInputs = - new ArrayList(); + public List extractOnDemandFeaturesDependencies( + List onDemandFeatureReferences) { + List onDemandFeatureInputs = new ArrayList<>(); for (ServingAPIProto.FeatureReferenceV2 featureReference : onDemandFeatureReferences) { OnDemandFeatureViewProto.OnDemandFeatureViewSpec onDemandFeatureViewSpec = this.registryRepository.getOnDemandFeatureViewSpec(featureReference); @@ -98,11 +94,20 @@ public TransformFeaturesResponse transformFeatures( OnDemandFeatureViewProto.OnDemandInput.InputCase inputCase = input.getInputCase(); switch (inputCase) { case REQUEST_DATA_SOURCE: - DataSourceProto.DataSource requestDataSource = input.getRequestDataSource(); - DataSourceProto.DataSource.RequestDataOptions requestDataOptions = - requestDataSource.getRequestDataOptions(); - Set requestDataNames = requestDataOptions.getSchemaMap().keySet(); - requestDataFeatureNames.addAll(requestDataNames); + // Do nothing. The value should be provided as dedicated request parameter + break; + case FEATURE_VIEW_PROJECTION: + FeatureReferenceProto.FeatureViewProjection projection = + input.getFeatureViewProjection(); + for (FeatureProto.FeatureSpecV2 featureSpec : projection.getFeatureColumnsList()) { + String featureName = featureSpec.getName(); + ServingAPIProto.FeatureReferenceV2 onDemandFeatureInput = + ServingAPIProto.FeatureReferenceV2.newBuilder() + .setFeatureViewName(projection.getFeatureViewName()) + .setFeatureName(featureName) + .build(); + onDemandFeatureInputs.add(onDemandFeatureInput); + } break; case FEATURE_VIEW: FeatureViewProto.FeatureView featureView = input.getFeatureView(); @@ -126,61 +131,7 @@ public TransformFeaturesResponse transformFeatures( } } } - Pair, List> pair = - new ImmutablePair, List>( - requestDataFeatureNames, onDemandFeatureInputs); - return pair; - } - - /** {@inheritDoc} */ - public Pair< - List, - Map>> - separateEntityRows( - Set requestDataFeatureNames, ServingAPIProto.GetOnlineFeaturesRequestV2 request) { - // Separate entity rows into entity data and request feature data. - List entityRows = - new ArrayList(); - Map> requestDataFeatures = - new HashMap>(); - - for (ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow entityRow : - request.getEntityRowsList()) { - Map fieldsMap = new HashMap(); - - for (Map.Entry entry : entityRow.getFieldsMap().entrySet()) { - String key = entry.getKey(); - ValueProto.Value value = entry.getValue(); - - if (requestDataFeatureNames.contains(key)) { - if (!requestDataFeatures.containsKey(key)) { - requestDataFeatures.put(key, new ArrayList()); - } - requestDataFeatures.get(key).add(value); - } else { - fieldsMap.put(key, value); - } - } - - // Construct new entity row containing the extracted entity data, if necessary. - if (!fieldsMap.isEmpty()) { - ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow newEntityRow = - ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow.newBuilder() - .setTimestamp(entityRow.getTimestamp()) - .putAllFields(fieldsMap) - .build(); - entityRows.add(newEntityRow); - } - } - - Pair< - List, - Map>> - pair = - new ImmutablePair< - List, - Map>>(entityRows, requestDataFeatures); - return pair; + return onDemandFeatureInputs; } /** {@inheritDoc} */ @@ -189,7 +140,7 @@ public void processTransformFeaturesResponse( transformFeaturesResponse, String onDemandFeatureViewName, Set onDemandFeatureStringReferences, - ServingAPIProto.GetOnlineFeaturesResponseV2.Builder responseBuilder) { + ServingAPIProto.GetOnlineFeaturesResponse.Builder responseBuilder) { try { BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); ArrowFileReader reader = @@ -208,7 +159,7 @@ public void processTransformFeaturesResponse( for (Field field : responseFields) { String columnName = field.getName(); - String fullFeatureName = onDemandFeatureViewName + ":" + columnName; + String fullFeatureName = columnName.replace("__", ":"); ArrowType columnType = field.getType(); // The response will contain all features for the specified ODFV, so we @@ -219,7 +170,7 @@ public void processTransformFeaturesResponse( FieldVector fieldVector = readBatch.getVector(field); int valueCount = fieldVector.getValueCount(); - ServingAPIProto.GetOnlineFeaturesResponseV2.FeatureVector.Builder vectorBuilder = + ServingAPIProto.GetOnlineFeaturesResponse.FeatureVector.Builder vectorBuilder = responseBuilder.addResultsBuilder(); List valueList = Lists.newArrayListWithExpectedSize(valueCount); @@ -306,7 +257,7 @@ public ValueType serializeValuesIntoArrowIPC(List> columnEntry : values) { // The Python FTS does not expect full feature names, so we extract the feature name. - String columnName = Feature.getFeatureName(columnEntry.getKey()); + String columnName = columnEntry.getKey(); List columnValues = columnEntry.getValue(); FieldVector column; @@ -332,14 +283,14 @@ public ValueType serializeValuesIntoArrowIPC(ListThis request is fulfilled synchronously. * - * @return {@link feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponseV2} with list of - * {@link feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponseV2.FeatureVector}. + * @return {@link feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse} with list of + * {@link feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FeatureVector}. */ - ServingAPIProto.GetOnlineFeaturesResponseV2 getOnlineFeatures( + ServingAPIProto.GetOnlineFeaturesResponse getOnlineFeatures( ServingAPIProto.GetOnlineFeaturesRequest getFeaturesRequest); } diff --git a/java/serving/src/main/java/feast/serving/service/TransformationService.java b/java/serving/src/main/java/feast/serving/service/TransformationService.java index 36cce43e0d..e993e76e0a 100644 --- a/java/serving/src/main/java/feast/serving/service/TransformationService.java +++ b/java/serving/src/main/java/feast/serving/service/TransformationService.java @@ -17,13 +17,11 @@ package feast.serving.service; import feast.proto.serving.ServingAPIProto; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesRequest; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesResponse; import feast.proto.serving.TransformationServiceAPIProto.ValueType; import feast.proto.types.ValueProto; import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.commons.lang3.tuple.Pair; @@ -37,27 +35,13 @@ public interface TransformationService { TransformFeaturesResponse transformFeatures(TransformFeaturesRequest transformFeaturesRequest); /** - * Extract the set of request data feature names and the list of on demand feature inputs from a - * list of ODFV references. + * Extract the list of on demand feature inputs from a list of ODFV references. * * @param onDemandFeatureReferences list of ODFV references to be parsed - * @return a pair containing the set of request data feature names and list of on demand feature - * inputs + * @return list of on demand feature inputs */ - Pair, List> - extractRequestDataFeatureNamesAndOnDemandFeatureInputs( - List onDemandFeatureReferences); - - /** - * Separate the entity rows of a request into entity data and request feature data. - * - * @param requestDataFeatureNames set of feature names for the request data - * @param request the GetOnlineFeaturesRequestV2 containing the entity rows - * @return a pair containing the set of request data feature names and list of on demand feature - * inputs - */ - Pair, Map>> - separateEntityRows(Set requestDataFeatureNames, GetOnlineFeaturesRequestV2 request); + List extractOnDemandFeaturesDependencies( + List onDemandFeatureReferences); /** * Process a response from the feature transformation server by augmenting the given lists of @@ -66,13 +50,13 @@ public interface TransformationService { * @param transformFeaturesResponse response to be processed * @param onDemandFeatureViewName name of ODFV to which the response corresponds * @param onDemandFeatureStringReferences set of all ODFV references that should be kept - * @param responseBuilder {@link ServingAPIProto.GetOnlineFeaturesResponseV2.Builder} + * @param responseBuilder {@link ServingAPIProto.GetOnlineFeaturesResponse.Builder} */ void processTransformFeaturesResponse( TransformFeaturesResponse transformFeaturesResponse, String onDemandFeatureViewName, Set onDemandFeatureStringReferences, - ServingAPIProto.GetOnlineFeaturesResponseV2.Builder responseBuilder); + ServingAPIProto.GetOnlineFeaturesResponse.Builder responseBuilder); /** * Serialize data into Arrow IPC format, to be sent to the Python feature transformation server. diff --git a/java/serving/src/main/java/feast/serving/util/mappers/ResponseJSONMapper.java b/java/serving/src/main/java/feast/serving/util/mappers/ResponseJSONMapper.java index 1e82bf864c..3ab9f43c34 100644 --- a/java/serving/src/main/java/feast/serving/util/mappers/ResponseJSONMapper.java +++ b/java/serving/src/main/java/feast/serving/util/mappers/ResponseJSONMapper.java @@ -26,14 +26,14 @@ public class ResponseJSONMapper { public static List> mapGetOnlineFeaturesResponse( - ServingAPIProto.GetOnlineFeaturesResponseV2 response) { + ServingAPIProto.GetOnlineFeaturesResponse response) { return response.getResultsList().stream() .map(fieldValues -> convertFieldValuesToMap(fieldValues)) .collect(Collectors.toList()); } private static Map convertFieldValuesToMap( - ServingAPIProto.GetOnlineFeaturesResponseV2.FeatureVector vec) { + ServingAPIProto.GetOnlineFeaturesResponse.FeatureVector vec) { return Map.of( "values", vec.getValuesList().stream() diff --git a/java/serving/src/test/java/feast/serving/it/ServingBaseTests.java b/java/serving/src/test/java/feast/serving/it/ServingBaseTests.java index 4d4272324e..c610d7df6b 100644 --- a/java/serving/src/test/java/feast/serving/it/ServingBaseTests.java +++ b/java/serving/src/test/java/feast/serving/it/ServingBaseTests.java @@ -74,7 +74,7 @@ private static RegistryProto.Registry readLocalRegistry() { @Test public void shouldGetOnlineFeatures() { - ServingAPIProto.GetOnlineFeaturesResponseV2 featureResponse = + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = servingStub.getOnlineFeatures(buildOnlineRequest(1005)); assertEquals(2, featureResponse.getResultsCount()); @@ -96,7 +96,7 @@ public void shouldGetOnlineFeatures() { @Test public void shouldGetOnlineFeaturesWithOutsideMaxAgeStatus() { - ServingAPIProto.GetOnlineFeaturesResponseV2 featureResponse = + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = servingStub.getOnlineFeatures(buildOnlineRequest(1001)); assertEquals(2, featureResponse.getResultsCount()); @@ -113,7 +113,7 @@ public void shouldGetOnlineFeaturesWithOutsideMaxAgeStatus() { @Test public void shouldGetOnlineFeaturesWithNotFoundStatus() { - ServingAPIProto.GetOnlineFeaturesResponseV2 featureResponse = + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = servingStub.getOnlineFeatures(buildOnlineRequest(-1)); assertEquals(2, featureResponse.getResultsCount()); @@ -157,5 +157,28 @@ public void shouldRefreshRegistryAndServeNewFeatures() throws InterruptedExcepti equalTo(3)); } + /** https://github.com/feast-dev/feast/issues/2253 */ + @Test + public void shouldGetOnlineFeaturesWithStringEntity() { + Map entityRows = + ImmutableMap.of( + "entity", + ValueProto.RepeatedValue.newBuilder() + .addVal(DataGenerator.createStrValue("key-1")) + .build()); + + ImmutableList featureReferences = + ImmutableList.of("feature_view_0:feature_0", "feature_view_0:feature_1"); + + ServingAPIProto.GetOnlineFeaturesRequest req = + TestUtils.createOnlineFeatureRequest(featureReferences, entityRows); + + ServingAPIProto.GetOnlineFeaturesResponse resp = servingStub.getOnlineFeatures(req); + + for (final int featureIdx : List.of(0, 1)) { + assertEquals(FieldStatus.PRESENT, resp.getResults(featureIdx).getStatuses(0)); + } + } + abstract void updateRegistryFile(RegistryProto.Registry registry); } diff --git a/java/serving/src/test/java/feast/serving/it/ServingBenchmarkIT.java b/java/serving/src/test/java/feast/serving/it/ServingBenchmarkIT.java index 43eed2fa33..1d77c2e4f7 100644 --- a/java/serving/src/test/java/feast/serving/it/ServingBenchmarkIT.java +++ b/java/serving/src/test/java/feast/serving/it/ServingBenchmarkIT.java @@ -51,7 +51,8 @@ protected ServingAPIProto.GetOnlineFeaturesRequest buildOnlineRequest( int rowsCount, int featuresCount) { List entities = IntStream.range(0, rowsCount) - .mapToObj(i -> DataGenerator.createInt64Value(rand.nextInt(1000))) + .mapToObj( + i -> DataGenerator.createStrValue(String.format("key-%s", rand.nextInt(1000)))) .collect(Collectors.toList()); List featureReferences = diff --git a/java/serving/src/test/java/feast/serving/it/ServingEnvironment.java b/java/serving/src/test/java/feast/serving/it/ServingEnvironment.java index 0c622d7c42..c00dc7b1f3 100644 --- a/java/serving/src/test/java/feast/serving/it/ServingEnvironment.java +++ b/java/serving/src/test/java/feast/serving/it/ServingEnvironment.java @@ -16,23 +16,23 @@ */ package feast.serving.it; +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.greaterThan; + import com.google.inject.*; import com.google.inject.Module; import com.google.inject.util.Modules; import feast.proto.serving.ServingServiceGrpc; -import feast.serving.config.ApplicationProperties; -import feast.serving.config.InstrumentationConfig; -import feast.serving.config.RegistryConfig; -import feast.serving.config.ServingServiceConfigV2; +import feast.serving.config.*; import feast.serving.grpc.OnlineServingGrpcServiceV2; import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; import io.grpc.Server; -import io.grpc.inprocess.InProcessChannelBuilder; -import io.grpc.inprocess.InProcessServerBuilder; -import io.grpc.protobuf.services.ProtoReflectionService; import io.grpc.util.MutableHandlerRegistry; import java.io.File; -import java.time.Duration; +import java.io.IOException; +import java.net.ServerSocket; import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; @@ -53,17 +53,16 @@ abstract class ServingEnvironment { Server server; MutableHandlerRegistry serviceRegistry; + static int serverPort = getFreePort(); + @BeforeAll static void globalSetup() { environment = new DockerComposeContainer( new File("src/test/resources/docker-compose/docker-compose-redis-it.yml")) .withExposedService("redis", 6379) - .withOptions() - .waitingFor( - "materialize", - Wait.forLogMessage(".*Materialization finished.*\\n", 1) - .withStartupTimeout(Duration.ofMinutes(5))); + .withExposedService("feast", 8080) + .waitingFor("feast", Wait.forListeningPort()); environment.start(); } @@ -74,7 +73,6 @@ static void globalTeardown() { @BeforeEach public void envSetUp() throws Exception { - AbstractModule appPropertiesModule = new AbstractModule() { @Override @@ -85,9 +83,15 @@ protected void configure() { @Provides ApplicationProperties applicationProperties() { final ApplicationProperties p = new ApplicationProperties(); - p.setAwsRegion("us-east-1"); + + ApplicationProperties.GrpcServer grpcServer = new ApplicationProperties.GrpcServer(); + ApplicationProperties.Server server = new ApplicationProperties.Server(); + server.setPort(serverPort); + grpcServer.setServer(server); + p.setGrpc(grpcServer); final ApplicationProperties.FeastProperties feastProperties = createFeastProperties(); + feastProperties.setAwsRegion("us-east-1"); p.setFeast(feastProperties); final ApplicationProperties.TracingProperties tracingProperties = @@ -112,22 +116,13 @@ ApplicationProperties applicationProperties() { new ServingServiceConfigV2(), registryConfig, new InstrumentationConfig(), - appPropertiesModule); + appPropertiesModule, + new ServerModule()); - OnlineServingGrpcServiceV2 onlineServingGrpcServiceV2 = - injector.getInstance(OnlineServingGrpcServiceV2.class); - - serverName = InProcessServerBuilder.generateName(); - - server = - InProcessServerBuilder.forName(serverName) - .fallbackHandlerRegistry(serviceRegistry) - .addService(onlineServingGrpcServiceV2) - .addService(ProtoReflectionService.newInstance()) - .build(); + server = injector.getInstance(Server.class); server.start(); - channel = InProcessChannelBuilder.forName(serverName).usePlaintext().directExecutor().build(); + channel = ManagedChannelBuilder.forAddress("localhost", serverPort).usePlaintext().build(); servingStub = ServingServiceGrpc.newBlockingStub(channel) @@ -149,6 +144,10 @@ public void envTeardown() throws Exception { channel.shutdownNow(); server.shutdownNow(); } + + server = null; + channel = null; + servingStub = null; } abstract ApplicationProperties.FeastProperties createFeastProperties(); @@ -156,4 +155,18 @@ public void envTeardown() throws Exception { AbstractModule registryConfig() { return null; } + + private static int getFreePort() { + ServerSocket serverSocket; + try { + serverSocket = new ServerSocket(0); + } catch (IOException e) { + throw new RuntimeException("Couldn't allocate port"); + } + + assertThat(serverSocket, is(notNullValue())); + assertThat(serverSocket.getLocalPort(), greaterThan(0)); + + return serverSocket.getLocalPort(); + } } diff --git a/java/serving/src/test/java/feast/serving/it/TestUtils.java b/java/serving/src/test/java/feast/serving/it/TestUtils.java index 71d15e4a89..867fa4afb0 100644 --- a/java/serving/src/test/java/feast/serving/it/TestUtils.java +++ b/java/serving/src/test/java/feast/serving/it/TestUtils.java @@ -38,17 +38,33 @@ public static ServingServiceGrpc.ServingServiceBlockingStub getServingServiceStu public static GetOnlineFeaturesRequest createOnlineFeatureRequest( List featureReferences, Map entityRows) { + return createOnlineFeatureRequest(featureReferences, entityRows, new HashMap<>()); + } + + public static GetOnlineFeaturesRequest createOnlineFeatureRequest( + List featureReferences, + Map entityRows, + Map requestContext) { return GetOnlineFeaturesRequest.newBuilder() .setFeatures(ServingAPIProto.FeatureList.newBuilder().addAllVal(featureReferences)) .putAllEntities(entityRows) + .putAllRequestContext(requestContext) .build(); } public static GetOnlineFeaturesRequest createOnlineFeatureRequest( String featureService, Map entityRows) { + return createOnlineFeatureRequest(featureService, entityRows, new HashMap<>()); + } + + public static GetOnlineFeaturesRequest createOnlineFeatureRequest( + String featureService, + Map entityRows, + Map requestContext) { return GetOnlineFeaturesRequest.newBuilder() .setFeatureService(featureService) .putAllEntities(entityRows) + .putAllRequestContext(requestContext) .build(); } diff --git a/java/serving/src/test/java/feast/serving/it/TransformationServiceIT.java b/java/serving/src/test/java/feast/serving/it/TransformationServiceIT.java new file mode 100644 index 0000000000..102d851528 --- /dev/null +++ b/java/serving/src/test/java/feast/serving/it/TransformationServiceIT.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2022 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.it; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import feast.proto.serving.ServingAPIProto; +import feast.proto.types.ValueProto; +import feast.serving.config.ApplicationProperties; +import feast.serving.util.DataGenerator; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +public class TransformationServiceIT extends ServingEnvironment { + @Override + ApplicationProperties.FeastProperties createFeastProperties() { + ApplicationProperties.FeastProperties feastProperties = + TestUtils.createBasicFeastProperties( + environment.getServiceHost("redis", 6379), environment.getServicePort("redis", 6379)); + feastProperties.setTransformationServiceEndpoint( + String.format( + "%s:%d", + environment.getServiceHost("feast", 8080), environment.getServicePort("feast", 8080))); + return feastProperties; + } + + private ServingAPIProto.GetOnlineFeaturesRequest buildOnlineRequest( + int driverId, boolean transformedFeaturesOnly) { + Map entityRows = + ImmutableMap.of( + "driver_id", + ValueProto.RepeatedValue.newBuilder() + .addVal(DataGenerator.createInt64Value(driverId)) + .build()); + + Map requestContext = + ImmutableMap.of( + "val_to_add", + ValueProto.RepeatedValue.newBuilder().addVal(DataGenerator.createInt64Value(3)).build(), + "val_to_add_2", + ValueProto.RepeatedValue.newBuilder() + .addVal(DataGenerator.createInt64Value(5)) + .build()); + + List featureReferences = + Lists.newArrayList( + "transformed_conv_rate:conv_rate_plus_val1", + "transformed_conv_rate:conv_rate_plus_val2"); + + if (!transformedFeaturesOnly) { + featureReferences.add("driver_hourly_stats:conv_rate"); + } + + return TestUtils.createOnlineFeatureRequest(featureReferences, entityRows, requestContext); + } + + @Test + public void shouldCalculateOnDemandFeatures() { + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = + servingStub.getOnlineFeatures(buildOnlineRequest(1005, false)); + + for (int featureIdx : List.of(0, 1, 2)) { + assertEquals( + List.of(ServingAPIProto.FieldStatus.PRESENT), + featureResponse.getResults(featureIdx).getStatusesList()); + } + + // conv_rate + assertEquals(0.5, featureResponse.getResults(0).getValues(0).getDoubleVal(), 0.0001); + // conv_rate + val_to_add (3.0) + assertEquals(3.5, featureResponse.getResults(1).getValues(0).getDoubleVal(), 0.0001); + // conv_rate + val_to_add_2 (5.0) + assertEquals(5.5, featureResponse.getResults(2).getValues(0).getDoubleVal(), 0.0001); + } + + @Test + public void shouldCorrectlyFetchDependantFeatures() { + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = + servingStub.getOnlineFeatures(buildOnlineRequest(1005, true)); + + // conv_rate + val_to_add (3.0) + assertEquals(3.5, featureResponse.getResults(0).getValues(0).getDoubleVal(), 0.0001); + // conv_rate + val_to_add_2 (5.0) + assertEquals(5.5, featureResponse.getResults(1).getValues(0).getDoubleVal(), 0.0001); + } +} diff --git a/java/serving/src/test/java/feast/serving/service/OnlineServingServiceTest.java b/java/serving/src/test/java/feast/serving/service/OnlineServingServiceTest.java index 4234e9dce3..64d2e20c9b 100644 --- a/java/serving/src/test/java/feast/serving/service/OnlineServingServiceTest.java +++ b/java/serving/src/test/java/feast/serving/service/OnlineServingServiceTest.java @@ -30,7 +30,7 @@ import feast.proto.core.FeatureViewProto; import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingAPIProto.FieldStatus; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponseV2; +import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.proto.types.ValueProto; import feast.serving.registry.Registry; import feast.serving.registry.RegistryRepository; @@ -173,10 +173,10 @@ public void shouldReturnResponseWithValuesAndMetadataIfKeysPresent() { when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); - GetOnlineFeaturesResponseV2 expected = - GetOnlineFeaturesResponseV2.newBuilder() + GetOnlineFeaturesResponse expected = + GetOnlineFeaturesResponse.newBuilder() .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(createStrValue("1")) .addValues(createStrValue("3")) .addStatuses(FieldStatus.PRESENT) @@ -184,7 +184,7 @@ public void shouldReturnResponseWithValuesAndMetadataIfKeysPresent() { .addEventTimestamps(now) .addEventTimestamps(now)) .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(createStrValue("2")) .addValues(createStrValue("4")) .addStatuses(FieldStatus.PRESENT) @@ -198,7 +198,7 @@ public void shouldReturnResponseWithValuesAndMetadataIfKeysPresent() { .addVal("featureview_1:feature_1") .addVal("featureview_1:feature_2"))) .build(); - ServingAPIProto.GetOnlineFeaturesResponseV2 actual = + ServingAPIProto.GetOnlineFeaturesResponse actual = onlineServingServiceV2.getOnlineFeatures(request); assertThat(actual, equalTo(expected)); } @@ -240,10 +240,10 @@ public void shouldReturnResponseWithUnsetValuesAndMetadataIfKeysNotPresent() { when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); - GetOnlineFeaturesResponseV2 expected = - GetOnlineFeaturesResponseV2.newBuilder() + GetOnlineFeaturesResponse expected = + GetOnlineFeaturesResponse.newBuilder() .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(createStrValue("1")) .addValues(createEmptyValue()) .addStatuses(FieldStatus.PRESENT) @@ -251,7 +251,7 @@ public void shouldReturnResponseWithUnsetValuesAndMetadataIfKeysNotPresent() { .addEventTimestamps(now) .addEventTimestamps(Timestamp.newBuilder().build())) .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(createStrValue("2")) .addValues(createStrValue("5")) .addStatuses(FieldStatus.PRESENT) @@ -265,7 +265,7 @@ public void shouldReturnResponseWithUnsetValuesAndMetadataIfKeysNotPresent() { .addVal("featureview_1:feature_1") .addVal("featureview_1:feature_2"))) .build(); - GetOnlineFeaturesResponseV2 actual = onlineServingServiceV2.getOnlineFeatures(request); + GetOnlineFeaturesResponse actual = onlineServingServiceV2.getOnlineFeatures(request); assertThat(actual, equalTo(expected)); } @@ -317,10 +317,10 @@ public void shouldReturnResponseWithValuesAndMetadataIfMaxAgeIsExceeded() { when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); - GetOnlineFeaturesResponseV2 expected = - GetOnlineFeaturesResponseV2.newBuilder() + GetOnlineFeaturesResponse expected = + GetOnlineFeaturesResponse.newBuilder() .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(createStrValue("6")) .addValues(createStrValue("6")) .addStatuses(FieldStatus.OUTSIDE_MAX_AGE) @@ -328,7 +328,7 @@ public void shouldReturnResponseWithValuesAndMetadataIfMaxAgeIsExceeded() { .addEventTimestamps(Timestamp.newBuilder().setSeconds(1).build()) .addEventTimestamps(Timestamp.newBuilder().setSeconds(1).build())) .addResults( - GetOnlineFeaturesResponseV2.FeatureVector.newBuilder() + GetOnlineFeaturesResponse.FeatureVector.newBuilder() .addValues(createStrValue("2")) .addValues(createStrValue("2")) .addStatuses(FieldStatus.PRESENT) @@ -342,7 +342,7 @@ public void shouldReturnResponseWithValuesAndMetadataIfMaxAgeIsExceeded() { .addVal("featureview_1:feature_1") .addVal("featureview_1:feature_2"))) .build(); - GetOnlineFeaturesResponseV2 actual = onlineServingServiceV2.getOnlineFeatures(request); + GetOnlineFeaturesResponse actual = onlineServingServiceV2.getOnlineFeatures(request); assertThat(actual, equalTo(expected)); } diff --git a/java/serving/src/test/resources/docker-compose/docker-compose-redis-it.yml b/java/serving/src/test/resources/docker-compose/docker-compose-redis-it.yml index 22e054f8b1..13835e07d4 100644 --- a/java/serving/src/test/resources/docker-compose/docker-compose-redis-it.yml +++ b/java/serving/src/test/resources/docker-compose/docker-compose-redis-it.yml @@ -5,8 +5,10 @@ services: image: redis:6.2 ports: - "6379:6379" - materialize: + feast: build: feast10 + ports: + - "8080:8080" links: - redis diff --git a/java/serving/src/test/resources/docker-compose/feast10/Dockerfile b/java/serving/src/test/resources/docker-compose/feast10/Dockerfile index bde9f11592..df14bb592b 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/Dockerfile +++ b/java/serving/src/test/resources/docker-compose/feast10/Dockerfile @@ -5,6 +5,11 @@ WORKDIR /usr/src/ COPY requirements.txt ./ RUN pip install --no-cache-dir -r requirements.txt +RUN git clone https://github.com/feast-dev/feast.git /root/feast +RUN cd /root/feast/sdk/python && pip install -e '.[redis]' + +WORKDIR /app COPY . . +EXPOSE 8080 -CMD [ "python", "./materialize.py" ] +CMD ["/bin/sh", "-c", "python materialize.py && feast serve_transformations --port 8080"] diff --git a/java/serving/src/test/resources/docker-compose/feast10/definitions.py b/java/serving/src/test/resources/docker-compose/feast10/definitions.py new file mode 100644 index 0000000000..c7ed6c9619 --- /dev/null +++ b/java/serving/src/test/resources/docker-compose/feast10/definitions.py @@ -0,0 +1,97 @@ +import pandas as pd + +from google.protobuf.duration_pb2 import Duration + +from feast.value_type import ValueType +from feast.feature import Feature +from feast.feature_view import FeatureView +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.on_demand_feature_view import RequestDataSource, on_demand_feature_view +from feast import FileSource + + +file_path = "driver_stats.parquet" +driver_hourly_stats = FileSource( + path=file_path, + event_timestamp_column="event_timestamp", + created_timestamp_column="created", +) + +# Define an entity for the driver. You can think of entity as a primary key used to +# fetch features. +driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) + +# Our parquet files contain sample data that includes a driver_id column, timestamps and +# three feature column. Here we define a Feature View that will allow us to serve this +# data to our model online. +driver_hourly_stats_view = FeatureView( + name="driver_hourly_stats", + entities=["driver_id"], + ttl=Duration(seconds=86400 * 7), + features=[ + Feature(name="conv_rate", dtype=ValueType.DOUBLE), + Feature(name="acc_rate", dtype=ValueType.FLOAT), + Feature(name="avg_daily_trips", dtype=ValueType.INT64), + ], + online=True, + batch_source=driver_hourly_stats, + tags={}, +) + + +input_request = RequestDataSource( + name="vals_to_add", + schema={ + "val_to_add": ValueType.INT64, + "val_to_add_2": ValueType.INT64 + } +) + + +@on_demand_feature_view( + inputs={ + 'driver_hourly_stats': driver_hourly_stats_view, + 'vals_to_add': input_request + }, + features=[ + Feature(name='conv_rate_plus_val1', dtype=ValueType.DOUBLE), + Feature(name='conv_rate_plus_val2', dtype=ValueType.DOUBLE) + ] +) +def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df['conv_rate_plus_val1'] = (features_df['conv_rate'] + features_df['val_to_add']) + df['conv_rate_plus_val2'] = (features_df['conv_rate'] + features_df['val_to_add_2']) + return df + + +generated_data_source = FileSource( + path="benchmark_data.parquet", + event_timestamp_column="event_timestamp", +) + +entity = Entity( + name="entity", + value_type=ValueType.STRING, +) + +benchmark_feature_views = [ + FeatureView( + name=f"feature_view_{i}", + entities=["entity"], + ttl=Duration(seconds=86400), + features=[ + Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64) + for j in range(10) + ], + online=True, + batch_source=generated_data_source, + ) + for i in range(25) +] + +benchmark_feature_service = FeatureService( + name=f"benchmark_feature_service", + features=benchmark_feature_views, +) diff --git a/java/serving/src/test/resources/docker-compose/feast10/materialize.py b/java/serving/src/test/resources/docker-compose/feast10/materialize.py index ca4cc98db2..8389d8527b 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/materialize.py +++ b/java/serving/src/test/resources/docker-compose/feast10/materialize.py @@ -1,12 +1,12 @@ -# This is an example feature definition file - import pandas as pd import numpy as np -from google.protobuf.duration_pb2 import Duration - from datetime import datetime, timedelta -from feast import Entity, Feature, FeatureView, FileSource, ValueType, FeatureService, FeatureStore +from feast import FeatureStore + +from definitions import driver_hourly_stats_view, driver, entity,\ + benchmark_feature_service, benchmark_feature_views, transformed_conv_rate + print("Running materialize.py") @@ -23,89 +23,39 @@ # some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(lambda days: timedelta(days=days)) -df.to_parquet("driver_stats.parquet") - -# Read data from parquet files. Parquet is convenient for local development mode. For +# Store data in parquet files. Parquet is convenient for local development mode. For # production, you can use your favorite DWH, such as BigQuery. See Feast documentation # for more info. -file_path = "driver_stats.parquet" -driver_hourly_stats = FileSource( - path=file_path, - event_timestamp_column="event_timestamp", - created_timestamp_column="created", -) - -# Define an entity for the driver. You can think of entity as a primary key used to -# fetch features. -driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) - -# Our parquet files contain sample data that includes a driver_id column, timestamps and -# three feature column. Here we define a Feature View that will allow us to serve this -# data to our model online. -driver_hourly_stats_view = FeatureView( - name="driver_hourly_stats", - entities=["driver_id"], - ttl=Duration(seconds=86400 * 7), - features=[ - Feature(name="conv_rate", dtype=ValueType.DOUBLE), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), - ], - online=True, - batch_source=driver_hourly_stats, - tags={}, -) +df.to_parquet("driver_stats.parquet") # For Benchmarks -# Please read more in Feast RFC-031 (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit) +# Please read more in Feast RFC-031 +# (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit) # about this benchmark setup -def generate_data(num_rows: int, num_features: int, key_space: int, destination: str) -> pd.DataFrame: +def generate_data(num_rows: int, num_features: int, destination: str) -> pd.DataFrame: features = [f"feature_{i}" for i in range(num_features)] columns = ["entity", "event_timestamp"] + features df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns) df["event_timestamp"] = datetime.utcnow() - for column in ["entity"] + features: - df[column] = np.random.randint(1, key_space, num_rows) + for column in features: + df[column] = np.random.randint(1, num_rows, num_rows) + + df["entity"] = "key-" + \ + pd.Series(np.arange(1, num_rows + 1)).astype(pd.StringDtype()) df.to_parquet(destination) -generate_data(10**3, 250, 10**3, "benchmark_data.parquet") - -generated_data_source = FileSource( - path="benchmark_data.parquet", - event_timestamp_column="event_timestamp", -) - -entity = Entity( - name="entity", - value_type=ValueType.INT64, -) - -benchmark_feature_views = [ - FeatureView( - name=f"feature_view_{i}", - entities=["entity"], - ttl=Duration(seconds=86400), - features=[ - Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64) - for j in range(10) - ], - online=True, - batch_source=generated_data_source, - ) - for i in range(25) -] - -benchmark_feature_service = FeatureService( - name=f"benchmark_feature_service", - features=benchmark_feature_views, -) + +generate_data(10**3, 250, "benchmark_data.parquet") fs = FeatureStore(".") -fs.apply([driver_hourly_stats_view, driver, - entity, benchmark_feature_service, *benchmark_feature_views]) +fs.apply([driver_hourly_stats_view, + transformed_conv_rate, + driver, + entity, benchmark_feature_service, + *benchmark_feature_views]) now = datetime.now() fs.materialize(start, now) diff --git a/java/serving/src/test/resources/docker-compose/feast10/registry.db b/java/serving/src/test/resources/docker-compose/feast10/registry.db index 4590c5800a..746934e3d0 100644 Binary files a/java/serving/src/test/resources/docker-compose/feast10/registry.db and b/java/serving/src/test/resources/docker-compose/feast10/registry.db differ diff --git a/java/serving/src/test/resources/docker-compose/feast10/requirements.txt b/java/serving/src/test/resources/docker-compose/feast10/requirements.txt index 447f126392..94e4771de2 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/requirements.txt +++ b/java/serving/src/test/resources/docker-compose/feast10/requirements.txt @@ -1,5 +1,3 @@ -feast[redis]>=0.13,<1 - # for source generation pyarrow==6.0.0 diff --git a/java/storage/api/pom.xml b/java/storage/api/pom.xml index 583bcd0640..90f656e281 100644 --- a/java/storage/api/pom.xml +++ b/java/storage/api/pom.xml @@ -32,16 +32,10 @@ dev.feast - datatypes-java + feast-datatypes ${project.version} - - - - - - com.google.auto.value auto-value-annotations diff --git a/java/storage/connectors/pom.xml b/java/storage/connectors/pom.xml index e896910e73..11e32a154c 100644 --- a/java/storage/connectors/pom.xml +++ b/java/storage/connectors/pom.xml @@ -41,7 +41,7 @@ dev.feast - datatypes-java + feast-datatypes ${project.version} diff --git a/java/storage/connectors/redis/pom.xml b/java/storage/connectors/redis/pom.xml index 7b0c944a66..ce25f41da6 100644 --- a/java/storage/connectors/redis/pom.xml +++ b/java/storage/connectors/redis/pom.xml @@ -48,6 +48,7 @@ com.google.guava guava + ${guava.version} @@ -61,6 +62,7 @@ com.github.kstyrc embedded-redis + 0.6 test @@ -68,12 +70,14 @@ org.hamcrest hamcrest-core test + ${hamcrest.version} org.hamcrest hamcrest-library test + ${hamcrest.version} @@ -93,7 +97,7 @@ org.slf4j slf4j-simple - 1.7.30 + 1.7.32 test diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java index 922a09d3f5..3e9ab7e8ab 100644 --- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java +++ b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java @@ -16,16 +16,14 @@ */ package feast.storage.connectors.redis.retriever; -import com.google.common.primitives.UnsignedBytes; import com.google.protobuf.ProtocolStringList; import feast.proto.storage.RedisProto; import feast.proto.types.ValueProto; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; +import java.util.*; +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.Pair; // This is derived from @@ -48,70 +46,52 @@ public byte[] serialize(RedisProto.RedisKeyV2 entityKey) { } tuples.sort(Comparator.comparing(Pair::getLeft)); - ByteBuffer stringBytes = ByteBuffer.allocate(Integer.BYTES); - stringBytes.order(ByteOrder.LITTLE_ENDIAN); - stringBytes.putInt(ValueProto.ValueType.Enum.STRING.getNumber()); - for (Pair pair : tuples) { - for (final byte b : stringBytes.array()) { - buffer.add(b); - } - for (final byte b : pair.getLeft().getBytes(StandardCharsets.UTF_8)) { - buffer.add(b); - } + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.STRING.getNumber())); + buffer.addAll(encodeString(pair.getLeft())); } for (Pair pair : tuples) { final ValueProto.Value val = pair.getRight(); switch (val.getValCase()) { case STRING_VAL: - buffer.add(UnsignedBytes.checkedCast(ValueProto.ValueType.Enum.STRING.getNumber())); - buffer.add( - UnsignedBytes.checkedCast( - val.getStringVal().getBytes(StandardCharsets.UTF_8).length)); - for (final byte b : val.getStringVal().getBytes(StandardCharsets.UTF_8)) { - buffer.add(b); - } + String stringVal = val.getStringVal(); + + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.STRING.getNumber())); + buffer.addAll(encodeInteger(stringVal.length())); + buffer.addAll(encodeString(stringVal)); + break; case BYTES_VAL: - buffer.add(UnsignedBytes.checkedCast(ValueProto.ValueType.Enum.BYTES.getNumber())); - for (final byte b : val.getBytesVal().toByteArray()) { - buffer.add(b); - } + byte[] bytes = val.getBytesVal().toByteArray(); + + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.BYTES.getNumber())); + buffer.addAll(encodeInteger(bytes.length)); + buffer.addAll(encodeBytes(bytes)); + break; case INT32_VAL: - ByteBuffer int32ByteBuffer = - ByteBuffer.allocate(Integer.BYTES + Integer.BYTES + Integer.BYTES); - int32ByteBuffer.order(ByteOrder.LITTLE_ENDIAN); - int32ByteBuffer.putInt(ValueProto.ValueType.Enum.INT32.getNumber()); - int32ByteBuffer.putInt(Integer.BYTES); - int32ByteBuffer.putInt(val.getInt32Val()); - for (final byte b : int32ByteBuffer.array()) { - buffer.add(b); - } + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.INT32.getNumber())); + buffer.addAll(encodeInteger(Integer.BYTES)); + buffer.addAll(encodeInteger(val.getInt32Val())); + break; case INT64_VAL: - ByteBuffer int64ByteBuffer = - ByteBuffer.allocate(Integer.BYTES + Integer.BYTES + Integer.BYTES); - int64ByteBuffer.order(ByteOrder.LITTLE_ENDIAN); - int64ByteBuffer.putInt(ValueProto.ValueType.Enum.INT64.getNumber()); - int64ByteBuffer.putInt(Integer.BYTES); + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.INT64.getNumber())); + buffer.addAll(encodeInteger(Integer.BYTES)); /* This is super dumb - but in https://github.com/feast-dev/feast/blob/dcae1606f53028ce5413567fb8b66f92cfef0f8e/sdk/python/feast/infra/key_encoding_utils.py#L9 we use `struct.pack(" encodeBytes(byte[] toByteArray) { + return Arrays.asList(ArrayUtils.toObject(toByteArray)); + } + + private List encodeInteger(Integer value) { + ByteBuffer buffer = ByteBuffer.allocate(Integer.BYTES); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(value); + + return Arrays.asList(ArrayUtils.toObject(buffer.array())); + } + + private List encodeString(String value) { + byte[] stringBytes = value.getBytes(StandardCharsets.UTF_8); + return encodeBytes(stringBytes); + } } diff --git a/protos/feast/core/DataSource.proto b/protos/feast/core/DataSource.proto index ee5c6939d7..41bba6443f 100644 --- a/protos/feast/core/DataSource.proto +++ b/protos/feast/core/DataSource.proto @@ -32,19 +32,22 @@ message DataSource { reserved 6 to 10; // Type of Data Source. + // Next available id: 9 enum SourceType { INVALID = 0; BATCH_FILE = 1; + BATCH_SNOWFLAKE = 8; BATCH_BIGQUERY = 2; + BATCH_REDSHIFT = 5; STREAM_KAFKA = 3; STREAM_KINESIS = 4; - BATCH_REDSHIFT = 5; CUSTOM_SOURCE = 6; REQUEST_SOURCE = 7; + } SourceType type = 1; - // Defines mapping between fields in the sourced data + // Defines mapping between fields in the sourced data // and fields in parent FeatureTable. map field_mapping = 2; @@ -128,6 +131,22 @@ message DataSource { string schema = 3; } + // Defines options for DataSource that sources features from a Snowflake Query + message SnowflakeOptions { + // Snowflake table name + string table = 1; + + // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective + // entity columns + string query = 2; + + // Snowflake schema name + string schema = 3; + + // Snowflake schema name + string database = 4; + } + // Defines configuration for custom third-party data sources. message CustomSourceOptions { // Serialized configuration information for the data source. The implementer of the custom data source is @@ -153,5 +172,6 @@ message DataSource { RedshiftOptions redshift_options = 15; RequestDataOptions request_data_options = 18; CustomSourceOptions custom_options = 16; + SnowflakeOptions snowflake_options = 19; } } diff --git a/protos/feast/core/FeatureService.proto b/protos/feast/core/FeatureService.proto index 952b30eb0a..4aaa0d5f06 100644 --- a/protos/feast/core/FeatureService.proto +++ b/protos/feast/core/FeatureService.proto @@ -32,6 +32,9 @@ message FeatureServiceSpec { // Description of the feature service. string description = 5; + + // Owner of the feature service. + string owner = 6; } diff --git a/protos/feast/core/OnDemandFeatureView.proto b/protos/feast/core/OnDemandFeatureView.proto index 31fe90a9ba..58feff5bfd 100644 --- a/protos/feast/core/OnDemandFeatureView.proto +++ b/protos/feast/core/OnDemandFeatureView.proto @@ -55,6 +55,9 @@ message OnDemandFeatureViewSpec { message OnDemandFeatureViewMeta { // Time where this Feature View is created google.protobuf.Timestamp created_timestamp = 1; + + // Time where this Feature View is last updated + google.protobuf.Timestamp last_updated_timestamp = 2; } message OnDemandInput { diff --git a/protos/feast/core/Registry.proto b/protos/feast/core/Registry.proto index 912fa1b90a..3deeb97238 100644 --- a/protos/feast/core/Registry.proto +++ b/protos/feast/core/Registry.proto @@ -28,6 +28,7 @@ import "feast/core/FeatureView.proto"; import "feast/core/InfraObject.proto"; import "feast/core/OnDemandFeatureView.proto"; import "feast/core/RequestFeatureView.proto"; +import "feast/core/SavedDataset.proto"; import "google/protobuf/timestamp.proto"; message Registry { @@ -37,6 +38,7 @@ message Registry { repeated OnDemandFeatureView on_demand_feature_views = 8; repeated RequestFeatureView request_feature_views = 9; repeated FeatureService feature_services = 7; + repeated SavedDataset saved_datasets = 11; Infra infra = 10; string registry_schema_version = 3; // to support migrations; incremented when schema is changed diff --git a/protos/feast/core/SavedDataset.proto b/protos/feast/core/SavedDataset.proto new file mode 100644 index 0000000000..ebd2e56d35 --- /dev/null +++ b/protos/feast/core/SavedDataset.proto @@ -0,0 +1,77 @@ +// +// Copyright 2021 The Feast Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + + +syntax = "proto3"; + +package feast.core; +option java_package = "feast.proto.core"; +option java_outer_classname = "SavedDatasetProto"; +option go_package = "github.com/feast-dev/feast/sdk/go/protos/feast/core"; + +import "google/protobuf/timestamp.proto"; +import "feast/core/FeatureViewProjection.proto"; +import "feast/core/DataSource.proto"; + +message SavedDatasetSpec { + // Name of the dataset. Must be unique since it's possible to overwrite dataset by name + string name = 1; + + // Name of Feast project that this Dataset belongs to. + string project = 2; + + // list of feature references with format ":" + repeated string features = 3; + + // entity columns + request columns from all feature views used during retrieval + repeated string join_keys = 4; + + // Whether full feature names are used in stored data + bool full_feature_names = 5; + + SavedDatasetStorage storage = 6; + + // User defined metadata + map tags = 7; +} + +message SavedDatasetStorage { + oneof kind { + DataSource.FileOptions file_storage = 4; + DataSource.BigQueryOptions bigquery_storage = 5; + DataSource.RedshiftOptions redshift_storage = 6; + DataSource.SnowflakeOptions snowflake_storage = 7; + } +} + +message SavedDatasetMeta { + // Time when this saved dataset is created + google.protobuf.Timestamp created_timestamp = 1; + + // Time when this saved dataset is last updated + google.protobuf.Timestamp last_updated_timestamp = 2; + + // Min timestamp in the dataset (needed for retrieval) + google.protobuf.Timestamp min_event_timestamp = 3; + + // Max timestamp in the dataset (needed for retrieval) + google.protobuf.Timestamp max_event_timestamp = 4; +} + +message SavedDataset { + SavedDatasetSpec spec = 1; + SavedDatasetMeta meta = 2; +} diff --git a/protos/feast/core/ValidationProfile.proto b/protos/feast/core/ValidationProfile.proto new file mode 100644 index 0000000000..31c4e150a0 --- /dev/null +++ b/protos/feast/core/ValidationProfile.proto @@ -0,0 +1,48 @@ +// +// Copyright 2021 The Feast Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + + +syntax = "proto3"; + +package feast.core; +option java_package = "feast.proto.core"; +option java_outer_classname = "ValidationProfile"; +option go_package = "github.com/feast-dev/feast/sdk/go/protos/feast/core"; + +import "google/protobuf/timestamp.proto"; +import "feast/core/SavedDataset.proto"; + +message GEValidationProfiler { + message UserDefinedProfiler { + // The python-syntax function body (serialized by dill) + bytes body = 1; + } + + UserDefinedProfiler profiler = 1; +} + +message GEValidationProfile { + // JSON-serialized ExpectationSuite object + bytes expectation_suite = 1; +} + +message ValidationReference { + SavedDataset dataset = 1; + + oneof profiler { + GEValidationProfiler ge_profiler = 2; + } +} diff --git a/protos/feast/serving/ServingService.proto b/protos/feast/serving/ServingService.proto index 7d45e61a5e..6c551a97ba 100644 --- a/protos/feast/serving/ServingService.proto +++ b/protos/feast/serving/ServingService.proto @@ -30,7 +30,7 @@ service ServingService { rpc GetFeastServingInfo (GetFeastServingInfoRequest) returns (GetFeastServingInfoResponse); // Get online features synchronously. - rpc GetOnlineFeatures (GetOnlineFeaturesRequest) returns (GetOnlineFeaturesResponseV2); + rpc GetOnlineFeatures (GetOnlineFeaturesRequest) returns (GetOnlineFeaturesResponse); } message GetFeastServingInfoRequest {} @@ -95,19 +95,6 @@ message GetOnlineFeaturesRequest { } message GetOnlineFeaturesResponse { - // Feature values retrieved from feast. - repeated FieldValues field_values = 1; - - message FieldValues { - // Map of feature or entity name to feature/entity values. - // Timestamps are not returned in this response. - map fields = 1; - // Map of feature or entity name to feature/entity statuses/metadata. - map statuses = 2; - } -} - -message GetOnlineFeaturesResponseV2 { GetOnlineFeaturesResponseMetadata metadata = 1; // Length of "results" array should match length of requested features. diff --git a/sdk/go/client_test.go b/sdk/go/client_test.go index 95be34af73..cb15f66654 100644 --- a/sdk/go/client_test.go +++ b/sdk/go/client_test.go @@ -33,8 +33,8 @@ func TestGetOnlineFeatures(t *testing.T) { Project: "driver_project", }, want: OnlineFeaturesResponse{ - RawResponse: &serving.GetOnlineFeaturesResponseV2{ - Results: []*serving.GetOnlineFeaturesResponseV2_FeatureVector{ + RawResponse: &serving.GetOnlineFeaturesResponse{ + Results: []*serving.GetOnlineFeaturesResponse_FeatureVector{ { Values: []*types.Value{Int64Val(1)}, Statuses: []serving.FieldStatus{ diff --git a/sdk/go/mocks/serving_mock.go b/sdk/go/mocks/serving_mock.go index 57ee0c1ea4..038d49f5e5 100644 --- a/sdk/go/mocks/serving_mock.go +++ b/sdk/go/mocks/serving_mock.go @@ -57,14 +57,14 @@ func (mr *MockServingServiceClientMockRecorder) GetFeastServingInfo(arg0, arg1 i } // GetOnlineFeaturesV2 mocks base method -func (m *MockServingServiceClient) GetOnlineFeatures(arg0 context.Context, arg1 *serving.GetOnlineFeaturesRequest, arg2 ...grpc.CallOption) (*serving.GetOnlineFeaturesResponseV2, error) { +func (m *MockServingServiceClient) GetOnlineFeatures(arg0 context.Context, arg1 *serving.GetOnlineFeaturesRequest, arg2 ...grpc.CallOption) (*serving.GetOnlineFeaturesResponse, error) { m.ctrl.T.Helper() varargs := []interface{}{arg0, arg1} for _, a := range arg2 { varargs = append(varargs, a) } ret := m.ctrl.Call(m, "GetOnlineFeatures", varargs...) - ret0, _ := ret[0].(*serving.GetOnlineFeaturesResponseV2) + ret0, _ := ret[0].(*serving.GetOnlineFeaturesResponse) ret1, _ := ret[1].(error) return ret0, ret1 } diff --git a/sdk/go/protos/feast/core/DataFormat.pb.go b/sdk/go/protos/feast/core/DataFormat.pb.go index 6745171c90..64c4ec8071 100644 --- a/sdk/go/protos/feast/core/DataFormat.pb.go +++ b/sdk/go/protos/feast/core/DataFormat.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/core/DataFormat.proto package core diff --git a/sdk/go/protos/feast/core/DataSource.pb.go b/sdk/go/protos/feast/core/DataSource.pb.go index 83f0bc6736..d0d42c66de 100644 --- a/sdk/go/protos/feast/core/DataSource.pb.go +++ b/sdk/go/protos/feast/core/DataSource.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/core/DataSource.proto package core @@ -38,17 +38,19 @@ const ( ) // Type of Data Source. +// Next available id: 9 type DataSource_SourceType int32 const ( - DataSource_INVALID DataSource_SourceType = 0 - DataSource_BATCH_FILE DataSource_SourceType = 1 - DataSource_BATCH_BIGQUERY DataSource_SourceType = 2 - DataSource_STREAM_KAFKA DataSource_SourceType = 3 - DataSource_STREAM_KINESIS DataSource_SourceType = 4 - DataSource_BATCH_REDSHIFT DataSource_SourceType = 5 - DataSource_CUSTOM_SOURCE DataSource_SourceType = 6 - DataSource_REQUEST_SOURCE DataSource_SourceType = 7 + DataSource_INVALID DataSource_SourceType = 0 + DataSource_BATCH_FILE DataSource_SourceType = 1 + DataSource_BATCH_SNOWFLAKE DataSource_SourceType = 8 + DataSource_BATCH_BIGQUERY DataSource_SourceType = 2 + DataSource_BATCH_REDSHIFT DataSource_SourceType = 5 + DataSource_STREAM_KAFKA DataSource_SourceType = 3 + DataSource_STREAM_KINESIS DataSource_SourceType = 4 + DataSource_CUSTOM_SOURCE DataSource_SourceType = 6 + DataSource_REQUEST_SOURCE DataSource_SourceType = 7 ) // Enum value maps for DataSource_SourceType. @@ -56,22 +58,24 @@ var ( DataSource_SourceType_name = map[int32]string{ 0: "INVALID", 1: "BATCH_FILE", + 8: "BATCH_SNOWFLAKE", 2: "BATCH_BIGQUERY", + 5: "BATCH_REDSHIFT", 3: "STREAM_KAFKA", 4: "STREAM_KINESIS", - 5: "BATCH_REDSHIFT", 6: "CUSTOM_SOURCE", 7: "REQUEST_SOURCE", } DataSource_SourceType_value = map[string]int32{ - "INVALID": 0, - "BATCH_FILE": 1, - "BATCH_BIGQUERY": 2, - "STREAM_KAFKA": 3, - "STREAM_KINESIS": 4, - "BATCH_REDSHIFT": 5, - "CUSTOM_SOURCE": 6, - "REQUEST_SOURCE": 7, + "INVALID": 0, + "BATCH_FILE": 1, + "BATCH_SNOWFLAKE": 8, + "BATCH_BIGQUERY": 2, + "BATCH_REDSHIFT": 5, + "STREAM_KAFKA": 3, + "STREAM_KINESIS": 4, + "CUSTOM_SOURCE": 6, + "REQUEST_SOURCE": 7, } ) @@ -132,6 +136,7 @@ type DataSource struct { // *DataSource_RedshiftOptions_ // *DataSource_RequestDataOptions_ // *DataSource_CustomOptions + // *DataSource_SnowflakeOptions_ Options isDataSource_Options `protobuf_oneof:"options"` } @@ -265,6 +270,13 @@ func (x *DataSource) GetCustomOptions() *DataSource_CustomSourceOptions { return nil } +func (x *DataSource) GetSnowflakeOptions() *DataSource_SnowflakeOptions { + if x, ok := x.GetOptions().(*DataSource_SnowflakeOptions_); ok { + return x.SnowflakeOptions + } + return nil +} + type isDataSource_Options interface { isDataSource_Options() } @@ -297,6 +309,10 @@ type DataSource_CustomOptions struct { CustomOptions *DataSource_CustomSourceOptions `protobuf:"bytes,16,opt,name=custom_options,json=customOptions,proto3,oneof"` } +type DataSource_SnowflakeOptions_ struct { + SnowflakeOptions *DataSource_SnowflakeOptions `protobuf:"bytes,19,opt,name=snowflake_options,json=snowflakeOptions,proto3,oneof"` +} + func (*DataSource_FileOptions_) isDataSource_Options() {} func (*DataSource_BigqueryOptions) isDataSource_Options() {} @@ -311,6 +327,8 @@ func (*DataSource_RequestDataOptions_) isDataSource_Options() {} func (*DataSource_CustomOptions) isDataSource_Options() {} +func (*DataSource_SnowflakeOptions_) isDataSource_Options() {} + // Defines options for DataSource that sources features from a file type DataSource_FileOptions struct { state protoimpl.MessageState @@ -646,6 +664,83 @@ func (x *DataSource_RedshiftOptions) GetSchema() string { return "" } +// Defines options for DataSource that sources features from a Snowflake Query +type DataSource_SnowflakeOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Snowflake table name + Table string `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"` + // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective + // entity columns + Query string `protobuf:"bytes,2,opt,name=query,proto3" json:"query,omitempty"` + // Snowflake schema name + Schema string `protobuf:"bytes,3,opt,name=schema,proto3" json:"schema,omitempty"` + // Snowflake schema name + Database string `protobuf:"bytes,4,opt,name=database,proto3" json:"database,omitempty"` +} + +func (x *DataSource_SnowflakeOptions) Reset() { + *x = DataSource_SnowflakeOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_feast_core_DataSource_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DataSource_SnowflakeOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DataSource_SnowflakeOptions) ProtoMessage() {} + +func (x *DataSource_SnowflakeOptions) ProtoReflect() protoreflect.Message { + mi := &file_feast_core_DataSource_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DataSource_SnowflakeOptions.ProtoReflect.Descriptor instead. +func (*DataSource_SnowflakeOptions) Descriptor() ([]byte, []int) { + return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 6} +} + +func (x *DataSource_SnowflakeOptions) GetTable() string { + if x != nil { + return x.Table + } + return "" +} + +func (x *DataSource_SnowflakeOptions) GetQuery() string { + if x != nil { + return x.Query + } + return "" +} + +func (x *DataSource_SnowflakeOptions) GetSchema() string { + if x != nil { + return x.Schema + } + return "" +} + +func (x *DataSource_SnowflakeOptions) GetDatabase() string { + if x != nil { + return x.Database + } + return "" +} + // Defines configuration for custom third-party data sources. type DataSource_CustomSourceOptions struct { state protoimpl.MessageState @@ -660,7 +755,7 @@ type DataSource_CustomSourceOptions struct { func (x *DataSource_CustomSourceOptions) Reset() { *x = DataSource_CustomSourceOptions{} if protoimpl.UnsafeEnabled { - mi := &file_feast_core_DataSource_proto_msgTypes[7] + mi := &file_feast_core_DataSource_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -673,7 +768,7 @@ func (x *DataSource_CustomSourceOptions) String() string { func (*DataSource_CustomSourceOptions) ProtoMessage() {} func (x *DataSource_CustomSourceOptions) ProtoReflect() protoreflect.Message { - mi := &file_feast_core_DataSource_proto_msgTypes[7] + mi := &file_feast_core_DataSource_proto_msgTypes[8] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -686,7 +781,7 @@ func (x *DataSource_CustomSourceOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use DataSource_CustomSourceOptions.ProtoReflect.Descriptor instead. func (*DataSource_CustomSourceOptions) Descriptor() ([]byte, []int) { - return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 6} + return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 7} } func (x *DataSource_CustomSourceOptions) GetConfiguration() []byte { @@ -711,7 +806,7 @@ type DataSource_RequestDataOptions struct { func (x *DataSource_RequestDataOptions) Reset() { *x = DataSource_RequestDataOptions{} if protoimpl.UnsafeEnabled { - mi := &file_feast_core_DataSource_proto_msgTypes[8] + mi := &file_feast_core_DataSource_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -724,7 +819,7 @@ func (x *DataSource_RequestDataOptions) String() string { func (*DataSource_RequestDataOptions) ProtoMessage() {} func (x *DataSource_RequestDataOptions) ProtoReflect() protoreflect.Message { - mi := &file_feast_core_DataSource_proto_msgTypes[8] + mi := &file_feast_core_DataSource_proto_msgTypes[9] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -737,7 +832,7 @@ func (x *DataSource_RequestDataOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use DataSource_RequestDataOptions.ProtoReflect.Descriptor instead. func (*DataSource_RequestDataOptions) Descriptor() ([]byte, []int) { - return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 7} + return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 8} } func (x *DataSource_RequestDataOptions) GetName() string { @@ -763,7 +858,7 @@ var file_feast_core_DataSource_proto_rawDesc = []byte{ 0x2f, 0x63, 0x6f, 0x72, 0x65, 0x2f, 0x44, 0x61, 0x74, 0x61, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x17, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2f, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, - 0x85, 0x10, 0x0a, 0x0a, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x12, 0x35, + 0xe6, 0x11, 0x0a, 0x0a, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x12, 0x35, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x21, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, @@ -822,82 +917,96 @@ var file_feast_core_DataSource_proto_rawDesc = []byte{ 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x48, 0x00, 0x52, 0x0d, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x1a, 0x3f, 0x0a, 0x11, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4d, 0x61, 0x70, 0x70, 0x69, - 0x6e, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, - 0x02, 0x38, 0x01, 0x1a, 0x93, 0x01, 0x0a, 0x0b, 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, - 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, - 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, - 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x19, 0x0a, 0x08, - 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x75, 0x72, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x66, 0x69, 0x6c, 0x65, 0x55, 0x72, 0x6c, 0x12, 0x30, 0x0a, 0x14, 0x73, 0x33, 0x5f, 0x65, 0x6e, - 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x6f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x65, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x73, 0x33, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, - 0x74, 0x4f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x65, 0x1a, 0x44, 0x0a, 0x0f, 0x42, 0x69, 0x67, - 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1b, 0x0a, 0x09, - 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x72, 0x65, 0x66, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x08, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x66, 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, - 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x1a, - 0x92, 0x01, 0x0a, 0x0c, 0x4b, 0x61, 0x66, 0x6b, 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x12, 0x2b, 0x0a, 0x11, 0x62, 0x6f, 0x6f, 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x5f, 0x73, 0x65, - 0x72, 0x76, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x62, 0x6f, 0x6f, - 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x12, 0x14, 0x0a, - 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x6f, - 0x70, 0x69, 0x63, 0x12, 0x3f, 0x0a, 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, - 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x66, 0x65, - 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x46, - 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, - 0x72, 0x6d, 0x61, 0x74, 0x1a, 0x88, 0x01, 0x0a, 0x0e, 0x4b, 0x69, 0x6e, 0x65, 0x73, 0x69, 0x73, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x72, 0x65, 0x67, 0x69, 0x6f, - 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e, 0x12, - 0x1f, 0x0a, 0x0b, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x4e, 0x61, 0x6d, 0x65, - 0x12, 0x3d, 0x0a, 0x0d, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, - 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, - 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x46, 0x6f, 0x72, 0x6d, 0x61, - 0x74, 0x52, 0x0c, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x1a, - 0x55, 0x0a, 0x0f, 0x52, 0x65, 0x64, 0x73, 0x68, 0x69, 0x66, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x56, 0x0a, 0x11, 0x73, 0x6e, 0x6f, 0x77, 0x66, 0x6c, 0x61, 0x6b, 0x65, 0x5f, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, + 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x53, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x53, 0x6e, 0x6f, 0x77, 0x66, 0x6c, 0x61, 0x6b, 0x65, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x48, 0x00, 0x52, 0x10, 0x73, 0x6e, 0x6f, 0x77, 0x66, 0x6c, + 0x61, 0x6b, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x3f, 0x0a, 0x11, 0x46, 0x69, + 0x65, 0x6c, 0x64, 0x4d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x93, 0x01, 0x0a, 0x0b, + 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x0b, 0x66, + 0x69, 0x6c, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x16, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x46, 0x69, + 0x6c, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x46, 0x6f, + 0x72, 0x6d, 0x61, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x75, 0x72, 0x6c, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x66, 0x69, 0x6c, 0x65, 0x55, 0x72, 0x6c, 0x12, + 0x30, 0x0a, 0x14, 0x73, 0x33, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x6f, + 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x73, + 0x33, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x4f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, + 0x65, 0x1a, 0x44, 0x0a, 0x0f, 0x42, 0x69, 0x67, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x72, 0x65, + 0x66, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, + 0x66, 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x1a, 0x92, 0x01, 0x0a, 0x0c, 0x4b, 0x61, 0x66, 0x6b, + 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2b, 0x0a, 0x11, 0x62, 0x6f, 0x6f, 0x74, + 0x73, 0x74, 0x72, 0x61, 0x70, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x10, 0x62, 0x6f, 0x6f, 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x53, 0x65, + 0x72, 0x76, 0x65, 0x72, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x12, 0x3f, 0x0a, 0x0e, 0x6d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, + 0x2e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x1a, 0x88, 0x01, 0x0a, + 0x0e, 0x4b, 0x69, 0x6e, 0x65, 0x73, 0x69, 0x73, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, + 0x16, 0x0a, 0x06, 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x06, 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x74, 0x72, 0x65, 0x61, + 0x6d, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x74, + 0x72, 0x65, 0x61, 0x6d, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x3d, 0x0a, 0x0d, 0x72, 0x65, 0x63, 0x6f, + 0x72, 0x64, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x18, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x53, 0x74, 0x72, + 0x65, 0x61, 0x6d, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0c, 0x72, 0x65, 0x63, 0x6f, 0x72, + 0x64, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x1a, 0x55, 0x0a, 0x0f, 0x52, 0x65, 0x64, 0x73, 0x68, + 0x69, 0x66, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x72, + 0x0a, 0x10, 0x53, 0x6e, 0x6f, 0x77, 0x66, 0x6c, 0x61, 0x6b, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, - 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x3b, 0x0a, 0x13, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, - 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, - 0x0d, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0d, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x1a, 0xcf, 0x01, 0x0a, 0x12, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x44, - 0x61, 0x74, 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, - 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4d, - 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, - 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, - 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x44, 0x61, - 0x74, 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, - 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x56, 0x0a, - 0x0b, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, - 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x31, - 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1b, 0x2e, - 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x56, 0x61, 0x6c, 0x75, - 0x65, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x9e, 0x01, 0x0a, 0x0a, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, - 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, - 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x46, 0x49, 0x4c, 0x45, 0x10, - 0x01, 0x12, 0x12, 0x0a, 0x0e, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x42, 0x49, 0x47, 0x51, 0x55, - 0x45, 0x52, 0x59, 0x10, 0x02, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, 0x52, 0x45, 0x41, 0x4d, 0x5f, - 0x4b, 0x41, 0x46, 0x4b, 0x41, 0x10, 0x03, 0x12, 0x12, 0x0a, 0x0e, 0x53, 0x54, 0x52, 0x45, 0x41, - 0x4d, 0x5f, 0x4b, 0x49, 0x4e, 0x45, 0x53, 0x49, 0x53, 0x10, 0x04, 0x12, 0x12, 0x0a, 0x0e, 0x42, - 0x41, 0x54, 0x43, 0x48, 0x5f, 0x52, 0x45, 0x44, 0x53, 0x48, 0x49, 0x46, 0x54, 0x10, 0x05, 0x12, - 0x11, 0x0a, 0x0d, 0x43, 0x55, 0x53, 0x54, 0x4f, 0x4d, 0x5f, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, - 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x52, 0x45, 0x51, 0x55, 0x45, 0x53, 0x54, 0x5f, 0x53, 0x4f, - 0x55, 0x52, 0x43, 0x45, 0x10, 0x07, 0x42, 0x09, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x4a, 0x04, 0x08, 0x06, 0x10, 0x0b, 0x42, 0x58, 0x0a, 0x10, 0x66, 0x65, 0x61, 0x73, 0x74, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0f, 0x44, 0x61, 0x74, - 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, 0x33, 0x67, 0x69, - 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2d, 0x64, - 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x73, 0x64, 0x6b, 0x2f, 0x67, 0x6f, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x63, 0x6f, 0x72, - 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x1a, 0x0a, 0x08, 0x64, 0x61, 0x74, 0x61, 0x62, 0x61, + 0x73, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x62, 0x61, + 0x73, 0x65, 0x1a, 0x3b, 0x0a, 0x13, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x53, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, 0x0d, 0x63, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, + 0x52, 0x0d, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, + 0xcf, 0x01, 0x0a, 0x12, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x44, 0x61, 0x74, 0x61, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4d, 0x0a, 0x06, 0x73, 0x63, + 0x68, 0x65, 0x6d, 0x61, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x66, 0x65, 0x61, + 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x44, 0x61, 0x74, 0x61, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x45, 0x6e, 0x74, 0x72, + 0x79, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x56, 0x0a, 0x0b, 0x53, 0x63, 0x68, + 0x65, 0x6d, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x31, 0x0a, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1b, 0x2e, 0x66, 0x65, 0x61, 0x73, + 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x54, 0x79, 0x70, + 0x65, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, + 0x01, 0x22, 0xb3, 0x01, 0x0a, 0x0a, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x0b, 0x0a, 0x07, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, 0x00, 0x12, 0x0e, 0x0a, + 0x0a, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x46, 0x49, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x13, 0x0a, + 0x0f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x53, 0x4e, 0x4f, 0x57, 0x46, 0x4c, 0x41, 0x4b, 0x45, + 0x10, 0x08, 0x12, 0x12, 0x0a, 0x0e, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x42, 0x49, 0x47, 0x51, + 0x55, 0x45, 0x52, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x0e, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, + 0x52, 0x45, 0x44, 0x53, 0x48, 0x49, 0x46, 0x54, 0x10, 0x05, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, + 0x52, 0x45, 0x41, 0x4d, 0x5f, 0x4b, 0x41, 0x46, 0x4b, 0x41, 0x10, 0x03, 0x12, 0x12, 0x0a, 0x0e, + 0x53, 0x54, 0x52, 0x45, 0x41, 0x4d, 0x5f, 0x4b, 0x49, 0x4e, 0x45, 0x53, 0x49, 0x53, 0x10, 0x04, + 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x55, 0x53, 0x54, 0x4f, 0x4d, 0x5f, 0x53, 0x4f, 0x55, 0x52, 0x43, + 0x45, 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x52, 0x45, 0x51, 0x55, 0x45, 0x53, 0x54, 0x5f, 0x53, + 0x4f, 0x55, 0x52, 0x43, 0x45, 0x10, 0x07, 0x42, 0x09, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x4a, 0x04, 0x08, 0x06, 0x10, 0x0b, 0x42, 0x58, 0x0a, 0x10, 0x66, 0x65, 0x61, 0x73, + 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0f, 0x44, 0x61, + 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, 0x33, 0x67, + 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2d, + 0x64, 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x73, 0x64, 0x6b, 0x2f, 0x67, 0x6f, + 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x63, 0x6f, + 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -913,7 +1022,7 @@ func file_feast_core_DataSource_proto_rawDescGZIP() []byte { } var file_feast_core_DataSource_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_feast_core_DataSource_proto_msgTypes = make([]protoimpl.MessageInfo, 10) +var file_feast_core_DataSource_proto_msgTypes = make([]protoimpl.MessageInfo, 11) var file_feast_core_DataSource_proto_goTypes = []interface{}{ (DataSource_SourceType)(0), // 0: feast.core.DataSource.SourceType (*DataSource)(nil), // 1: feast.core.DataSource @@ -923,12 +1032,13 @@ var file_feast_core_DataSource_proto_goTypes = []interface{}{ (*DataSource_KafkaOptions)(nil), // 5: feast.core.DataSource.KafkaOptions (*DataSource_KinesisOptions)(nil), // 6: feast.core.DataSource.KinesisOptions (*DataSource_RedshiftOptions)(nil), // 7: feast.core.DataSource.RedshiftOptions - (*DataSource_CustomSourceOptions)(nil), // 8: feast.core.DataSource.CustomSourceOptions - (*DataSource_RequestDataOptions)(nil), // 9: feast.core.DataSource.RequestDataOptions - nil, // 10: feast.core.DataSource.RequestDataOptions.SchemaEntry - (*FileFormat)(nil), // 11: feast.core.FileFormat - (*StreamFormat)(nil), // 12: feast.core.StreamFormat - (types.ValueType_Enum)(0), // 13: feast.types.ValueType.Enum + (*DataSource_SnowflakeOptions)(nil), // 8: feast.core.DataSource.SnowflakeOptions + (*DataSource_CustomSourceOptions)(nil), // 9: feast.core.DataSource.CustomSourceOptions + (*DataSource_RequestDataOptions)(nil), // 10: feast.core.DataSource.RequestDataOptions + nil, // 11: feast.core.DataSource.RequestDataOptions.SchemaEntry + (*FileFormat)(nil), // 12: feast.core.FileFormat + (*StreamFormat)(nil), // 13: feast.core.StreamFormat + (types.ValueType_Enum)(0), // 14: feast.types.ValueType.Enum } var file_feast_core_DataSource_proto_depIdxs = []int32{ 0, // 0: feast.core.DataSource.type:type_name -> feast.core.DataSource.SourceType @@ -938,18 +1048,19 @@ var file_feast_core_DataSource_proto_depIdxs = []int32{ 5, // 4: feast.core.DataSource.kafka_options:type_name -> feast.core.DataSource.KafkaOptions 6, // 5: feast.core.DataSource.kinesis_options:type_name -> feast.core.DataSource.KinesisOptions 7, // 6: feast.core.DataSource.redshift_options:type_name -> feast.core.DataSource.RedshiftOptions - 9, // 7: feast.core.DataSource.request_data_options:type_name -> feast.core.DataSource.RequestDataOptions - 8, // 8: feast.core.DataSource.custom_options:type_name -> feast.core.DataSource.CustomSourceOptions - 11, // 9: feast.core.DataSource.FileOptions.file_format:type_name -> feast.core.FileFormat - 12, // 10: feast.core.DataSource.KafkaOptions.message_format:type_name -> feast.core.StreamFormat - 12, // 11: feast.core.DataSource.KinesisOptions.record_format:type_name -> feast.core.StreamFormat - 10, // 12: feast.core.DataSource.RequestDataOptions.schema:type_name -> feast.core.DataSource.RequestDataOptions.SchemaEntry - 13, // 13: feast.core.DataSource.RequestDataOptions.SchemaEntry.value:type_name -> feast.types.ValueType.Enum - 14, // [14:14] is the sub-list for method output_type - 14, // [14:14] is the sub-list for method input_type - 14, // [14:14] is the sub-list for extension type_name - 14, // [14:14] is the sub-list for extension extendee - 0, // [0:14] is the sub-list for field type_name + 10, // 7: feast.core.DataSource.request_data_options:type_name -> feast.core.DataSource.RequestDataOptions + 9, // 8: feast.core.DataSource.custom_options:type_name -> feast.core.DataSource.CustomSourceOptions + 8, // 9: feast.core.DataSource.snowflake_options:type_name -> feast.core.DataSource.SnowflakeOptions + 12, // 10: feast.core.DataSource.FileOptions.file_format:type_name -> feast.core.FileFormat + 13, // 11: feast.core.DataSource.KafkaOptions.message_format:type_name -> feast.core.StreamFormat + 13, // 12: feast.core.DataSource.KinesisOptions.record_format:type_name -> feast.core.StreamFormat + 11, // 13: feast.core.DataSource.RequestDataOptions.schema:type_name -> feast.core.DataSource.RequestDataOptions.SchemaEntry + 14, // 14: feast.core.DataSource.RequestDataOptions.SchemaEntry.value:type_name -> feast.types.ValueType.Enum + 15, // [15:15] is the sub-list for method output_type + 15, // [15:15] is the sub-list for method input_type + 15, // [15:15] is the sub-list for extension type_name + 15, // [15:15] is the sub-list for extension extendee + 0, // [0:15] is the sub-list for field type_name } func init() { file_feast_core_DataSource_proto_init() } @@ -1032,7 +1143,7 @@ func file_feast_core_DataSource_proto_init() { } } file_feast_core_DataSource_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*DataSource_CustomSourceOptions); i { + switch v := v.(*DataSource_SnowflakeOptions); i { case 0: return &v.state case 1: @@ -1044,6 +1155,18 @@ func file_feast_core_DataSource_proto_init() { } } file_feast_core_DataSource_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DataSource_CustomSourceOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_feast_core_DataSource_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*DataSource_RequestDataOptions); i { case 0: return &v.state @@ -1064,6 +1187,7 @@ func file_feast_core_DataSource_proto_init() { (*DataSource_RedshiftOptions_)(nil), (*DataSource_RequestDataOptions_)(nil), (*DataSource_CustomOptions)(nil), + (*DataSource_SnowflakeOptions_)(nil), } type x struct{} out := protoimpl.TypeBuilder{ @@ -1071,7 +1195,7 @@ func file_feast_core_DataSource_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_feast_core_DataSource_proto_rawDesc, NumEnums: 1, - NumMessages: 10, + NumMessages: 11, NumExtensions: 0, NumServices: 0, }, diff --git a/sdk/go/protos/feast/core/Entity.pb.go b/sdk/go/protos/feast/core/Entity.pb.go index 87f5b45164..245f724e0a 100644 --- a/sdk/go/protos/feast/core/Entity.pb.go +++ b/sdk/go/protos/feast/core/Entity.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/core/Entity.proto package core diff --git a/sdk/go/protos/feast/core/Feature.pb.go b/sdk/go/protos/feast/core/Feature.pb.go index 50515a822b..a30fafb9d3 100644 --- a/sdk/go/protos/feast/core/Feature.pb.go +++ b/sdk/go/protos/feast/core/Feature.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/core/Feature.proto package core diff --git a/sdk/go/protos/feast/core/FeatureTable.pb.go b/sdk/go/protos/feast/core/FeatureTable.pb.go index 0fc3feb0ca..144d46d8e2 100644 --- a/sdk/go/protos/feast/core/FeatureTable.pb.go +++ b/sdk/go/protos/feast/core/FeatureTable.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/core/FeatureTable.proto package core diff --git a/sdk/go/protos/feast/core/Store.pb.go b/sdk/go/protos/feast/core/Store.pb.go index 26e5a5918f..c56a4ede6d 100644 --- a/sdk/go/protos/feast/core/Store.pb.go +++ b/sdk/go/protos/feast/core/Store.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/core/Store.proto package core diff --git a/sdk/go/protos/feast/serving/ServingService.pb.go b/sdk/go/protos/feast/serving/ServingService.pb.go index 68e771a31b..3527c6688e 100644 --- a/sdk/go/protos/feast/serving/ServingService.pb.go +++ b/sdk/go/protos/feast/serving/ServingService.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/serving/ServingService.proto package serving @@ -380,7 +380,10 @@ type GetOnlineFeaturesRequest struct { // A map of entity name -> list of values Entities map[string]*types.RepeatedValue `protobuf:"bytes,3,rep,name=entities,proto3" json:"entities,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` FullFeatureNames bool `protobuf:"varint,4,opt,name=full_feature_names,json=fullFeatureNames,proto3" json:"full_feature_names,omitempty"` - RequestContext map[string]*types.RepeatedValue `protobuf:"bytes,5,rep,name=request_context,json=requestContext,proto3" json:"request_context,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // Context for OnDemand Feature Transformation + // (was moved to dedicated parameter to avoid unnecessary separation logic on serving side) + // A map of variable name -> list of values + RequestContext map[string]*types.RepeatedValue `protobuf:"bytes,5,rep,name=request_context,json=requestContext,proto3" json:"request_context,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` } func (x *GetOnlineFeaturesRequest) Reset() { @@ -478,8 +481,10 @@ type GetOnlineFeaturesResponse struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Feature values retrieved from feast. - FieldValues []*GetOnlineFeaturesResponse_FieldValues `protobuf:"bytes,1,rep,name=field_values,json=fieldValues,proto3" json:"field_values,omitempty"` + Metadata *GetOnlineFeaturesResponseMetadata `protobuf:"bytes,1,opt,name=metadata,proto3" json:"metadata,omitempty"` + // Length of "results" array should match length of requested features. + // We also preserve the same order of features here as in metadata.feature_names + Results []*GetOnlineFeaturesResponse_FeatureVector `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"` } func (x *GetOnlineFeaturesResponse) Reset() { @@ -514,62 +519,14 @@ func (*GetOnlineFeaturesResponse) Descriptor() ([]byte, []int) { return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{6} } -func (x *GetOnlineFeaturesResponse) GetFieldValues() []*GetOnlineFeaturesResponse_FieldValues { - if x != nil { - return x.FieldValues - } - return nil -} - -type GetOnlineFeaturesResponseV2 struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Metadata *GetOnlineFeaturesResponseMetadata `protobuf:"bytes,1,opt,name=metadata,proto3" json:"metadata,omitempty"` - Results []*GetOnlineFeaturesResponseV2_FeatureVector `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"` -} - -func (x *GetOnlineFeaturesResponseV2) Reset() { - *x = GetOnlineFeaturesResponseV2{} - if protoimpl.UnsafeEnabled { - mi := &file_feast_serving_ServingService_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *GetOnlineFeaturesResponseV2) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*GetOnlineFeaturesResponseV2) ProtoMessage() {} - -func (x *GetOnlineFeaturesResponseV2) ProtoReflect() protoreflect.Message { - mi := &file_feast_serving_ServingService_proto_msgTypes[7] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use GetOnlineFeaturesResponseV2.ProtoReflect.Descriptor instead. -func (*GetOnlineFeaturesResponseV2) Descriptor() ([]byte, []int) { - return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{7} -} - -func (x *GetOnlineFeaturesResponseV2) GetMetadata() *GetOnlineFeaturesResponseMetadata { +func (x *GetOnlineFeaturesResponse) GetMetadata() *GetOnlineFeaturesResponseMetadata { if x != nil { return x.Metadata } return nil } -func (x *GetOnlineFeaturesResponseV2) GetResults() []*GetOnlineFeaturesResponseV2_FeatureVector { +func (x *GetOnlineFeaturesResponse) GetResults() []*GetOnlineFeaturesResponse_FeatureVector { if x != nil { return x.Results } @@ -587,7 +544,7 @@ type GetOnlineFeaturesResponseMetadata struct { func (x *GetOnlineFeaturesResponseMetadata) Reset() { *x = GetOnlineFeaturesResponseMetadata{} if protoimpl.UnsafeEnabled { - mi := &file_feast_serving_ServingService_proto_msgTypes[8] + mi := &file_feast_serving_ServingService_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -600,7 +557,7 @@ func (x *GetOnlineFeaturesResponseMetadata) String() string { func (*GetOnlineFeaturesResponseMetadata) ProtoMessage() {} func (x *GetOnlineFeaturesResponseMetadata) ProtoReflect() protoreflect.Message { - mi := &file_feast_serving_ServingService_proto_msgTypes[8] + mi := &file_feast_serving_ServingService_proto_msgTypes[7] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -613,7 +570,7 @@ func (x *GetOnlineFeaturesResponseMetadata) ProtoReflect() protoreflect.Message // Deprecated: Use GetOnlineFeaturesResponseMetadata.ProtoReflect.Descriptor instead. func (*GetOnlineFeaturesResponseMetadata) Descriptor() ([]byte, []int) { - return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{8} + return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{7} } func (x *GetOnlineFeaturesResponseMetadata) GetFeatureNames() *FeatureList { @@ -638,7 +595,7 @@ type GetOnlineFeaturesRequestV2_EntityRow struct { func (x *GetOnlineFeaturesRequestV2_EntityRow) Reset() { *x = GetOnlineFeaturesRequestV2_EntityRow{} if protoimpl.UnsafeEnabled { - mi := &file_feast_serving_ServingService_proto_msgTypes[9] + mi := &file_feast_serving_ServingService_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -651,7 +608,7 @@ func (x *GetOnlineFeaturesRequestV2_EntityRow) String() string { func (*GetOnlineFeaturesRequestV2_EntityRow) ProtoMessage() {} func (x *GetOnlineFeaturesRequestV2_EntityRow) ProtoReflect() protoreflect.Message { - mi := &file_feast_serving_ServingService_proto_msgTypes[9] + mi := &file_feast_serving_ServingService_proto_msgTypes[8] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -681,65 +638,7 @@ func (x *GetOnlineFeaturesRequestV2_EntityRow) GetFields() map[string]*types.Val return nil } -type GetOnlineFeaturesResponse_FieldValues struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - // Map of feature or entity name to feature/entity values. - // Timestamps are not returned in this response. - Fields map[string]*types.Value `protobuf:"bytes,1,rep,name=fields,proto3" json:"fields,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - // Map of feature or entity name to feature/entity statuses/metadata. - Statuses map[string]FieldStatus `protobuf:"bytes,2,rep,name=statuses,proto3" json:"statuses,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3,enum=feast.serving.FieldStatus"` -} - -func (x *GetOnlineFeaturesResponse_FieldValues) Reset() { - *x = GetOnlineFeaturesResponse_FieldValues{} - if protoimpl.UnsafeEnabled { - mi := &file_feast_serving_ServingService_proto_msgTypes[13] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *GetOnlineFeaturesResponse_FieldValues) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*GetOnlineFeaturesResponse_FieldValues) ProtoMessage() {} - -func (x *GetOnlineFeaturesResponse_FieldValues) ProtoReflect() protoreflect.Message { - mi := &file_feast_serving_ServingService_proto_msgTypes[13] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use GetOnlineFeaturesResponse_FieldValues.ProtoReflect.Descriptor instead. -func (*GetOnlineFeaturesResponse_FieldValues) Descriptor() ([]byte, []int) { - return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{6, 0} -} - -func (x *GetOnlineFeaturesResponse_FieldValues) GetFields() map[string]*types.Value { - if x != nil { - return x.Fields - } - return nil -} - -func (x *GetOnlineFeaturesResponse_FieldValues) GetStatuses() map[string]FieldStatus { - if x != nil { - return x.Statuses - } - return nil -} - -type GetOnlineFeaturesResponseV2_FeatureVector struct { +type GetOnlineFeaturesResponse_FeatureVector struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields @@ -749,23 +648,23 @@ type GetOnlineFeaturesResponseV2_FeatureVector struct { EventTimestamps []*timestamppb.Timestamp `protobuf:"bytes,3,rep,name=event_timestamps,json=eventTimestamps,proto3" json:"event_timestamps,omitempty"` } -func (x *GetOnlineFeaturesResponseV2_FeatureVector) Reset() { - *x = GetOnlineFeaturesResponseV2_FeatureVector{} +func (x *GetOnlineFeaturesResponse_FeatureVector) Reset() { + *x = GetOnlineFeaturesResponse_FeatureVector{} if protoimpl.UnsafeEnabled { - mi := &file_feast_serving_ServingService_proto_msgTypes[16] + mi := &file_feast_serving_ServingService_proto_msgTypes[12] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetOnlineFeaturesResponseV2_FeatureVector) String() string { +func (x *GetOnlineFeaturesResponse_FeatureVector) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetOnlineFeaturesResponseV2_FeatureVector) ProtoMessage() {} +func (*GetOnlineFeaturesResponse_FeatureVector) ProtoMessage() {} -func (x *GetOnlineFeaturesResponseV2_FeatureVector) ProtoReflect() protoreflect.Message { - mi := &file_feast_serving_ServingService_proto_msgTypes[16] +func (x *GetOnlineFeaturesResponse_FeatureVector) ProtoReflect() protoreflect.Message { + mi := &file_feast_serving_ServingService_proto_msgTypes[12] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -776,26 +675,26 @@ func (x *GetOnlineFeaturesResponseV2_FeatureVector) ProtoReflect() protoreflect. return mi.MessageOf(x) } -// Deprecated: Use GetOnlineFeaturesResponseV2_FeatureVector.ProtoReflect.Descriptor instead. -func (*GetOnlineFeaturesResponseV2_FeatureVector) Descriptor() ([]byte, []int) { - return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{7, 0} +// Deprecated: Use GetOnlineFeaturesResponse_FeatureVector.ProtoReflect.Descriptor instead. +func (*GetOnlineFeaturesResponse_FeatureVector) Descriptor() ([]byte, []int) { + return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{6, 0} } -func (x *GetOnlineFeaturesResponseV2_FeatureVector) GetValues() []*types.Value { +func (x *GetOnlineFeaturesResponse_FeatureVector) GetValues() []*types.Value { if x != nil { return x.Values } return nil } -func (x *GetOnlineFeaturesResponseV2_FeatureVector) GetStatuses() []FieldStatus { +func (x *GetOnlineFeaturesResponse_FeatureVector) GetStatuses() []FieldStatus { if x != nil { return x.Statuses } return nil } -func (x *GetOnlineFeaturesResponseV2_FeatureVector) GetEventTimestamps() []*timestamppb.Timestamp { +func (x *GetOnlineFeaturesResponse_FeatureVector) GetEventTimestamps() []*timestamppb.Timestamp { if x != nil { return x.EventTimestamps } @@ -888,94 +787,63 @@ var file_feast_serving_ServingService_proto_rawDesc = []byte{ 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x06, 0x0a, 0x04, 0x6b, 0x69, 0x6e, - 0x64, 0x22, 0xe6, 0x03, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, + 0x64, 0x22, 0xf8, 0x02, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x57, 0x0a, 0x0c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, - 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, + 0x4c, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x30, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, + 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x50, 0x0a, + 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x36, + 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, + 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x1a, + 0xba, 0x01, 0x0a, 0x0d, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x56, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x12, 0x2a, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x12, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x36, 0x0a, + 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0e, 0x32, + 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, + 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x08, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x65, 0x73, 0x12, 0x45, 0x0a, 0x10, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x74, + 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0f, 0x65, 0x76, 0x65, + 0x6e, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x22, 0x64, 0x0a, 0x21, + 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x12, 0x3f, 0x0a, 0x0d, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x6e, 0x61, 0x6d, + 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, + 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x4c, 0x69, 0x73, 0x74, 0x52, 0x0c, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, 0x61, 0x6d, + 0x65, 0x73, 0x2a, 0x5b, 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x12, 0x0b, 0x0a, 0x07, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, 0x00, 0x12, 0x0b, + 0x0a, 0x07, 0x50, 0x52, 0x45, 0x53, 0x45, 0x4e, 0x54, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x4e, + 0x55, 0x4c, 0x4c, 0x5f, 0x56, 0x41, 0x4c, 0x55, 0x45, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x4e, + 0x4f, 0x54, 0x5f, 0x46, 0x4f, 0x55, 0x4e, 0x44, 0x10, 0x03, 0x12, 0x13, 0x0a, 0x0f, 0x4f, 0x55, + 0x54, 0x53, 0x49, 0x44, 0x45, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x41, 0x47, 0x45, 0x10, 0x04, 0x32, + 0xe6, 0x01, 0x0a, 0x0e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x53, 0x65, 0x72, 0x76, 0x69, + 0x63, 0x65, 0x12, 0x6c, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x29, 0x2e, 0x66, 0x65, 0x61, 0x73, + 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, + 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, + 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, + 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x66, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, + 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x27, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, - 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, - 0x46, 0x69, 0x65, 0x6c, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x52, 0x0b, 0x66, 0x69, 0x65, - 0x6c, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x1a, 0xef, 0x02, 0x0a, 0x0b, 0x46, 0x69, 0x65, - 0x6c, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x58, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, - 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x40, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, - 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, - 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x2e, 0x46, - 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, - 0x64, 0x73, 0x12, 0x5e, 0x0a, 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x18, 0x02, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x42, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, - 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, - 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x46, - 0x69, 0x65, 0x6c, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x65, 0x73, 0x1a, 0x4d, 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, - 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, - 0x01, 0x1a, 0x57, 0x0a, 0x0d, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x6b, 0x65, 0x79, 0x12, 0x30, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, - 0x69, 0x6e, 0x67, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, - 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xfc, 0x02, 0x0a, 0x1b, 0x47, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, + 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x56, 0x32, 0x12, 0x4c, 0x0a, 0x08, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x66, - 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, - 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x52, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x38, 0x2e, 0x66, 0x65, 0x61, 0x73, - 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, - 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x56, 0x32, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x56, 0x65, 0x63, - 0x74, 0x6f, 0x72, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x1a, 0xba, 0x01, 0x0a, - 0x0d, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x12, 0x2a, - 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, - 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x56, 0x61, 0x6c, - 0x75, 0x65, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0e, 0x32, 0x1a, 0x2e, 0x66, - 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x46, 0x69, 0x65, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x65, 0x73, 0x12, 0x45, 0x0a, 0x10, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, - 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, - 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, - 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, - 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x22, 0x64, 0x0a, 0x21, 0x47, 0x65, 0x74, - 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x3f, - 0x0a, 0x0d, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, - 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4c, 0x69, 0x73, - 0x74, 0x52, 0x0c, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x2a, - 0x5b, 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0b, - 0x0a, 0x07, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x50, - 0x52, 0x45, 0x53, 0x45, 0x4e, 0x54, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x4e, 0x55, 0x4c, 0x4c, - 0x5f, 0x56, 0x41, 0x4c, 0x55, 0x45, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x4e, 0x4f, 0x54, 0x5f, - 0x46, 0x4f, 0x55, 0x4e, 0x44, 0x10, 0x03, 0x12, 0x13, 0x0a, 0x0f, 0x4f, 0x55, 0x54, 0x53, 0x49, - 0x44, 0x45, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x41, 0x47, 0x45, 0x10, 0x04, 0x32, 0xe8, 0x01, 0x0a, - 0x0e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, - 0x6c, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, - 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x29, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, - 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, - 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x2a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, - 0x67, 0x2e, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, - 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x68, 0x0a, - 0x11, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, - 0x65, 0x73, 0x12, 0x27, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, - 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, - 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x66, 0x65, - 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, - 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x56, 0x32, 0x42, 0x5e, 0x0a, 0x13, 0x66, 0x65, 0x61, 0x73, 0x74, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x42, 0x0f, - 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x41, 0x50, 0x49, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, - 0x36, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x65, 0x61, 0x73, - 0x74, 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x73, 0x64, 0x6b, 0x2f, - 0x67, 0x6f, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, - 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x5e, 0x0a, 0x13, 0x66, 0x65, 0x61, 0x73, + 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x42, + 0x0f, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x41, 0x50, 0x49, 0x50, 0x72, 0x6f, 0x74, 0x6f, + 0x5a, 0x36, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x65, 0x61, + 0x73, 0x74, 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x73, 0x64, 0x6b, + 0x2f, 0x67, 0x6f, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, + 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -991,7 +859,7 @@ func file_feast_serving_ServingService_proto_rawDescGZIP() []byte { } var file_feast_serving_ServingService_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_feast_serving_ServingService_proto_msgTypes = make([]protoimpl.MessageInfo, 17) +var file_feast_serving_ServingService_proto_msgTypes = make([]protoimpl.MessageInfo, 13) var file_feast_serving_ServingService_proto_goTypes = []interface{}{ (FieldStatus)(0), // 0: feast.serving.FieldStatus (*GetFeastServingInfoRequest)(nil), // 1: feast.serving.GetFeastServingInfoRequest @@ -1001,51 +869,42 @@ var file_feast_serving_ServingService_proto_goTypes = []interface{}{ (*FeatureList)(nil), // 5: feast.serving.FeatureList (*GetOnlineFeaturesRequest)(nil), // 6: feast.serving.GetOnlineFeaturesRequest (*GetOnlineFeaturesResponse)(nil), // 7: feast.serving.GetOnlineFeaturesResponse - (*GetOnlineFeaturesResponseV2)(nil), // 8: feast.serving.GetOnlineFeaturesResponseV2 - (*GetOnlineFeaturesResponseMetadata)(nil), // 9: feast.serving.GetOnlineFeaturesResponseMetadata - (*GetOnlineFeaturesRequestV2_EntityRow)(nil), // 10: feast.serving.GetOnlineFeaturesRequestV2.EntityRow - nil, // 11: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry - nil, // 12: feast.serving.GetOnlineFeaturesRequest.EntitiesEntry - nil, // 13: feast.serving.GetOnlineFeaturesRequest.RequestContextEntry - (*GetOnlineFeaturesResponse_FieldValues)(nil), // 14: feast.serving.GetOnlineFeaturesResponse.FieldValues - nil, // 15: feast.serving.GetOnlineFeaturesResponse.FieldValues.FieldsEntry - nil, // 16: feast.serving.GetOnlineFeaturesResponse.FieldValues.StatusesEntry - (*GetOnlineFeaturesResponseV2_FeatureVector)(nil), // 17: feast.serving.GetOnlineFeaturesResponseV2.FeatureVector - (*timestamppb.Timestamp)(nil), // 18: google.protobuf.Timestamp - (*types.Value)(nil), // 19: feast.types.Value - (*types.RepeatedValue)(nil), // 20: feast.types.RepeatedValue + (*GetOnlineFeaturesResponseMetadata)(nil), // 8: feast.serving.GetOnlineFeaturesResponseMetadata + (*GetOnlineFeaturesRequestV2_EntityRow)(nil), // 9: feast.serving.GetOnlineFeaturesRequestV2.EntityRow + nil, // 10: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry + nil, // 11: feast.serving.GetOnlineFeaturesRequest.EntitiesEntry + nil, // 12: feast.serving.GetOnlineFeaturesRequest.RequestContextEntry + (*GetOnlineFeaturesResponse_FeatureVector)(nil), // 13: feast.serving.GetOnlineFeaturesResponse.FeatureVector + (*timestamppb.Timestamp)(nil), // 14: google.protobuf.Timestamp + (*types.Value)(nil), // 15: feast.types.Value + (*types.RepeatedValue)(nil), // 16: feast.types.RepeatedValue } var file_feast_serving_ServingService_proto_depIdxs = []int32{ 3, // 0: feast.serving.GetOnlineFeaturesRequestV2.features:type_name -> feast.serving.FeatureReferenceV2 - 10, // 1: feast.serving.GetOnlineFeaturesRequestV2.entity_rows:type_name -> feast.serving.GetOnlineFeaturesRequestV2.EntityRow + 9, // 1: feast.serving.GetOnlineFeaturesRequestV2.entity_rows:type_name -> feast.serving.GetOnlineFeaturesRequestV2.EntityRow 5, // 2: feast.serving.GetOnlineFeaturesRequest.features:type_name -> feast.serving.FeatureList - 12, // 3: feast.serving.GetOnlineFeaturesRequest.entities:type_name -> feast.serving.GetOnlineFeaturesRequest.EntitiesEntry - 13, // 4: feast.serving.GetOnlineFeaturesRequest.request_context:type_name -> feast.serving.GetOnlineFeaturesRequest.RequestContextEntry - 14, // 5: feast.serving.GetOnlineFeaturesResponse.field_values:type_name -> feast.serving.GetOnlineFeaturesResponse.FieldValues - 9, // 6: feast.serving.GetOnlineFeaturesResponseV2.metadata:type_name -> feast.serving.GetOnlineFeaturesResponseMetadata - 17, // 7: feast.serving.GetOnlineFeaturesResponseV2.results:type_name -> feast.serving.GetOnlineFeaturesResponseV2.FeatureVector - 5, // 8: feast.serving.GetOnlineFeaturesResponseMetadata.feature_names:type_name -> feast.serving.FeatureList - 18, // 9: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.timestamp:type_name -> google.protobuf.Timestamp - 11, // 10: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.fields:type_name -> feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry - 19, // 11: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry.value:type_name -> feast.types.Value - 20, // 12: feast.serving.GetOnlineFeaturesRequest.EntitiesEntry.value:type_name -> feast.types.RepeatedValue - 20, // 13: feast.serving.GetOnlineFeaturesRequest.RequestContextEntry.value:type_name -> feast.types.RepeatedValue - 15, // 14: feast.serving.GetOnlineFeaturesResponse.FieldValues.fields:type_name -> feast.serving.GetOnlineFeaturesResponse.FieldValues.FieldsEntry - 16, // 15: feast.serving.GetOnlineFeaturesResponse.FieldValues.statuses:type_name -> feast.serving.GetOnlineFeaturesResponse.FieldValues.StatusesEntry - 19, // 16: feast.serving.GetOnlineFeaturesResponse.FieldValues.FieldsEntry.value:type_name -> feast.types.Value - 0, // 17: feast.serving.GetOnlineFeaturesResponse.FieldValues.StatusesEntry.value:type_name -> feast.serving.FieldStatus - 19, // 18: feast.serving.GetOnlineFeaturesResponseV2.FeatureVector.values:type_name -> feast.types.Value - 0, // 19: feast.serving.GetOnlineFeaturesResponseV2.FeatureVector.statuses:type_name -> feast.serving.FieldStatus - 18, // 20: feast.serving.GetOnlineFeaturesResponseV2.FeatureVector.event_timestamps:type_name -> google.protobuf.Timestamp - 1, // 21: feast.serving.ServingService.GetFeastServingInfo:input_type -> feast.serving.GetFeastServingInfoRequest - 6, // 22: feast.serving.ServingService.GetOnlineFeatures:input_type -> feast.serving.GetOnlineFeaturesRequest - 2, // 23: feast.serving.ServingService.GetFeastServingInfo:output_type -> feast.serving.GetFeastServingInfoResponse - 8, // 24: feast.serving.ServingService.GetOnlineFeatures:output_type -> feast.serving.GetOnlineFeaturesResponseV2 - 23, // [23:25] is the sub-list for method output_type - 21, // [21:23] is the sub-list for method input_type - 21, // [21:21] is the sub-list for extension type_name - 21, // [21:21] is the sub-list for extension extendee - 0, // [0:21] is the sub-list for field type_name + 11, // 3: feast.serving.GetOnlineFeaturesRequest.entities:type_name -> feast.serving.GetOnlineFeaturesRequest.EntitiesEntry + 12, // 4: feast.serving.GetOnlineFeaturesRequest.request_context:type_name -> feast.serving.GetOnlineFeaturesRequest.RequestContextEntry + 8, // 5: feast.serving.GetOnlineFeaturesResponse.metadata:type_name -> feast.serving.GetOnlineFeaturesResponseMetadata + 13, // 6: feast.serving.GetOnlineFeaturesResponse.results:type_name -> feast.serving.GetOnlineFeaturesResponse.FeatureVector + 5, // 7: feast.serving.GetOnlineFeaturesResponseMetadata.feature_names:type_name -> feast.serving.FeatureList + 14, // 8: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.timestamp:type_name -> google.protobuf.Timestamp + 10, // 9: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.fields:type_name -> feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry + 15, // 10: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry.value:type_name -> feast.types.Value + 16, // 11: feast.serving.GetOnlineFeaturesRequest.EntitiesEntry.value:type_name -> feast.types.RepeatedValue + 16, // 12: feast.serving.GetOnlineFeaturesRequest.RequestContextEntry.value:type_name -> feast.types.RepeatedValue + 15, // 13: feast.serving.GetOnlineFeaturesResponse.FeatureVector.values:type_name -> feast.types.Value + 0, // 14: feast.serving.GetOnlineFeaturesResponse.FeatureVector.statuses:type_name -> feast.serving.FieldStatus + 14, // 15: feast.serving.GetOnlineFeaturesResponse.FeatureVector.event_timestamps:type_name -> google.protobuf.Timestamp + 1, // 16: feast.serving.ServingService.GetFeastServingInfo:input_type -> feast.serving.GetFeastServingInfoRequest + 6, // 17: feast.serving.ServingService.GetOnlineFeatures:input_type -> feast.serving.GetOnlineFeaturesRequest + 2, // 18: feast.serving.ServingService.GetFeastServingInfo:output_type -> feast.serving.GetFeastServingInfoResponse + 7, // 19: feast.serving.ServingService.GetOnlineFeatures:output_type -> feast.serving.GetOnlineFeaturesResponse + 18, // [18:20] is the sub-list for method output_type + 16, // [16:18] is the sub-list for method input_type + 16, // [16:16] is the sub-list for extension type_name + 16, // [16:16] is the sub-list for extension extendee + 0, // [0:16] is the sub-list for field type_name } func init() { file_feast_serving_ServingService_proto_init() } @@ -1139,18 +998,6 @@ func file_feast_serving_ServingService_proto_init() { } } file_feast_serving_ServingService_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetOnlineFeaturesResponseV2); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_feast_serving_ServingService_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*GetOnlineFeaturesResponseMetadata); i { case 0: return &v.state @@ -1162,7 +1009,7 @@ func file_feast_serving_ServingService_proto_init() { return nil } } - file_feast_serving_ServingService_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + file_feast_serving_ServingService_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*GetOnlineFeaturesRequestV2_EntityRow); i { case 0: return &v.state @@ -1174,20 +1021,8 @@ func file_feast_serving_ServingService_proto_init() { return nil } } - file_feast_serving_ServingService_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetOnlineFeaturesResponse_FieldValues); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_feast_serving_ServingService_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetOnlineFeaturesResponseV2_FeatureVector); i { + file_feast_serving_ServingService_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetOnlineFeaturesResponse_FeatureVector); i { case 0: return &v.state case 1: @@ -1209,7 +1044,7 @@ func file_feast_serving_ServingService_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_feast_serving_ServingService_proto_rawDesc, NumEnums: 1, - NumMessages: 17, + NumMessages: 13, NumExtensions: 0, NumServices: 1, }, @@ -1239,7 +1074,7 @@ type ServingServiceClient interface { // Get information about this Feast serving. GetFeastServingInfo(ctx context.Context, in *GetFeastServingInfoRequest, opts ...grpc.CallOption) (*GetFeastServingInfoResponse, error) // Get online features synchronously. - GetOnlineFeatures(ctx context.Context, in *GetOnlineFeaturesRequest, opts ...grpc.CallOption) (*GetOnlineFeaturesResponseV2, error) + GetOnlineFeatures(ctx context.Context, in *GetOnlineFeaturesRequest, opts ...grpc.CallOption) (*GetOnlineFeaturesResponse, error) } type servingServiceClient struct { @@ -1259,8 +1094,8 @@ func (c *servingServiceClient) GetFeastServingInfo(ctx context.Context, in *GetF return out, nil } -func (c *servingServiceClient) GetOnlineFeatures(ctx context.Context, in *GetOnlineFeaturesRequest, opts ...grpc.CallOption) (*GetOnlineFeaturesResponseV2, error) { - out := new(GetOnlineFeaturesResponseV2) +func (c *servingServiceClient) GetOnlineFeatures(ctx context.Context, in *GetOnlineFeaturesRequest, opts ...grpc.CallOption) (*GetOnlineFeaturesResponse, error) { + out := new(GetOnlineFeaturesResponse) err := c.cc.Invoke(ctx, "/feast.serving.ServingService/GetOnlineFeatures", in, out, opts...) if err != nil { return nil, err @@ -1273,7 +1108,7 @@ type ServingServiceServer interface { // Get information about this Feast serving. GetFeastServingInfo(context.Context, *GetFeastServingInfoRequest) (*GetFeastServingInfoResponse, error) // Get online features synchronously. - GetOnlineFeatures(context.Context, *GetOnlineFeaturesRequest) (*GetOnlineFeaturesResponseV2, error) + GetOnlineFeatures(context.Context, *GetOnlineFeaturesRequest) (*GetOnlineFeaturesResponse, error) } // UnimplementedServingServiceServer can be embedded to have forward compatible implementations. @@ -1283,7 +1118,7 @@ type UnimplementedServingServiceServer struct { func (*UnimplementedServingServiceServer) GetFeastServingInfo(context.Context, *GetFeastServingInfoRequest) (*GetFeastServingInfoResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetFeastServingInfo not implemented") } -func (*UnimplementedServingServiceServer) GetOnlineFeatures(context.Context, *GetOnlineFeaturesRequest) (*GetOnlineFeaturesResponseV2, error) { +func (*UnimplementedServingServiceServer) GetOnlineFeatures(context.Context, *GetOnlineFeaturesRequest) (*GetOnlineFeaturesResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetOnlineFeatures not implemented") } diff --git a/sdk/go/protos/feast/storage/Redis.pb.go b/sdk/go/protos/feast/storage/Redis.pb.go index 8fff34e517..35f38ba2a7 100644 --- a/sdk/go/protos/feast/storage/Redis.pb.go +++ b/sdk/go/protos/feast/storage/Redis.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/storage/Redis.proto package storage diff --git a/sdk/go/protos/feast/types/Field.pb.go b/sdk/go/protos/feast/types/Field.pb.go index c529d76153..af964f2c6e 100644 --- a/sdk/go/protos/feast/types/Field.pb.go +++ b/sdk/go/protos/feast/types/Field.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/types/Field.proto package types diff --git a/sdk/go/protos/feast/types/Value.pb.go b/sdk/go/protos/feast/types/Value.pb.go index fe53c2ec29..79eaa16009 100644 --- a/sdk/go/protos/feast/types/Value.pb.go +++ b/sdk/go/protos/feast/types/Value.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.17.3 +// protoc v3.19.4 // source: feast/types/Value.proto package types diff --git a/sdk/go/response.go b/sdk/go/response.go index 49c8904ab7..cdb2cbee38 100644 --- a/sdk/go/response.go +++ b/sdk/go/response.go @@ -19,7 +19,7 @@ var ( // OnlineFeaturesResponse is a wrapper around serving.GetOnlineFeaturesResponse. type OnlineFeaturesResponse struct { - RawResponse *serving.GetOnlineFeaturesResponseV2 + RawResponse *serving.GetOnlineFeaturesResponse } // Rows retrieves the result of the request as a list of Rows. diff --git a/sdk/go/response_test.go b/sdk/go/response_test.go index e9a9bc1605..693faae7e4 100644 --- a/sdk/go/response_test.go +++ b/sdk/go/response_test.go @@ -9,8 +9,8 @@ import ( ) var response = OnlineFeaturesResponse{ - RawResponse: &serving.GetOnlineFeaturesResponseV2{ - Results: []*serving.GetOnlineFeaturesResponseV2_FeatureVector{ + RawResponse: &serving.GetOnlineFeaturesResponse{ + Results: []*serving.GetOnlineFeaturesResponse_FeatureVector{ { Values: []*types.Value{Int64Val(1), Int64Val(2)}, Statuses: []serving.FieldStatus{ diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py index eada13f995..9f78f9d98b 100644 --- a/sdk/python/feast/__init__.py +++ b/sdk/python/feast/__init__.py @@ -5,6 +5,7 @@ from feast.infra.offline_stores.bigquery_source import BigQuerySource from feast.infra.offline_stores.file_source import FileSource from feast.infra.offline_stores.redshift_source import RedshiftSource +from feast.infra.offline_stores.snowflake_source import SnowflakeSource from .data_source import KafkaSource, KinesisSource, SourceType from .entity import Entity @@ -43,4 +44,5 @@ "BigQuerySource", "FileSource", "RedshiftSource", + "SnowflakeSource", ] diff --git a/sdk/python/feast/base_feature_view.py b/sdk/python/feast/base_feature_view.py index 97180266d7..b2178ec631 100644 --- a/sdk/python/feast/base_feature_view.py +++ b/sdk/python/feast/base_feature_view.py @@ -28,12 +28,16 @@ class BaseFeatureView(ABC): """A FeatureView defines a logical grouping of features to be served.""" + created_timestamp: Optional[datetime] + last_updated_timestamp: Optional[datetime] + @abstractmethod def __init__(self, name: str, features: List[Feature]): self._name = name self._features = features self._projection = FeatureViewProjection.from_definition(self) self.created_timestamp: Optional[datetime] = None + self.last_updated_timestamp: Optional[datetime] = None @property def name(self) -> str: diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 4950977e2a..c23c3d104a 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -477,7 +477,7 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List @click.option( "--template", "-t", - type=click.Choice(["local", "gcp", "aws"], case_sensitive=False), + type=click.Choice(["local", "gcp", "aws", "snowflake"], case_sensitive=False), help="Specify a template for the created project", default="local", ) @@ -512,7 +512,7 @@ def init_command(project_directory, minimal: bool, template: str): ) @click.pass_context def serve_command(ctx: click.Context, host: str, port: int, no_access_log: bool): - """[Experimental] Start a the feature consumption server locally on a given port.""" + """Start a feature server locally on a given port.""" repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) diff --git a/sdk/python/feast/constants.py b/sdk/python/feast/constants.py index ff93347130..a2fe6f15c5 100644 --- a/sdk/python/feast/constants.py +++ b/sdk/python/feast/constants.py @@ -29,6 +29,9 @@ # Environment variable for toggling usage FEAST_USAGE = "FEAST_USAGE" +# Default value for FEAST_USAGE when environment variable is not set +DEFAULT_FEAST_USAGE_VALUE = "True" + # Environment variable for the path for overwriting universal test configs FULL_REPO_CONFIGS_MODULE_ENV_NAME: str = "FULL_REPO_CONFIGS_MODULE" diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index b30340f0d2..94910c6c08 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -360,6 +360,12 @@ def from_proto(data_source: DataSourceProto) -> Any: from feast.infra.offline_stores.redshift_source import RedshiftSource data_source_obj = RedshiftSource.from_proto(data_source) + + elif data_source.snowflake_options.table or data_source.snowflake_options.query: + from feast.infra.offline_stores.snowflake_source import SnowflakeSource + + data_source_obj = SnowflakeSource.from_proto(data_source) + elif ( data_source.kafka_options.bootstrap_servers and data_source.kafka_options.topic diff --git a/sdk/python/feast/diff/FcoDiff.py b/sdk/python/feast/diff/FcoDiff.py deleted file mode 100644 index e4b044dcc4..0000000000 --- a/sdk/python/feast/diff/FcoDiff.py +++ /dev/null @@ -1,87 +0,0 @@ -from dataclasses import dataclass -from typing import Any, Iterable, List, Set, Tuple, TypeVar - -from feast.base_feature_view import BaseFeatureView -from feast.diff.property_diff import PropertyDiff, TransitionType -from feast.entity import Entity -from feast.feature_service import FeatureService -from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto -from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto - - -@dataclass -class FcoDiff: - name: str - fco_type: str - current_fco: Any - new_fco: Any - fco_property_diffs: List[PropertyDiff] - transition_type: TransitionType - - -@dataclass -class RegistryDiff: - fco_diffs: List[FcoDiff] - - def __init__(self): - self.fco_diffs = [] - - def add_fco_diff(self, fco_diff: FcoDiff): - self.fco_diffs.append(fco_diff) - - -T = TypeVar("T", Entity, BaseFeatureView, FeatureService) - - -def tag_objects_for_keep_delete_add( - existing_objs: Iterable[T], desired_objs: Iterable[T] -) -> Tuple[Set[T], Set[T], Set[T]]: - existing_obj_names = {e.name for e in existing_objs} - desired_obj_names = {e.name for e in desired_objs} - - objs_to_add = {e for e in desired_objs if e.name not in existing_obj_names} - objs_to_keep = {e for e in desired_objs if e.name in existing_obj_names} - objs_to_delete = {e for e in existing_objs if e.name not in desired_obj_names} - - return objs_to_keep, objs_to_delete, objs_to_add - - -U = TypeVar("U", EntityProto, FeatureViewProto) - - -def tag_proto_objects_for_keep_delete_add( - existing_objs: Iterable[U], desired_objs: Iterable[U] -) -> Tuple[Iterable[U], Iterable[U], Iterable[U]]: - existing_obj_names = {e.spec.name for e in existing_objs} - desired_obj_names = {e.spec.name for e in desired_objs} - - objs_to_add = [e for e in desired_objs if e.spec.name not in existing_obj_names] - objs_to_keep = [e for e in desired_objs if e.spec.name in existing_obj_names] - objs_to_delete = [e for e in existing_objs if e.spec.name not in desired_obj_names] - - return objs_to_keep, objs_to_delete, objs_to_add - - -FIELDS_TO_IGNORE = {"project"} - - -def diff_between(current: U, new: U, object_type: str) -> FcoDiff: - assert current.DESCRIPTOR.full_name == new.DESCRIPTOR.full_name - property_diffs = [] - transition: TransitionType = TransitionType.UNCHANGED - if current.spec != new.spec: - for _field in current.spec.DESCRIPTOR.fields: - if _field.name in FIELDS_TO_IGNORE: - continue - if getattr(current.spec, _field.name) != getattr(new.spec, _field.name): - transition = TransitionType.UPDATE - property_diffs.append( - PropertyDiff( - _field.name, - getattr(current.spec, _field.name), - getattr(new.spec, _field.name), - ) - ) - return FcoDiff( - new.spec.name, object_type, current, new, property_diffs, transition, - ) diff --git a/sdk/python/feast/diff/infra_diff.py b/sdk/python/feast/diff/infra_diff.py index 458b7e1e01..a09eaf39eb 100644 --- a/sdk/python/feast/diff/infra_diff.py +++ b/sdk/python/feast/diff/infra_diff.py @@ -1,16 +1,34 @@ from dataclasses import dataclass -from typing import Any, List +from typing import Generic, Iterable, List, Tuple, TypeVar from feast.diff.property_diff import PropertyDiff, TransitionType +from feast.infra.infra_object import ( + DATASTORE_INFRA_OBJECT_CLASS_TYPE, + DYNAMODB_INFRA_OBJECT_CLASS_TYPE, + SQLITE_INFRA_OBJECT_CLASS_TYPE, + InfraObject, +) +from feast.protos.feast.core.DatastoreTable_pb2 import ( + DatastoreTable as DatastoreTableProto, +) +from feast.protos.feast.core.DynamoDBTable_pb2 import ( + DynamoDBTable as DynamoDBTableProto, +) +from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto +from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto + +InfraObjectProto = TypeVar( + "InfraObjectProto", DatastoreTableProto, DynamoDBTableProto, SqliteTableProto +) @dataclass -class InfraObjectDiff: +class InfraObjectDiff(Generic[InfraObjectProto]): name: str infra_object_type: str - current_fco: Any - new_fco: Any - fco_property_diffs: List[PropertyDiff] + current_infra_object: InfraObjectProto + new_infra_object: InfraObjectProto + infra_object_property_diffs: List[PropertyDiff] transition_type: TransitionType @@ -22,7 +40,164 @@ def __init__(self): self.infra_object_diffs = [] def update(self): - pass + """Apply the infrastructure changes specified in this object.""" + for infra_object_diff in self.infra_object_diffs: + if infra_object_diff.transition_type in [ + TransitionType.DELETE, + TransitionType.UPDATE, + ]: + infra_object = InfraObject.from_proto( + infra_object_diff.current_infra_object + ) + infra_object.teardown() + elif infra_object_diff.transition_type in [ + TransitionType.CREATE, + TransitionType.UPDATE, + ]: + infra_object = InfraObject.from_proto( + infra_object_diff.new_infra_object + ) + infra_object.update() def to_string(self): - pass + from colorama import Fore, Style + + log_string = "" + + message_action_map = { + TransitionType.CREATE: ("Created", Fore.GREEN), + TransitionType.DELETE: ("Deleted", Fore.RED), + TransitionType.UNCHANGED: ("Unchanged", Fore.LIGHTBLUE_EX), + TransitionType.UPDATE: ("Updated", Fore.YELLOW), + } + for infra_object_diff in self.infra_object_diffs: + if infra_object_diff.transition_type == TransitionType.UNCHANGED: + continue + action, color = message_action_map[infra_object_diff.transition_type] + log_string += f"{action} {infra_object_diff.infra_object_type} {Style.BRIGHT + color}{infra_object_diff.name}{Style.RESET_ALL}\n" + if infra_object_diff.transition_type == TransitionType.UPDATE: + for _p in infra_object_diff.infra_object_property_diffs: + log_string += f"\t{_p.property_name}: {Style.BRIGHT + color}{_p.val_existing}{Style.RESET_ALL} -> {Style.BRIGHT + Fore.LIGHTGREEN_EX}{_p.val_declared}{Style.RESET_ALL}\n" + + log_string = ( + f"{Style.BRIGHT + Fore.LIGHTBLUE_EX}No changes to infrastructure" + if not log_string + else log_string + ) + + return log_string + + +def tag_infra_proto_objects_for_keep_delete_add( + existing_objs: Iterable[InfraObjectProto], desired_objs: Iterable[InfraObjectProto] +) -> Tuple[ + Iterable[InfraObjectProto], Iterable[InfraObjectProto], Iterable[InfraObjectProto] +]: + existing_obj_names = {e.name for e in existing_objs} + desired_obj_names = {e.name for e in desired_objs} + + objs_to_add = [e for e in desired_objs if e.name not in existing_obj_names] + objs_to_keep = [e for e in desired_objs if e.name in existing_obj_names] + objs_to_delete = [e for e in existing_objs if e.name not in desired_obj_names] + + return objs_to_keep, objs_to_delete, objs_to_add + + +def diff_infra_protos( + current_infra_proto: InfraProto, new_infra_proto: InfraProto +) -> InfraDiff: + infra_diff = InfraDiff() + + infra_object_class_types_to_str = { + DATASTORE_INFRA_OBJECT_CLASS_TYPE: "datastore table", + DYNAMODB_INFRA_OBJECT_CLASS_TYPE: "dynamodb table", + SQLITE_INFRA_OBJECT_CLASS_TYPE: "sqlite table", + } + + for infra_object_class_type in infra_object_class_types_to_str: + current_infra_objects = get_infra_object_protos_by_type( + current_infra_proto, infra_object_class_type + ) + new_infra_objects = get_infra_object_protos_by_type( + new_infra_proto, infra_object_class_type + ) + ( + infra_objects_to_keep, + infra_objects_to_delete, + infra_objects_to_add, + ) = tag_infra_proto_objects_for_keep_delete_add( + current_infra_objects, new_infra_objects, + ) + + for e in infra_objects_to_add: + infra_diff.infra_object_diffs.append( + InfraObjectDiff( + e.name, + infra_object_class_types_to_str[infra_object_class_type], + None, + e, + [], + TransitionType.CREATE, + ) + ) + for e in infra_objects_to_delete: + infra_diff.infra_object_diffs.append( + InfraObjectDiff( + e.name, + infra_object_class_types_to_str[infra_object_class_type], + e, + None, + [], + TransitionType.DELETE, + ) + ) + for e in infra_objects_to_keep: + current_infra_object = [ + _e for _e in current_infra_objects if _e.name == e.name + ][0] + infra_diff.infra_object_diffs.append( + diff_between( + current_infra_object, + e, + infra_object_class_types_to_str[infra_object_class_type], + ) + ) + + return infra_diff + + +def get_infra_object_protos_by_type( + infra_proto: InfraProto, infra_object_class_type: str +) -> List[InfraObjectProto]: + return [ + InfraObject.from_infra_object_proto(infra_object).to_proto() + for infra_object in infra_proto.infra_objects + if infra_object.infra_object_class_type == infra_object_class_type + ] + + +FIELDS_TO_IGNORE = {"project"} + + +def diff_between( + current: InfraObjectProto, new: InfraObjectProto, infra_object_type: str +) -> InfraObjectDiff: + assert current.DESCRIPTOR.full_name == new.DESCRIPTOR.full_name + property_diffs = [] + transition: TransitionType = TransitionType.UNCHANGED + if current != new: + for _field in current.DESCRIPTOR.fields: + if _field.name in FIELDS_TO_IGNORE: + continue + if getattr(current, _field.name) != getattr(new, _field.name): + transition = TransitionType.UPDATE + property_diffs.append( + PropertyDiff( + _field.name, + getattr(current, _field.name), + getattr(new, _field.name), + ) + ) + return InfraObjectDiff( + new.name, infra_object_type, current, new, property_diffs, transition, + ) diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py new file mode 100644 index 0000000000..1f68d3ff65 --- /dev/null +++ b/sdk/python/feast/diff/registry_diff.py @@ -0,0 +1,298 @@ +from dataclasses import dataclass +from typing import Any, Dict, Generic, Iterable, List, Set, Tuple, TypeVar + +from feast.base_feature_view import BaseFeatureView +from feast.diff.property_diff import PropertyDiff, TransitionType +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.feature_view import DUMMY_ENTITY_NAME +from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto +from feast.protos.feast.core.FeatureService_pb2 import ( + FeatureService as FeatureServiceProto, +) +from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto +from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( + OnDemandFeatureView as OnDemandFeatureViewProto, +) +from feast.protos.feast.core.RequestFeatureView_pb2 import ( + RequestFeatureView as RequestFeatureViewProto, +) +from feast.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry +from feast.repo_contents import RepoContents + +FeastObject = TypeVar("FeastObject", Entity, BaseFeatureView, FeatureService) + + +@dataclass +class FeastObjectDiff(Generic[FeastObject]): + name: str + feast_object_type: FeastObjectType + current_feast_object: FeastObject + new_feast_object: FeastObject + feast_object_property_diffs: List[PropertyDiff] + transition_type: TransitionType + + +@dataclass +class RegistryDiff: + feast_object_diffs: List[FeastObjectDiff] + + def __init__(self): + self.feast_object_diffs = [] + + def add_feast_object_diff(self, feast_object_diff: FeastObjectDiff): + self.feast_object_diffs.append(feast_object_diff) + + def to_string(self): + from colorama import Fore, Style + + log_string = "" + + message_action_map = { + TransitionType.CREATE: ("Created", Fore.GREEN), + TransitionType.DELETE: ("Deleted", Fore.RED), + TransitionType.UNCHANGED: ("Unchanged", Fore.LIGHTBLUE_EX), + TransitionType.UPDATE: ("Updated", Fore.YELLOW), + } + for feast_object_diff in self.feast_object_diffs: + if feast_object_diff.name == DUMMY_ENTITY_NAME: + continue + if feast_object_diff.transition_type == TransitionType.UNCHANGED: + continue + action, color = message_action_map[feast_object_diff.transition_type] + log_string += f"{action} {feast_object_diff.feast_object_type.value} {Style.BRIGHT + color}{feast_object_diff.name}{Style.RESET_ALL}\n" + if feast_object_diff.transition_type == TransitionType.UPDATE: + for _p in feast_object_diff.feast_object_property_diffs: + log_string += f"\t{_p.property_name}: {Style.BRIGHT + color}{_p.val_existing}{Style.RESET_ALL} -> {Style.BRIGHT + Fore.LIGHTGREEN_EX}{_p.val_declared}{Style.RESET_ALL}\n" + + log_string = ( + f"{Style.BRIGHT + Fore.LIGHTBLUE_EX}No changes to registry" + if not log_string + else log_string + ) + + return log_string + + +def tag_objects_for_keep_delete_update_add( + existing_objs: Iterable[FeastObject], desired_objs: Iterable[FeastObject] +) -> Tuple[Set[FeastObject], Set[FeastObject], Set[FeastObject], Set[FeastObject]]: + existing_obj_names = {e.name for e in existing_objs} + desired_obj_names = {e.name for e in desired_objs} + + objs_to_add = {e for e in desired_objs if e.name not in existing_obj_names} + objs_to_update = {e for e in desired_objs if e.name in existing_obj_names} + objs_to_keep = {e for e in existing_objs if e.name in desired_obj_names} + objs_to_delete = {e for e in existing_objs if e.name not in desired_obj_names} + + return objs_to_keep, objs_to_delete, objs_to_update, objs_to_add + + +FeastObjectProto = TypeVar( + "FeastObjectProto", + EntityProto, + FeatureViewProto, + FeatureServiceProto, + OnDemandFeatureViewProto, + RequestFeatureViewProto, +) + + +FIELDS_TO_IGNORE = {"project"} + + +def diff_registry_objects( + current: FeastObject, new: FeastObject, object_type: FeastObjectType +) -> FeastObjectDiff: + current_proto = current.to_proto() + new_proto = new.to_proto() + assert current_proto.DESCRIPTOR.full_name == new_proto.DESCRIPTOR.full_name + property_diffs = [] + transition: TransitionType = TransitionType.UNCHANGED + if current_proto.spec != new_proto.spec: + for _field in current_proto.spec.DESCRIPTOR.fields: + if _field.name in FIELDS_TO_IGNORE: + continue + if getattr(current_proto.spec, _field.name) != getattr( + new_proto.spec, _field.name + ): + transition = TransitionType.UPDATE + property_diffs.append( + PropertyDiff( + _field.name, + getattr(current_proto.spec, _field.name), + getattr(new_proto.spec, _field.name), + ) + ) + return FeastObjectDiff( + name=new_proto.spec.name, + feast_object_type=object_type, + current_feast_object=current, + new_feast_object=new, + feast_object_property_diffs=property_diffs, + transition_type=transition, + ) + + +def extract_objects_for_keep_delete_update_add( + registry: Registry, current_project: str, desired_repo_contents: RepoContents, +) -> Tuple[ + Dict[FeastObjectType, Set[FeastObject]], + Dict[FeastObjectType, Set[FeastObject]], + Dict[FeastObjectType, Set[FeastObject]], + Dict[FeastObjectType, Set[FeastObject]], +]: + """ + Returns the objects in the registry that must be modified to achieve the desired repo state. + + Args: + registry: The registry storing the current repo state. + current_project: The Feast project whose objects should be compared. + desired_repo_contents: The desired repo state. + """ + objs_to_keep = {} + objs_to_delete = {} + objs_to_update = {} + objs_to_add = {} + + registry_object_type_to_objects: Dict[ + FeastObjectType, List[Any] + ] = FeastObjectType.get_objects_from_registry(registry, current_project) + registry_object_type_to_repo_contents: Dict[ + FeastObjectType, Set[Any] + ] = FeastObjectType.get_objects_from_repo_contents(desired_repo_contents) + + for object_type in FEAST_OBJECT_TYPES: + ( + to_keep, + to_delete, + to_update, + to_add, + ) = tag_objects_for_keep_delete_update_add( + registry_object_type_to_objects[object_type], + registry_object_type_to_repo_contents[object_type], + ) + + objs_to_keep[object_type] = to_keep + objs_to_delete[object_type] = to_delete + objs_to_update[object_type] = to_update + objs_to_add[object_type] = to_add + + return objs_to_keep, objs_to_delete, objs_to_update, objs_to_add + + +def diff_between( + registry: Registry, current_project: str, desired_repo_contents: RepoContents, +) -> RegistryDiff: + """ + Returns the difference between the current and desired repo states. + + Args: + registry: The registry storing the current repo state. + current_project: The Feast project for which the diff is being computed. + desired_repo_contents: The desired repo state. + """ + diff = RegistryDiff() + + ( + objs_to_keep, + objs_to_delete, + objs_to_update, + objs_to_add, + ) = extract_objects_for_keep_delete_update_add( + registry, current_project, desired_repo_contents + ) + + for object_type in FEAST_OBJECT_TYPES: + objects_to_keep = objs_to_keep[object_type] + objects_to_delete = objs_to_delete[object_type] + objects_to_update = objs_to_update[object_type] + objects_to_add = objs_to_add[object_type] + + for e in objects_to_add: + diff.add_feast_object_diff( + FeastObjectDiff( + name=e.name, + feast_object_type=object_type, + current_feast_object=None, + new_feast_object=e, + feast_object_property_diffs=[], + transition_type=TransitionType.CREATE, + ) + ) + for e in objects_to_delete: + diff.add_feast_object_diff( + FeastObjectDiff( + name=e.name, + feast_object_type=object_type, + current_feast_object=e, + new_feast_object=None, + feast_object_property_diffs=[], + transition_type=TransitionType.DELETE, + ) + ) + for e in objects_to_update: + current_obj = [_e for _e in objects_to_keep if _e.name == e.name][0] + diff.add_feast_object_diff( + diff_registry_objects(current_obj, e, object_type) + ) + + return diff + + +def apply_diff_to_registry( + registry: Registry, registry_diff: RegistryDiff, project: str, commit: bool = True +): + """ + Applies the given diff to the given Feast project in the registry. + + Args: + registry: The registry to be updated. + registry_diff: The diff to apply. + project: Feast project to be updated. + commit: Whether the change should be persisted immediately + """ + for feast_object_diff in registry_diff.feast_object_diffs: + # There is no need to delete the object on an update, since applying the new object + # will automatically delete the existing object. + if feast_object_diff.transition_type == TransitionType.DELETE: + if feast_object_diff.feast_object_type == FeastObjectType.ENTITY: + registry.delete_entity( + feast_object_diff.current_feast_object.name, project, commit=False + ) + elif feast_object_diff.feast_object_type == FeastObjectType.FEATURE_SERVICE: + registry.delete_feature_service( + feast_object_diff.current_feast_object.name, project, commit=False + ) + elif feast_object_diff.feast_object_type in [ + FeastObjectType.FEATURE_VIEW, + FeastObjectType.ON_DEMAND_FEATURE_VIEW, + FeastObjectType.REQUEST_FEATURE_VIEW, + ]: + registry.delete_feature_view( + feast_object_diff.current_feast_object.name, project, commit=False, + ) + + if feast_object_diff.transition_type in [ + TransitionType.CREATE, + TransitionType.UPDATE, + ]: + if feast_object_diff.feast_object_type == FeastObjectType.ENTITY: + registry.apply_entity( + feast_object_diff.new_feast_object, project, commit=False + ) + elif feast_object_diff.feast_object_type == FeastObjectType.FEATURE_SERVICE: + registry.apply_feature_service( + feast_object_diff.new_feast_object, project, commit=False + ) + elif feast_object_diff.feast_object_type in [ + FeastObjectType.FEATURE_VIEW, + FeastObjectType.ON_DEMAND_FEATURE_VIEW, + FeastObjectType.REQUEST_FEATURE_VIEW, + ]: + registry.apply_feature_view( + feast_object_diff.new_feast_object, project, commit=False + ) + + if commit: + registry.commit() diff --git a/sdk/python/feast/dqm/__init__.py b/sdk/python/feast/dqm/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/dqm/errors.py b/sdk/python/feast/dqm/errors.py new file mode 100644 index 0000000000..c4179f72b3 --- /dev/null +++ b/sdk/python/feast/dqm/errors.py @@ -0,0 +1,13 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .profilers.profiler import ValidationReport + + +class ValidationFailed(Exception): + def __init__(self, validation_report: "ValidationReport"): + self.validation_report = validation_report + + @property + def report(self) -> "ValidationReport": + return self.validation_report diff --git a/sdk/python/feast/dqm/profilers/__init__.py b/sdk/python/feast/dqm/profilers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/dqm/profilers/ge_profiler.py b/sdk/python/feast/dqm/profilers/ge_profiler.py new file mode 100644 index 0000000000..f1780754de --- /dev/null +++ b/sdk/python/feast/dqm/profilers/ge_profiler.py @@ -0,0 +1,162 @@ +import json +from typing import Any, Callable, Dict, List + +import dill +import great_expectations as ge +import numpy as np +import pandas as pd +from great_expectations.core import ExpectationSuite +from great_expectations.dataset import PandasDataset +from great_expectations.profile.base import ProfilerTypeMapping + +from feast.dqm.profilers.profiler import ( + Profile, + Profiler, + ValidationError, + ValidationReport, +) +from feast.protos.feast.core.ValidationProfile_pb2 import ( + GEValidationProfile as GEValidationProfileProto, +) +from feast.protos.feast.core.ValidationProfile_pb2 import ( + GEValidationProfiler as GEValidationProfilerProto, +) + + +def _prepare_dataset(dataset: PandasDataset) -> PandasDataset: + dataset_copy = dataset.copy(deep=True) + + for column in dataset.columns: + if dataset.expect_column_values_to_be_in_type_list( + column, type_list=sorted(list(ProfilerTypeMapping.DATETIME_TYPE_NAMES)) + ).success: + # GE cannot parse Timestamp or other pandas datetime time + dataset_copy[column] = dataset[column].dt.strftime("%Y-%m-%dT%H:%M:%S") + + if dataset[column].dtype == np.float32: + # GE converts expectation arguments into native Python float + # This could cause error on comparison => so better to convert to double prematurely + dataset_copy[column] = dataset[column].astype(np.float64) + + return dataset_copy + + +class GEProfile(Profile): + """ + GEProfile is an implementation of abstract Profile for integration with Great Expectations. + It executes validation by applying expectations from ExpectationSuite instance to a given dataset. + """ + + expectation_suite: ExpectationSuite + + def __init__(self, expectation_suite: ExpectationSuite): + self.expectation_suite = expectation_suite + + def validate(self, df: pd.DataFrame) -> "GEValidationReport": + """ + Validate provided dataframe against GE expectation suite. + 1. Pandas dataframe is converted into PandasDataset (GE type) + 2. Some fixes applied to the data to avoid crashes inside GE (see _prepare_dataset) + 3. Each expectation from ExpectationSuite instance tested against resulting dataset + + Return GEValidationReport, which parses great expectation's schema into list of generic ValidationErrors. + """ + dataset = PandasDataset(df) + + dataset = _prepare_dataset(dataset) + + results = ge.validate( + dataset, expectation_suite=self.expectation_suite, result_format="COMPLETE" + ) + return GEValidationReport(results) + + def to_proto(self): + return GEValidationProfileProto( + expectation_suite=json.dumps(self.expectation_suite.to_json_dict()).encode() + ) + + @classmethod + def from_proto(cls, proto: GEValidationProfileProto) -> "GEProfile": + return GEProfile( + expectation_suite=ExpectationSuite(**json.loads(proto.expectation_suite)) + ) + + def __repr__(self): + expectations = json.dumps( + [e.to_json_dict() for e in self.expectation_suite.expectations], indent=2 + ) + return f"" + + +class GEProfiler(Profiler): + """ + GEProfiler is an implementation of abstract Profiler for integration with Great Expectations. + It wraps around user defined profiler that should accept dataset (in a form of pandas dataframe) + and return ExpectationSuite. + """ + + def __init__( + self, user_defined_profiler: Callable[[pd.DataFrame], ExpectationSuite] + ): + self.user_defined_profiler = user_defined_profiler + + def analyze_dataset(self, df: pd.DataFrame) -> Profile: + """ + Generate GEProfile with ExpectationSuite (set of expectations) + from a given pandas dataframe by applying user defined profiler. + + Some fixes are also applied to the dataset (see _prepare_dataset function) to make it compatible with GE. + + Return GEProfile + """ + dataset = PandasDataset(df) + + dataset = _prepare_dataset(dataset) + + return GEProfile(expectation_suite=self.user_defined_profiler(dataset)) + + def to_proto(self): + return GEValidationProfilerProto( + profiler=GEValidationProfilerProto.UserDefinedProfiler( + body=dill.dumps(self.user_defined_profiler, recurse=True) + ) + ) + + @classmethod + def from_proto(cls, proto: GEValidationProfilerProto) -> "GEProfiler": + return GEProfiler(user_defined_profiler=dill.loads(proto.profiler.body)) + + +class GEValidationReport(ValidationReport): + def __init__(self, validation_result: Dict[Any, Any]): + self._validation_result = validation_result + + @property + def is_success(self) -> bool: + return self._validation_result["success"] + + @property + def errors(self) -> List["ValidationError"]: + return [ + ValidationError( + check_name=res.expectation_config.expectation_type, + column_name=res.expectation_config.kwargs["column"], + check_config=res.expectation_config.kwargs, + missing_count=res["result"].get("missing_count"), + missing_percent=res["result"].get("missing_percent"), + ) + for res in self._validation_result["results"] + if not res["success"] + ] + + def __repr__(self): + failed_expectations = [ + res.to_json_dict() + for res in self._validation_result["results"] + if not res["success"] + ] + return json.dumps(failed_expectations, indent=2) + + +def ge_profiler(func): + return GEProfiler(user_defined_profiler=func) diff --git a/sdk/python/feast/dqm/profilers/profiler.py b/sdk/python/feast/dqm/profilers/profiler.py new file mode 100644 index 0000000000..5d2e9d36bc --- /dev/null +++ b/sdk/python/feast/dqm/profilers/profiler.py @@ -0,0 +1,88 @@ +import abc +from typing import Any, List, Optional + +import pandas as pd + + +class Profile: + @abc.abstractmethod + def validate(self, dataset: pd.DataFrame) -> "ValidationReport": + """ + Run set of rules / expectations from current profile against given dataset. + + Return ValidationReport + """ + ... + + @abc.abstractmethod + def to_proto(self): + ... + + @classmethod + @abc.abstractmethod + def from_proto(cls, proto) -> "Profile": + ... + + +class Profiler: + @abc.abstractmethod + def analyze_dataset(self, dataset: pd.DataFrame) -> Profile: + """ + Generate Profile object with dataset's characteristics (with rules / expectations) + from given dataset (as pandas dataframe). + """ + ... + + @abc.abstractmethod + def to_proto(self): + ... + + @classmethod + @abc.abstractmethod + def from_proto(cls, proto) -> "Profiler": + ... + + +class ValidationReport: + @property + @abc.abstractmethod + def is_success(self) -> bool: + """ + Return whether validation was successful + """ + ... + + @property + @abc.abstractmethod + def errors(self) -> List["ValidationError"]: + """ + Return list of ValidationErrors if validation failed (is_success = false) + """ + ... + + +class ValidationError: + check_name: str + column_name: str + + check_config: Optional[Any] + + missing_count: Optional[int] + missing_percent: Optional[float] + + def __init__( + self, + check_name: str, + column_name: str, + check_config: Optional[Any] = None, + missing_count: Optional[int] = None, + missing_percent: Optional[float] = None, + ): + self.check_name = check_name + self.column_name = column_name + self.check_config = check_config + self.missing_count = missing_count + self.missing_percent = missing_percent + + def __repr__(self): + return f"" diff --git a/sdk/python/feast/driver_test_data.py b/sdk/python/feast/driver_test_data.py index 1c9a1dd20b..117bfcbd9c 100644 --- a/sdk/python/feast/driver_test_data.py +++ b/sdk/python/feast/driver_test_data.py @@ -264,3 +264,29 @@ def create_global_daily_stats_df(start_date, end_date) -> pd.DataFrame: # TODO: Remove created timestamp in order to test whether its really optional df_daily["created"] = pd.to_datetime(pd.Timestamp.now(tz=None).round("ms")) return df_daily + + +def create_field_mapping_df(start_date, end_date) -> pd.DataFrame: + """ + Example df generated by this function: + | event_timestamp | column_name | created | + |------------------+-------------+------------------| + | 2021-03-17 19:00 | 99 | 2021-03-24 19:38 | + | 2021-03-17 19:00 | 22 | 2021-03-24 19:38 | + | 2021-03-17 19:00 | 7 | 2021-03-24 19:38 | + | 2021-03-17 19:00 | 45 | 2021-03-24 19:38 | + """ + size = 10 + df = pd.DataFrame() + df["column_name"] = np.random.randint(1, 100, size=size).astype(np.int32) + df[DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL] = [ + _convert_event_timestamp( + pd.Timestamp(dt, unit="ms", tz="UTC").round("ms"), + EventTimestampType(idx % 4), + ) + for idx, dt in enumerate( + pd.date_range(start=start_date, end=end_date, periods=size) + ) + ] + df["created"] = pd.to_datetime(pd.Timestamp.now(tz=None).round("ms")) + return df diff --git a/sdk/python/feast/errors.py b/sdk/python/feast/errors.py index 615069e579..17147f8a60 100644 --- a/sdk/python/feast/errors.py +++ b/sdk/python/feast/errors.py @@ -74,6 +74,11 @@ def __init__(self, bucket): super().__init__(f"S3 bucket {bucket} for the Feast registry can't be accessed") +class SavedDatasetNotFound(FeastObjectNotFoundException): + def __init__(self, name: str, project: str): + super().__init__(f"Saved dataset {name} does not exist in project {project}") + + class FeastProviderLoginError(Exception): """Error class that indicates a user has not authenticated with their provider.""" @@ -238,6 +243,23 @@ def __init__(self, details): super().__init__(f"Redshift SQL Query failed to finish. Details: {details}") +class RedshiftTableNameTooLong(Exception): + def __init__(self, table_name: str): + super().__init__( + f"Redshift table names have a maximum length of 127 characters, but the table name {table_name} has length {len(table_name)} characters." + ) + + +class SnowflakeCredentialsError(Exception): + def __init__(self): + super().__init__("Snowflake Connector failed due to incorrect credentials") + + +class SnowflakeQueryError(Exception): + def __init__(self, details): + super().__init__(f"Snowflake SQL Query failed to finish. Details: {details}") + + class EntityTimestampInferenceException(Exception): def __init__(self, expected_column_name: str): super().__init__( @@ -293,3 +315,18 @@ def __init__(self, actual_class: str, expected_class: str): super().__init__( f"The registry store class was expected to be {expected_class}, but was instead {actual_class}." ) + + +class FeastInvalidInfraObjectType(Exception): + def __init__(self): + super().__init__("Could not identify the type of the InfraObject.") + + +class SnowflakeIncompleteConfig(Exception): + def __init__(self, e: KeyError): + super().__init__(f"{e} not defined in a config file or feature_store.yaml file") + + +class SnowflakeQueryUnknownError(Exception): + def __init__(self, query: str): + super().__init__(f"Snowflake query failed: {query}") diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index b813af1c63..585075843a 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -1,4 +1,5 @@ -import click +import traceback + import uvicorn from fastapi import FastAPI, HTTPException, Request from fastapi.logger import logger @@ -8,7 +9,6 @@ import feast from feast import proto_json from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest -from feast.type_map import feast_value_type_to_python_type def get_app(store: "feast.FeatureStore"): @@ -41,16 +41,11 @@ def get_online_features(body=Depends(get_body)): if any(batch_size != num_entities for batch_size in batch_sizes): raise HTTPException(status_code=500, detail="Uneven number of columns") - entity_rows = [ - { - k: feast_value_type_to_python_type(v.val[idx]) - for k, v in request_proto.entities.items() - } - for idx in range(num_entities) - ] - - response_proto = store.get_online_features( - features, entity_rows, full_feature_names=full_feature_names + response_proto = store._get_online_features( + features, + request_proto.entities, + full_feature_names=full_feature_names, + native_entity_values=False, ).proto # Convert the Protobuf object to JSON and return it @@ -59,7 +54,7 @@ def get_online_features(body=Depends(get_body)): ) except Exception as e: # Print the original exception on the server side - logger.exception(e) + logger.exception(traceback.format_exc()) # Raise HTTPException to return the error message to the client raise HTTPException(status_code=500, detail=str(e)) @@ -70,9 +65,4 @@ def start_server( store: "feast.FeatureStore", host: str, port: int, no_access_log: bool ): app = get_app(store) - click.echo( - "This is an " - + click.style("experimental", fg="yellow", bold=True, underline=True) - + " feature. It's intended for early testing and feedback, and could change without warnings in future releases." - ) uvicorn.run(app, host=host, port=port, access_log=(not no_access_log)) diff --git a/sdk/python/feast/feature_service.py b/sdk/python/feast/feature_service.py index 9bb4fb5e5d..bb6ec909bf 100644 --- a/sdk/python/feast/feature_service.py +++ b/sdk/python/feast/feature_service.py @@ -11,39 +11,47 @@ FeatureService as FeatureServiceProto, ) from feast.protos.feast.core.FeatureService_pb2 import ( - FeatureServiceMeta, - FeatureServiceSpec, + FeatureServiceMeta as FeatureServiceMetaProto, +) +from feast.protos.feast.core.FeatureService_pb2 import ( + FeatureServiceSpec as FeatureServiceSpecProto, ) from feast.usage import log_exceptions class FeatureService: """ - A feature service is a logical grouping of features for retrieval (training or serving). - The features grouped by a feature service may come from any number of feature views. - - Args: - name: Unique name of the feature service. - features: A list of Features that are grouped as part of this FeatureService. - The list may contain Feature Views, Feature Tables, or a subset of either. - tags (optional): A dictionary of key-value pairs used for organizing Feature - Services. + A feature service defines a logical group of features from one or more feature views. + This group of features can be retrieved together during training or serving. + + Attributes: + name: The unique name of the feature service. + feature_view_projections: A list containing feature views and feature view + projections, representing the features in the feature service. + description: A human-readable description. + tags: A dictionary of key-value pairs to store arbitrary metadata. + owner: The owner of the feature service, typically the email of the primary + maintainer. + created_timestamp: The time when the feature service was created. + last_updated_timestamp: The time when the feature service was last updated. """ - name: str - feature_view_projections: List[FeatureViewProjection] - tags: Dict[str, str] - description: Optional[str] = None - created_timestamp: Optional[datetime] = None - last_updated_timestamp: Optional[datetime] = None + _name: str + _feature_view_projections: List[FeatureViewProjection] + _description: str + _tags: Dict[str, str] + _owner: str + _created_timestamp: Optional[datetime] = None + _last_updated_timestamp: Optional[datetime] = None @log_exceptions def __init__( self, name: str, features: List[Union[FeatureView, OnDemandFeatureView]], - tags: Optional[Dict[str, str]] = None, - description: Optional[str] = None, + tags: Dict[str, str] = None, + description: str = "", + owner: str = "", ): """ Creates a FeatureService object. @@ -51,22 +59,23 @@ def __init__( Raises: ValueError: If one of the specified features is not a valid type. """ - self.name = name - self.feature_view_projections = [] + self._name = name + self._feature_view_projections = [] for feature_grouping in features: if isinstance(feature_grouping, BaseFeatureView): - self.feature_view_projections.append(feature_grouping.projection) + self._feature_view_projections.append(feature_grouping.projection) else: raise ValueError( - "The FeatureService {fs_name} has been provided with an invalid type" + f"The feature service {name} has been provided with an invalid type " f'{type(feature_grouping)} as part of the "features" argument.)' ) - self.tags = tags or {} - self.description = description - self.created_timestamp = None - self.last_updated_timestamp = None + self._description = description + self._tags = tags or {} + self._owner = owner + self._created_timestamp = None + self._last_updated_timestamp = None def __repr__(self): items = (f"{k} = {v}" for k, v in self.__dict__.items()) @@ -83,7 +92,13 @@ def __eq__(self, other): raise TypeError( "Comparisons should only involve FeatureService class objects." ) - if self.tags != other.tags or self.name != other.name: + + if ( + self.name != other.name + or self.description != other.description + or self.tags != other.tags + or self.owner != other.owner + ): return False if sorted(self.feature_view_projections) != sorted( @@ -93,23 +108,78 @@ def __eq__(self, other): return True - @staticmethod - def from_proto(feature_service_proto: FeatureServiceProto): + @property + def name(self) -> str: + return self._name + + @name.setter + def name(self, name: str): + self._name = name + + @property + def feature_view_projections(self) -> List[FeatureViewProjection]: + return self._feature_view_projections + + @feature_view_projections.setter + def feature_view_projections( + self, feature_view_projections: List[FeatureViewProjection] + ): + self._feature_view_projections = feature_view_projections + + @property + def description(self) -> str: + return self._description + + @description.setter + def description(self, description: str): + self._description = description + + @property + def tags(self) -> Dict[str, str]: + return self._tags + + @tags.setter + def tags(self, tags: Dict[str, str]): + self._tags = tags + + @property + def owner(self) -> str: + return self._owner + + @owner.setter + def owner(self, owner: str): + self._owner = owner + + @property + def created_timestamp(self) -> Optional[datetime]: + return self._created_timestamp + + @created_timestamp.setter + def created_timestamp(self, created_timestamp: datetime): + self._created_timestamp = created_timestamp + + @property + def last_updated_timestamp(self) -> Optional[datetime]: + return self._last_updated_timestamp + + @last_updated_timestamp.setter + def last_updated_timestamp(self, last_updated_timestamp: datetime): + self._last_updated_timestamp = last_updated_timestamp + + @classmethod + def from_proto(cls, feature_service_proto: FeatureServiceProto): """ Converts a FeatureServiceProto to a FeatureService object. Args: feature_service_proto: A protobuf representation of a FeatureService. """ - fs = FeatureService( + fs = cls( name=feature_service_proto.spec.name, features=[], tags=dict(feature_service_proto.spec.tags), - description=( - feature_service_proto.spec.description - if feature_service_proto.spec.description != "" - else None - ), + description=feature_service_proto.spec.description, + owner=feature_service_proto.spec.owner, ) fs.feature_view_projections.extend( [ @@ -131,29 +201,28 @@ def from_proto(feature_service_proto: FeatureServiceProto): def to_proto(self) -> FeatureServiceProto: """ - Converts a FeatureService to its protobuf representation. + Converts a feature service to its protobuf representation. Returns: A FeatureServiceProto protobuf. """ - meta = FeatureServiceMeta() + meta = FeatureServiceMetaProto() if self.created_timestamp: meta.created_timestamp.FromDatetime(self.created_timestamp) + if self.last_updated_timestamp: + meta.last_updated_timestamp.FromDatetime(self.last_updated_timestamp) - spec = FeatureServiceSpec( + spec = FeatureServiceSpecProto( name=self.name, features=[ projection.to_proto() for projection in self.feature_view_projections ], + tags=self.tags, + description=self.description, + owner=self.owner, ) - if self.tags: - spec.tags.update(self.tags) - if self.description: - spec.description = self.description - - feature_service_proto = FeatureServiceProto(spec=spec, meta=meta) - return feature_service_proto + return FeatureServiceProto(spec=spec, meta=meta) def validate(self): pass diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index f1fee70336..fcd94f9bea 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -23,8 +23,9 @@ Dict, Iterable, List, - NamedTuple, + Mapping, Optional, + Sequence, Set, Tuple, Union, @@ -33,11 +34,13 @@ import pandas as pd from colorama import Fore, Style +from google.protobuf.timestamp_pb2 import Timestamp from tqdm import tqdm from feast import feature_server, flags, flags_helper, utils from feast.base_feature_view import BaseFeatureView -from feast.diff.FcoDiff import RegistryDiff +from feast.diff.infra_diff import InfraDiff, diff_infra_protos +from feast.diff.registry_diff import RegistryDiff, apply_diff_to_registry, diff_between from feast.entity import Entity from feast.errors import ( EntityNotFoundException, @@ -60,19 +63,22 @@ update_entities_with_inferred_types_from_feature_views, update_feature_views_with_inferred_features, ) +from feast.infra.infra_object import Infra from feast.infra.provider import Provider, RetrievalJob, get_provider from feast.on_demand_feature_view import OnDemandFeatureView from feast.online_response import OnlineResponse -from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto +from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto from feast.protos.feast.serving.ServingService_pb2 import ( FieldStatus, GetOnlineFeaturesResponse, ) from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto -from feast.protos.feast.types.Value_pb2 import Value +from feast.protos.feast.types.Value_pb2 import RepeatedValue, Value from feast.registry import Registry from feast.repo_config import RepoConfig, load_repo_config +from feast.repo_contents import RepoContents from feast.request_feature_view import RequestFeatureView +from feast.saved_dataset import SavedDataset, SavedDatasetStorage from feast.type_map import python_values_to_proto_values from feast.usage import log_exceptions, log_exceptions_and_usage, set_usage_attribute from feast.value_type import ValueType @@ -81,31 +87,6 @@ warnings.simplefilter("once", DeprecationWarning) -class RepoContents(NamedTuple): - feature_views: Set[FeatureView] - on_demand_feature_views: Set[OnDemandFeatureView] - request_feature_views: Set[RequestFeatureView] - entities: Set[Entity] - feature_services: Set[FeatureService] - - def to_registry_proto(self) -> RegistryProto: - registry_proto = RegistryProto() - registry_proto.entities.extend([e.to_proto() for e in self.entities]) - registry_proto.feature_views.extend( - [fv.to_proto() for fv in self.feature_views] - ) - registry_proto.on_demand_feature_views.extend( - [fv.to_proto() for fv in self.on_demand_feature_views] - ) - registry_proto.request_feature_views.extend( - [fv.to_proto() for fv in self.request_feature_views] - ) - registry_proto.feature_services.extend( - [fs.to_proto() for fs in self.feature_services] - ) - return registry_proto - - class FeatureStore: """ A FeatureStore object is used to define, create, and retrieve features. @@ -144,6 +125,7 @@ def __init__( registry_config = self.config.get_registry_config() self._registry = Registry(registry_config, repo_path=self.repo_path) + self._registry._initialize_registry() self._provider = get_provider(self.config, self.repo_path) @log_exceptions @@ -264,14 +246,18 @@ def _list_feature_views( return feature_views @log_exceptions_and_usage - def list_on_demand_feature_views(self) -> List[OnDemandFeatureView]: + def list_on_demand_feature_views( + self, allow_cache: bool = False + ) -> List[OnDemandFeatureView]: """ Retrieves the list of on demand feature views from the registry. Returns: A list of on demand feature views. """ - return self._registry.list_on_demand_feature_views(self.project) + return self._registry.list_on_demand_feature_views( + self.project, allow_cache=allow_cache + ) @log_exceptions_and_usage def get_entity(self, name: str) -> Entity: @@ -404,8 +390,58 @@ def _get_features( _feature_refs = _features return _feature_refs + def _should_use_plan(self): + """Returns True if _plan and _apply_diffs should be used, False otherwise.""" + # Currently only the local provider supports _plan and _apply_diffs. + return self.config.provider == "local" + + def _validate_all_feature_views( + self, + views_to_update: List[FeatureView], + odfvs_to_update: List[OnDemandFeatureView], + request_views_to_update: List[RequestFeatureView], + ): + """Validates all feature views.""" + if ( + not flags_helper.enable_on_demand_feature_views(self.config) + and len(odfvs_to_update) > 0 + ): + raise ExperimentalFeatureNotEnabled(flags.FLAG_ON_DEMAND_TRANSFORM_NAME) + + set_usage_attribute("odfv", bool(odfvs_to_update)) + + _validate_feature_views( + [*views_to_update, *odfvs_to_update, *request_views_to_update] + ) + + def _make_inferences( + self, + entities_to_update: List[Entity], + views_to_update: List[FeatureView], + odfvs_to_update: List[OnDemandFeatureView], + ): + """Makes inferences for entities, feature views, and odfvs.""" + update_entities_with_inferred_types_from_feature_views( + entities_to_update, views_to_update, self.config + ) + + update_data_sources_with_inferred_event_timestamp_col( + [view.batch_source for view in views_to_update], self.config + ) + + # New feature views may reference previously applied entities. + entities = self._list_entities() + update_feature_views_with_inferred_features( + views_to_update, entities + entities_to_update, self.config + ) + + for odfv in odfvs_to_update: + odfv.infer_features() + @log_exceptions_and_usage - def plan(self, desired_repo_objects: RepoContents) -> RegistryDiff: + def _plan( + self, desired_repo_contents: RepoContents + ) -> Tuple[RegistryDiff, InfraDiff, Infra]: """Dry-run registering objects to metadata store. The plan method dry-runs registering one or more definitions (e.g., Entity, FeatureView), and produces @@ -440,18 +476,57 @@ def plan(self, desired_repo_objects: RepoContents) -> RegistryDiff: ... ttl=timedelta(seconds=86400 * 1), ... batch_source=driver_hourly_stats, ... ) - >>> diff = fs.plan(RepoContents({driver_hourly_stats_view}, set(), set(), {driver}, set())) # register entity and feature view + >>> registry_diff, infra_diff, new_infra = fs._plan(RepoContents({driver_hourly_stats_view}, set(), set(), {driver}, set())) # register entity and feature view """ + # Validate and run inference on all the objects to be registered. + self._validate_all_feature_views( + list(desired_repo_contents.feature_views), + list(desired_repo_contents.on_demand_feature_views), + list(desired_repo_contents.request_feature_views), + ) + self._make_inferences( + list(desired_repo_contents.entities), + list(desired_repo_contents.feature_views), + list(desired_repo_contents.on_demand_feature_views), + ) + + # Compute the desired difference between the current objects in the registry and + # the desired repo state. + registry_diff = diff_between( + self._registry, self.project, desired_repo_contents + ) - current_registry_proto = ( - self._registry.cached_registry_proto.__deepcopy__() + # Compute the desired difference between the current infra, as stored in the registry, + # and the desired infra. + self._registry.refresh() + current_infra_proto = ( + self._registry.cached_registry_proto.infra.__deepcopy__() if self._registry.cached_registry_proto - else RegistryProto() + else InfraProto() ) + desired_registry_proto = desired_repo_contents.to_registry_proto() + new_infra = self._provider.plan_infra(self.config, desired_registry_proto) + new_infra_proto = new_infra.to_proto() + infra_diff = diff_infra_protos(current_infra_proto, new_infra_proto) - desired_registry_proto = desired_repo_objects.to_registry_proto() - diffs = Registry.diff_between(current_registry_proto, desired_registry_proto) - return diffs + return (registry_diff, infra_diff, new_infra) + + @log_exceptions_and_usage + def _apply_diffs( + self, registry_diff: RegistryDiff, infra_diff: InfraDiff, new_infra: Infra + ): + """Applies the given diffs to the metadata store and infrastructure. + + Args: + registry_diff: The diff between the current registry and the desired registry. + infra_diff: The diff between the current infra and the desired infra. + new_infra: The desired infra. + """ + infra_diff.update() + apply_diff_to_registry( + self._registry, registry_diff, self.project, commit=False + ) + self._registry.update_infra(new_infra, self.project, commit=True) @log_exceptions_and_usage def apply( @@ -484,7 +559,7 @@ def apply( ] ] = None, partial: bool = True, - ) -> RegistryDiff: + ): """Register objects to metadata store and update related infrastructure. The apply method registers one or more definitions (e.g., Entity, FeatureView) and registers or updates these @@ -520,7 +595,7 @@ def apply( ... ttl=timedelta(seconds=86400 * 1), ... batch_source=driver_hourly_stats, ... ) - >>> diff = fs.apply([driver_hourly_stats_view, driver]) # register entity and feature view + >>> fs.apply([driver_hourly_stats_view, driver]) # register entity and feature view """ # TODO: Add locking if not isinstance(objects, Iterable): @@ -530,12 +605,6 @@ def apply( if not objects_to_delete: objects_to_delete = [] - current_registry_proto = ( - self._registry.cached_registry_proto.__deepcopy__() - if self._registry.cached_registry_proto - else RegistryProto() - ) - # Separate all objects into entities, feature services, and different feature view types. entities_to_update = [ob for ob in objects if isinstance(ob, Entity)] views_to_update = [ob for ob in objects if isinstance(ob, FeatureView)] @@ -550,34 +619,11 @@ def apply( ) + len(odfvs_to_update) + len(services_to_update) != len(objects): raise ValueError("Unknown object type provided as part of apply() call") - # Validate all types of feature views. - if ( - not flags_helper.enable_on_demand_feature_views(self.config) - and len(odfvs_to_update) > 0 - ): - raise ExperimentalFeatureNotEnabled(flags.FLAG_ON_DEMAND_TRANSFORM_NAME) - - set_usage_attribute("odfv", bool(odfvs_to_update)) - - _validate_feature_views( - [*views_to_update, *odfvs_to_update, *request_views_to_update] - ) - - # Make inferences - update_entities_with_inferred_types_from_feature_views( - entities_to_update, views_to_update, self.config - ) - - update_data_sources_with_inferred_event_timestamp_col( - [view.batch_source for view in views_to_update], self.config - ) - - update_feature_views_with_inferred_features( - views_to_update, entities_to_update, self.config + # Validate all feature views and make inferences. + self._validate_all_feature_views( + views_to_update, odfvs_to_update, request_views_to_update ) - - for odfv in odfvs_to_update: - odfv.infer_features() + self._make_inferences(entities_to_update, views_to_update, odfvs_to_update) # Handle all entityless feature views by using DUMMY_ENTITY as a placeholder entity. entities_to_update.append(DUMMY_ENTITY) @@ -633,22 +679,6 @@ def apply( service.name, project=self.project, commit=False ) - new_registry_proto = ( - self._registry.cached_registry_proto - if self._registry.cached_registry_proto - else RegistryProto() - ) - - diffs = Registry.diff_between(current_registry_proto, new_registry_proto) - - entities_to_update = [ob for ob in objects if isinstance(ob, Entity)] - views_to_update = [ob for ob in objects if isinstance(ob, FeatureView)] - - entities_to_delete = [ob for ob in objects_to_delete if isinstance(ob, Entity)] - views_to_delete = [ - ob for ob in objects_to_delete if isinstance(ob, FeatureView) - ] - self._get_provider().update_infra( project=self.project, tables_to_delete=views_to_delete if not partial else [], @@ -660,8 +690,6 @@ def apply( self._registry.commit() - return diffs - @log_exceptions_and_usage def teardown(self): """Tears down all local and cloud resources for the feature store.""" @@ -797,6 +825,93 @@ def get_historical_features( return job + @log_exceptions_and_usage + def create_saved_dataset( + self, + from_: RetrievalJob, + name: str, + storage: SavedDatasetStorage, + tags: Optional[Dict[str, str]] = None, + ) -> SavedDataset: + """ + Execute provided retrieval job and persist its outcome in given storage. + Storage type (eg, BigQuery or Redshift) must be the same as globally configured offline store. + After data successfully persisted saved dataset object with dataset metadata is committed to the registry. + Name for the saved dataset should be unique within project, since it's possible to overwrite previously stored dataset + with the same name. + + Returns: + SavedDataset object with attached RetrievalJob + + Raises: + ValueError if given retrieval job doesn't have metadata + """ + warnings.warn( + "Saving dataset is an experimental feature. " + "This API is unstable and it could and most probably will be changed in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, + ) + + if not from_.metadata: + raise ValueError( + "RetrievalJob must contains metadata. " + "Use RetrievalJob produced by get_historical_features" + ) + + dataset = SavedDataset( + name=name, + features=from_.metadata.features, + join_keys=from_.metadata.keys, + full_feature_names=from_.full_feature_names, + storage=storage, + tags=tags, + ) + + dataset.min_event_timestamp = from_.metadata.min_event_timestamp + dataset.max_event_timestamp = from_.metadata.max_event_timestamp + + from_.persist(storage) + + self._registry.apply_saved_dataset(dataset, self.project, commit=True) + + return dataset.with_retrieval_job( + self._get_provider().retrieve_saved_dataset( + config=self.config, dataset=dataset + ) + ) + + @log_exceptions_and_usage + def get_saved_dataset(self, name: str) -> SavedDataset: + """ + Find a saved dataset in the registry by provided name and + create a retrieval job to pull whole dataset from storage (offline store). + + If dataset couldn't be found by provided name SavedDatasetNotFound exception will be raised. + + Data will be retrieved from globally configured offline store. + + Returns: + SavedDataset with RetrievalJob attached + + Raises: + SavedDatasetNotFound + """ + warnings.warn( + "Retrieving datasets is an experimental feature. " + "This API is unstable and it could and most probably will be changed in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, + ) + + dataset = self._registry.get_saved_dataset(name, self.project) + provider = self._get_provider() + + retrieval_job = provider.retrieve_saved_dataset( + config=self.config, dataset=dataset + ) + return dataset.with_retrieval_job(retrieval_job) + @log_exceptions_and_usage def materialize_incremental( self, end_date: datetime, feature_views: Optional[List[str]] = None, @@ -1049,6 +1164,30 @@ def get_online_features( ... ) >>> online_response_dict = online_response.to_dict() """ + columnar: Dict[str, List[Any]] = {k: [] for k in entity_rows[0].keys()} + for entity_row in entity_rows: + for key, value in entity_row.items(): + try: + columnar[key].append(value) + except KeyError as e: + raise ValueError("All entity_rows must have the same keys.") from e + + return self._get_online_features( + features=features, + entity_values=columnar, + full_feature_names=full_feature_names, + native_entity_values=True, + ) + + def _get_online_features( + self, + features: Union[List[str], FeatureService], + entity_values: Mapping[ + str, Union[Sequence[Any], Sequence[Value], RepeatedValue] + ], + full_feature_names: bool = False, + native_entity_values: bool = True, + ): _feature_refs = self._get_features(features, allow_cache=True) ( requested_feature_views, @@ -1058,6 +1197,29 @@ def get_online_features( features=features, allow_cache=True, hide_dummy_entity=False ) + entity_name_to_join_key_map, entity_type_map = self._get_entity_maps( + requested_feature_views + ) + + # Extract Sequence from RepeatedValue Protobuf. + entity_value_lists: Dict[str, Union[List[Any], List[Value]]] = { + k: list(v) if isinstance(v, Sequence) else list(v.val) + for k, v in entity_values.items() + } + + entity_proto_values: Dict[str, List[Value]] + if native_entity_values: + # Convert values to Protobuf once. + entity_proto_values = { + k: python_values_to_proto_values( + v, entity_type_map.get(k, ValueType.UNKNOWN) + ) + for k, v in entity_value_lists.items() + } + else: + entity_proto_values = entity_value_lists + + num_rows = _validate_entity_values(entity_proto_values) _validate_feature_refs(_feature_refs, full_feature_names) ( grouped_refs, @@ -1083,134 +1245,137 @@ def get_online_features( } feature_views = list(view for view, _ in grouped_refs) - entityless_case = DUMMY_ENTITY_NAME in [ - entity_name - for feature_view in feature_views - for entity_name in feature_view.entities - ] - - provider = self._get_provider() - entities = self._list_entities(allow_cache=True, hide_dummy_entity=False) - entity_name_to_join_key_map: Dict[str, str] = {} - join_key_to_entity_type_map: Dict[str, ValueType] = {} - for entity in entities: - entity_name_to_join_key_map[entity.name] = entity.join_key - join_key_to_entity_type_map[entity.join_key] = entity.value_type - for feature_view in requested_feature_views: - for entity_name in feature_view.entities: - entity = self._registry.get_entity( - entity_name, self.project, allow_cache=True - ) - # User directly uses join_key as the entity reference in the entity_rows for the - # entity mapping case. - entity_name = feature_view.projection.join_key_map.get( - entity.join_key, entity.name - ) - join_key = feature_view.projection.join_key_map.get( - entity.join_key, entity.join_key - ) - entity_name_to_join_key_map[entity_name] = join_key - join_key_to_entity_type_map[join_key] = entity.value_type needed_request_data, needed_request_fv_features = self.get_needed_request_data( grouped_odfv_refs, grouped_request_fv_refs ) - join_key_rows = [] - request_data_features: Dict[str, List[Any]] = defaultdict(list) + join_key_values: Dict[str, List[Value]] = {} + request_data_features: Dict[str, List[Value]] = {} # Entity rows may be either entities or request data. - for row in entity_rows: - join_key_row = {} - for entity_name, entity_value in row.items(): - # Found request data - if ( - entity_name in needed_request_data - or entity_name in needed_request_fv_features - ): - if entity_name in needed_request_fv_features: - # If the data was requested as a feature then - # make sure it appears in the result. - requested_result_row_names.add(entity_name) - request_data_features[entity_name].append(entity_value) - else: - try: - join_key = entity_name_to_join_key_map[entity_name] - except KeyError: - raise EntityNotFoundException(entity_name, self.project) - # All join keys should be returned in the result. - requested_result_row_names.add(join_key) - join_key_row[join_key] = entity_value - if entityless_case: - join_key_row[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL - if len(join_key_row) > 0: - # May be empty if this entity row was request data - join_key_rows.append(join_key_row) + for entity_name, values in entity_proto_values.items(): + # Found request data + if ( + entity_name in needed_request_data + or entity_name in needed_request_fv_features + ): + if entity_name in needed_request_fv_features: + # If the data was requested as a feature then + # make sure it appears in the result. + requested_result_row_names.add(entity_name) + request_data_features[entity_name] = values + else: + try: + join_key = entity_name_to_join_key_map[entity_name] + except KeyError: + raise EntityNotFoundException(entity_name, self.project) + # All join keys should be returned in the result. + requested_result_row_names.add(join_key) + join_key_values[join_key] = values self.ensure_request_data_values_exist( needed_request_data, needed_request_fv_features, request_data_features ) - # Convert join_key_rows from rowise to columnar. - join_key_python_values: Dict[str, List[Value]] = defaultdict(list) - for join_key_row in join_key_rows: - for join_key, value in join_key_row.items(): - join_key_python_values[join_key].append(value) - - # Convert all join key values to Protobuf Values - join_key_proto_values = { - k: python_values_to_proto_values(v, join_key_to_entity_type_map[k]) - for k, v in join_key_python_values.items() - } + # Populate online features response proto with join keys and request data features + online_features_response = GetOnlineFeaturesResponse( + results=[GetOnlineFeaturesResponse.FeatureVector() for _ in range(num_rows)] + ) + self._populate_result_rows_from_columnar( + online_features_response=online_features_response, + data=dict(**join_key_values, **request_data_features), + ) - # Populate result rows with join keys - result_rows = [ - GetOnlineFeaturesResponse.FieldValues() for _ in range(len(entity_rows)) + # Add the Entityless case after populating result rows to avoid having to remove + # it later. + entityless_case = DUMMY_ENTITY_NAME in [ + entity_name + for feature_view in feature_views + for entity_name in feature_view.entities ] - for key, values in join_key_proto_values.items(): - for row_idx, result_row in enumerate(result_rows): - result_row.fields[key].CopyFrom(values[row_idx]) - result_row.statuses[key] = FieldStatus.PRESENT - - # Initialize the set of EntityKeyProtos once and reuse them for each FeatureView - # to avoid initialization overhead. - entity_keys = [EntityKeyProto() for _ in range(len(join_key_rows))] + if entityless_case: + join_key_values[DUMMY_ENTITY_ID] = python_values_to_proto_values( + [DUMMY_ENTITY_VAL] * num_rows, DUMMY_ENTITY.value_type + ) + + provider = self._get_provider() for table, requested_features in grouped_refs: # Get the correct set of entity values with the correct join keys. - entity_values = self._get_table_entity_values( - table, entity_name_to_join_key_map, join_key_proto_values, + table_entity_values, idxs = self._get_unique_entities( + table, join_key_values, entity_name_to_join_key_map, ) - # Set the EntityKeyProtos inplace. - self._set_table_entity_keys( - entity_values, entity_keys, + # Fetch feature data for the minimum set of Entities. + feature_data = self._read_from_online_store( + table_entity_values, provider, requested_features, table, ) # Populate the result_rows with the Features from the OnlineStore inplace. - self._populate_result_rows_from_feature_view( - entity_keys, + self._populate_response_from_feature_data( + feature_data, + idxs, + online_features_response, full_feature_names, - provider, requested_features, - result_rows, table, ) - self._populate_request_data_features( - request_data_features, result_rows, - ) - if grouped_odfv_refs: self._augment_response_with_on_demand_transforms( + online_features_response, _feature_refs, requested_on_demand_feature_views, full_feature_names, - result_rows, ) self._drop_unneeded_columns( - requested_result_row_names, result_rows, + online_features_response, requested_result_row_names ) - return OnlineResponse(GetOnlineFeaturesResponse(field_values=result_rows)) + return OnlineResponse(online_features_response) + + @staticmethod + def _get_columnar_entity_values( + rowise: Optional[List[Dict[str, Any]]], columnar: Optional[Dict[str, List[Any]]] + ) -> Dict[str, List[Any]]: + if (rowise is None and columnar is None) or ( + rowise is not None and columnar is not None + ): + raise ValueError( + "Exactly one of `columnar_entity_values` and `rowise_entity_values` must be set." + ) + + if rowise is not None: + # Convert entity_rows from rowise to columnar. + res = defaultdict(list) + for entity_row in rowise: + for key, value in entity_row.items(): + res[key].append(value) + return res + return cast(Dict[str, List[Any]], columnar) + + def _get_entity_maps(self, feature_views): + entities = self._list_entities(allow_cache=True, hide_dummy_entity=False) + entity_name_to_join_key_map: Dict[str, str] = {} + entity_type_map: Dict[str, ValueType] = {} + for entity in entities: + entity_name_to_join_key_map[entity.name] = entity.join_key + entity_type_map[entity.name] = entity.value_type + for feature_view in feature_views: + for entity_name in feature_view.entities: + entity = self._registry.get_entity( + entity_name, self.project, allow_cache=True + ) + # User directly uses join_key as the entity reference in the entity_rows for the + # entity mapping case. + entity_name = feature_view.projection.join_key_map.get( + entity.join_key, entity.name + ) + join_key = feature_view.projection.join_key_map.get( + entity.join_key, entity.join_key + ) + entity_name_to_join_key_map[entity_name] = join_key + entity_type_map[join_key] = entity.value_type + return entity_name_to_join_key_map, entity_type_map @staticmethod def _get_table_entity_values( @@ -1236,36 +1401,21 @@ def _get_table_entity_values( return entity_values @staticmethod - def _set_table_entity_keys( - entity_values: Dict[str, List[Value]], entity_keys: List[EntityKeyProto], + def _populate_result_rows_from_columnar( + online_features_response: GetOnlineFeaturesResponse, + data: Dict[str, List[Value]], ): - """ - This method sets the a list of EntityKeyProtos inplace. - """ - keys = entity_values.keys() - # Columar to rowise (dict keys and values are guaranteed to have the same order). - rowise_values = zip(*entity_values.values()) - for entity_key in entity_keys: - # Make sure entity_keys are empty before setting. - entity_key.Clear() - entity_key.join_keys.extend(keys) - entity_key.entity_values.extend(next(rowise_values)) + timestamp = Timestamp() # Only initialize this timestamp once. + # Add more values to the existing result rows + for feature_name, feature_values in data.items(): - @staticmethod - def _populate_request_data_features( - request_data_features: Dict[str, List[Any]], - result_rows: List[GetOnlineFeaturesResponse.FieldValues], - ): - # Add more feature values to the existing result rows for the request data features - for feature_name, feature_values in request_data_features.items(): - proto_values = python_values_to_proto_values( - feature_values, ValueType.UNKNOWN - ) + online_features_response.metadata.feature_names.val.append(feature_name) - for row_idx, proto_value in enumerate(proto_values): - result_row = result_rows[row_idx] - result_row.fields[feature_name].CopyFrom(proto_value) - result_row.statuses[feature_name] = FieldStatus.PRESENT + for row_idx, proto_value in enumerate(feature_values): + result_row = online_features_response.results[row_idx] + result_row.values.append(proto_value) + result_row.statuses.append(FieldStatus.PRESENT) + result_row.event_timestamps.append(timestamp) @staticmethod def get_needed_request_data( @@ -1302,62 +1452,170 @@ def ensure_request_data_values_exist( feature_names=missing_features ) - def _populate_result_rows_from_feature_view( + def _get_unique_entities( self, - entity_keys: List[EntityKeyProto], - full_feature_names: bool, + table: FeatureView, + join_key_values: Dict[str, List[Value]], + entity_name_to_join_key_map: Dict[str, str], + ) -> Tuple[Tuple[Dict[str, Value], ...], Tuple[List[int], ...]]: + """ Return the set of unique composite Entities for a Feature View and the indexes at which they appear. + + This method allows us to query the OnlineStore for data we need only once + rather than requesting and processing data for the same combination of + Entities multiple times. + """ + # Get the correct set of entity values with the correct join keys. + table_entity_values = self._get_table_entity_values( + table, entity_name_to_join_key_map, join_key_values, + ) + + # Convert back to rowise. + keys = table_entity_values.keys() + # Sort the rowise data to allow for grouping but keep original index. This lambda is + # sufficient as Entity types cannot be complex (ie. lists). + rowise = list(enumerate(zip(*table_entity_values.values()))) + rowise.sort( + key=lambda row: tuple(getattr(x, x.WhichOneof("val")) for x in row[1]) + ) + + # Identify unique entities and the indexes at which they occur. + unique_entities: Tuple[Dict[str, Value], ...] + indexes: Tuple[List[int], ...] + unique_entities, indexes = tuple( + zip( + *[ + (dict(zip(keys, k)), [_[0] for _ in g]) + for k, g in itertools.groupby(rowise, key=lambda x: x[1]) + ] + ) + ) + return unique_entities, indexes + + def _read_from_online_store( + self, + entity_rows: Iterable[Mapping[str, Value]], provider: Provider, requested_features: List[str], - result_rows: List[GetOnlineFeaturesResponse.FieldValues], table: FeatureView, - ): + ) -> List[Tuple[List[Timestamp], List["FieldStatus.ValueType"], List[Value]]]: + """ Read and process data from the OnlineStore for a given FeatureView. + + This method guarentees that the order of the data in each element of the + List returned is the same as the order of `requested_features`. + + This method assumes that `provider.online_read` returns data for each + combination of Entities in `entity_rows` in the same order as they + are provided. + """ + # Instantiate one EntityKeyProto per Entity. + entity_key_protos = [ + EntityKeyProto(join_keys=row.keys(), entity_values=row.values()) + for row in entity_rows + ] + + # Fetch data for Entities. read_rows = provider.online_read( config=self.config, table=table, - entity_keys=entity_keys, + entity_keys=entity_key_protos, requested_features=requested_features, ) - # Each row is a set of features for a given entity key - for row_idx, read_row in enumerate(read_rows): - row_ts, feature_data = read_row - result_row = result_rows[row_idx] + # Each row is a set of features for a given entity key. We only need to convert + # the data to Protobuf once. + row_ts_proto = Timestamp() + null_value = Value() + read_row_protos = [] + for read_row in read_rows: + row_ts, feature_data = read_row + if row_ts is not None: + row_ts_proto.FromDatetime(row_ts) + event_timestamps = [row_ts_proto] * len(requested_features) if feature_data is None: - for feature_name in requested_features: - feature_ref = ( - f"{table.projection.name_to_use()}__{feature_name}" - if full_feature_names - else feature_name - ) - result_row.statuses[feature_ref] = FieldStatus.NOT_FOUND + statuses = [FieldStatus.NOT_FOUND] * len(requested_features) + values = [null_value] * len(requested_features) else: - for feature_name in feature_data: - feature_ref = ( - f"{table.projection.name_to_use()}__{feature_name}" - if full_feature_names - else feature_name - ) - if feature_name in requested_features: - result_row.fields[feature_ref].CopyFrom( - feature_data[feature_name] - ) - result_row.statuses[feature_ref] = FieldStatus.PRESENT + statuses = [] + values = [] + for feature_name in requested_features: + # Make sure order of data is the same as requested_features. + if feature_name not in feature_data: + statuses.append(FieldStatus.NOT_FOUND) + values.append(null_value) + else: + statuses.append(FieldStatus.PRESENT) + values.append(feature_data[feature_name]) + read_row_protos.append((event_timestamps, statuses, values)) + return read_row_protos + + @staticmethod + def _populate_response_from_feature_data( + feature_data: Iterable[ + Tuple[ + Iterable[Timestamp], Iterable["FieldStatus.ValueType"], Iterable[Value] + ] + ], + indexes: Iterable[Iterable[int]], + online_features_response: GetOnlineFeaturesResponse, + full_feature_names: bool, + requested_features: Iterable[str], + table: FeatureView, + ): + """ Populate the GetOnlineFeaturesReponse with feature data. + + This method assumes that `_read_from_online_store` returns data for each + combination of Entities in `entity_rows` in the same order as they + are provided. + + Args: + feature_data: A list of data in Protobuf form which was retrieved from the OnlineStore. + indexes: A list of indexes which should be the same length as `feature_data`. Each list + of indexes corresponds to a set of result rows in `online_features_response`. + online_features_response: The object to populate. + full_feature_names: A boolean that provides the option to add the feature view prefixes to the feature names, + changing them from the format "feature" to "feature_view__feature" (e.g., "daily_transactions" changes to + "customer_fv__daily_transactions"). + requested_features: The names of the features in `feature_data`. This should be ordered in the same way as the + data in `feature_data`. + table: The FeatureView that `feature_data` was retrieved from. + """ + # Add the feature names to the response. + requested_feature_refs = [ + f"{table.projection.name_to_use()}__{feature_name}" + if full_feature_names + else feature_name + for feature_name in requested_features + ] + online_features_response.metadata.feature_names.val.extend( + requested_feature_refs + ) + + # Populate the result with data fetched from the OnlineStore + # which is guarenteed to be aligned with `requested_features`. + for feature_row, dest_idxs in zip(feature_data, indexes): + event_timestamps, statuses, values = feature_row + for dest_idx in dest_idxs: + result_row = online_features_response.results[dest_idx] + result_row.event_timestamps.extend(event_timestamps) + result_row.statuses.extend(statuses) + result_row.values.extend(values) @staticmethod def _augment_response_with_on_demand_transforms( + online_features_response: GetOnlineFeaturesResponse, feature_refs: List[str], requested_on_demand_feature_views: List[OnDemandFeatureView], full_feature_names: bool, - result_rows: List[GetOnlineFeaturesResponse.FieldValues], ): """Computes on demand feature values and adds them to the result rows. - Assumes that 'result_rows' already contains the necessary request data and input feature - views for the on demand feature views. + Assumes that 'online_features_response' already contains the necessary request data and input feature + views for the on demand feature views. Unneeded feature values such as request data and + unrequested input feature views will be removed from 'online_features_response'. Args: + online_features_response: Protobuf object to populate feature_refs: List of all feature references to be returned. - requested_on_demand_feature_views: List of all odfvs that have been requested. full_feature_names: A boolean that provides the option to add the feature view prefixes to the feature names, changing them from the format "feature" to "feature_view__feature" (e.g., "daily_transactions" changes to @@ -1379,9 +1637,7 @@ def _augment_response_with_on_demand_transforms( else feature_name ) - initial_response = OnlineResponse( - GetOnlineFeaturesResponse(field_values=result_rows) - ) + initial_response = OnlineResponse(online_features_response) initial_response_df = initial_response.to_df() # Apply on demand transformations and augment the result rows @@ -1395,48 +1651,56 @@ def _augment_response_with_on_demand_transforms( f for f in transformed_features_df.columns if f in _feature_refs ] - proto_values_by_column = { - feature: python_values_to_proto_values( + proto_values = [ + python_values_to_proto_values( transformed_features_df[feature].values, ValueType.UNKNOWN ) for feature in selected_subset - } + ] - for row_idx in range(len(result_rows)): - result_row = result_rows[row_idx] + odfv_result_names |= set(selected_subset) - for transformed_feature in selected_subset: - odfv_result_names.add(transformed_feature) - result_row.fields[transformed_feature].CopyFrom( - proto_values_by_column[transformed_feature][row_idx] - ) - result_row.statuses[transformed_feature] = FieldStatus.PRESENT + online_features_response.metadata.feature_names.val.extend(selected_subset) + + for row_idx in range(len(online_features_response.results)): + result_row = online_features_response.results[row_idx] + for feature_idx, transformed_feature in enumerate(selected_subset): + result_row.values.append(proto_values[feature_idx][row_idx]) + result_row.statuses.append(FieldStatus.PRESENT) + result_row.event_timestamps.append(Timestamp()) @staticmethod def _drop_unneeded_columns( + online_features_response: GetOnlineFeaturesResponse, requested_result_row_names: Set[str], - result_rows: List[GetOnlineFeaturesResponse.FieldValues], ): """ Unneeded feature values such as request data and unrequested input feature views will - be removed from 'result_rows'. + be removed from 'online_features_response'. Args: + online_features_response: Protobuf object to populate requested_result_row_names: Fields from 'result_rows' that have been requested, and therefore should not be dropped. - result_rows: List of result rows to be editted inplace. """ # Drop values that aren't needed - unneeded_features = [ - val - for val in result_rows[0].fields + unneeded_feature_indices = [ + idx + for idx, val in enumerate( + online_features_response.metadata.feature_names.val + ) if val not in requested_result_row_names ] - for row_idx in range(len(result_rows)): - result_row = result_rows[row_idx] - for unneeded_feature in unneeded_features: - result_row.fields.pop(unneeded_feature) - result_row.statuses.pop(unneeded_feature) + + for idx in reversed(unneeded_feature_indices): + del online_features_response.metadata.feature_names.val[idx] + + for row_idx in range(len(online_features_response.results)): + result_row = online_features_response.results[row_idx] + for idx in reversed(unneeded_feature_indices): + del result_row.values[idx] + del result_row.statuses[idx] + del result_row.event_timestamps[idx] def _get_feature_views_to_use( self, @@ -1505,9 +1769,6 @@ def _get_feature_views_to_use( @log_exceptions_and_usage def serve(self, host: str, port: int, no_access_log: bool) -> None: """Start the feature consumption server locally on a given port.""" - if not flags_helper.enable_python_feature_server(self.config): - raise ExperimentalFeatureNotEnabled(flags.FLAG_PYTHON_FEATURE_SERVER_NAME) - feature_server.start_server(self, host, port, no_access_log) @log_exceptions_and_usage @@ -1518,8 +1779,6 @@ def get_feature_server_endpoint(self) -> Optional[str]: @log_exceptions_and_usage def serve_transformations(self, port: int) -> None: """Start the feature transformation server locally on a given port.""" - if not flags_helper.enable_python_feature_server(self.config): - raise ExperimentalFeatureNotEnabled(flags.FLAG_PYTHON_FEATURE_SERVER_NAME) if not flags_helper.enable_on_demand_feature_views(self.config): raise ExperimentalFeatureNotEnabled(flags.FLAG_ON_DEMAND_TRANSFORM_NAME) @@ -1528,6 +1787,13 @@ def serve_transformations(self, port: int) -> None: transformation_server.start_server(self, port) +def _validate_entity_values(join_key_values: Dict[str, List[Value]]): + set_of_row_lengths = {len(v) for v in join_key_values.values()} + if len(set_of_row_lengths) > 1: + raise ValueError("All entity rows must have the same columns.") + return set_of_row_lengths.pop() + + def _validate_feature_refs(feature_refs: List[str], full_feature_names: bool = False): collided_feature_refs = [] diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 57b60c0503..2c1d0675d4 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -74,8 +74,7 @@ class FeatureView(BaseFeatureView): online: bool input: DataSource batch_source: DataSource - stream_source: Optional[DataSource] = None - last_updated_timestamp: Optional[datetime] = None + stream_source: Optional[DataSource] materialization_intervals: List[Tuple[datetime, datetime]] @log_exceptions @@ -136,9 +135,6 @@ def __init__( self.materialization_intervals = [] - self.created_timestamp: Optional[datetime] = None - self.last_updated_timestamp: Optional[datetime] = None - # Note: Python requires redefining hash in child classes that override __eq__ def __hash__(self): return super().__hash__() diff --git a/sdk/python/feast/feature_view_projection.py b/sdk/python/feast/feature_view_projection.py index 97b3b0ab57..04d923122c 100644 --- a/sdk/python/feast/feature_view_projection.py +++ b/sdk/python/feast/feature_view_projection.py @@ -10,6 +10,18 @@ @dataclass class FeatureViewProjection: + """ + A feature view projection represents a selection of one or more features from a + single feature view. + + Attributes: + name: The unique name of the feature view from which this projection is created. + name_alias: An optional alias for the name. + features: The list of features represented by the feature view projection. + join_key_map: A map to modify join key columns during retrieval of this feature + view projection. + """ + name: str name_alias: Optional[str] features: List[Feature] @@ -18,10 +30,10 @@ class FeatureViewProjection: def name_to_use(self): return self.name_alias or self.name - def to_proto(self): + def to_proto(self) -> FeatureViewProjectionProto: feature_reference_proto = FeatureViewProjectionProto( feature_view_name=self.name, - feature_view_name_alias=self.name_alias, + feature_view_name_alias=self.name_alias or "", join_key_map=self.join_key_map, ) for feature in self.features: @@ -31,16 +43,16 @@ def to_proto(self): @staticmethod def from_proto(proto: FeatureViewProjectionProto): - ref = FeatureViewProjection( + feature_view_projection = FeatureViewProjection( name=proto.feature_view_name, name_alias=proto.feature_view_name_alias, features=[], join_key_map=dict(proto.join_key_map), ) for feature_column in proto.feature_columns: - ref.features.append(Feature.from_proto(feature_column)) + feature_view_projection.features.append(Feature.from_proto(feature_column)) - return ref + return feature_view_projection @staticmethod def from_definition(feature_grouping): diff --git a/sdk/python/feast/flags.py b/sdk/python/feast/flags.py index 5c6357ec26..a1ca0c3b73 100644 --- a/sdk/python/feast/flags.py +++ b/sdk/python/feast/flags.py @@ -1,6 +1,5 @@ FLAG_ALPHA_FEATURES_NAME = "alpha_features" FLAG_ON_DEMAND_TRANSFORM_NAME = "on_demand_transforms" -FLAG_PYTHON_FEATURE_SERVER_NAME = "python_feature_server" FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME = "aws_lambda_feature_server" FLAG_DIRECT_INGEST_TO_ONLINE_STORE = "direct_ingest_to_online_store" ENV_FLAG_IS_TEST = "IS_TEST" @@ -8,7 +7,6 @@ FLAG_NAMES = { FLAG_ALPHA_FEATURES_NAME, FLAG_ON_DEMAND_TRANSFORM_NAME, - FLAG_PYTHON_FEATURE_SERVER_NAME, FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME, FLAG_DIRECT_INGEST_TO_ONLINE_STORE, } diff --git a/sdk/python/feast/flags_helper.py b/sdk/python/feast/flags_helper.py index 89784d6ecc..89905e7d36 100644 --- a/sdk/python/feast/flags_helper.py +++ b/sdk/python/feast/flags_helper.py @@ -35,10 +35,6 @@ def enable_on_demand_feature_views(repo_config: RepoConfig) -> bool: return feature_flag_enabled(repo_config, flags.FLAG_ON_DEMAND_TRANSFORM_NAME) -def enable_python_feature_server(repo_config: RepoConfig) -> bool: - return feature_flag_enabled(repo_config, flags.FLAG_PYTHON_FEATURE_SERVER_NAME) - - def enable_aws_lambda_feature_server(repo_config: RepoConfig) -> bool: return feature_flag_enabled(repo_config, flags.FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME) diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index 39a77264bc..ce8fa919f1 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -1,7 +1,14 @@ import re from typing import List -from feast import BigQuerySource, Entity, Feature, FileSource, RedshiftSource +from feast import ( + BigQuerySource, + Entity, + Feature, + FileSource, + RedshiftSource, + SnowflakeSource, +) from feast.data_source import DataSource from feast.errors import RegistryInferenceFailure from feast.feature_view import FeatureView @@ -13,7 +20,12 @@ def update_entities_with_inferred_types_from_feature_views( entities: List[Entity], feature_views: List[FeatureView], config: RepoConfig ) -> None: """ - Infer entity value type by examining schema of feature view batch sources + Infers the types of the entities by examining the schemas of feature view batch sources. + + Args: + entities: The entities to be updated. + feature_views: A list containing feature views associated with the entities. + config: The config for the current feature store. """ incomplete_entities = { entity.name: entity @@ -78,6 +90,8 @@ def update_data_sources_with_inferred_event_timestamp_col( ts_column_type_regex_pattern = "TIMESTAMP|DATETIME" elif isinstance(data_source, RedshiftSource): ts_column_type_regex_pattern = "TIMESTAMP[A-Z]*" + elif isinstance(data_source, SnowflakeSource): + ts_column_type_regex_pattern = "TIMESTAMP_[A-Z]*" else: raise RegistryInferenceFailure( "DataSource", @@ -87,8 +101,10 @@ def update_data_sources_with_inferred_event_timestamp_col( """, ) # for informing the type checker - assert isinstance(data_source, FileSource) or isinstance( - data_source, BigQuerySource + assert ( + isinstance(data_source, FileSource) + or isinstance(data_source, BigQuerySource) + or isinstance(data_source, SnowflakeSource) ) # loop through table columns to find singular match @@ -127,6 +143,11 @@ def update_feature_views_with_inferred_features( Infers the set of features associated to each FeatureView and updates the FeatureView with those features. Inference occurs through considering each column of the underlying data source as a feature except columns that are associated with the data source's timestamp columns and the FeatureView's entity columns. + + Args: + fvs: The feature views to be updated. + entities: A list containing entities associated with the feature views. + config: The config for the current feature store. """ entity_name_to_join_key_map = {entity.name: entity.join_key for entity in entities} diff --git a/sdk/python/feast/infra/aws.py b/sdk/python/feast/infra/aws.py index 735b2f62e7..104e20388a 100644 --- a/sdk/python/feast/infra/aws.py +++ b/sdk/python/feast/infra/aws.py @@ -62,14 +62,16 @@ def update_infra( entities_to_keep: Sequence[Entity], partial: bool, ): - self.online_store.update( - config=self.repo_config, - tables_to_delete=tables_to_delete, - tables_to_keep=tables_to_keep, - entities_to_keep=entities_to_keep, - entities_to_delete=entities_to_delete, - partial=partial, - ) + # Call update only if there is an online store + if self.online_store: + self.online_store.update( + config=self.repo_config, + tables_to_delete=tables_to_delete, + tables_to_keep=tables_to_keep, + entities_to_keep=entities_to_keep, + entities_to_delete=entities_to_delete, + partial=partial, + ) if self.repo_config.feature_server and self.repo_config.feature_server.enabled: if not enable_aws_lambda_feature_server(self.repo_config): @@ -194,7 +196,8 @@ def _deploy_feature_server(self, project: str, image_uri: str): def teardown_infra( self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], ) -> None: - self.online_store.teardown(self.repo_config, tables, entities) + if self.online_store: + self.online_store.teardown(self.repo_config, tables, entities) if ( self.repo_config.feature_server is not None diff --git a/sdk/python/feast/infra/feature_servers/aws_lambda/__init__.py b/sdk/python/feast/infra/feature_servers/aws_lambda/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/infra/feature_servers/gcp_cloudrun/__init__.py b/sdk/python/feast/infra/feature_servers/gcp_cloudrun/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/infra/infra_object.py b/sdk/python/feast/infra/infra_object.py index 3cd00899fe..91770e64e5 100644 --- a/sdk/python/feast/infra/infra_object.py +++ b/sdk/python/feast/infra/infra_object.py @@ -15,9 +15,21 @@ from dataclasses import dataclass, field from typing import Any, List +from feast.errors import FeastInvalidInfraObjectType from feast.importer import import_class +from feast.protos.feast.core.DatastoreTable_pb2 import ( + DatastoreTable as DatastoreTableProto, +) +from feast.protos.feast.core.DynamoDBTable_pb2 import ( + DynamoDBTable as DynamoDBTableProto, +) from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto from feast.protos.feast.core.InfraObject_pb2 import InfraObject as InfraObjectProto +from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto + +DATASTORE_INFRA_OBJECT_CLASS_TYPE = "feast.infra.online_stores.datastore.DatastoreTable" +DYNAMODB_INFRA_OBJECT_CLASS_TYPE = "feast.infra.online_stores.dynamodb.DynamoDBTable" +SQLITE_INFRA_OBJECT_CLASS_TYPE = "feast.infra.online_stores.sqlite.SqliteTable" class InfraObject(ABC): @@ -26,13 +38,29 @@ class InfraObject(ABC): """ @abstractmethod - def to_proto(self) -> InfraObjectProto: + def __init__(self, name: str): + self._name = name + + @property + def name(self) -> str: + return self._name + + @abstractmethod + def to_infra_object_proto(self) -> InfraObjectProto: + """Converts an InfraObject to its protobuf representation, wrapped in an InfraObjectProto.""" + pass + + @abstractmethod + def to_proto(self) -> Any: """Converts an InfraObject to its protobuf representation.""" pass + def __lt__(self, other) -> bool: + return self.name < other.name + @staticmethod @abstractmethod - def from_proto(infra_object_proto: InfraObjectProto) -> Any: + def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: """ Returns an InfraObject created from a protobuf representation. @@ -40,15 +68,38 @@ def from_proto(infra_object_proto: InfraObjectProto) -> Any: infra_object_proto: A protobuf representation of an InfraObject. Raises: - ValueError: The type of InfraObject could not be identified. + FeastInvalidInfraObjectType: The type of InfraObject could not be identified. """ if infra_object_proto.infra_object_class_type: cls = _get_infra_object_class_from_type( infra_object_proto.infra_object_class_type ) - return cls.from_proto(infra_object_proto) + return cls.from_infra_object_proto(infra_object_proto) + + raise FeastInvalidInfraObjectType() - raise ValueError("Could not identify the type of the InfraObject.") + @staticmethod + def from_proto(infra_object_proto: Any) -> Any: + """ + Converts a protobuf representation of a subclass to an object of that subclass. + + Args: + infra_object_proto: A protobuf representation of an InfraObject. + + Raises: + FeastInvalidInfraObjectType: The type of InfraObject could not be identified. + """ + if isinstance(infra_object_proto, DatastoreTableProto): + infra_object_class_type = DATASTORE_INFRA_OBJECT_CLASS_TYPE + elif isinstance(infra_object_proto, DynamoDBTableProto): + infra_object_class_type = DYNAMODB_INFRA_OBJECT_CLASS_TYPE + elif isinstance(infra_object_proto, SqliteTableProto): + infra_object_class_type = SQLITE_INFRA_OBJECT_CLASS_TYPE + else: + raise FeastInvalidInfraObjectType() + + cls = _get_infra_object_class_from_type(infra_object_class_type) + return cls.from_proto(infra_object_proto) @abstractmethod def update(self): @@ -85,7 +136,7 @@ def to_proto(self) -> InfraProto: """ infra_proto = InfraProto() for infra_object in self.infra_objects: - infra_object_proto = infra_object.to_proto() + infra_object_proto = infra_object.to_infra_object_proto() infra_proto.infra_objects.append(infra_object_proto) return infra_proto @@ -96,8 +147,8 @@ def from_proto(cls, infra_proto: InfraProto): Returns an Infra object created from a protobuf representation. """ infra = cls() - cls.infra_objects += [ - InfraObject.from_proto(infra_object_proto) + infra.infra_objects += [ + InfraObject.from_infra_object_proto(infra_object_proto) for infra_object_proto in infra_proto.infra_objects ] diff --git a/sdk/python/feast/infra/local.py b/sdk/python/feast/infra/local.py index 31c46cf282..c5a15c8a91 100644 --- a/sdk/python/feast/infra/local.py +++ b/sdk/python/feast/infra/local.py @@ -1,12 +1,13 @@ import uuid from datetime import datetime from pathlib import Path +from typing import List -from feast.feature_view import FeatureView +from feast.infra.infra_object import Infra, InfraObject from feast.infra.passthrough_provider import PassthroughProvider from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.registry_store import RegistryStore -from feast.repo_config import RegistryConfig +from feast.repo_config import RegistryConfig, RepoConfig from feast.usage import log_exceptions_and_usage @@ -15,11 +16,16 @@ class LocalProvider(PassthroughProvider): This class only exists for backwards compatibility. """ - pass - - -def _table_id(project: str, table: FeatureView) -> str: - return f"{project}_{table.name}" + def plan_infra( + self, config: RepoConfig, desired_registry_proto: RegistryProto + ) -> Infra: + infra = Infra() + if self.online_store: + infra_objects: List[InfraObject] = self.online_store.plan( + config, desired_registry_proto + ) + infra.infra_objects += infra_objects + return infra class LocalRegistryStore(RegistryStore): diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 34dde7aa7b..44e62d6ad1 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -30,19 +30,24 @@ ) from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView from feast.infra.offline_stores import offline_utils -from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) from feast.on_demand_feature_view import OnDemandFeatureView from feast.registry import Registry from feast.repo_config import FeastConfigBaseModel, RepoConfig +from ...saved_dataset import SavedDatasetStorage from ...usage import log_exceptions_and_usage -from .bigquery_source import BigQuerySource +from .bigquery_source import BigQuerySource, SavedDatasetBigQueryStorage try: from google.api_core.exceptions import NotFound from google.auth.exceptions import DefaultCredentialsError from google.cloud import bigquery - from google.cloud.bigquery import Client + from google.cloud.bigquery import Client, Table except ImportError as e: from feast.errors import FeastExtrasDependencyImportError @@ -119,6 +124,36 @@ def pull_latest_from_table_or_query( query=query, client=client, config=config, full_feature_names=False, ) + @staticmethod + @log_exceptions_and_usage(offline_store="bigquery") + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, BigQuerySource) + from_expression = data_source.get_table_query_string() + + client = _get_bigquery_client( + project=config.offline_store.project_id, + location=config.offline_store.location, + ) + field_string = ", ".join( + join_key_columns + feature_name_columns + [event_timestamp_column] + ) + query = f""" + SELECT {field_string} + FROM {from_expression} + WHERE {event_timestamp_column} BETWEEN TIMESTAMP('{start_date}') AND TIMESTAMP('{end_date}') + """ + return BigQueryRetrievalJob( + query=query, client=client, config=config, full_feature_names=False, + ) + @staticmethod @log_exceptions_and_usage(offline_store="bigquery") def get_historical_features( @@ -147,16 +182,22 @@ def get_historical_features( config.offline_store.location, ) + entity_schema = _get_entity_schema(client=client, entity_df=entity_df,) + + entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema + ) + + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col, client, + ) + @contextlib.contextmanager def query_generator() -> Iterator[str]: - entity_schema = _upload_entity_df_and_get_entity_schema( + _upload_entity_df( client=client, table_name=table_reference, entity_df=entity_df, ) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema - ) - expected_join_keys = offline_utils.get_expected_join_keys( project, feature_views, registry ) @@ -165,10 +206,6 @@ def query_generator() -> Iterator[str]: entity_schema, expected_join_keys, entity_df_event_timestamp_col ) - entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, entity_df_event_timestamp_col, client, table_reference, - ) - # Build a query context containing all information required to template the BigQuery SQL query query_context = offline_utils.get_feature_view_query_context( feature_refs, @@ -203,6 +240,12 @@ def query_generator() -> Iterator[str]: on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( feature_refs, project, registry ), + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(entity_schema.keys() - {entity_df_event_timestamp_col}), + min_event_timestamp=entity_df_event_timestamp_range[0], + max_event_timestamp=entity_df_event_timestamp_range[1], + ), ) @@ -214,6 +257,7 @@ def __init__( config: RepoConfig, full_feature_names: bool, on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + metadata: Optional[RetrievalMetadata] = None, ): if not isinstance(query, str): self._query_generator = query @@ -231,6 +275,7 @@ def query_generator() -> Iterator[str]: self._on_demand_feature_views = ( on_demand_feature_views if on_demand_feature_views else [] ) + self._metadata = metadata @property def full_feature_names(self) -> bool: @@ -310,6 +355,17 @@ def _execute_query( block_until_done(client=self.client, bq_job=bq_job, timeout=timeout) return bq_job + def persist(self, storage: SavedDatasetStorage): + assert isinstance(storage, SavedDatasetBigQueryStorage) + + self.to_bigquery( + bigquery.QueryJobConfig(destination=storage.bigquery_options.table_ref) + ) + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + def block_until_done( client: Client, @@ -380,34 +436,45 @@ def _get_table_reference_for_new_entity( return f"{dataset_project}.{dataset_name}.{table_name}" -def _upload_entity_df_and_get_entity_schema( +def _upload_entity_df( client: Client, table_name: str, entity_df: Union[pd.DataFrame, str], -) -> Dict[str, np.dtype]: +) -> Table: """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table""" if isinstance(entity_df, str): job = client.query(f"CREATE TABLE {table_name} AS ({entity_df})") - block_until_done(client, job) - - limited_entity_df = ( - client.query(f"SELECT * FROM {table_name} LIMIT 1").result().to_dataframe() - ) - entity_schema = dict(zip(limited_entity_df.columns, limited_entity_df.dtypes)) elif isinstance(entity_df, pd.DataFrame): - # Drop the index so that we dont have unnecessary columns + # Drop the index so that we don't have unnecessary columns entity_df.reset_index(drop=True, inplace=True) job = client.load_table_from_dataframe(entity_df, table_name) - block_until_done(client, job) - entity_schema = dict(zip(entity_df.columns, entity_df.dtypes)) else: raise InvalidEntityType(type(entity_df)) + block_until_done(client, job) + # Ensure that the table expires after some time table = client.get_table(table=table_name) table.expires = datetime.utcnow() + timedelta(minutes=30) client.update_table(table, ["expires"]) + return table + + +def _get_entity_schema( + client: Client, entity_df: Union[pd.DataFrame, str] +) -> Dict[str, np.dtype]: + if isinstance(entity_df, str): + entity_df_sample = ( + client.query(f"SELECT * FROM ({entity_df}) LIMIT 1").result().to_dataframe() + ) + + entity_schema = dict(zip(entity_df_sample.columns, entity_df_sample.dtypes)) + elif isinstance(entity_df, pd.DataFrame): + entity_schema = dict(zip(entity_df.columns, entity_df.dtypes)) + else: + raise InvalidEntityType(type(entity_df)) + return entity_schema @@ -415,11 +482,11 @@ def _get_entity_df_event_timestamp_range( entity_df: Union[pd.DataFrame, str], entity_df_event_timestamp_col: str, client: Client, - table_name: str, ) -> Tuple[datetime, datetime]: if type(entity_df) is str: job = client.query( - f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max FROM {table_name}" + f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max " + f"FROM ({entity_df})" ) res = next(job.result()) entity_df_event_timestamp_range = ( @@ -435,8 +502,8 @@ def _get_entity_df_event_timestamp_range( entity_df_event_timestamp, utc=True ) entity_df_event_timestamp_range = ( - entity_df_event_timestamp.min(), - entity_df_event_timestamp.max(), + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), ) else: raise InvalidEntityType(type(entity_df)) @@ -491,7 +558,7 @@ def _get_bigquery_client(project: Optional[str] = None, location: Optional[str] ,CAST({{entity_df_event_timestamp_col}} AS STRING) AS {{featureview.name}}__entity_row_unique_id {% endif %} {% endfor %} - FROM {{ left_table_query_string }} + FROM `{{ left_table_query_string }}` ), {% for featureview in featureviews %} @@ -531,7 +598,7 @@ def _get_bigquery_client(project: Optional[str] = None, location: Optional[str] {{ featureview.created_timestamp_column ~ ' as created_timestamp,' if featureview.created_timestamp_column else '' }} {{ featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} {% for feature in featureview.features %} - {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %}{% if loop.last %}{% else %}, {% endif %} + {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %}{% if loop.last %}{% else %}, {% endif %} {% endfor %} FROM {{ featureview.table_subquery }} WHERE {{ featureview.event_timestamp_column }} <= '{{ featureview.max_event_timestamp }}' @@ -632,7 +699,7 @@ def _get_bigquery_client(project: Optional[str] = None, location: Optional[str] SELECT {{featureview.name}}__entity_row_unique_id {% for feature in featureview.features %} - ,{% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %} + ,{% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %} {% endfor %} FROM {{ featureview.name }}__cleaned ) USING ({{featureview.name}}__entity_row_unique_id) diff --git a/sdk/python/feast/infra/offline_stores/bigquery_source.py b/sdk/python/feast/infra/offline_stores/bigquery_source.py index a5c1afa3e0..f97f687b0f 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery_source.py +++ b/sdk/python/feast/infra/offline_stores/bigquery_source.py @@ -1,10 +1,14 @@ -from typing import Callable, Dict, Iterable, Optional, Tuple +from typing import Callable, Dict, Iterable, List, Optional, Tuple from feast import type_map from feast.data_source import DataSource from feast.errors import DataSourceNotFoundException from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.value_type import ValueType @@ -119,18 +123,20 @@ def get_table_column_names_and_types( client = bigquery.Client() if self.table_ref is not None: - table_schema = client.get_table(self.table_ref).schema - if not isinstance(table_schema[0], bigquery.schema.SchemaField): + schema = client.get_table(self.table_ref).schema + if not isinstance(schema[0], bigquery.schema.SchemaField): raise TypeError("Could not parse BigQuery table schema.") - - name_type_pairs = [(field.name, field.field_type) for field in table_schema] else: bq_columns_query = f"SELECT * FROM ({self.query}) LIMIT 1" queryRes = client.query(bq_columns_query).result() - name_type_pairs = [ - (schema_field.name, schema_field.field_type) - for schema_field in queryRes.schema - ] + schema = queryRes.schema + + name_type_pairs: List[Tuple[str, str]] = [] + for field in schema: + bq_type_as_str = field.field_type + if field.mode == "REPEATED": + bq_type_as_str = "ARRAY<" + bq_type_as_str + ">" + name_type_pairs.append((field.name, bq_type_as_str)) return name_type_pairs @@ -204,3 +210,28 @@ def to_proto(self) -> DataSourceProto.BigQueryOptions: ) return bigquery_options_proto + + +class SavedDatasetBigQueryStorage(SavedDatasetStorage): + _proto_attr_name = "bigquery_storage" + + bigquery_options: BigQueryOptions + + def __init__(self, table_ref: str): + self.bigquery_options = BigQueryOptions(table_ref=table_ref, query=None) + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> SavedDatasetStorage: + return SavedDatasetBigQueryStorage( + table_ref=BigQueryOptions.from_proto( + storage_proto.bigquery_storage + ).table_ref + ) + + def to_proto(self) -> SavedDatasetStorageProto: + return SavedDatasetStorageProto( + bigquery_storage=self.bigquery_options.to_proto() + ) + + def to_data_source(self) -> DataSource: + return BigQuerySource(table_ref=self.bigquery_options.table_ref) diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 723e9eb533..c71f0c3ff7 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -1,6 +1,7 @@ from datetime import datetime -from typing import Callable, List, Optional, Union +from typing import Callable, List, Optional, Tuple, Union +import dask.dataframe as dd import pandas as pd import pyarrow import pytz @@ -10,16 +11,22 @@ from feast.data_source import DataSource from feast.errors import FeastJoinKeysDuringMaterialization from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView -from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.file_source import SavedDatasetFileStorage +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) from feast.infra.offline_stores.offline_utils import ( DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL, ) from feast.infra.provider import ( _get_requested_feature_views_to_features_dict, - _run_field_mapping, + _run_dask_field_mapping, ) from feast.registry import Registry from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.usage import log_exceptions_and_usage @@ -36,6 +43,7 @@ def __init__( evaluation_function: Callable, full_feature_names: bool, on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + metadata: Optional[RetrievalMetadata] = None, ): """Initialize a lazy historical retrieval job""" @@ -45,6 +53,7 @@ def __init__( self._on_demand_feature_views = ( on_demand_feature_views if on_demand_feature_views else [] ) + self._metadata = metadata @property def full_feature_names(self) -> bool: @@ -57,15 +66,36 @@ def on_demand_feature_views(self) -> Optional[List[OnDemandFeatureView]]: @log_exceptions_and_usage def _to_df_internal(self) -> pd.DataFrame: # Only execute the evaluation function to build the final historical retrieval dataframe at the last moment. - df = self.evaluation_function() + df = self.evaluation_function().compute() return df @log_exceptions_and_usage def _to_arrow_internal(self): # Only execute the evaluation function to build the final historical retrieval dataframe at the last moment. - df = self.evaluation_function() + df = self.evaluation_function().compute() return pyarrow.Table.from_pandas(df) + def persist(self, storage: SavedDatasetStorage): + assert isinstance(storage, SavedDatasetFileStorage) + + filesystem, path = FileSource.create_filesystem_and_path( + storage.file_options.file_url, storage.file_options.s3_endpoint_override, + ) + + if path.endswith(".parquet"): + pyarrow.parquet.write_table( + self.to_arrow(), where=path, filesystem=filesystem + ) + else: + # otherwise assume destination is directory + pyarrow.parquet.write_to_dataset( + self.to_arrow(), root_path=path, filesystem=filesystem + ) + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + class FileOfflineStore(OfflineStore): @staticmethod @@ -79,7 +109,9 @@ def get_historical_features( project: str, full_feature_names: bool = False, ) -> RetrievalJob: - if not isinstance(entity_df, pd.DataFrame): + if not isinstance(entity_df, pd.DataFrame) and not isinstance( + entity_df, dd.DataFrame + ): raise ValueError( f"Please provide an entity_df of type {type(pd.DataFrame)} instead of type {type(entity_df)}" ) @@ -106,28 +138,53 @@ def get_historical_features( registry.list_on_demand_feature_views(config.project), ) + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col + ) + # Create lazy function that is only called from the RetrievalJob object def evaluate_historical_retrieval(): - # Make sure all event timestamp fields are tz-aware. We default tz-naive fields to UTC - entity_df[entity_df_event_timestamp_col] = entity_df[ - entity_df_event_timestamp_col - ].apply(lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc)) - # Create a copy of entity_df to prevent modifying the original entity_df_with_features = entity_df.copy() - # Convert event timestamp column to datetime and normalize time zone to UTC - # This is necessary to avoid issues with pd.merge_asof - entity_df_with_features[entity_df_event_timestamp_col] = pd.to_datetime( - entity_df_with_features[entity_df_event_timestamp_col], utc=True - ) + entity_df_event_timestamp_col_type = entity_df_with_features.dtypes[ + entity_df_event_timestamp_col + ] + if ( + not hasattr(entity_df_event_timestamp_col_type, "tz") + or entity_df_event_timestamp_col_type.tz != pytz.UTC + ): + # Make sure all event timestamp fields are tz-aware. We default tz-naive fields to UTC + entity_df_with_features[ + entity_df_event_timestamp_col + ] = entity_df_with_features[entity_df_event_timestamp_col].apply( + lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc) + ) + + # Convert event timestamp column to datetime and normalize time zone to UTC + # This is necessary to avoid issues with pd.merge_asof + if isinstance(entity_df_with_features, dd.DataFrame): + entity_df_with_features[ + entity_df_event_timestamp_col + ] = dd.to_datetime( + entity_df_with_features[entity_df_event_timestamp_col], utc=True + ) + else: + entity_df_with_features[ + entity_df_event_timestamp_col + ] = pd.to_datetime( + entity_df_with_features[entity_df_event_timestamp_col], utc=True + ) # Sort event timestamp values entity_df_with_features = entity_df_with_features.sort_values( entity_df_event_timestamp_col ) + join_keys = [] + all_join_keys = [] + # Load feature view data from sources and join them incrementally for feature_view, features in feature_views_to_features.items(): event_timestamp_column = ( @@ -137,128 +194,65 @@ def evaluate_historical_retrieval(): feature_view.batch_source.created_timestamp_column ) - # Read offline parquet data in pyarrow format. - filesystem, path = FileSource.create_filesystem_and_path( - feature_view.batch_source.path, - feature_view.batch_source.file_options.s3_endpoint_override, - ) - table = pyarrow.parquet.read_table(path, filesystem=filesystem) - - # Rename columns by the field mapping dictionary if it exists - if feature_view.batch_source.field_mapping is not None: - table = _run_field_mapping( - table, feature_view.batch_source.field_mapping - ) - # Rename entity columns by the join_key_map dictionary if it exists - if feature_view.projection.join_key_map: - table = _run_field_mapping( - table, feature_view.projection.join_key_map - ) - - # Convert pyarrow table to pandas dataframe. Note, if the underlying data has missing values, - # pandas will convert those values to np.nan if the dtypes are numerical (floats, ints, etc.) or boolean - # If the dtype is 'object', then missing values are inferred as python `None`s. - # More details at: - # https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html#values-considered-missing - df_to_join = table.to_pandas() - - # Make sure all timestamp fields are tz-aware. We default tz-naive fields to UTC - df_to_join[event_timestamp_column] = df_to_join[ - event_timestamp_column - ].apply( - lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc) - ) - if created_timestamp_column: - df_to_join[created_timestamp_column] = df_to_join[ - created_timestamp_column - ].apply( - lambda x: x - if x.tzinfo is not None - else x.replace(tzinfo=pytz.utc) - ) - - # Sort dataframe by the event timestamp column - df_to_join = df_to_join.sort_values(event_timestamp_column) - - # Build a list of all the features we should select from this source - feature_names = [] - for feature in features: - # Modify the separator for feature refs in column names to double underscore. We are using - # double underscore as separator for consistency with other databases like BigQuery, - # where there are very few characters available for use as separators - if full_feature_names: - formatted_feature_name = ( - f"{feature_view.projection.name_to_use()}__{feature}" - ) - else: - formatted_feature_name = feature - # Add the feature name to the list of columns - feature_names.append(formatted_feature_name) - - # Ensure that the source dataframe feature column includes the feature view name as a prefix - df_to_join.rename( - columns={feature: formatted_feature_name}, inplace=True, - ) - # Build a list of entity columns to join on (from the right table) join_keys = [] + for entity_name in feature_view.entities: entity = registry.get_entity(entity_name, project) join_key = feature_view.projection.join_key_map.get( entity.join_key, entity.join_key ) join_keys.append(join_key) - right_entity_columns = join_keys + right_entity_key_columns = [ - event_timestamp_column - ] + right_entity_columns - - # Remove all duplicate entity keys (using created timestamp) - right_entity_key_sort_columns = right_entity_key_columns - if created_timestamp_column: - # If created_timestamp is available, use it to dedupe deterministically - right_entity_key_sort_columns = right_entity_key_sort_columns + [ - created_timestamp_column - ] - - df_to_join.sort_values(by=right_entity_key_sort_columns, inplace=True) - df_to_join.drop_duplicates( - right_entity_key_sort_columns, - keep="last", - ignore_index=True, - inplace=True, + event_timestamp_column, + created_timestamp_column, + ] + join_keys + right_entity_key_columns = [c for c in right_entity_key_columns if c] + + all_join_keys = list(set(all_join_keys + join_keys)) + + df_to_join = _read_datasource(feature_view.batch_source) + + df_to_join, event_timestamp_column = _field_mapping( + df_to_join, + feature_view, + features, + right_entity_key_columns, + entity_df_event_timestamp_col, + event_timestamp_column, + full_feature_names, ) - # Select only the columns we need to join from the feature dataframe - df_to_join = df_to_join[right_entity_key_columns + feature_names] + df_to_join = _merge(entity_df_with_features, df_to_join, join_keys) + + df_to_join = _normalize_timestamp( + df_to_join, event_timestamp_column, created_timestamp_column + ) - # Do point in-time-join between entity_df and feature dataframe - entity_df_with_features = pd.merge_asof( - entity_df_with_features, + df_to_join = _filter_ttl( df_to_join, - left_on=entity_df_event_timestamp_col, - right_on=event_timestamp_column, - by=right_entity_columns or None, - tolerance=feature_view.ttl, + feature_view, + entity_df_event_timestamp_col, + event_timestamp_column, ) - # Remove right (feature table/view) event_timestamp column. - if event_timestamp_column != entity_df_event_timestamp_col: - entity_df_with_features.drop( - columns=[event_timestamp_column], inplace=True - ) + df_to_join = _drop_duplicates( + df_to_join, + all_join_keys, + event_timestamp_column, + created_timestamp_column, + entity_df_event_timestamp_col, + ) + + entity_df_with_features = _drop_columns( + df_to_join, event_timestamp_column, created_timestamp_column + ) # Ensure that we delete dataframes to free up memory del df_to_join - # Move "event_timestamp" column to front - current_cols = entity_df_with_features.columns.tolist() - current_cols.remove(entity_df_event_timestamp_col) - entity_df_with_features = entity_df_with_features[ - [entity_df_event_timestamp_col] + current_cols - ] - - return entity_df_with_features + return entity_df_with_features.persist() job = FileRetrievalJob( evaluation_function=evaluate_historical_retrieval, @@ -266,6 +260,12 @@ def evaluate_historical_retrieval(): on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( feature_refs, project, registry ), + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(set(entity_df.columns) - {entity_df_event_timestamp_col}), + min_event_timestamp=entity_df_event_timestamp_range[0], + max_event_timestamp=entity_df_event_timestamp_range[1], + ), ) return job @@ -285,20 +285,11 @@ def pull_latest_from_table_or_query( # Create lazy function that is only called from the RetrievalJob object def evaluate_offline_job(): - filesystem, path = FileSource.create_filesystem_and_path( - data_source.path, data_source.file_options.s3_endpoint_override - ) - source_df = pd.read_parquet(path, filesystem=filesystem) - # Make sure all timestamp fields are tz-aware. We default tz-naive fields to UTC - source_df[event_timestamp_column] = source_df[event_timestamp_column].apply( - lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc) + source_df = _read_datasource(data_source) + + source_df = _normalize_timestamp( + source_df, event_timestamp_column, created_timestamp_column ) - if created_timestamp_column: - source_df[created_timestamp_column] = source_df[ - created_timestamp_column - ].apply( - lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc) - ) source_columns = set(source_df.columns) if not set(join_key_columns).issubset(source_columns): @@ -312,28 +303,274 @@ def evaluate_offline_job(): else [event_timestamp_column] ) - source_df.sort_values(by=ts_columns, inplace=True) + if created_timestamp_column: + source_df = source_df.sort_values(by=created_timestamp_column) + + source_df = source_df.sort_values(by=event_timestamp_column) - filtered_df = source_df[ + source_df = source_df[ (source_df[event_timestamp_column] >= start_date) & (source_df[event_timestamp_column] < end_date) ] + source_df = source_df.persist() + columns_to_extract = set( join_key_columns + feature_name_columns + ts_columns ) if join_key_columns: - last_values_df = filtered_df.drop_duplicates( + source_df = source_df.drop_duplicates( join_key_columns, keep="last", ignore_index=True ) else: - last_values_df = filtered_df - last_values_df[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL + source_df[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL columns_to_extract.add(DUMMY_ENTITY_ID) - return last_values_df[columns_to_extract] + source_df = source_df.persist() + + return source_df[list(columns_to_extract)].persist() # When materializing a single feature view, we don't need full feature names. On demand transforms aren't materialized return FileRetrievalJob( evaluation_function=evaluate_offline_job, full_feature_names=False, ) + + @staticmethod + @log_exceptions_and_usage(offline_store="file") + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + return FileOfflineStore.pull_latest_from_table_or_query( + config=config, + data_source=data_source, + join_key_columns=join_key_columns + + [event_timestamp_column], # avoid deduplication + feature_name_columns=feature_name_columns, + event_timestamp_column=event_timestamp_column, + created_timestamp_column=None, + start_date=start_date, + end_date=end_date, + ) + + +def _get_entity_df_event_timestamp_range( + entity_df: Union[pd.DataFrame, str], entity_df_event_timestamp_col: str, +) -> Tuple[datetime, datetime]: + if not isinstance(entity_df, pd.DataFrame): + raise ValueError( + f"Please provide an entity_df of type {type(pd.DataFrame)} instead of type {type(entity_df)}" + ) + + entity_df_event_timestamp = entity_df.loc[ + :, entity_df_event_timestamp_col + ].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime(entity_df_event_timestamp, utc=True) + + return ( + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), + ) + + +def _read_datasource(data_source) -> dd.DataFrame: + storage_options = ( + { + "client_kwargs": { + "endpoint_url": data_source.file_options.s3_endpoint_override + } + } + if data_source.file_options.s3_endpoint_override + else None + ) + + return dd.read_parquet(data_source.path, storage_options=storage_options,) + + +def _field_mapping( + df_to_join: dd.DataFrame, + feature_view: FeatureView, + features: List[str], + right_entity_key_columns: List[str], + entity_df_event_timestamp_col: str, + event_timestamp_column: str, + full_feature_names: bool, +) -> dd.DataFrame: + # Rename columns by the field mapping dictionary if it exists + if feature_view.batch_source.field_mapping: + df_to_join = _run_dask_field_mapping( + df_to_join, feature_view.batch_source.field_mapping + ) + # Rename entity columns by the join_key_map dictionary if it exists + if feature_view.projection.join_key_map: + df_to_join = _run_dask_field_mapping( + df_to_join, feature_view.projection.join_key_map + ) + + # Build a list of all the features we should select from this source + feature_names = [] + columns_map = {} + for feature in features: + # Modify the separator for feature refs in column names to double underscore. We are using + # double underscore as separator for consistency with other databases like BigQuery, + # where there are very few characters available for use as separators + if full_feature_names: + formatted_feature_name = ( + f"{feature_view.projection.name_to_use()}__{feature}" + ) + else: + formatted_feature_name = feature + # Add the feature name to the list of columns + feature_names.append(formatted_feature_name) + columns_map[feature] = formatted_feature_name + + # Ensure that the source dataframe feature column includes the feature view name as a prefix + df_to_join = _run_dask_field_mapping(df_to_join, columns_map) + + # Select only the columns we need to join from the feature dataframe + df_to_join = df_to_join[right_entity_key_columns + feature_names] + df_to_join = df_to_join.persist() + + # Make sure to not have duplicated columns + if entity_df_event_timestamp_col == event_timestamp_column: + df_to_join = _run_dask_field_mapping( + df_to_join, {event_timestamp_column: f"__{event_timestamp_column}"}, + ) + event_timestamp_column = f"__{event_timestamp_column}" + + return df_to_join.persist(), event_timestamp_column + + +def _merge( + entity_df_with_features: dd.DataFrame, + df_to_join: dd.DataFrame, + join_keys: List[str], +) -> dd.DataFrame: + # tmp join keys needed for cross join with null join table view + tmp_join_keys = [] + if not join_keys: + entity_df_with_features["__tmp"] = 1 + df_to_join["__tmp"] = 1 + tmp_join_keys = ["__tmp"] + + # Get only data with requested entities + df_to_join = dd.merge( + entity_df_with_features, + df_to_join, + left_on=join_keys or tmp_join_keys, + right_on=join_keys or tmp_join_keys, + suffixes=("", "__"), + how="left", + ) + + if tmp_join_keys: + df_to_join = df_to_join.drop(tmp_join_keys, axis=1).persist() + else: + df_to_join = df_to_join.persist() + + return df_to_join + + +def _normalize_timestamp( + df_to_join: dd.DataFrame, + event_timestamp_column: str, + created_timestamp_column: str, +) -> dd.DataFrame: + df_to_join_types = df_to_join.dtypes + event_timestamp_column_type = df_to_join_types[event_timestamp_column] + + if created_timestamp_column: + created_timestamp_column_type = df_to_join_types[created_timestamp_column] + + if ( + not hasattr(event_timestamp_column_type, "tz") + or event_timestamp_column_type.tz != pytz.UTC + ): + # Make sure all timestamp fields are tz-aware. We default tz-naive fields to UTC + df_to_join[event_timestamp_column] = df_to_join[event_timestamp_column].apply( + lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc), + meta=(event_timestamp_column, "datetime64[ns, UTC]"), + ) + + if created_timestamp_column and ( + not hasattr(created_timestamp_column_type, "tz") + or created_timestamp_column_type.tz != pytz.UTC + ): + df_to_join[created_timestamp_column] = df_to_join[ + created_timestamp_column + ].apply( + lambda x: x if x.tzinfo is not None else x.replace(tzinfo=pytz.utc), + meta=(event_timestamp_column, "datetime64[ns, UTC]"), + ) + + return df_to_join.persist() + + +def _filter_ttl( + df_to_join: dd.DataFrame, + feature_view: FeatureView, + entity_df_event_timestamp_col: str, + event_timestamp_column: str, +) -> dd.DataFrame: + # Filter rows by defined timestamp tolerance + if feature_view.ttl and feature_view.ttl.total_seconds() != 0: + df_to_join = df_to_join[ + ( + df_to_join[event_timestamp_column] + >= df_to_join[entity_df_event_timestamp_col] - feature_view.ttl + ) + & ( + df_to_join[event_timestamp_column] + <= df_to_join[entity_df_event_timestamp_col] + ) + ] + + df_to_join = df_to_join.persist() + + return df_to_join + + +def _drop_duplicates( + df_to_join: dd.DataFrame, + all_join_keys: List[str], + event_timestamp_column: str, + created_timestamp_column: str, + entity_df_event_timestamp_col: str, +) -> dd.DataFrame: + if created_timestamp_column: + df_to_join = df_to_join.sort_values( + by=created_timestamp_column, na_position="first" + ) + df_to_join = df_to_join.persist() + + df_to_join = df_to_join.sort_values(by=event_timestamp_column, na_position="first") + df_to_join = df_to_join.persist() + + df_to_join = df_to_join.drop_duplicates( + all_join_keys + [entity_df_event_timestamp_col], keep="last", ignore_index=True, + ) + + return df_to_join.persist() + + +def _drop_columns( + df_to_join: dd.DataFrame, + event_timestamp_column: str, + created_timestamp_column: str, +) -> dd.DataFrame: + entity_df_with_features = df_to_join.drop( + [event_timestamp_column], axis=1 + ).persist() + + if created_timestamp_column: + entity_df_with_features = entity_df_with_features.drop( + [created_timestamp_column], axis=1 + ).persist() + + return entity_df_with_features diff --git a/sdk/python/feast/infra/offline_stores/file_source.py b/sdk/python/feast/infra/offline_stores/file_source.py index 31eb5f037f..7d52110985 100644 --- a/sdk/python/feast/infra/offline_stores/file_source.py +++ b/sdk/python/feast/infra/offline_stores/file_source.py @@ -5,10 +5,14 @@ from pyarrow.parquet import ParquetFile from feast import type_map -from feast.data_format import FileFormat +from feast.data_format import FileFormat, ParquetFormat from feast.data_source import DataSource from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.value_type import ValueType @@ -260,3 +264,40 @@ def to_proto(self) -> DataSourceProto.FileOptions: ) return file_options_proto + + +class SavedDatasetFileStorage(SavedDatasetStorage): + _proto_attr_name = "file_storage" + + file_options: FileOptions + + def __init__( + self, + path: str, + file_format: FileFormat = ParquetFormat(), + s3_endpoint_override: Optional[str] = None, + ): + self.file_options = FileOptions( + file_url=path, + file_format=file_format, + s3_endpoint_override=s3_endpoint_override, + ) + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> SavedDatasetStorage: + file_options = FileOptions.from_proto(storage_proto.file_storage) + return SavedDatasetFileStorage( + path=file_options.file_url, + file_format=file_options.file_format, + s3_endpoint_override=file_options.s3_endpoint_override, + ) + + def to_proto(self) -> SavedDatasetStorageProto: + return SavedDatasetStorageProto(file_storage=self.file_options.to_proto()) + + def to_data_source(self) -> DataSource: + return FileSource( + path=self.file_options.file_url, + file_format=self.file_options.file_format, + s3_endpoint_override=self.file_options.s3_endpoint_override, + ) diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 0ba8197154..a3065a31c0 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -11,18 +11,46 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings from abc import ABC, abstractmethod from datetime import datetime -from typing import List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union import pandas as pd import pyarrow from feast.data_source import DataSource +from feast.dqm.errors import ValidationFailed from feast.feature_view import FeatureView from feast.on_demand_feature_view import OnDemandFeatureView from feast.registry import Registry from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage + +if TYPE_CHECKING: + from feast.saved_dataset import ValidationReference + + +class RetrievalMetadata: + min_event_timestamp: Optional[datetime] + max_event_timestamp: Optional[datetime] + + # List of feature references + features: List[str] + # List of entity keys + ODFV inputs + keys: List[str] + + def __init__( + self, + features: List[str], + keys: List[str], + min_event_timestamp: Optional[datetime] = None, + max_event_timestamp: Optional[datetime] = None, + ): + self.features = features + self.keys = keys + self.min_event_timestamp = min_event_timestamp + self.max_event_timestamp = max_event_timestamp class RetrievalJob(ABC): @@ -38,17 +66,37 @@ def full_feature_names(self) -> bool: def on_demand_feature_views(self) -> Optional[List[OnDemandFeatureView]]: pass - def to_df(self) -> pd.DataFrame: - """Return dataset as Pandas DataFrame synchronously including on demand transforms""" + def to_df( + self, validation_reference: Optional["ValidationReference"] = None + ) -> pd.DataFrame: + """ + Return dataset as Pandas DataFrame synchronously including on demand transforms + Args: + validation_reference: If provided resulting dataset will be validated against this reference profile. + """ features_df = self._to_df_internal() - if not self.on_demand_feature_views: - return features_df - # TODO(adchia): Fix requirement to specify dependent feature views in feature_refs - for odfv in self.on_demand_feature_views: - features_df = features_df.join( - odfv.get_transformed_features_df(features_df, self.full_feature_names,) + if self.on_demand_feature_views: + # TODO(adchia): Fix requirement to specify dependent feature views in feature_refs + for odfv in self.on_demand_feature_views: + features_df = features_df.join( + odfv.get_transformed_features_df( + features_df, self.full_feature_names, + ) + ) + + if validation_reference: + warnings.warn( + "Dataset validation is an experimental feature. " + "This API is unstable and it could and most probably will be changed in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, ) + + validation_result = validation_reference.profile.validate(features_df) + if not validation_result.is_success: + raise ValidationFailed(validation_result) + return features_df @abstractmethod @@ -61,18 +109,56 @@ def _to_arrow_internal(self) -> pyarrow.Table: """Return dataset as pyarrow Table synchronously""" pass - def to_arrow(self) -> pyarrow.Table: - """Return dataset as pyarrow Table synchronously""" - if not self.on_demand_feature_views: + def to_arrow( + self, validation_reference: Optional["ValidationReference"] = None + ) -> pyarrow.Table: + """ + Return dataset as pyarrow Table synchronously + Args: + validation_reference: If provided resulting dataset will be validated against this reference profile. + """ + if not self.on_demand_feature_views and not validation_reference: return self._to_arrow_internal() features_df = self._to_df_internal() - for odfv in self.on_demand_feature_views: - features_df = features_df.join( - odfv.get_transformed_features_df(features_df, self.full_feature_names,) + if self.on_demand_feature_views: + for odfv in self.on_demand_feature_views: + features_df = features_df.join( + odfv.get_transformed_features_df( + features_df, self.full_feature_names, + ) + ) + + if validation_reference: + warnings.warn( + "Dataset validation is an experimental feature. " + "This API is unstable and it could and most probably will be changed in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, ) + + validation_result = validation_reference.profile.validate(features_df) + if not validation_result.is_success: + raise ValidationFailed(validation_result) + return pyarrow.Table.from_pandas(features_df) + @abstractmethod + def persist(self, storage: SavedDatasetStorage): + """ + Run the retrieval and persist the results in the same offline store used for read. + """ + pass + + @property + @abstractmethod + def metadata(self) -> Optional[RetrievalMetadata]: + """ + Return metadata information about retrieval. + Should be available even before materializing the dataset itself. + """ + pass + class OfflineStore(ABC): """ @@ -111,3 +197,21 @@ def get_historical_features( full_feature_names: bool = False, ) -> RetrievalJob: pass + + @staticmethod + @abstractmethod + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + """ + Note that join_key_columns, feature_name_columns, event_timestamp_column, and created_timestamp_column + have all already been mapped to column names of the source table and those column names are the values passed + into this function. + """ + pass diff --git a/sdk/python/feast/infra/offline_stores/offline_utils.py b/sdk/python/feast/infra/offline_stores/offline_utils.py index 0b60c3493d..eaf4925266 100644 --- a/sdk/python/feast/infra/offline_stores/offline_utils.py +++ b/sdk/python/feast/infra/offline_stores/offline_utils.py @@ -85,6 +85,7 @@ class FeatureViewQueryContext: ttl: int entities: List[str] features: List[str] # feature reference format + field_mapping: Dict[str, str] event_timestamp_column: str created_timestamp_column: Optional[str] table_subquery: str @@ -144,7 +145,10 @@ def get_feature_view_query_context( name=feature_view.projection.name_to_use(), ttl=ttl_seconds, entities=join_keys, - features=features, + features=[ + reverse_field_mapping.get(feature, feature) for feature in features + ], + field_mapping=feature_view.input.field_mapping, event_timestamp_column=reverse_field_mapping.get( event_timestamp_column, event_timestamp_column ), @@ -175,7 +179,11 @@ def build_point_in_time_query( final_output_feature_names = list(entity_df_columns) final_output_feature_names.extend( [ - (f"{fv.name}__{feature}" if full_feature_names else feature) + ( + f"{fv.name}__{fv.field_mapping.get(feature, feature)}" + if full_feature_names + else fv.field_mapping.get(feature, feature) + ) for fv in feature_view_query_contexts for feature in fv.features ] diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index df363967d6..3efd45bc74 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -25,10 +25,16 @@ from feast.errors import InvalidEntityType from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView from feast.infra.offline_stores import offline_utils -from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) +from feast.infra.offline_stores.redshift_source import SavedDatasetRedshiftStorage from feast.infra.utils import aws_utils from feast.registry import Registry from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.usage import log_exceptions_and_usage @@ -117,6 +123,46 @@ def pull_latest_from_table_or_query( full_feature_names=False, ) + @staticmethod + @log_exceptions_and_usage(offline_store="redshift") + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, RedshiftSource) + from_expression = data_source.get_table_query_string() + + field_string = ", ".join( + join_key_columns + feature_name_columns + [event_timestamp_column] + ) + + redshift_client = aws_utils.get_redshift_data_client( + config.offline_store.region + ) + s3_resource = aws_utils.get_s3_resource(config.offline_store.region) + + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + query = f""" + SELECT {field_string} + FROM {from_expression} + WHERE {event_timestamp_column} BETWEEN TIMESTAMP '{start_date}' AND TIMESTAMP '{end_date}' + """ + + return RedshiftRetrievalJob( + query=query, + redshift_client=redshift_client, + s3_resource=s3_resource, + config=config, + full_feature_names=False, + ) + @staticmethod @log_exceptions_and_usage(offline_store="redshift") def get_historical_features( @@ -135,18 +181,26 @@ def get_historical_features( ) s3_resource = aws_utils.get_s3_resource(config.offline_store.region) + entity_schema = _get_entity_schema( + entity_df, redshift_client, config, s3_resource + ) + + entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema + ) + + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col, redshift_client, config, + ) + @contextlib.contextmanager def query_generator() -> Iterator[str]: table_name = offline_utils.get_temp_entity_table_name() - entity_schema = _upload_entity_df_and_get_entity_schema( + _upload_entity_df( entity_df, redshift_client, config, s3_resource, table_name ) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema - ) - expected_join_keys = offline_utils.get_expected_join_keys( project, feature_views, registry ) @@ -155,14 +209,6 @@ def query_generator() -> Iterator[str]: entity_schema, expected_join_keys, entity_df_event_timestamp_col ) - entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, - entity_df_event_timestamp_col, - redshift_client, - config, - table_name, - ) - # Build a query context containing all information required to template the Redshift SQL query query_context = offline_utils.get_feature_view_query_context( feature_refs, @@ -203,6 +249,12 @@ def query_generator() -> Iterator[str]: on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( feature_refs, project, registry ), + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(entity_schema.keys() - {entity_df_event_timestamp_col}), + min_event_timestamp=entity_df_event_timestamp_range[0], + max_event_timestamp=entity_df_event_timestamp_range[1], + ), ) @@ -215,6 +267,7 @@ def __init__( config: RepoConfig, full_feature_names: bool, on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + metadata: Optional[RetrievalMetadata] = None, ): """Initialize RedshiftRetrievalJob object. @@ -248,6 +301,7 @@ def query_generator() -> Iterator[str]: self._on_demand_feature_views = ( on_demand_feature_views if on_demand_feature_views else [] ) + self._metadata = metadata @property def full_feature_names(self) -> bool: @@ -334,17 +388,24 @@ def to_redshift(self, table_name: str) -> None: query, ) + def persist(self, storage: SavedDatasetStorage): + assert isinstance(storage, SavedDatasetRedshiftStorage) + self.to_redshift(table_name=storage.redshift_options.table) -def _upload_entity_df_and_get_entity_schema( + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + + +def _upload_entity_df( entity_df: Union[pd.DataFrame, str], redshift_client, config: RepoConfig, s3_resource, table_name: str, -) -> Dict[str, np.dtype]: +): if isinstance(entity_df, pd.DataFrame): # If the entity_df is a pandas dataframe, upload it to Redshift - # and construct the schema from the original entity_df dataframe aws_utils.upload_df_to_redshift( redshift_client, config.offline_store.cluster_id, @@ -356,10 +417,8 @@ def _upload_entity_df_and_get_entity_schema( table_name, entity_df, ) - return dict(zip(entity_df.columns, entity_df.dtypes)) elif isinstance(entity_df, str): - # If the entity_df is a string (SQL query), create a Redshift table out of it, - # get pandas dataframe consisting of 1 row (LIMIT 1) and generate the schema out of it + # If the entity_df is a string (SQL query), create a Redshift table out of it aws_utils.execute_redshift_statement( redshift_client, config.offline_store.cluster_id, @@ -367,14 +426,29 @@ def _upload_entity_df_and_get_entity_schema( config.offline_store.user, f"CREATE TABLE {table_name} AS ({entity_df})", ) - limited_entity_df = RedshiftRetrievalJob( - f"SELECT * FROM {table_name} LIMIT 1", + else: + raise InvalidEntityType(type(entity_df)) + + +def _get_entity_schema( + entity_df: Union[pd.DataFrame, str], + redshift_client, + config: RepoConfig, + s3_resource, +) -> Dict[str, np.dtype]: + if isinstance(entity_df, pd.DataFrame): + return dict(zip(entity_df.columns, entity_df.dtypes)) + + elif isinstance(entity_df, str): + # get pandas dataframe consisting of 1 row (LIMIT 1) and generate the schema out of it + entity_df_sample = RedshiftRetrievalJob( + f"SELECT * FROM ({entity_df}) LIMIT 1", redshift_client, s3_resource, config, full_feature_names=False, ).to_df() - return dict(zip(limited_entity_df.columns, limited_entity_df.dtypes)) + return dict(zip(entity_df_sample.columns, entity_df_sample.dtypes)) else: raise InvalidEntityType(type(entity_df)) @@ -384,7 +458,6 @@ def _get_entity_df_event_timestamp_range( entity_df_event_timestamp_col: str, redshift_client, config: RepoConfig, - table_name: str, ) -> Tuple[datetime, datetime]: if isinstance(entity_df, pd.DataFrame): entity_df_event_timestamp = entity_df.loc[ @@ -395,8 +468,8 @@ def _get_entity_df_event_timestamp_range( entity_df_event_timestamp, utc=True ) entity_df_event_timestamp_range = ( - entity_df_event_timestamp.min(), - entity_df_event_timestamp.max(), + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), ) elif isinstance(entity_df, str): # If the entity_df is a string (SQL query), determine range @@ -406,7 +479,8 @@ def _get_entity_df_event_timestamp_range( config.offline_store.cluster_id, config.offline_store.database, config.offline_store.user, - f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max FROM {table_name}", + f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max " + f"FROM ({entity_df})", ) res = aws_utils.get_redshift_statement_result(redshift_client, statement_id)[ "Records" @@ -489,7 +563,7 @@ def _get_entity_df_event_timestamp_range( {{ featureview.created_timestamp_column ~ ' as created_timestamp,' if featureview.created_timestamp_column else '' }} {{ featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} {% for feature in featureview.features %} - {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %}{% if loop.last %}{% else %}, {% endif %} + {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %}{% if loop.last %}{% else %}, {% endif %} {% endfor %} FROM {{ featureview.table_subquery }} WHERE {{ featureview.event_timestamp_column }} <= '{{ featureview.max_event_timestamp }}' @@ -590,7 +664,7 @@ def _get_entity_df_event_timestamp_range( SELECT {{featureview.name}}__entity_row_unique_id {% for feature in featureview.features %} - ,{% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %} + ,{% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %} {% endfor %} FROM {{ featureview.name }}__cleaned ) USING ({{featureview.name}}__entity_row_unique_id) diff --git a/sdk/python/feast/infra/offline_stores/redshift_source.py b/sdk/python/feast/infra/offline_stores/redshift_source.py index e7e88a54ef..949f1c9221 100644 --- a/sdk/python/feast/infra/offline_stores/redshift_source.py +++ b/sdk/python/feast/infra/offline_stores/redshift_source.py @@ -4,7 +4,11 @@ from feast.data_source import DataSource from feast.errors import DataSourceNotFoundException, RedshiftCredentialsError from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.value_type import ValueType @@ -269,3 +273,29 @@ def to_proto(self) -> DataSourceProto.RedshiftOptions: ) return redshift_options_proto + + +class SavedDatasetRedshiftStorage(SavedDatasetStorage): + _proto_attr_name = "redshift_storage" + + redshift_options: RedshiftOptions + + def __init__(self, table_ref: str): + self.redshift_options = RedshiftOptions( + table=table_ref, schema=None, query=None + ) + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> SavedDatasetStorage: + + return SavedDatasetRedshiftStorage( + table_ref=RedshiftOptions.from_proto(storage_proto.redshift_storage).table + ) + + def to_proto(self) -> SavedDatasetStorageProto: + return SavedDatasetStorageProto( + redshift_storage=self.redshift_options.to_proto() + ) + + def to_data_source(self) -> DataSource: + return RedshiftSource(table=self.redshift_options.table) diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py new file mode 100644 index 0000000000..ee8cd71ce0 --- /dev/null +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -0,0 +1,632 @@ +import contextlib +import os +from datetime import datetime +from pathlib import Path +from typing import ( + Callable, + ContextManager, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, + cast, +) + +import numpy as np +import pandas as pd +import pyarrow as pa +from pydantic import Field +from pydantic.typing import Literal +from pytz import utc + +from feast import OnDemandFeatureView +from feast.data_source import DataSource +from feast.errors import InvalidEntityType +from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView +from feast.infra.offline_stores import offline_utils +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) +from feast.infra.offline_stores.snowflake_source import ( + SavedDatasetSnowflakeStorage, + SnowflakeSource, +) +from feast.infra.utils.snowflake_utils import ( + execute_snowflake_statement, + get_snowflake_conn, + write_pandas, +) +from feast.registry import Registry +from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.saved_dataset import SavedDatasetStorage +from feast.usage import log_exceptions_and_usage + +try: + from snowflake.connector import SnowflakeConnection +except ImportError as e: + from feast.errors import FeastExtrasDependencyImportError + + raise FeastExtrasDependencyImportError("snowflake", str(e)) + + +class SnowflakeOfflineStoreConfig(FeastConfigBaseModel): + """ Offline store config for Snowflake """ + + type: Literal["snowflake.offline"] = "snowflake.offline" + """ Offline store type selector""" + + config_path: Optional[str] = ( + Path(os.environ["HOME"]) / ".snowsql/config" + ).__str__() + """ Snowflake config path -- absolute path required (Cant use ~)""" + + account: Optional[str] = None + """ Snowflake deployment identifier -- drop .snowflakecomputing.com""" + + user: Optional[str] = None + """ Snowflake user name """ + + password: Optional[str] = None + """ Snowflake password """ + + role: Optional[str] = None + """ Snowflake role name""" + + warehouse: Optional[str] = None + """ Snowflake warehouse name """ + + database: Optional[str] = None + """ Snowflake database name """ + + schema_: Optional[str] = Field("PUBLIC", alias="schema") + """ Snowflake schema name """ + + class Config: + allow_population_by_field_name = True + + +class SnowflakeOfflineStore(OfflineStore): + @staticmethod + @log_exceptions_and_usage(offline_store="snowflake") + def pull_latest_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + created_timestamp_column: Optional[str], + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, SnowflakeSource) + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + from_expression = ( + data_source.get_table_query_string() + ) # returns schema.table as a string + + if join_key_columns: + partition_by_join_key_string = '"' + '", "'.join(join_key_columns) + '"' + partition_by_join_key_string = ( + "PARTITION BY " + partition_by_join_key_string + ) + else: + partition_by_join_key_string = "" + + timestamp_columns = [event_timestamp_column] + if created_timestamp_column: + timestamp_columns.append(created_timestamp_column) + + timestamp_desc_string = '"' + '" DESC, "'.join(timestamp_columns) + '" DESC' + field_string = ( + '"' + + '", "'.join(join_key_columns + feature_name_columns + timestamp_columns) + + '"' + ) + + snowflake_conn = get_snowflake_conn(config.offline_store) + + query = f""" + SELECT + {field_string} + {f''', TRIM({repr(DUMMY_ENTITY_VAL)}::VARIANT,'"') AS "{DUMMY_ENTITY_ID}"''' if not join_key_columns else ""} + FROM ( + SELECT {field_string}, + ROW_NUMBER() OVER({partition_by_join_key_string} ORDER BY {timestamp_desc_string}) AS "_feast_row" + FROM {from_expression} + WHERE "{event_timestamp_column}" BETWEEN TO_TIMESTAMP_NTZ({start_date.timestamp()}) AND TO_TIMESTAMP_NTZ({end_date.timestamp()}) + ) + WHERE "_feast_row" = 1 + """ + + return SnowflakeRetrievalJob( + query=query, + snowflake_conn=snowflake_conn, + config=config, + full_feature_names=False, + on_demand_feature_views=None, + ) + + @staticmethod + @log_exceptions_and_usage(offline_store="snowflake") + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, SnowflakeSource) + from_expression = data_source.get_table_query_string() + + field_string = ( + '"' + + '", "'.join( + join_key_columns + feature_name_columns + [event_timestamp_column] + ) + + '"' + ) + + snowflake_conn = get_snowflake_conn(config.offline_store) + + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + query = f""" + SELECT {field_string} + FROM {from_expression} + WHERE "{event_timestamp_column}" BETWEEN TIMESTAMP '{start_date}' AND TIMESTAMP '{end_date}' + """ + + return SnowflakeRetrievalJob( + query=query, + snowflake_conn=snowflake_conn, + config=config, + full_feature_names=False, + ) + + @staticmethod + @log_exceptions_and_usage(offline_store="snowflake") + def get_historical_features( + config: RepoConfig, + feature_views: List[FeatureView], + feature_refs: List[str], + entity_df: Union[pd.DataFrame, str], + registry: Registry, + project: str, + full_feature_names: bool = False, + ) -> RetrievalJob: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + snowflake_conn = get_snowflake_conn(config.offline_store) + + entity_schema = _get_entity_schema(entity_df, snowflake_conn, config) + + entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema + ) + + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col, snowflake_conn, + ) + + @contextlib.contextmanager + def query_generator() -> Iterator[str]: + + table_name = offline_utils.get_temp_entity_table_name() + + _upload_entity_df(entity_df, snowflake_conn, config, table_name) + + expected_join_keys = offline_utils.get_expected_join_keys( + project, feature_views, registry + ) + + offline_utils.assert_expected_columns_in_entity_df( + entity_schema, expected_join_keys, entity_df_event_timestamp_col + ) + + # Build a query context containing all information required to template the Snowflake SQL query + query_context = offline_utils.get_feature_view_query_context( + feature_refs, + feature_views, + registry, + project, + entity_df_event_timestamp_range, + ) + + query_context = _fix_entity_selections_identifiers(query_context) + + # Generate the Snowflake SQL query from the query context + query = offline_utils.build_point_in_time_query( + query_context, + left_table_query_string=table_name, + entity_df_event_timestamp_col=entity_df_event_timestamp_col, + entity_df_columns=entity_schema.keys(), + query_template=MULTIPLE_FEATURE_VIEW_POINT_IN_TIME_JOIN, + full_feature_names=full_feature_names, + ) + + yield query + + return SnowflakeRetrievalJob( + query=query_generator, + snowflake_conn=snowflake_conn, + config=config, + full_feature_names=full_feature_names, + on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( + feature_refs, project, registry + ), + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(entity_schema.keys() - {entity_df_event_timestamp_col}), + min_event_timestamp=entity_df_event_timestamp_range[0], + max_event_timestamp=entity_df_event_timestamp_range[1], + ), + ) + + +class SnowflakeRetrievalJob(RetrievalJob): + def __init__( + self, + query: Union[str, Callable[[], ContextManager[str]]], + snowflake_conn: SnowflakeConnection, + config: RepoConfig, + full_feature_names: bool, + on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + metadata: Optional[RetrievalMetadata] = None, + ): + + if not isinstance(query, str): + self._query_generator = query + else: + + @contextlib.contextmanager + def query_generator() -> Iterator[str]: + assert isinstance(query, str) + yield query + + self._query_generator = query_generator + + self.snowflake_conn = snowflake_conn + self.config = config + self._full_feature_names = full_feature_names + self._on_demand_feature_views = ( + on_demand_feature_views if on_demand_feature_views else [] + ) + self._metadata = metadata + + @property + def full_feature_names(self) -> bool: + return self._full_feature_names + + @property + def on_demand_feature_views(self) -> Optional[List[OnDemandFeatureView]]: + return self._on_demand_feature_views + + def _to_df_internal(self) -> pd.DataFrame: + with self._query_generator() as query: + + df = execute_snowflake_statement( + self.snowflake_conn, query + ).fetch_pandas_all() + + return df + + def _to_arrow_internal(self) -> pa.Table: + with self._query_generator() as query: + + pa_table = execute_snowflake_statement( + self.snowflake_conn, query + ).fetch_arrow_all() + + if pa_table: + + return pa_table + else: + empty_result = execute_snowflake_statement(self.snowflake_conn, query) + + return pa.Table.from_pandas( + pd.DataFrame(columns=[md.name for md in empty_result.description]) + ) + + def to_snowflake(self, table_name: str) -> None: + """ Save dataset as a new Snowflake table """ + if self.on_demand_feature_views is not None: + transformed_df = self.to_df() + + write_pandas( + self.snowflake_conn, transformed_df, table_name, auto_create_table=True + ) + + return None + + with self._query_generator() as query: + query = f'CREATE TABLE IF NOT EXISTS "{table_name}" AS ({query});\n' + + execute_snowflake_statement(self.snowflake_conn, query) + + def to_sql(self) -> str: + """ + Returns the SQL query that will be executed in Snowflake to build the historical feature table. + """ + with self._query_generator() as query: + return query + + def to_arrow_chunks(self, arrow_options: Optional[Dict] = None) -> Optional[List]: + with self._query_generator() as query: + + arrow_batches = execute_snowflake_statement( + self.snowflake_conn, query + ).get_result_batches() + + return arrow_batches + + def persist(self, storage: SavedDatasetStorage): + assert isinstance(storage, SavedDatasetSnowflakeStorage) + self.to_snowflake(table_name=storage.snowflake_options.table) + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + + +def _get_entity_schema( + entity_df: Union[pd.DataFrame, str], + snowflake_conn: SnowflakeConnection, + config: RepoConfig, +) -> Dict[str, np.dtype]: + + if isinstance(entity_df, pd.DataFrame): + + return dict(zip(entity_df.columns, entity_df.dtypes)) + + else: + + query = f"SELECT * FROM ({entity_df}) LIMIT 1" + limited_entity_df = execute_snowflake_statement( + snowflake_conn, query + ).fetch_pandas_all() + + return dict(zip(limited_entity_df.columns, limited_entity_df.dtypes)) + + +def _upload_entity_df( + entity_df: Union[pd.DataFrame, str], + snowflake_conn: SnowflakeConnection, + config: RepoConfig, + table_name: str, +) -> None: + + if isinstance(entity_df, pd.DataFrame): + # Write the data from the DataFrame to the table + write_pandas( + snowflake_conn, + entity_df, + table_name, + auto_create_table=True, + create_temp_table=True, + ) + + return None + elif isinstance(entity_df, str): + # If the entity_df is a string (SQL query), create a Snowflake table out of it, + query = f'CREATE TEMPORARY TABLE "{table_name}" AS ({entity_df})' + execute_snowflake_statement(snowflake_conn, query) + + return None + else: + raise InvalidEntityType(type(entity_df)) + + +def _fix_entity_selections_identifiers(query_context) -> list: + + for i, qc in enumerate(query_context): + for j, es in enumerate(qc.entity_selections): + query_context[i].entity_selections[j] = f'"{es}"'.replace(" AS ", '" AS "') + + return query_context + + +def _get_entity_df_event_timestamp_range( + entity_df: Union[pd.DataFrame, str], + entity_df_event_timestamp_col: str, + snowflake_conn: SnowflakeConnection, +) -> Tuple[datetime, datetime]: + if isinstance(entity_df, pd.DataFrame): + entity_df_event_timestamp = entity_df.loc[ + :, entity_df_event_timestamp_col + ].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime( + entity_df_event_timestamp, utc=True + ) + entity_df_event_timestamp_range = ( + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), + ) + elif isinstance(entity_df, str): + # If the entity_df is a string (SQL query), determine range + # from table + query = f'SELECT MIN("{entity_df_event_timestamp_col}") AS "min_value", MAX("{entity_df_event_timestamp_col}") AS "max_value" FROM ({entity_df})' + results = execute_snowflake_statement(snowflake_conn, query).fetchall() + + entity_df_event_timestamp_range = cast(Tuple[datetime, datetime], results[0]) + else: + raise InvalidEntityType(type(entity_df)) + + return entity_df_event_timestamp_range + + +MULTIPLE_FEATURE_VIEW_POINT_IN_TIME_JOIN = """ +/* + Compute a deterministic hash for the `left_table_query_string` that will be used throughout + all the logic as the field to GROUP BY the data +*/ +WITH "entity_dataframe" AS ( + SELECT *, + "{{entity_df_event_timestamp_col}}" AS "entity_timestamp" + {% for featureview in featureviews %} + {% if featureview.entities %} + ,( + {% for entity in featureview.entities %} + CAST("{{entity}}" AS VARCHAR) || + {% endfor %} + CAST("{{entity_df_event_timestamp_col}}" AS VARCHAR) + ) AS "{{featureview.name}}__entity_row_unique_id" + {% else %} + ,CAST("{{entity_df_event_timestamp_col}}" AS VARCHAR) AS "{{featureview.name}}__entity_row_unique_id" + {% endif %} + {% endfor %} + FROM "{{ left_table_query_string }}" +), + +{% for featureview in featureviews %} + +"{{ featureview.name }}__entity_dataframe" AS ( + SELECT + {{ featureview.entities | map('tojson') | join(', ')}}{% if featureview.entities %},{% else %}{% endif %} + "entity_timestamp", + "{{featureview.name}}__entity_row_unique_id" + FROM "entity_dataframe" + GROUP BY + {{ featureview.entities | map('tojson') | join(', ')}}{% if featureview.entities %},{% else %}{% endif %} + "entity_timestamp", + "{{featureview.name}}__entity_row_unique_id" +), + +/* + This query template performs the point-in-time correctness join for a single feature set table + to the provided entity table. + + 1. We first join the current feature_view to the entity dataframe that has been passed. + This JOIN has the following logic: + - For each row of the entity dataframe, only keep the rows where the `event_timestamp_column` + is less than the one provided in the entity dataframe + - If there a TTL for the current feature_view, also keep the rows where the `event_timestamp_column` + is higher the the one provided minus the TTL + - For each row, Join on the entity key and retrieve the `entity_row_unique_id` that has been + computed previously + + The output of this CTE will contain all the necessary information and already filtered out most + of the data that is not relevant. +*/ + +"{{ featureview.name }}__subquery" AS ( + SELECT + "{{ featureview.event_timestamp_column }}" as "event_timestamp", + {{'"' ~ featureview.created_timestamp_column ~ '" as "created_timestamp",' if featureview.created_timestamp_column else '' }} + {{featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} + {% for feature in featureview.features %} + "{{ feature }}" as {% if full_feature_names %}"{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}"{% else %}"{{ featureview.field_mapping.get(feature, feature) }}"{% endif %}{% if loop.last %}{% else %}, {% endif %} + {% endfor %} + FROM {{ featureview.table_subquery }} + WHERE "{{ featureview.event_timestamp_column }}" <= '{{ featureview.max_event_timestamp }}' + {% if featureview.ttl == 0 %}{% else %} + AND "{{ featureview.event_timestamp_column }}" >= '{{ featureview.min_event_timestamp }}' + {% endif %} +), + +"{{ featureview.name }}__base" AS ( + SELECT + "subquery".*, + "entity_dataframe"."entity_timestamp", + "entity_dataframe"."{{featureview.name}}__entity_row_unique_id" + FROM "{{ featureview.name }}__subquery" AS "subquery" + INNER JOIN "{{ featureview.name }}__entity_dataframe" AS "entity_dataframe" + ON TRUE + AND "subquery"."event_timestamp" <= "entity_dataframe"."entity_timestamp" + + {% if featureview.ttl == 0 %}{% else %} + AND "subquery"."event_timestamp" >= TIMESTAMPADD(second,-{{ featureview.ttl }},"entity_dataframe"."entity_timestamp") + {% endif %} + + {% for entity in featureview.entities %} + AND "subquery"."{{ entity }}" = "entity_dataframe"."{{ entity }}" + {% endfor %} +), + +/* + 2. If the `created_timestamp_column` has been set, we need to + deduplicate the data first. This is done by calculating the + `MAX(created_at_timestamp)` for each event_timestamp. + We then join the data on the next CTE +*/ +{% if featureview.created_timestamp_column %} +"{{ featureview.name }}__dedup" AS ( + SELECT + "{{featureview.name}}__entity_row_unique_id", + "event_timestamp", + MAX("created_timestamp") AS "created_timestamp" + FROM "{{ featureview.name }}__base" + GROUP BY "{{featureview.name}}__entity_row_unique_id", "event_timestamp" +), +{% endif %} + +/* + 3. The data has been filtered during the first CTE "*__base" + Thus we only need to compute the latest timestamp of each feature. +*/ +"{{ featureview.name }}__latest" AS ( + SELECT + "event_timestamp", + {% if featureview.created_timestamp_column %}"created_timestamp",{% endif %} + "{{featureview.name}}__entity_row_unique_id" + FROM + ( + SELECT *, + ROW_NUMBER() OVER( + PARTITION BY "{{featureview.name}}__entity_row_unique_id" + ORDER BY "event_timestamp" DESC{% if featureview.created_timestamp_column %},"created_timestamp" DESC{% endif %} + ) AS "row_number" + FROM "{{ featureview.name }}__base" + {% if featureview.created_timestamp_column %} + INNER JOIN "{{ featureview.name }}__dedup" + USING ("{{featureview.name}}__entity_row_unique_id", "event_timestamp", "created_timestamp") + {% endif %} + ) + WHERE "row_number" = 1 +), + +/* + 4. Once we know the latest value of each feature for a given timestamp, + we can join again the data back to the original "base" dataset +*/ +"{{ featureview.name }}__cleaned" AS ( + SELECT "base".* + FROM "{{ featureview.name }}__base" AS "base" + INNER JOIN "{{ featureview.name }}__latest" + USING( + "{{featureview.name}}__entity_row_unique_id", + "event_timestamp" + {% if featureview.created_timestamp_column %} + ,"created_timestamp" + {% endif %} + ) +){% if loop.last %}{% else %}, {% endif %} + + +{% endfor %} +/* + Joins the outputs of multiple time travel joins to a single table. + The entity_dataframe dataset being our source of truth here. + */ + +SELECT "{{ final_output_feature_names | join('", "')}}" +FROM "entity_dataframe" +{% for featureview in featureviews %} +LEFT JOIN ( + SELECT + "{{featureview.name}}__entity_row_unique_id" + {% for feature in featureview.features %} + ,{% if full_feature_names %}"{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}"{% else %}"{{ featureview.field_mapping.get(feature, feature) }}"{% endif %} + {% endfor %} + FROM "{{ featureview.name }}__cleaned" +) "{{ featureview.name }}__cleaned" USING ("{{featureview.name}}__entity_row_unique_id") +{% endfor %} +""" diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py new file mode 100644 index 0000000000..b5d50be0f4 --- /dev/null +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -0,0 +1,315 @@ +from typing import Callable, Dict, Iterable, Optional, Tuple + +from feast import type_map +from feast.data_source import DataSource +from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) +from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage +from feast.value_type import ValueType + + +class SnowflakeSource(DataSource): + def __init__( + self, + database: Optional[str] = None, + schema: Optional[str] = None, + table: Optional[str] = None, + query: Optional[str] = None, + event_timestamp_column: Optional[str] = "", + created_timestamp_column: Optional[str] = "", + field_mapping: Optional[Dict[str, str]] = None, + date_partition_column: Optional[str] = "", + ): + """ + Creates a SnowflakeSource object. + + Args: + database (optional): Snowflake database where the features are stored. + schema (optional): Snowflake schema in which the table is located. + table (optional): Snowflake table where the features are stored. + event_timestamp_column (optional): Event timestamp column used for point in + time joins of feature values. + query (optional): The query to be executed to obtain the features. + created_timestamp_column (optional): Timestamp column indicating when the + row was created, used for deduplicating rows. + field_mapping (optional): A dictionary mapping of column names in this data + source to column names in a feature table or view. + date_partition_column (optional): Timestamp column used for partitioning. + + """ + super().__init__( + event_timestamp_column, + created_timestamp_column, + field_mapping, + date_partition_column, + ) + + # The default Snowflake schema is named "PUBLIC". + _schema = "PUBLIC" if (database and table and not schema) else schema + + self._snowflake_options = SnowflakeOptions( + database=database, schema=_schema, table=table, query=query + ) + + @staticmethod + def from_proto(data_source: DataSourceProto): + """ + Creates a SnowflakeSource from a protobuf representation of a SnowflakeSource. + + Args: + data_source: A protobuf representation of a SnowflakeSource + + Returns: + A SnowflakeSource object based on the data_source protobuf. + """ + return SnowflakeSource( + field_mapping=dict(data_source.field_mapping), + database=data_source.snowflake_options.database, + schema=data_source.snowflake_options.schema, + table=data_source.snowflake_options.table, + event_timestamp_column=data_source.event_timestamp_column, + created_timestamp_column=data_source.created_timestamp_column, + date_partition_column=data_source.date_partition_column, + query=data_source.snowflake_options.query, + ) + + def __eq__(self, other): + if not isinstance(other, SnowflakeSource): + raise TypeError( + "Comparisons should only involve SnowflakeSource class objects." + ) + + return ( + self.snowflake_options.database == other.snowflake_options.database + and self.snowflake_options.schema == other.snowflake_options.schema + and self.snowflake_options.table == other.snowflake_options.table + and self.snowflake_options.query == other.snowflake_options.query + and self.event_timestamp_column == other.event_timestamp_column + and self.created_timestamp_column == other.created_timestamp_column + and self.field_mapping == other.field_mapping + ) + + @property + def database(self): + """Returns the database of this snowflake source.""" + return self._snowflake_options.database + + @property + def schema(self): + """Returns the schema of this snowflake source.""" + return self._snowflake_options.schema + + @property + def table(self): + """Returns the table of this snowflake source.""" + return self._snowflake_options.table + + @property + def query(self): + """Returns the snowflake options of this snowflake source.""" + return self._snowflake_options.query + + @property + def snowflake_options(self): + """Returns the snowflake options of this snowflake source.""" + return self._snowflake_options + + @snowflake_options.setter + def snowflake_options(self, _snowflake_options): + """Sets the snowflake options of this snowflake source.""" + self._snowflake_options = _snowflake_options + + def to_proto(self) -> DataSourceProto: + """ + Converts a SnowflakeSource object to its protobuf representation. + + Returns: + A DataSourceProto object. + """ + data_source_proto = DataSourceProto( + type=DataSourceProto.BATCH_SNOWFLAKE, + field_mapping=self.field_mapping, + snowflake_options=self.snowflake_options.to_proto(), + ) + + data_source_proto.event_timestamp_column = self.event_timestamp_column + data_source_proto.created_timestamp_column = self.created_timestamp_column + data_source_proto.date_partition_column = self.date_partition_column + + return data_source_proto + + def validate(self, config: RepoConfig): + # As long as the query gets successfully executed, or the table exists, + # the data source is validated. We don't need the results though. + self.get_table_column_names_and_types(config) + + def get_table_query_string(self) -> str: + """Returns a string that can directly be used to reference this table in SQL.""" + if self.database and self.table: + return f'"{self.database}"."{self.schema}"."{self.table}"' + elif self.table: + return f'"{self.table}"' + else: + return f"({self.query})" + + @staticmethod + def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: + return type_map.snowflake_python_type_to_feast_value_type + + def get_table_column_names_and_types( + self, config: RepoConfig + ) -> Iterable[Tuple[str, str]]: + """ + Returns a mapping of column names to types for this snowflake source. + + Args: + config: A RepoConfig describing the feature repo + """ + + from feast.infra.offline_stores.snowflake import SnowflakeOfflineStoreConfig + from feast.infra.utils.snowflake_utils import ( + execute_snowflake_statement, + get_snowflake_conn, + ) + + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + snowflake_conn = get_snowflake_conn(config.offline_store) + + if self.database and self.table: + query = f'SELECT * FROM "{self.database}"."{self.schema}"."{self.table}" LIMIT 1' + elif self.table: + query = f'SELECT * FROM "{self.table}" LIMIT 1' + else: + query = f"SELECT * FROM ({self.query}) LIMIT 1" + + result = execute_snowflake_statement(snowflake_conn, query).fetch_pandas_all() + + if not result.empty: + metadata = result.dtypes.apply(str) + return list(zip(metadata.index, metadata)) + else: + raise ValueError("The following source:\n" + query + "\n ... is empty") + + +class SnowflakeOptions: + """ + DataSource snowflake options used to source features from snowflake query. + """ + + def __init__( + self, + database: Optional[str], + schema: Optional[str], + table: Optional[str], + query: Optional[str], + ): + self._database = database + self._schema = schema + self._table = table + self._query = query + + @property + def query(self): + """Returns the snowflake SQL query referenced by this source.""" + return self._query + + @query.setter + def query(self, query): + """Sets the snowflake SQL query referenced by this source.""" + self._query = query + + @property + def database(self): + """Returns the database name of this snowflake table.""" + return self._database + + @database.setter + def database(self, database): + """Sets the database ref of this snowflake table.""" + self._database = database + + @property + def schema(self): + """Returns the schema name of this snowflake table.""" + return self._schema + + @schema.setter + def schema(self, schema): + """Sets the schema of this snowflake table.""" + self._schema = schema + + @property + def table(self): + """Returns the table name of this snowflake table.""" + return self._table + + @table.setter + def table(self, table): + """Sets the table ref of this snowflake table.""" + self._table = table + + @classmethod + def from_proto(cls, snowflake_options_proto: DataSourceProto.SnowflakeOptions): + """ + Creates a SnowflakeOptions from a protobuf representation of a snowflake option. + + Args: + snowflake_options_proto: A protobuf representation of a DataSource + + Returns: + A SnowflakeOptions object based on the snowflake_options protobuf. + """ + snowflake_options = cls( + database=snowflake_options_proto.database, + schema=snowflake_options_proto.schema, + table=snowflake_options_proto.table, + query=snowflake_options_proto.query, + ) + + return snowflake_options + + def to_proto(self) -> DataSourceProto.SnowflakeOptions: + """ + Converts an SnowflakeOptionsProto object to its protobuf representation. + + Returns: + A SnowflakeOptionsProto protobuf. + """ + snowflake_options_proto = DataSourceProto.SnowflakeOptions( + database=self.database, + schema=self.schema, + table=self.table, + query=self.query, + ) + + return snowflake_options_proto + + +class SavedDatasetSnowflakeStorage(SavedDatasetStorage): + _proto_attr_name = "snowflake_storage" + + snowflake_options: SnowflakeOptions + + def __init__(self, table_ref: str): + self.snowflake_options = SnowflakeOptions( + database=None, schema=None, table=table_ref, query=None + ) + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> SavedDatasetStorage: + + return SavedDatasetSnowflakeStorage( + table_ref=SnowflakeOptions.from_proto(storage_proto.snowflake_storage).table + ) + + def to_proto(self) -> SavedDatasetStorageProto: + return SavedDatasetStorageProto( + snowflake_storage=self.snowflake_options.to_proto() + ) + + def to_data_source(self) -> DataSource: + return SnowflakeSource(table=self.snowflake_options.table) diff --git a/sdk/python/feast/infra/online_stores/datastore.py b/sdk/python/feast/infra/online_stores/datastore.py index f788f1bc74..a29a8393e2 100644 --- a/sdk/python/feast/infra/online_stores/datastore.py +++ b/sdk/python/feast/infra/online_stores/datastore.py @@ -23,8 +23,9 @@ from pydantic.typing import Literal from feast import Entity, utils +from feast.errors import FeastProviderLoginError from feast.feature_view import FeatureView -from feast.infra.infra_object import InfraObject +from feast.infra.infra_object import DATASTORE_INFRA_OBJECT_CLASS_TYPE, InfraObject from feast.infra.online_stores.helpers import compute_entity_id from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.core.DatastoreTable_pb2 import ( @@ -43,7 +44,7 @@ from google.cloud import datastore from google.cloud.datastore.client import Key except ImportError as e: - from feast.errors import FeastExtrasDependencyImportError, FeastProviderLoginError + from feast.errors import FeastExtrasDependencyImportError raise FeastExtrasDependencyImportError("gcp", str(e)) @@ -332,14 +333,11 @@ class DatastoreTable(InfraObject): name: The name of the table. project_id (optional): The GCP project id. namespace (optional): Datastore namespace. - client: Datastore client. """ project: str - name: str project_id: Optional[str] namespace: Optional[str] - client: datastore.Client def __init__( self, @@ -348,55 +346,74 @@ def __init__( project_id: Optional[str] = None, namespace: Optional[str] = None, ): + super().__init__(name) self.project = project - self.name = name self.project_id = project_id self.namespace = namespace - self.client = _initialize_client(self.project_id, self.namespace) - def to_proto(self) -> InfraObjectProto: + def to_infra_object_proto(self) -> InfraObjectProto: + datastore_table_proto = self.to_proto() + return InfraObjectProto( + infra_object_class_type=DATASTORE_INFRA_OBJECT_CLASS_TYPE, + datastore_table=datastore_table_proto, + ) + + def to_proto(self) -> Any: datastore_table_proto = DatastoreTableProto() datastore_table_proto.project = self.project datastore_table_proto.name = self.name if self.project_id: - datastore_table_proto.project_id.FromString(bytes(self.project_id, "utf-8")) + datastore_table_proto.project_id.value = self.project_id if self.namespace: - datastore_table_proto.namespace.FromString(bytes(self.namespace, "utf-8")) - - return InfraObjectProto( - infra_object_class_type="feast.infra.online_stores.datastore.DatastoreTable", - datastore_table=datastore_table_proto, - ) + datastore_table_proto.namespace.value = self.namespace + return datastore_table_proto @staticmethod - def from_proto(infra_object_proto: InfraObjectProto) -> Any: + def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: datastore_table = DatastoreTable( project=infra_object_proto.datastore_table.project, name=infra_object_proto.datastore_table.name, ) + # Distinguish between null and empty string, since project_id and namespace are StringValues. if infra_object_proto.datastore_table.HasField("project_id"): datastore_table.project_id = ( - infra_object_proto.datastore_table.project_id.SerializeToString() - ).decode("utf-8") + infra_object_proto.datastore_table.project_id.value + ) if infra_object_proto.datastore_table.HasField("namespace"): datastore_table.namespace = ( - infra_object_proto.datastore_table.namespace.SerializeToString() - ).decode("utf-8") + infra_object_proto.datastore_table.namespace.value + ) + + return datastore_table + + @staticmethod + def from_proto(datastore_table_proto: DatastoreTableProto) -> Any: + datastore_table = DatastoreTable( + project=datastore_table_proto.project, name=datastore_table_proto.name, + ) + + # Distinguish between null and empty string, since project_id and namespace are StringValues. + if datastore_table_proto.HasField("project_id"): + datastore_table.project_id = datastore_table_proto.project_id.value + if datastore_table_proto.HasField("namespace"): + datastore_table.namespace = datastore_table_proto.namespace.value return datastore_table def update(self): - key = self.client.key("Project", self.project, "Table", self.name) + client = _initialize_client(self.project_id, self.namespace) + key = client.key("Project", self.project, "Table", self.name) entity = datastore.Entity( key=key, exclude_from_indexes=("created_ts", "event_ts", "values") ) entity.update({"created_ts": datetime.utcnow()}) - self.client.put(entity) + client.put(entity) def teardown(self): - key = self.client.key("Project", self.project, "Table", self.name) - _delete_all_values(self.client, key) + client = _initialize_client(self.project_id, self.namespace) + key = client.key("Project", self.project, "Table", self.name) + _delete_all_values(client, key) # Delete the table metadata datastore entity - self.client.delete(key) + client.delete(key) diff --git a/sdk/python/feast/infra/online_stores/dynamodb.py b/sdk/python/feast/infra/online_stores/dynamodb.py index 4b42b98c48..61161ea3c6 100644 --- a/sdk/python/feast/infra/online_stores/dynamodb.py +++ b/sdk/python/feast/infra/online_stores/dynamodb.py @@ -20,7 +20,7 @@ from pydantic.typing import Literal from feast import Entity, FeatureView, utils -from feast.infra.infra_object import InfraObject +from feast.infra.infra_object import DYNAMODB_INFRA_OBJECT_CLASS_TYPE, InfraObject from feast.infra.online_stores.helpers import compute_entity_id from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.core.DynamoDBTable_pb2 import ( @@ -183,7 +183,7 @@ def online_read( val = ValueProto() val.ParseFromString(value_bin['B']) res[feature_name] = val - result.append((item["event_ts"]['S'], res)) + result.append((datetime.fromisoformat(item["event_ts"]['S']), res)) else: result.append((None, None)) return result @@ -324,30 +324,38 @@ class DynamoDBTable(InfraObject): region: The region of the table. """ - name: str region: str def __init__(self, name: str, region: str): - self.name = name + super().__init__(name) self.region = region - def to_proto(self) -> InfraObjectProto: - dynamodb_table_proto = DynamoDBTableProto() - dynamodb_table_proto.name = self.name - dynamodb_table_proto.region = self.region - + def to_infra_object_proto(self) -> InfraObjectProto: + dynamodb_table_proto = self.to_proto() return InfraObjectProto( - infra_object_class_type="feast.infra.online_stores.dynamodb.DynamoDBTable", + infra_object_class_type=DYNAMODB_INFRA_OBJECT_CLASS_TYPE, dynamodb_table=dynamodb_table_proto, ) + def to_proto(self) -> Any: + dynamodb_table_proto = DynamoDBTableProto() + dynamodb_table_proto.name = self.name + dynamodb_table_proto.region = self.region + return dynamodb_table_proto + @staticmethod - def from_proto(infra_object_proto: InfraObjectProto) -> Any: + def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: return DynamoDBTable( name=infra_object_proto.dynamodb_table.name, region=infra_object_proto.dynamodb_table.region, ) + @staticmethod + def from_proto(dynamodb_table_proto: DynamoDBTableProto) -> Any: + return DynamoDBTable( + name=dynamodb_table_proto.name, region=dynamodb_table_proto.region, + ) + def update(self): dynamodb_client = _initialize_dynamodb_client(region=self.region) dynamodb_resource = _initialize_dynamodb_resource(region=self.region) diff --git a/sdk/python/feast/infra/online_stores/online_store.py b/sdk/python/feast/infra/online_stores/online_store.py index b2aa1e46d0..1f177996de 100644 --- a/sdk/python/feast/infra/online_stores/online_store.py +++ b/sdk/python/feast/infra/online_stores/online_store.py @@ -18,6 +18,8 @@ from feast import Entity from feast.feature_view import FeatureView +from feast.infra.infra_object import InfraObject +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RepoConfig @@ -92,6 +94,18 @@ def update( ): ... + def plan( + self, config: RepoConfig, desired_registry_proto: RegistryProto + ) -> List[InfraObject]: + """ + Returns the set of InfraObjects required to support the desired registry. + + Args: + config: The RepoConfig for the current FeatureStore. + desired_registry_proto: The desired registry, in proto form. + """ + return [] + @abstractmethod def teardown( self, diff --git a/sdk/python/feast/infra/online_stores/redis.py b/sdk/python/feast/infra/online_stores/redis.py index 9f20339343..b557fddc68 100644 --- a/sdk/python/feast/infra/online_stores/redis.py +++ b/sdk/python/feast/infra/online_stores/redis.py @@ -41,7 +41,7 @@ try: from redis import Redis - from rediscluster import RedisCluster + from redis.cluster import ClusterNode, RedisCluster except ImportError as e: from feast.errors import FeastExtrasDependencyImportError @@ -72,11 +72,11 @@ class RedisOnlineStoreConfig(FeastConfigBaseModel): class RedisOnlineStore(OnlineStore): _client: Optional[Union[Redis, RedisCluster]] = None - def delete_table_values(self, config: RepoConfig, table: FeatureView): + def delete_entity_values(self, config: RepoConfig, join_keys: List[str]): client = self._get_client(config.online_store) deleted_count = 0 - pipeline = client.pipeline() - prefix = _redis_key_prefix(table.entities) + pipeline = client.pipeline(transaction=False) + prefix = _redis_key_prefix(join_keys) for _k in client.scan_iter( b"".join([prefix, b"*", config.project.encode("utf8")]) @@ -85,7 +85,7 @@ def delete_table_values(self, config: RepoConfig, table: FeatureView): deleted_count += 1 pipeline.execute() - logger.debug(f"Deleted {deleted_count} keys for {table.name}") + logger.debug(f"Deleted {deleted_count} rows for entity {', '.join(join_keys)}") @log_exceptions_and_usage(online_store="redis") def update( @@ -98,10 +98,16 @@ def update( partial: bool, ): """ - We delete the keys in redis for tables/views being removed. + Look for join_keys (list of entities) that are not in use anymore + (usually this happens when the last feature view that was using specific compound key is deleted) + and remove all features attached to this "join_keys". """ - for table in tables_to_delete: - self.delete_table_values(config, table) + join_keys_to_keep = set(tuple(table.entities) for table in tables_to_keep) + + join_keys_to_delete = set(tuple(table.entities) for table in tables_to_delete) + + for join_keys in join_keys_to_delete - join_keys_to_keep: + self.delete_entity_values(config, list(join_keys)) def teardown( self, @@ -112,8 +118,10 @@ def teardown( """ We delete the keys in redis for tables/views being removed. """ - for table in tables: - self.delete_table_values(config, table) + join_keys_to_delete = set(tuple(table.entities) for table in tables) + + for join_keys in join_keys_to_delete: + self.delete_entity_values(config, list(join_keys)) @staticmethod def _parse_connection_string(connection_string: str): @@ -152,7 +160,9 @@ def _get_client(self, online_store_config: RedisOnlineStoreConfig): online_store_config.connection_string ) if online_store_config.redis_type == RedisType.redis_cluster: - kwargs["startup_nodes"] = startup_nodes + kwargs["startup_nodes"] = [ + ClusterNode(**node) for node in startup_nodes + ] self._client = RedisCluster(**kwargs) else: kwargs["host"] = startup_nodes[0]["host"] @@ -180,7 +190,7 @@ def online_write_batch( ts_key = f"_ts:{feature_view}" keys = [] # redis pipelining optimization: send multiple commands to redis server without waiting for every reply - with client.pipeline() as pipe: + with client.pipeline(transaction=False) as pipe: # check if a previous record under the key bin exists # TODO: investigate if check and set is a better approach rather than pulling all entity ts and then setting # it may be significantly slower but avoids potential (rare) race conditions @@ -254,7 +264,7 @@ def online_read( for entity_key in entity_keys: redis_key_bin = _redis_key(project, entity_key) keys.append(redis_key_bin) - with client.pipeline() as pipe: + with client.pipeline(transaction=False) as pipe: for redis_key_bin in keys: pipe.hmget(redis_key_bin, hset_keys) with tracing_span(name="remote_call"): @@ -277,13 +287,13 @@ def _get_features_for_entity( res_ts = Timestamp() ts_val = res_val.pop(f"_ts:{feature_view}") if ts_val: - res_ts.ParseFromString(ts_val) + res_ts.ParseFromString(bytes(ts_val)) res = {} for feature_name, val_bin in res_val.items(): val = ValueProto() if val_bin: - val.ParseFromString(val_bin) + val.ParseFromString(bytes(val_bin)) res[feature_name] = val if not res: diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 206e2eb0d5..e65aab4e7b 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -23,10 +23,11 @@ from feast import Entity from feast.feature_view import FeatureView -from feast.infra.infra_object import InfraObject +from feast.infra.infra_object import SQLITE_INFRA_OBJECT_CLASS_TYPE, InfraObject from feast.infra.key_encoding_utils import serialize_entity_key from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.core.InfraObject_pb2 import InfraObject as InfraObjectProto +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto @@ -199,6 +200,21 @@ def update( for table in tables_to_delete: conn.execute(f"DROP TABLE IF EXISTS {_table_id(project, table)}") + @log_exceptions_and_usage(online_store="sqlite") + def plan( + self, config: RepoConfig, desired_registry_proto: RegistryProto + ) -> List[InfraObject]: + project = config.project + + infra_objects: List[InfraObject] = [ + SqliteTable( + path=self._get_db_path(config), + name=_table_id(project, FeatureView.from_proto(view)), + ) + for view in desired_registry_proto.feature_views + ] + return infra_objects + def teardown( self, config: RepoConfig, @@ -233,31 +249,37 @@ class SqliteTable(InfraObject): """ path: str - name: str conn: sqlite3.Connection def __init__(self, path: str, name: str): + super().__init__(name) self.path = path - self.name = name self.conn = _initialize_conn(path) - def to_proto(self) -> InfraObjectProto: - sqlite_table_proto = SqliteTableProto() - sqlite_table_proto.path = self.path - sqlite_table_proto.name = self.name - + def to_infra_object_proto(self) -> InfraObjectProto: + sqlite_table_proto = self.to_proto() return InfraObjectProto( - infra_object_class_type="feast.infra.online_store.sqlite.SqliteTable", + infra_object_class_type=SQLITE_INFRA_OBJECT_CLASS_TYPE, sqlite_table=sqlite_table_proto, ) + def to_proto(self) -> Any: + sqlite_table_proto = SqliteTableProto() + sqlite_table_proto.path = self.path + sqlite_table_proto.name = self.name + return sqlite_table_proto + @staticmethod - def from_proto(infra_object_proto: InfraObjectProto) -> Any: + def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: return SqliteTable( path=infra_object_proto.sqlite_table.path, name=infra_object_proto.sqlite_table.name, ) + @staticmethod + def from_proto(sqlite_table_proto: SqliteTableProto) -> Any: + return SqliteTable(path=sqlite_table_proto.path, name=sqlite_table_proto.name,) + def update(self): self.conn.execute( f"CREATE TABLE IF NOT EXISTS {self.name} (entity_key BLOB, feature_name TEXT, value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index 98937ce1fa..3468b9dc92 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timedelta from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union import pandas @@ -20,7 +20,9 @@ from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.registry import Registry from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDataset from feast.usage import RatioSampler, log_exceptions_and_usage, set_usage_attribute +from feast.utils import make_tzaware DEFAULT_BATCH_SIZE = 10_000 @@ -35,7 +37,11 @@ def __init__(self, config: RepoConfig): self.repo_config = config self.offline_store = get_offline_store_from_config(config.offline_store) - self.online_store = get_online_store_from_config(config.online_store) + self.online_store = ( + get_online_store_from_config(config.online_store) + if config.online_store + else None + ) def update_infra( self, @@ -47,20 +53,24 @@ def update_infra( partial: bool, ): set_usage_attribute("provider", self.__class__.__name__) - self.online_store.update( - config=self.repo_config, - tables_to_delete=tables_to_delete, - tables_to_keep=tables_to_keep, - entities_to_keep=entities_to_keep, - entities_to_delete=entities_to_delete, - partial=partial, - ) + + # Call update only if there is an online store + if self.online_store: + self.online_store.update( + config=self.repo_config, + tables_to_delete=tables_to_delete, + tables_to_keep=tables_to_keep, + entities_to_keep=entities_to_keep, + entities_to_delete=entities_to_delete, + partial=partial, + ) def teardown_infra( self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], ) -> None: set_usage_attribute("provider", self.__class__.__name__) - self.online_store.teardown(self.repo_config, tables, entities) + if self.online_store: + self.online_store.teardown(self.repo_config, tables, entities) def online_write_batch( self, @@ -72,7 +82,8 @@ def online_write_batch( progress: Optional[Callable[[int], Any]], ) -> None: set_usage_attribute("provider", self.__class__.__name__) - self.online_store.online_write_batch(config, table, data, progress) + if self.online_store: + self.online_store.online_write_batch(config, table, data, progress) @log_exceptions_and_usage(sampler=RatioSampler(ratio=0.001)) def online_read( @@ -81,12 +92,13 @@ def online_read( table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: List[str] = None, - ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + ) -> List: set_usage_attribute("provider", self.__class__.__name__) - result = self.online_store.online_read( - config, table, entity_keys, requested_features - ) - + result = [] + if self.online_store: + result = self.online_store.online_read( + config, table, entity_keys, requested_features + ) return result def ingest_df( @@ -177,4 +189,28 @@ def get_historical_features( project=project, full_feature_names=full_feature_names, ) + return job + + def retrieve_saved_dataset( + self, config: RepoConfig, dataset: SavedDataset + ) -> RetrievalJob: + set_usage_attribute("provider", self.__class__.__name__) + + feature_name_columns = [ + ref.replace(":", "__") if dataset.full_feature_names else ref.split(":")[1] + for ref in dataset.features + ] + + # ToDo: replace hardcoded value + event_ts_column = "event_timestamp" + + return self.offline_store.pull_all_from_table_or_query( + config=config, + data_source=dataset.storage.to_data_source(), + join_key_columns=dataset.join_keys, + feature_name_columns=feature_name_columns, + event_timestamp_column=event_ts_column, + start_date=make_tzaware(dataset.min_event_timestamp), # type: ignore + end_date=make_tzaware(dataset.max_event_timestamp + timedelta(seconds=1)), # type: ignore + ) diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 3c761f1195..b3f1029242 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union +import dask.dataframe as dd import pandas import pyarrow from tqdm import tqdm @@ -12,12 +13,15 @@ from feast.entity import Entity from feast.feature_view import DUMMY_ENTITY_ID, FeatureView from feast.importer import import_class +from feast.infra.infra_object import Infra from feast.infra.offline_stores.offline_store import RetrievalJob from feast.on_demand_feature_view import OnDemandFeatureView +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.registry import Registry from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDataset from feast.type_map import python_values_to_proto_values from feast.value_type import ValueType @@ -61,6 +65,18 @@ def update_infra( """ ... + def plan_infra( + self, config: RepoConfig, desired_registry_proto: RegistryProto + ) -> Infra: + """ + Returns the Infra required to support the desired registry. + + Args: + config: The RepoConfig for the current FeatureStore. + desired_registry_proto: The desired registry, in proto form. + """ + return Infra() + @abc.abstractmethod def teardown_infra( self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], @@ -155,6 +171,21 @@ def online_read( """ ... + @abc.abstractmethod + def retrieve_saved_dataset( + self, config: RepoConfig, dataset: SavedDataset + ) -> RetrievalJob: + """ + Read saved dataset from offline store. + All parameters for retrieval (like path, datetime boundaries, column names for both keys and features, etc) + are determined from SavedDataset object. + + Returns: + RetrievalJob object, which is lazy wrapper for actual query performed under the hood. + + """ + ... + def get_feature_server_endpoint(self) -> Optional[str]: """Returns endpoint for the feature server, if it exists.""" return None @@ -282,6 +313,17 @@ def _run_field_mapping( return table +def _run_dask_field_mapping( + table: dd.DataFrame, field_mapping: Dict[str, str], +): + if field_mapping: + # run field mapping in the forward direction + table = table.rename(columns=field_mapping) + table = table.persist() + + return table + + def _coerce_datetime(ts): """ Depending on underlying time resolution, arrow to_pydict() sometimes returns pandas @@ -291,7 +333,6 @@ def _coerce_datetime(ts): same way. We convert it to normal datetime so that consumers downstream don't have to deal with these quirks. """ - if isinstance(ts, pandas.Timestamp): return ts.to_pydatetime() else: diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 6211c75e37..b25454ca6a 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -15,7 +15,11 @@ wait_exponential, ) -from feast.errors import RedshiftCredentialsError, RedshiftQueryError +from feast.errors import ( + RedshiftCredentialsError, + RedshiftQueryError, + RedshiftTableNameTooLong, +) from feast.type_map import pa_to_redshift_value_type try: @@ -28,6 +32,9 @@ raise FeastExtrasDependencyImportError("aws", str(e)) +REDSHIFT_TABLE_NAME_MAX_LENGTH = 127 + + def get_redshift_data_client(aws_region: str): """ Get the Redshift Data API Service client for the given AWS region. @@ -184,7 +191,7 @@ def upload_df_to_redshift( iam_role: str, table_name: str, df: pd.DataFrame, -) -> None: +): """Uploads a Pandas DataFrame to Redshift as a new table. The caller is responsible for deleting the table when no longer necessary. @@ -208,9 +215,12 @@ def upload_df_to_redshift( table_name: The name of the new Redshift table where we copy the dataframe df: The Pandas DataFrame to upload - Returns: None - + Raises: + RedshiftTableNameTooLong: The specified table name is too long. """ + if len(table_name) > REDSHIFT_TABLE_NAME_MAX_LENGTH: + raise RedshiftTableNameTooLong(table_name) + bucket, key = get_bucket_and_key(s3_path) # Drop the index so that we dont have unnecessary columns diff --git a/sdk/python/feast/infra/utils/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake_utils.py new file mode 100644 index 0000000000..0e704a385c --- /dev/null +++ b/sdk/python/feast/infra/utils/snowflake_utils.py @@ -0,0 +1,286 @@ +import configparser +import os +import random +import string +from logging import getLogger +from tempfile import TemporaryDirectory +from typing import Dict, Iterator, List, Optional, Tuple, cast + +import pandas as pd +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from feast.errors import SnowflakeIncompleteConfig, SnowflakeQueryUnknownError + +try: + import snowflake.connector + from snowflake.connector import ProgrammingError, SnowflakeConnection + from snowflake.connector.cursor import SnowflakeCursor +except ImportError as e: + from feast.errors import FeastExtrasDependencyImportError + + raise FeastExtrasDependencyImportError("snowflake", str(e)) + + +getLogger("snowflake.connector.cursor").disabled = True +getLogger("snowflake.connector.connection").disabled = True +getLogger("snowflake.connector.network").disabled = True +logger = getLogger(__name__) + + +def execute_snowflake_statement(conn: SnowflakeConnection, query) -> SnowflakeCursor: + cursor = conn.cursor().execute(query) + if cursor is None: + raise SnowflakeQueryUnknownError(query) + return cursor + + +def get_snowflake_conn(config, autocommit=True) -> SnowflakeConnection: + if config.type == "snowflake.offline": + config_header = "connections.feast_offline_store" + + config = dict(config) + + # read config file + config_reader = configparser.ConfigParser() + config_reader.read([config["config_path"]]) + if config_reader.has_section(config_header): + kwargs = dict(config_reader[config_header]) + else: + kwargs = {} + + kwargs.update((k, v) for k, v in config.items() if v is not None) + + try: + conn = snowflake.connector.connect( + account=kwargs["account"], + user=kwargs["user"], + password=kwargs["password"], + role=f'''"{kwargs['role']}"''', + warehouse=f'''"{kwargs['warehouse']}"''', + database=f'''"{kwargs['database']}"''', + schema=f'''"{kwargs['schema_']}"''', + application="feast", + autocommit=autocommit, + ) + + return conn + except KeyError as e: + raise SnowflakeIncompleteConfig(e) + + +# TO DO -- sfc-gh-madkins +# Remove dependency on write_pandas function by falling back to native snowflake python connector +# Current issue is datetime[ns] types are read incorrectly in Snowflake, need to coerce to datetime[ns, UTC] +def write_pandas( + conn: SnowflakeConnection, + df: pd.DataFrame, + table_name: str, + database: Optional[str] = None, + schema: Optional[str] = None, + chunk_size: Optional[int] = None, + compression: str = "gzip", + on_error: str = "abort_statement", + parallel: int = 4, + quote_identifiers: bool = True, + auto_create_table: bool = False, + create_temp_table: bool = False, +): + """Allows users to most efficiently write back a pandas DataFrame to Snowflake. + + It works by dumping the DataFrame into Parquet files, uploading them and finally copying their data into the table. + + Returns whether all files were ingested correctly, number of chunks uploaded, and number of rows ingested + with all of the COPY INTO command's output for debugging purposes. + + Example usage: + import pandas + from snowflake.connector.pandas_tools import write_pandas + + df = pandas.DataFrame([('Mark', 10), ('Luke', 20)], columns=['name', 'balance']) + success, nchunks, nrows, _ = write_pandas(cnx, df, 'customers') + + Args: + conn: Connection to be used to communicate with Snowflake. + df: Dataframe we'd like to write back. + table_name: Table name where we want to insert into. + database: Database schema and table is in, if not provided the default one will be used (Default value = None). + schema: Schema table is in, if not provided the default one will be used (Default value = None). + chunk_size: Number of elements to be inserted once, if not provided all elements will be dumped once + (Default value = None). + compression: The compression used on the Parquet files, can only be gzip, or snappy. Gzip gives supposedly a + better compression, while snappy is faster. Use whichever is more appropriate (Default value = 'gzip'). + on_error: Action to take when COPY INTO statements fail, default follows documentation at: + https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions + (Default value = 'abort_statement'). + parallel: Number of threads to be used when uploading chunks, default follows documentation at: + https://docs.snowflake.com/en/sql-reference/sql/put.html#optional-parameters (Default value = 4). + quote_identifiers: By default, identifiers, specifically database, schema, table and column names + (from df.columns) will be quoted. If set to False, identifiers are passed on to Snowflake without quoting. + I.e. identifiers will be coerced to uppercase by Snowflake. (Default value = True) + auto_create_table: When true, will automatically create a table with corresponding columns for each column in + the passed in DataFrame. The table will not be created if it already exists + create_temp_table: Will make the auto-created table as a temporary table + """ + if database is not None and schema is None: + raise ProgrammingError( + "Schema has to be provided to write_pandas when a database is provided" + ) + # This dictionary maps the compression algorithm to Snowflake put copy into command type + # https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#type-parquet + compression_map = {"gzip": "auto", "snappy": "snappy"} + if compression not in compression_map.keys(): + raise ProgrammingError( + "Invalid compression '{}', only acceptable values are: {}".format( + compression, compression_map.keys() + ) + ) + if quote_identifiers: + location = ( + (('"' + database + '".') if database else "") + + (('"' + schema + '".') if schema else "") + + ('"' + table_name + '"') + ) + else: + location = ( + (database + "." if database else "") + + (schema + "." if schema else "") + + (table_name) + ) + if chunk_size is None: + chunk_size = len(df) + cursor: SnowflakeCursor = conn.cursor() + stage_name = create_temporary_sfc_stage(cursor) + + with TemporaryDirectory() as tmp_folder: + for i, chunk in chunk_helper(df, chunk_size): + chunk_path = os.path.join(tmp_folder, "file{}.txt".format(i)) + # Dump chunk into parquet file + chunk.to_parquet( + chunk_path, + compression=compression, + use_deprecated_int96_timestamps=True, + ) + # Upload parquet file + upload_sql = ( + "PUT /* Python:snowflake.connector.pandas_tools.write_pandas() */ " + "'file://{path}' @\"{stage_name}\" PARALLEL={parallel}" + ).format( + path=chunk_path.replace("\\", "\\\\").replace("'", "\\'"), + stage_name=stage_name, + parallel=parallel, + ) + logger.debug(f"uploading files with '{upload_sql}'") + cursor.execute(upload_sql, _is_internal=True) + # Remove chunk file + os.remove(chunk_path) + if quote_identifiers: + columns = '"' + '","'.join(list(df.columns)) + '"' + else: + columns = ",".join(list(df.columns)) + + if auto_create_table: + file_format_name = create_file_format(compression, compression_map, cursor) + infer_schema_sql = f"SELECT COLUMN_NAME, TYPE FROM table(infer_schema(location=>'@\"{stage_name}\"', file_format=>'{file_format_name}'))" + logger.debug(f"inferring schema with '{infer_schema_sql}'") + result_cursor = cursor.execute(infer_schema_sql, _is_internal=True) + if result_cursor is None: + raise SnowflakeQueryUnknownError(infer_schema_sql) + result = cast(List[Tuple[str, str]], result_cursor.fetchall()) + column_type_mapping: Dict[str, str] = dict(result) + # Infer schema can return the columns out of order depending on the chunking we do when uploading + # so we have to iterate through the dataframe columns to make sure we create the table with its + # columns in order + quote = '"' if quote_identifiers else "" + create_table_columns = ", ".join( + [f"{quote}{c}{quote} {column_type_mapping[c]}" for c in df.columns] + ) + create_table_sql = ( + f"CREATE {'TEMP ' if create_temp_table else ''}TABLE IF NOT EXISTS {location} " + f"({create_table_columns})" + f" /* Python:snowflake.connector.pandas_tools.write_pandas() */ " + ) + logger.debug(f"auto creating table with '{create_table_sql}'") + cursor.execute(create_table_sql, _is_internal=True) + drop_file_format_sql = f"DROP FILE FORMAT IF EXISTS {file_format_name}" + logger.debug(f"dropping file format with '{drop_file_format_sql}'") + cursor.execute(drop_file_format_sql, _is_internal=True) + + # in Snowflake, all parquet data is stored in a single column, $1, so we must select columns explicitly + # see (https://docs.snowflake.com/en/user-guide/script-data-load-transform-parquet.html) + if quote_identifiers: + parquet_columns = "$1:" + ",$1:".join(f'"{c}"' for c in df.columns) + else: + parquet_columns = "$1:" + ",$1:".join(df.columns) + copy_into_sql = ( + "COPY INTO {location} /* Python:snowflake.connector.pandas_tools.write_pandas() */ " + "({columns}) " + 'FROM (SELECT {parquet_columns} FROM @"{stage_name}") ' + "FILE_FORMAT=(TYPE=PARQUET COMPRESSION={compression}) " + "PURGE=TRUE ON_ERROR={on_error}" + ).format( + location=location, + columns=columns, + parquet_columns=parquet_columns, + stage_name=stage_name, + compression=compression_map[compression], + on_error=on_error, + ) + logger.debug("copying into with '{}'".format(copy_into_sql)) + # Snowflake returns the original cursor if the query execution succeeded. + result_cursor = cursor.execute(copy_into_sql, _is_internal=True) + if result_cursor is None: + raise SnowflakeQueryUnknownError(copy_into_sql) + result_cursor.close() + + +@retry( + wait=wait_exponential(multiplier=1, max=4), + retry=retry_if_exception_type(ProgrammingError), + stop=stop_after_attempt(5), + reraise=True, +) +def create_file_format( + compression: str, compression_map: Dict[str, str], cursor: SnowflakeCursor +) -> str: + file_format_name = ( + '"' + "".join(random.choice(string.ascii_lowercase) for _ in range(5)) + '"' + ) + file_format_sql = ( + f"CREATE FILE FORMAT {file_format_name} " + f"/* Python:snowflake.connector.pandas_tools.write_pandas() */ " + f"TYPE=PARQUET COMPRESSION={compression_map[compression]}" + ) + logger.debug(f"creating file format with '{file_format_sql}'") + cursor.execute(file_format_sql, _is_internal=True) + return file_format_name + + +@retry( + wait=wait_exponential(multiplier=1, max=4), + retry=retry_if_exception_type(ProgrammingError), + stop=stop_after_attempt(5), + reraise=True, +) +def create_temporary_sfc_stage(cursor: SnowflakeCursor) -> str: + stage_name = "".join(random.choice(string.ascii_lowercase) for _ in range(5)) + create_stage_sql = ( + "create temporary stage /* Python:snowflake.connector.pandas_tools.write_pandas() */ " + '"{stage_name}"' + ).format(stage_name=stage_name) + logger.debug(f"creating stage with '{create_stage_sql}'") + result_cursor = cursor.execute(create_stage_sql, _is_internal=True) + if result_cursor is None: + raise SnowflakeQueryUnknownError(create_stage_sql) + result_cursor.fetchall() + return stage_name + + +def chunk_helper(lst: pd.DataFrame, n: int) -> Iterator[Tuple[int, pd.DataFrame]]: + """Helper generator to chunk a sequence efficiently with current index like if enumerate was called on sequence.""" + for i in range(0, len(lst), n): + yield int(i / n), lst[i : i + n] diff --git a/sdk/python/feast/on_demand_feature_view.py b/sdk/python/feast/on_demand_feature_view.py index 789422add4..04b7f33cc6 100644 --- a/sdk/python/feast/on_demand_feature_view.py +++ b/sdk/python/feast/on_demand_feature_view.py @@ -119,6 +119,8 @@ def to_proto(self) -> OnDemandFeatureViewProto: meta = OnDemandFeatureViewMeta() if self.created_timestamp: meta.created_timestamp.FromDatetime(self.created_timestamp) + if self.last_updated_timestamp: + meta.last_updated_timestamp.FromDatetime(self.last_updated_timestamp) inputs = {} for input_ref, fv_projection in self.input_feature_view_projections.items(): inputs[input_ref] = OnDemandInput( @@ -194,6 +196,10 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): on_demand_feature_view_obj.created_timestamp = ( on_demand_feature_view_proto.meta.created_timestamp.ToDatetime() ) + if on_demand_feature_view_proto.meta.HasField("last_updated_timestamp"): + on_demand_feature_view_obj.last_updated_timestamp = ( + on_demand_feature_view_proto.meta.last_updated_timestamp.ToDatetime() + ) return on_demand_feature_view_obj diff --git a/sdk/python/feast/online_response.py b/sdk/python/feast/online_response.py index e6bf6be42c..bb69c6b9d9 100644 --- a/sdk/python/feast/online_response.py +++ b/sdk/python/feast/online_response.py @@ -18,6 +18,7 @@ from feast.feature_view import DUMMY_ENTITY_ID from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesResponse +from feast.type_map import feast_value_type_to_python_type class OnlineResponse: @@ -34,53 +35,30 @@ def __init__(self, online_response_proto: GetOnlineFeaturesResponse): """ self.proto = online_response_proto # Delete DUMMY_ENTITY_ID from proto if it exists - for item in self.proto.field_values: - if DUMMY_ENTITY_ID in item.statuses: - del item.statuses[DUMMY_ENTITY_ID] - if DUMMY_ENTITY_ID in item.fields: - del item.fields[DUMMY_ENTITY_ID] - - @property - def field_values(self): - """ - Getter for GetOnlineResponse's field_values. - """ - return self.proto.field_values + for idx, val in enumerate(self.proto.metadata.feature_names.val): + if val == DUMMY_ENTITY_ID: + del self.proto.metadata.feature_names.val[idx] + for result in self.proto.results: + del result.values[idx] + del result.statuses[idx] + del result.event_timestamps[idx] + break def to_dict(self) -> Dict[str, Any]: """ Converts GetOnlineFeaturesResponse features into a dictionary form. """ - # Status for every Feature should be present in every record. - features_dict: Dict[str, List[Any]] = { - k: list() for k in self.field_values[0].statuses.keys() - } - rows = [record.fields for record in self.field_values] - - # Find the first non-null instance of each Feature to determine - # which ValueType. - val_types = {k: None for k in features_dict.keys()} - for feature in features_dict.keys(): - for row in rows: - try: - val_types[feature] = row[feature].WhichOneof("val") - except KeyError: - continue - if val_types[feature] is not None: - break + response: Dict[str, List[Any]] = {} - # Now we know what attribute to fetch. - for feature, val_type in val_types.items(): - if val_type is None: - features_dict[feature] = [None] * len(rows) - else: - for row in rows: - val = getattr(row[feature], val_type) - if "_list_" in val_type: - val = list(val.val) - features_dict[feature].append(val) + for result in self.proto.results: + for idx, feature_ref in enumerate(self.proto.metadata.feature_names.val): + native_type_value = feast_value_type_to_python_type(result.values[idx]) + if feature_ref not in response: + response[feature_ref] = [native_type_value] + else: + response[feature_ref].append(native_type_value) - return features_dict + return response def to_df(self) -> pd.DataFrame: """ diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index 0c058a0d46..4273493255 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -11,26 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import json import logging from collections import defaultdict from datetime import datetime, timedelta +from enum import Enum from pathlib import Path from threading import Lock -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Set from urllib.parse import urlparse from google.protobuf.internal.containers import RepeatedCompositeFieldContainer -from google.protobuf.json_format import MessageToDict +from google.protobuf.json_format import MessageToJson from proto import Message from feast.base_feature_view import BaseFeatureView -from feast.diff.FcoDiff import ( - FcoDiff, - RegistryDiff, - TransitionType, - diff_between, - tag_proto_objects_for_keep_delete_add, -) from feast.entity import Entity from feast.errors import ( ConflictingFeatureViewNames, @@ -38,6 +33,7 @@ FeatureServiceNotFoundException, FeatureViewNotFoundException, OnDemandFeatureViewNotFoundException, + SavedDatasetNotFound, ) from feast.feature_service import FeatureService from feast.feature_view import FeatureView @@ -47,7 +43,9 @@ from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.registry_store import NoopRegistryStore from feast.repo_config import RegistryConfig +from feast.repo_contents import RepoContents from feast.request_feature_view import RequestFeatureView +from feast.saved_dataset import SavedDataset REGISTRY_SCHEMA_VERSION = "1" @@ -65,6 +63,48 @@ "": "LocalRegistryStore", } + +class FeastObjectType(Enum): + ENTITY = "entity" + FEATURE_VIEW = "feature view" + ON_DEMAND_FEATURE_VIEW = "on demand feature view" + REQUEST_FEATURE_VIEW = "request feature view" + FEATURE_SERVICE = "feature service" + + @staticmethod + def get_objects_from_registry( + registry: "Registry", project: str + ) -> Dict["FeastObjectType", List[Any]]: + return { + FeastObjectType.ENTITY: registry.list_entities(project=project), + FeastObjectType.FEATURE_VIEW: registry.list_feature_views(project=project), + FeastObjectType.ON_DEMAND_FEATURE_VIEW: registry.list_on_demand_feature_views( + project=project + ), + FeastObjectType.REQUEST_FEATURE_VIEW: registry.list_request_feature_views( + project=project + ), + FeastObjectType.FEATURE_SERVICE: registry.list_feature_services( + project=project + ), + } + + @staticmethod + def get_objects_from_repo_contents( + repo_contents: RepoContents, + ) -> Dict["FeastObjectType", Set[Any]]: + return { + FeastObjectType.ENTITY: repo_contents.entities, + FeastObjectType.FEATURE_VIEW: repo_contents.feature_views, + FeastObjectType.ON_DEMAND_FEATURE_VIEW: repo_contents.on_demand_feature_views, + FeastObjectType.REQUEST_FEATURE_VIEW: repo_contents.request_feature_views, + FeastObjectType.FEATURE_SERVICE: repo_contents.feature_services, + } + + +FEAST_OBJECT_TYPES = [feast_object_type for feast_object_type in FeastObjectType] + + logger = logging.getLogger(__name__) @@ -143,75 +183,6 @@ def clone(self) -> "Registry": new_registry._registry_store = NoopRegistryStore() return new_registry - # TODO(achals): This method needs to be filled out and used in the feast plan/apply methods. - @staticmethod - def diff_between( - current_registry: RegistryProto, new_registry: RegistryProto - ) -> RegistryDiff: - diff = RegistryDiff() - - attribute_to_object_type_str = { - "entities": "entity", - "feature_views": "feature view", - "feature_tables": "feature table", - "on_demand_feature_views": "on demand feature view", - "request_feature_views": "request feature view", - "feature_services": "feature service", - } - - for object_type in [ - "entities", - "feature_views", - "feature_tables", - "on_demand_feature_views", - "request_feature_views", - "feature_services", - ]: - ( - objects_to_keep, - objects_to_delete, - objects_to_add, - ) = tag_proto_objects_for_keep_delete_add( - getattr(current_registry, object_type), - getattr(new_registry, object_type), - ) - - for e in objects_to_add: - diff.add_fco_diff( - FcoDiff( - e.spec.name, - attribute_to_object_type_str[object_type], - None, - e, - [], - TransitionType.CREATE, - ) - ) - for e in objects_to_delete: - diff.add_fco_diff( - FcoDiff( - e.spec.name, - attribute_to_object_type_str[object_type], - e, - None, - [], - TransitionType.DELETE, - ) - ) - for e in objects_to_keep: - current_obj_proto = [ - _e - for _e in getattr(current_registry, object_type) - if _e.spec.name == e.spec.name - ][0] - diff.add_fco_diff( - diff_between( - current_obj_proto, e, attribute_to_object_type_str[object_type] - ) - ) - - return diff - def _initialize_registry(self): """Explicitly initializes the registry with an empty proto if it doesn't exist.""" try: @@ -261,6 +232,12 @@ def apply_entity(self, entity: Entity, project: str, commit: bool = True): commit: Whether the change should be persisted immediately """ entity.is_valid() + + now = datetime.utcnow() + if not entity.created_timestamp: + entity._created_timestamp = now + entity._last_updated_timestamp = now + entity_proto = entity.to_proto() entity_proto.spec.project = project self._prepare_registry_for_changes() @@ -308,6 +285,11 @@ def apply_feature_service( feature_service: A feature service that will be registered project: Feast project that this entity belongs to """ + now = datetime.utcnow() + if not feature_service.created_timestamp: + feature_service.created_timestamp = now + feature_service.last_updated_timestamp = now + feature_service_proto = feature_service.to_proto() feature_service_proto.spec.project = project @@ -403,8 +385,12 @@ def apply_feature_view( commit: Whether the change should be persisted immediately """ feature_view.ensure_valid() + + now = datetime.utcnow() if not feature_view.created_timestamp: - feature_view.created_timestamp = datetime.now() + feature_view.created_timestamp = now + feature_view.last_updated_timestamp = now + feature_view_proto = feature_view.to_proto() feature_view_proto.spec.project = project self._prepare_registry_for_changes() @@ -528,6 +514,7 @@ def apply_materialization( existing_feature_view.materialization_intervals.append( (start_date, end_date) ) + existing_feature_view.last_updated_timestamp = datetime.utcnow() feature_view_proto = existing_feature_view.to_proto() feature_view_proto.spec.project = project del self.cached_registry_proto.feature_views[idx] @@ -705,6 +692,85 @@ def delete_entity(self, name: str, project: str, commit: bool = True): raise EntityNotFoundException(name, project) + def apply_saved_dataset( + self, saved_dataset: SavedDataset, project: str, commit: bool = True + ): + """ + Registers a single entity with Feast + + Args: + saved_dataset: SavedDataset that will be added / updated to registry + project: Feast project that this dataset belongs to + commit: Whether the change should be persisted immediately + """ + now = datetime.utcnow() + if not saved_dataset.created_timestamp: + saved_dataset.created_timestamp = now + saved_dataset.last_updated_timestamp = now + + saved_dataset_proto = saved_dataset.to_proto() + saved_dataset_proto.spec.project = project + self._prepare_registry_for_changes() + assert self.cached_registry_proto + + for idx, existing_saved_dataset_proto in enumerate( + self.cached_registry_proto.saved_datasets + ): + if ( + existing_saved_dataset_proto.spec.name == saved_dataset_proto.spec.name + and existing_saved_dataset_proto.spec.project == project + ): + del self.cached_registry_proto.saved_datasets[idx] + break + + self.cached_registry_proto.saved_datasets.append(saved_dataset_proto) + if commit: + self.commit() + + def get_saved_dataset( + self, name: str, project: str, allow_cache: bool = False + ) -> SavedDataset: + """ + Retrieves a saved dataset. + + Args: + name: Name of dataset + project: Feast project that this dataset belongs to + allow_cache: Whether to allow returning this dataset from a cached registry + + Returns: + Returns either the specified SavedDataset, or raises an exception if + none is found + """ + registry_proto = self._get_registry_proto(allow_cache=allow_cache) + for saved_dataset in registry_proto.saved_datasets: + if ( + saved_dataset.spec.name == name + and saved_dataset.spec.project == project + ): + return SavedDataset.from_proto(saved_dataset) + raise SavedDatasetNotFound(name, project=project) + + def list_saved_datasets( + self, project: str, allow_cache: bool = False + ) -> List[SavedDataset]: + """ + Retrieves a list of all saved datasets in specified project + + Args: + project: Feast project + allow_cache: Whether to allow returning this dataset from a cached registry + + Returns: + Returns the list of SavedDatasets + """ + registry_proto = self._get_registry_proto(allow_cache=allow_cache) + return [ + SavedDataset.from_proto(saved_dataset) + for saved_dataset in registry_proto.saved_datasets + if saved_dataset.spec.project == project + ] + def commit(self): """Commits the state of the registry cache to the remote registry store.""" if self.cached_registry_proto: @@ -732,35 +798,53 @@ def to_dict(self, project: str) -> Dict[str, List[Any]]: for entity in sorted( self.list_entities(project=project), key=lambda entity: entity.name ): - registry_dict["entities"].append(MessageToDict(entity.to_proto())) + registry_dict["entities"].append( + self._message_to_sorted_dict(entity.to_proto()) + ) for feature_view in sorted( self.list_feature_views(project=project), key=lambda feature_view: feature_view.name, ): - registry_dict["featureViews"].append(MessageToDict(feature_view.to_proto())) + registry_dict["featureViews"].append( + self._message_to_sorted_dict(feature_view.to_proto()) + ) for feature_service in sorted( self.list_feature_services(project=project), key=lambda feature_service: feature_service.name, ): registry_dict["featureServices"].append( - MessageToDict(feature_service.to_proto()) + self._message_to_sorted_dict(feature_service.to_proto()) ) for on_demand_feature_view in sorted( self.list_on_demand_feature_views(project=project), key=lambda on_demand_feature_view: on_demand_feature_view.name, ): registry_dict["onDemandFeatureViews"].append( - MessageToDict(on_demand_feature_view.to_proto()) + self._message_to_sorted_dict(on_demand_feature_view.to_proto()) ) for request_feature_view in sorted( self.list_request_feature_views(project=project), key=lambda request_feature_view: request_feature_view.name, ): registry_dict["requestFeatureViews"].append( - MessageToDict(request_feature_view.to_proto()) + self._message_to_sorted_dict(request_feature_view.to_proto()) + ) + for saved_dataset in sorted( + self.list_saved_datasets(project=project), key=lambda item: item.name + ): + registry_dict["savedDatasets"].append( + self._message_to_sorted_dict(saved_dataset.to_proto()) + ) + for infra_object in sorted(self.get_infra(project=project).infra_objects): + registry_dict["infra"].append( + self._message_to_sorted_dict(infra_object.to_proto()) ) return registry_dict + @staticmethod + def _message_to_sorted_dict(message: Message) -> Dict[str, Any]: + return json.loads(MessageToJson(message, sort_keys=True)) + def _prepare_registry_for_changes(self): """Prepares the Registry for changes by refreshing the cache if necessary.""" try: @@ -769,7 +853,7 @@ def _prepare_registry_for_changes(self): registry_proto = RegistryProto() registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION self.cached_registry_proto = registry_proto - self.cached_registry_proto_created = datetime.now() + self.cached_registry_proto_created = datetime.utcnow() return self.cached_registry_proto def _get_registry_proto(self, allow_cache: bool = False) -> RegistryProto: @@ -788,7 +872,7 @@ def _get_registry_proto(self, allow_cache: bool = False) -> RegistryProto: self.cached_registry_proto_ttl.total_seconds() > 0 # 0 ttl means infinity and ( - datetime.now() + datetime.utcnow() > ( self.cached_registry_proto_created + self.cached_registry_proto_ttl @@ -802,7 +886,7 @@ def _get_registry_proto(self, allow_cache: bool = False) -> RegistryProto: registry_proto = self._registry_store.get_registry_proto() self.cached_registry_proto = registry_proto - self.cached_registry_proto_created = datetime.now() + self.cached_registry_proto_created = datetime.utcnow() return registry_proto diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 26309fe9d7..3f32d18b80 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -31,12 +31,14 @@ "datastore": "feast.infra.online_stores.datastore.DatastoreOnlineStore", "redis": "feast.infra.online_stores.redis.RedisOnlineStore", "dynamodb": "feast.infra.online_stores.dynamodb.DynamoDBOnlineStore", + "snowflake.online": "feast.infra.online_stores.snowflake.SnowflakeOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { "file": "feast.infra.offline_stores.file.FileOfflineStore", "bigquery": "feast.infra.offline_stores.bigquery.BigQueryOfflineStore", "redshift": "feast.infra.offline_stores.redshift.RedshiftOfflineStore", + "snowflake.offline": "feast.infra.offline_stores.snowflake.SnowflakeOfflineStore", } FEATURE_SERVER_CONFIG_CLASS_FOR_TYPE = { @@ -152,8 +154,12 @@ def _validate_online_store_config(cls, values): if "online_store" not in values: values["online_store"] = dict() - # Skip if we aren't creating the configuration from a dict + # Skip if we aren't creating the configuration from a dict or online store is null or it is a string like "None" or "null" if not isinstance(values["online_store"], Dict): + if isinstance(values["online_store"], str) and values[ + "online_store" + ].lower() in {"none", "null"}: + values["online_store"] = None return values # Make sure that the provider configuration is set. We need it to set the defaults diff --git a/sdk/python/feast/repo_contents.py b/sdk/python/feast/repo_contents.py new file mode 100644 index 0000000000..9190af11ee --- /dev/null +++ b/sdk/python/feast/repo_contents.py @@ -0,0 +1,50 @@ +# Copyright 2022 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import NamedTuple, Set + +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.feature_view import FeatureView +from feast.on_demand_feature_view import OnDemandFeatureView +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto +from feast.request_feature_view import RequestFeatureView + + +class RepoContents(NamedTuple): + """ + Represents the objects in a Feast feature repo. + """ + + feature_views: Set[FeatureView] + on_demand_feature_views: Set[OnDemandFeatureView] + request_feature_views: Set[RequestFeatureView] + entities: Set[Entity] + feature_services: Set[FeatureService] + + def to_registry_proto(self) -> RegistryProto: + registry_proto = RegistryProto() + registry_proto.entities.extend([e.to_proto() for e in self.entities]) + registry_proto.feature_views.extend( + [fv.to_proto() for fv in self.feature_views] + ) + registry_proto.on_demand_feature_views.extend( + [fv.to_proto() for fv in self.on_demand_feature_views] + ) + registry_proto.request_feature_views.extend( + [fv.to_proto() for fv in self.request_feature_views] + ) + registry_proto.feature_services.extend( + [fs.to_proto() for fs in self.feature_services] + ) + return registry_proto diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 9299a36123..cba04a9942 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -5,22 +5,23 @@ import re import sys from importlib.abc import Loader +from importlib.machinery import ModuleSpec from pathlib import Path -from typing import List, Set, Union, cast +from typing import List, Set, Union import click from click.exceptions import BadParameter -from feast.base_feature_view import BaseFeatureView -from feast.diff.FcoDiff import TransitionType, tag_objects_for_keep_delete_add +from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add from feast.entity import Entity from feast.feature_service import FeatureService -from feast.feature_store import FeatureStore, RepoContents -from feast.feature_view import DUMMY_ENTITY, DUMMY_ENTITY_NAME, FeatureView +from feast.feature_store import FeatureStore +from feast.feature_view import DUMMY_ENTITY, FeatureView from feast.names import adjectives, animals from feast.on_demand_feature_view import OnDemandFeatureView -from feast.registry import Registry +from feast.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry from feast.repo_config import RepoConfig +from feast.repo_contents import RepoContents from feast.request_feature_view import RequestFeatureView from feast.usage import log_exceptions_and_usage @@ -78,7 +79,11 @@ def get_repo_files(repo_root: Path) -> List[Path]: ignore_files = get_ignore_files(repo_root, ignore_paths) # List all Python files in the root directory (recursively) - repo_files = {p.resolve() for p in repo_root.glob("**/*.py") if p.is_file()} + repo_files = { + p.resolve() + for p in repo_root.glob("**/*.py") + if p.is_file() and "__init__.py" != p.name + } # Ignore all files that match any of the ignore paths in .feastignore repo_files -= ignore_files @@ -127,20 +132,9 @@ def plan(repo_config: RepoConfig, repo_path: Path, skip_source_validation: bool) for data_source in data_sources: data_source.validate(store.config) - diff = store.plan(repo) - views_to_delete = [ - v - for v in diff.fco_diffs - if v.fco_type == "feature view" and v.transition_type == TransitionType.DELETE - ] - views_to_keep = [ - v - for v in diff.fco_diffs - if v.fco_type == "feature view" - and v.transition_type in {TransitionType.CREATE, TransitionType.UNCHANGED} - ] - - log_cli_output(diff, views_to_delete, views_to_keep) + registry_diff, infra_diff, _ = store._plan(repo) + click.echo(registry_diff.to_string()) + click.echo(infra_diff.to_string()) def _prepare_registry_and_repo(repo_config, repo_path): @@ -153,153 +147,107 @@ def _prepare_registry_and_repo(repo_config, repo_path): ) sys.exit(1) registry = store.registry - registry._initialize_registry() sys.dont_write_bytecode = True repo = parse_repo(repo_path) return project, registry, repo, store def extract_objects_for_apply_delete(project, registry, repo): - ( - entities_to_keep, - entities_to_delete, - entities_to_add, - ) = tag_objects_for_keep_delete_add( - set(registry.list_entities(project=project)), repo.entities - ) # TODO(achals): This code path should be refactored to handle added & kept entities separately. - entities_to_keep = set(entities_to_keep).union(entities_to_add) - views = tag_objects_for_keep_delete_add( - set(registry.list_feature_views(project=project)), repo.feature_views - ) - views_to_keep, views_to_delete, views_to_add = ( - cast(Set[FeatureView], views[0]), - cast(Set[FeatureView], views[1]), - cast(Set[FeatureView], views[2]), - ) - request_views = tag_objects_for_keep_delete_add( - set(registry.list_request_feature_views(project=project)), - repo.request_feature_views, - ) - request_views_to_keep: Set[RequestFeatureView] - request_views_to_delete: Set[RequestFeatureView] - request_views_to_add: Set[RequestFeatureView] - request_views_to_keep, request_views_to_delete, request_views_to_add = ( - cast(Set[RequestFeatureView], request_views[0]), - cast(Set[RequestFeatureView], request_views[1]), - cast(Set[RequestFeatureView], request_views[2]), - ) - base_views_to_keep: Set[Union[RequestFeatureView, FeatureView]] = { - *views_to_keep, - *views_to_add, - *request_views_to_keep, - *request_views_to_add, - } - base_views_to_delete: Set[Union[RequestFeatureView, FeatureView]] = { - *views_to_delete, - *request_views_to_delete, - } - odfvs = tag_objects_for_keep_delete_add( - set(registry.list_on_demand_feature_views(project=project)), - repo.on_demand_feature_views, - ) - odfvs_to_keep, odfvs_to_delete, odfvs_to_add = ( - cast(Set[OnDemandFeatureView], odfvs[0]), - cast(Set[OnDemandFeatureView], odfvs[1]), - cast(Set[OnDemandFeatureView], odfvs[2]), - ) - odfvs_to_keep = odfvs_to_keep.union(odfvs_to_add) ( - services_to_keep, - services_to_delete, - services_to_add, - ) = tag_objects_for_keep_delete_add( - set(registry.list_feature_services(project=project)), repo.feature_services - ) - services_to_keep = services_to_keep.union(services_to_add) - sys.dont_write_bytecode = False - # Apply all changes to the registry and infrastructure. + _, + objs_to_delete, + objs_to_update, + objs_to_add, + ) = extract_objects_for_keep_delete_update_add(registry, project, repo) + all_to_apply: List[ - Union[Entity, BaseFeatureView, FeatureService, OnDemandFeatureView] + Union[ + Entity, FeatureView, RequestFeatureView, OnDemandFeatureView, FeatureService + ] ] = [] - all_to_apply.extend(entities_to_keep) - all_to_apply.extend(base_views_to_keep) - all_to_apply.extend(services_to_keep) - all_to_apply.extend(odfvs_to_keep) + for object_type in FEAST_OBJECT_TYPES: + to_apply = set(objs_to_add[object_type]).union(objs_to_update[object_type]) + all_to_apply.extend(to_apply) + all_to_delete: List[ - Union[Entity, BaseFeatureView, FeatureService, OnDemandFeatureView] + Union[ + Entity, FeatureView, RequestFeatureView, OnDemandFeatureView, FeatureService + ] ] = [] - all_to_delete.extend(entities_to_delete) - all_to_delete.extend(base_views_to_delete) - all_to_delete.extend(services_to_delete) - all_to_delete.extend(odfvs_to_delete) + for object_type in FEAST_OBJECT_TYPES: + all_to_delete.extend(objs_to_delete[object_type]) - return all_to_apply, all_to_delete, views_to_delete, views_to_keep - - -@log_exceptions_and_usage -def apply_total(repo_config: RepoConfig, repo_path: Path, skip_source_validation: bool): + return ( + all_to_apply, + all_to_delete, + set( + objs_to_add[FeastObjectType.FEATURE_VIEW].union( + objs_to_update[FeastObjectType.FEATURE_VIEW] + ) + ), + objs_to_delete[FeastObjectType.FEATURE_VIEW], + ) - os.chdir(repo_path) - project, registry, repo, store = _prepare_registry_and_repo(repo_config, repo_path) +def apply_total_with_repo_instance( + store: FeatureStore, + project: str, + registry: Registry, + repo: RepoContents, + skip_source_validation: bool, +): if not skip_source_validation: data_sources = [t.batch_source for t in repo.feature_views] # Make sure the data source used by this feature view is supported by Feast for data_source in data_sources: data_source.validate(store.config) + registry_diff, infra_diff, new_infra = store._plan(repo) + # For each object in the registry, determine whether it should be kept or deleted. ( all_to_apply, all_to_delete, - views_to_delete, views_to_keep, + views_to_delete, ) = extract_objects_for_apply_delete(project, registry, repo) - diff = store.apply(all_to_apply, objects_to_delete=all_to_delete, partial=False) + click.echo(registry_diff.to_string()) - log_cli_output(diff, views_to_delete, views_to_keep) + if store._should_use_plan(): + store._apply_diffs(registry_diff, infra_diff, new_infra) + click.echo(infra_diff.to_string()) + else: + store.apply(all_to_apply, objects_to_delete=all_to_delete, partial=False) + log_infra_changes(views_to_keep, views_to_delete) -def log_cli_output(diff, views_to_delete, views_to_keep): +def log_infra_changes( + views_to_keep: Set[FeatureView], views_to_delete: Set[FeatureView] +): from colorama import Fore, Style - message_action_map = { - TransitionType.CREATE: ("Created", Fore.GREEN), - TransitionType.DELETE: ("Deleted", Fore.RED), - TransitionType.UNCHANGED: ("Unchanged", Fore.LIGHTBLUE_EX), - TransitionType.UPDATE: ("Updated", Fore.YELLOW), - } - for fco_diff in diff.fco_diffs: - if fco_diff.name == DUMMY_ENTITY_NAME: - continue - action, color = message_action_map[fco_diff.transition_type] + for view in views_to_keep: click.echo( - f"{action} {fco_diff.fco_type} {Style.BRIGHT + color}{fco_diff.name}{Style.RESET_ALL}" + f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}" ) - if fco_diff.transition_type == TransitionType.UPDATE: - for _p in fco_diff.fco_property_diffs: - click.echo( - f"\t{_p.property_name}: {Style.BRIGHT + color}{_p.val_existing}{Style.RESET_ALL} -> {Style.BRIGHT + Fore.LIGHTGREEN_EX}{_p.val_declared}{Style.RESET_ALL}" - ) - - views_to_keep_in_infra = [ - view for view in views_to_keep if isinstance(view, FeatureView) - ] - for name in [view.name for view in views_to_keep_in_infra]: + for view in views_to_delete: click.echo( - f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{name}{Style.RESET_ALL}" - ) - views_to_delete_from_infra = [ - view for view in views_to_delete if isinstance(view, FeatureView) - ] - for name in [view.name for view in views_to_delete_from_infra]: - click.echo( - f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{name}{Style.RESET_ALL}" + f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{view.name}{Style.RESET_ALL}" ) +@log_exceptions_and_usage +def apply_total(repo_config: RepoConfig, repo_path: Path, skip_source_validation: bool): + + os.chdir(repo_path) + project, registry, repo, store = _prepare_registry_and_repo(repo_config, repo_path) + apply_total_with_repo_instance( + store, project, registry, repo, skip_source_validation + ) + + @log_exceptions_and_usage def teardown(repo_config: RepoConfig, repo_path: Path): # Cannot pass in both repo_path and repo_config to FeatureStore. @@ -322,7 +270,7 @@ def registry_dump(repo_config: RepoConfig, repo_path: Path): "breaking changes in the future. No guarantees are made on this interface." ) click.echo(f"{Style.BRIGHT}{Fore.YELLOW}{warning}{Style.RESET_ALL}") - click.echo(json.dumps(registry_dict, indent=2)) + click.echo(json.dumps(registry_dict, indent=2, sort_keys=True)) def cli_check_repo(repo_path: Path): @@ -375,6 +323,7 @@ def init_repo(repo_name: str, template: str): import importlib.util spec = importlib.util.spec_from_file_location("bootstrap", str(bootstrap_path)) + assert isinstance(spec, ModuleSpec) bootstrap = importlib.util.module_from_spec(spec) assert isinstance(spec.loader, Loader) spec.loader.exec_module(bootstrap) diff --git a/sdk/python/feast/saved_dataset.py b/sdk/python/feast/saved_dataset.py new file mode 100644 index 0000000000..75b6d2c199 --- /dev/null +++ b/sdk/python/feast/saved_dataset.py @@ -0,0 +1,209 @@ +from abc import abstractmethod +from datetime import datetime +from typing import TYPE_CHECKING, Dict, List, Optional, Type, cast + +import pandas as pd +import pyarrow +from google.protobuf.json_format import MessageToJson + +from feast.data_source import DataSource +from feast.dqm.profilers.profiler import Profile, Profiler +from feast.protos.feast.core.SavedDataset_pb2 import SavedDataset as SavedDatasetProto +from feast.protos.feast.core.SavedDataset_pb2 import SavedDatasetMeta, SavedDatasetSpec +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) + +if TYPE_CHECKING: + from feast.infra.offline_stores.offline_store import RetrievalJob + + +class _StorageRegistry(type): + classes_by_proto_attr_name: Dict[str, Type["SavedDatasetStorage"]] = {} + + def __new__(cls, name, bases, dct): + kls = type.__new__(cls, name, bases, dct) + if dct.get("_proto_attr_name"): + cls.classes_by_proto_attr_name[dct["_proto_attr_name"]] = kls + return kls + + +class SavedDatasetStorage(metaclass=_StorageRegistry): + _proto_attr_name: str + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> "SavedDatasetStorage": + proto_attr_name = cast(str, storage_proto.WhichOneof("kind")) + return _StorageRegistry.classes_by_proto_attr_name[proto_attr_name].from_proto( + storage_proto + ) + + @abstractmethod + def to_proto(self) -> SavedDatasetStorageProto: + ... + + @abstractmethod + def to_data_source(self) -> DataSource: + ... + + +class SavedDataset: + name: str + features: List[str] + join_keys: List[str] + full_feature_names: bool + storage: SavedDatasetStorage + tags: Dict[str, str] + + created_timestamp: Optional[datetime] = None + last_updated_timestamp: Optional[datetime] = None + + min_event_timestamp: Optional[datetime] = None + max_event_timestamp: Optional[datetime] = None + + _retrieval_job: Optional["RetrievalJob"] = None + + def __init__( + self, + name: str, + features: List[str], + join_keys: List[str], + storage: SavedDatasetStorage, + full_feature_names: bool = False, + tags: Optional[Dict[str, str]] = None, + ): + self.name = name + self.features = features + self.join_keys = join_keys + self.storage = storage + self.full_feature_names = full_feature_names + self.tags = tags or {} + + self._retrieval_job = None + + def __repr__(self): + items = (f"{k} = {v}" for k, v in self.__dict__.items()) + return f"<{self.__class__.__name__}({', '.join(items)})>" + + def __str__(self): + return str(MessageToJson(self.to_proto())) + + def __hash__(self): + return hash((id(self), self.name)) + + def __eq__(self, other): + if not isinstance(other, SavedDataset): + raise TypeError( + "Comparisons should only involve FeatureService class objects." + ) + if self.name != other.name: + return False + + if sorted(self.features) != sorted(other.features): + return False + + return True + + @staticmethod + def from_proto(saved_dataset_proto: SavedDatasetProto): + """ + Converts a SavedDatasetProto to a SavedDataset object. + + Args: + saved_dataset_proto: A protobuf representation of a SavedDataset. + """ + ds = SavedDataset( + name=saved_dataset_proto.spec.name, + features=list(saved_dataset_proto.spec.features), + join_keys=list(saved_dataset_proto.spec.join_keys), + full_feature_names=saved_dataset_proto.spec.full_feature_names, + storage=SavedDatasetStorage.from_proto(saved_dataset_proto.spec.storage), + tags=dict(saved_dataset_proto.spec.tags.items()), + ) + + if saved_dataset_proto.meta.HasField("created_timestamp"): + ds.created_timestamp = ( + saved_dataset_proto.meta.created_timestamp.ToDatetime() + ) + if saved_dataset_proto.meta.HasField("last_updated_timestamp"): + ds.last_updated_timestamp = ( + saved_dataset_proto.meta.last_updated_timestamp.ToDatetime() + ) + if saved_dataset_proto.meta.HasField("min_event_timestamp"): + ds.min_event_timestamp = ( + saved_dataset_proto.meta.min_event_timestamp.ToDatetime() + ) + if saved_dataset_proto.meta.HasField("max_event_timestamp"): + ds.max_event_timestamp = ( + saved_dataset_proto.meta.max_event_timestamp.ToDatetime() + ) + + return ds + + def to_proto(self) -> SavedDatasetProto: + """ + Converts a SavedDataset to its protobuf representation. + + Returns: + A SavedDatasetProto protobuf. + """ + meta = SavedDatasetMeta() + if self.created_timestamp: + meta.created_timestamp.FromDatetime(self.created_timestamp) + if self.min_event_timestamp: + meta.min_event_timestamp.FromDatetime(self.min_event_timestamp) + if self.max_event_timestamp: + meta.max_event_timestamp.FromDatetime(self.max_event_timestamp) + + spec = SavedDatasetSpec( + name=self.name, + features=self.features, + join_keys=self.join_keys, + full_feature_names=self.full_feature_names, + storage=self.storage.to_proto(), + tags=self.tags, + ) + + feature_service_proto = SavedDatasetProto(spec=spec, meta=meta) + return feature_service_proto + + def with_retrieval_job(self, retrieval_job: "RetrievalJob") -> "SavedDataset": + self._retrieval_job = retrieval_job + return self + + def to_df(self) -> pd.DataFrame: + if not self._retrieval_job: + raise RuntimeError( + "To load this dataset use FeatureStore.get_saved_dataset() " + "instead of instantiating it directly." + ) + + return self._retrieval_job.to_df() + + def to_arrow(self) -> pyarrow.Table: + if not self._retrieval_job: + raise RuntimeError( + "To load this dataset use FeatureStore.get_saved_dataset() " + "instead of instantiating it directly." + ) + + return self._retrieval_job.to_arrow() + + def as_reference(self, profiler: "Profiler") -> "ValidationReference": + return ValidationReference(profiler=profiler, dataset=self) + + def get_profile(self, profiler: Profiler) -> Profile: + return profiler.analyze_dataset(self.to_df()) + + +class ValidationReference: + dataset: SavedDataset + profiler: Profiler + + def __init__(self, dataset: SavedDataset, profiler: Profiler): + self.dataset = dataset + self.profiler = profiler + + @property + def profile(self) -> Profile: + return self.profiler.analyze_dataset(self.dataset.to_df()) diff --git a/sdk/python/feast/templates/aws/__init__.py b/sdk/python/feast/templates/aws/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/templates/gcp/__init__.py b/sdk/python/feast/templates/gcp/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/templates/local/__init__.py b/sdk/python/feast/templates/local/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/feast/templates/snowflake/bootstrap.py b/sdk/python/feast/templates/snowflake/bootstrap.py new file mode 100644 index 0000000000..3712651a5d --- /dev/null +++ b/sdk/python/feast/templates/snowflake/bootstrap.py @@ -0,0 +1,91 @@ +import click +import snowflake.connector + +from feast.infra.utils.snowflake_utils import write_pandas + + +def bootstrap(): + # Bootstrap() will automatically be called from the init_repo() during `feast init` + + import pathlib + from datetime import datetime, timedelta + + from feast.driver_test_data import create_driver_hourly_stats_df + + repo_path = pathlib.Path(__file__).parent.absolute() + config_file = repo_path / "feature_store.yaml" + + project_name = str(repo_path)[str(repo_path).rfind("/") + 1 :] + + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + + driver_entities = [1001, 1002, 1003, 1004, 1005] + driver_df = create_driver_hourly_stats_df(driver_entities, start_date, end_date) + + repo_path = pathlib.Path(__file__).parent.absolute() + data_path = repo_path / "data" + data_path.mkdir(exist_ok=True) + driver_stats_path = data_path / "driver_stats.parquet" + driver_df.to_parquet(path=str(driver_stats_path), allow_truncated_timestamps=True) + + snowflake_deployment_url = click.prompt( + "Snowflake Deployment URL (exclude .snowflakecomputing.com):" + ) + snowflake_user = click.prompt("Snowflake User Name:") + snowflake_password = click.prompt("Snowflake Password:", hide_input=True) + snowflake_role = click.prompt("Snowflake Role Name (Case Sensitive):") + snowflake_warehouse = click.prompt("Snowflake Warehouse Name (Case Sensitive):") + snowflake_database = click.prompt("Snowflake Database Name (Case Sensitive):") + + if click.confirm( + f'Should I upload example data to Snowflake (overwriting "{project_name}_feast_driver_hourly_stats" table)?', + default=True, + ): + + conn = snowflake.connector.connect( + account=snowflake_deployment_url, + user=snowflake_user, + password=snowflake_password, + role=snowflake_role, + warehouse=snowflake_warehouse, + application="feast", + ) + + cur = conn.cursor() + cur.execute(f'CREATE DATABASE IF NOT EXISTS "{snowflake_database}"') + cur.execute(f'USE DATABASE "{snowflake_database}"') + cur.execute('CREATE SCHEMA IF NOT EXISTS "PUBLIC"') + cur.execute('USE SCHEMA "PUBLIC"') + cur.execute(f'DROP TABLE IF EXISTS "{project_name}_feast_driver_hourly_stats"') + write_pandas( + conn, + driver_df, + f"{project_name}_feast_driver_hourly_stats", + auto_create_table=True, + ) + conn.close() + + repo_path = pathlib.Path(__file__).parent.absolute() + config_file = repo_path / "feature_store.yaml" + + replace_str_in_file( + config_file, "SNOWFLAKE_DEPLOYMENT_URL", snowflake_deployment_url + ) + replace_str_in_file(config_file, "SNOWFLAKE_USER", snowflake_user) + replace_str_in_file(config_file, "SNOWFLAKE_PASSWORD", snowflake_password) + replace_str_in_file(config_file, "SNOWFLAKE_ROLE", snowflake_role) + replace_str_in_file(config_file, "SNOWFLAKE_WAREHOUSE", snowflake_warehouse) + replace_str_in_file(config_file, "SNOWFLAKE_DATABASE", snowflake_database) + + +def replace_str_in_file(file_path, match_str, sub_str): + with open(file_path, "r") as f: + contents = f.read() + contents = contents.replace(match_str, sub_str) + with open(file_path, "wt") as f: + f.write(contents) + + +if __name__ == "__main__": + bootstrap() diff --git a/sdk/python/feast/templates/snowflake/driver_repo.py b/sdk/python/feast/templates/snowflake/driver_repo.py new file mode 100644 index 0000000000..a63c6cb503 --- /dev/null +++ b/sdk/python/feast/templates/snowflake/driver_repo.py @@ -0,0 +1,64 @@ +from datetime import timedelta + +import yaml + +from feast import Entity, Feature, FeatureView, SnowflakeSource, ValueType + +# Define an entity for the driver. Entities can be thought of as primary keys used to +# retrieve features. Entities are also used to join multiple tables/views during the +# construction of feature vectors +driver = Entity( + # Name of the entity. Must be unique within a project + name="driver_id", + # The join key of an entity describes the storage level field/column on which + # features can be looked up. The join key is also used to join feature + # tables/views when building feature vectors + join_key="driver_id", +) + +# Indicates a data source from which feature values can be retrieved. Sources are queried when building training +# datasets or materializing features into an online store. +project_name = yaml.safe_load(open("feature_store.yaml"))["project"] + +driver_stats_source = SnowflakeSource( + # The Snowflake table where features can be found + database=yaml.safe_load(open("feature_store.yaml"))["offline_store"]["database"], + table=f"{project_name}_feast_driver_hourly_stats", + # The event timestamp is used for point-in-time joins and for ensuring only + # features within the TTL are returned + event_timestamp_column="event_timestamp", + # The (optional) created timestamp is used to ensure there are no duplicate + # feature rows in the offline store or when building training datasets + created_timestamp_column="created", +) + +# Feature views are a grouping based on how features are stored in either the +# online or offline store. +driver_stats_fv = FeatureView( + # The unique name of this feature view. Two feature views in a single + # project cannot have the same name + name="driver_hourly_stats", + # The list of entities specifies the keys required for joining or looking + # up features from this feature view. The reference provided in this field + # correspond to the name of a defined entity (or entities) + entities=["driver_id"], + # The timedelta is the maximum age that each feature value may have + # relative to its lookup time. For historical features (used in training), + # TTL is relative to each timestamp provided in the entity dataframe. + # TTL also allows for eviction of keys from online stores and limits the + # amount of historical scanning required for historical feature values + # during retrieval + ttl=timedelta(weeks=52), + # The list of features defined below act as a schema to both define features + # for both materialization of features into a store, and are used as references + # during retrieval for building a training dataset or serving features + features=[ + Feature(name="conv_rate", dtype=ValueType.FLOAT), + Feature(name="acc_rate", dtype=ValueType.FLOAT), + Feature(name="avg_daily_trips", dtype=ValueType.INT64), + ], + # Batch sources are used to find feature values. In the case of this feature + # view we will query a source table on Redshift for driver statistics + # features + batch_source=driver_stats_source, +) diff --git a/sdk/python/feast/templates/snowflake/feature_store.yaml b/sdk/python/feast/templates/snowflake/feature_store.yaml new file mode 100644 index 0000000000..9757ea2ead --- /dev/null +++ b/sdk/python/feast/templates/snowflake/feature_store.yaml @@ -0,0 +1,11 @@ +project: my_project +registry: registry.db +provider: local +offline_store: + type: snowflake.offline + account: SNOWFLAKE_DEPLOYMENT_URL + user: SNOWFLAKE_USER + password: SNOWFLAKE_PASSWORD + role: SNOWFLAKE_ROLE + warehouse: SNOWFLAKE_WAREHOUSE + database: SNOWFLAKE_DATABASE diff --git a/sdk/python/feast/templates/snowflake/test.py b/sdk/python/feast/templates/snowflake/test.py new file mode 100644 index 0000000000..32aa6380d5 --- /dev/null +++ b/sdk/python/feast/templates/snowflake/test.py @@ -0,0 +1,65 @@ +from datetime import datetime, timedelta + +import pandas as pd +from driver_repo import driver, driver_stats_fv + +from feast import FeatureStore + + +def main(): + pd.set_option("display.max_columns", None) + pd.set_option("display.width", 1000) + + # Load the feature store from the current path + fs = FeatureStore(repo_path=".") + + # Deploy the feature store to Snowflake + print("Deploying feature store to Snowflake...") + fs.apply([driver, driver_stats_fv]) + + # Select features + features = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"] + + # Create an entity dataframe. This is the dataframe that will be enriched with historical features + entity_df = pd.DataFrame( + { + "event_timestamp": [ + pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + for dt in pd.date_range( + start=datetime.now() - timedelta(days=3), + end=datetime.now(), + periods=3, + ) + ], + "driver_id": [1001, 1002, 1003], + } + ) + + print("Retrieving training data...") + + # Retrieve historical features by joining the entity dataframe to the Snowflake table source + training_df = fs.get_historical_features( + features=features, entity_df=entity_df + ).to_df() + + print() + print(training_df) + + print() + print("Loading features into the online store...") + fs.materialize_incremental(end_date=datetime.now()) + + print() + print("Retrieving online features...") + + # Retrieve features from the online store + online_features = fs.get_online_features( + features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], + ).to_dict() + + print() + print(pd.DataFrame.from_dict(online_features)) + + +if __name__ == "__main__": + main() diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 969ca65862..82827bce2a 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -12,14 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re -from datetime import datetime -from typing import Any, Dict, List, Optional, Set, Sized, Tuple, Type +from datetime import datetime, timezone +from typing import ( + Any, + Dict, + List, + Optional, + Sequence, + Set, + Sized, + Tuple, + Type, + Union, + cast, +) import numpy as np import pandas as pd import pyarrow -from google.protobuf.pyext.cpp_message import GeneratedProtocolMessageType from google.protobuf.timestamp_pb2 import Timestamp from feast.protos.feast.types.Value_pb2 import ( @@ -32,7 +42,7 @@ StringList, ) from feast.protos.feast.types.Value_pb2 import Value as ProtoValue -from feast.value_type import ValueType +from feast.value_type import ListType, ValueType def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: @@ -50,8 +60,17 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: if val_attr is None: return None val = getattr(field_value_proto, val_attr) + + # If it's a _LIST type extract the list. if hasattr(val, "val"): val = list(val.val) + + # Convert UNIX_TIMESTAMP values to `datetime` + if val_attr == "unix_timestamp_list_val": + val = [datetime.fromtimestamp(v, tz=timezone.utc) for v in val] + elif val_attr == "unix_timestamp_val": + val = datetime.fromtimestamp(val, tz=timezone.utc) + return val @@ -97,6 +116,7 @@ def python_type_to_feast_value_type( type_map = { "int": ValueType.INT64, "str": ValueType.STRING, + "string": ValueType.STRING, # pandas.StringDtype "float": ValueType.DOUBLE, "bytes": ValueType.BYTES, "float64": ValueType.DOUBLE, @@ -105,6 +125,8 @@ def python_type_to_feast_value_type( "uint64": ValueType.INT64, "int32": ValueType.INT32, "uint32": ValueType.INT32, + "int16": ValueType.INT32, + "uint16": ValueType.INT32, "uint8": ValueType.INT32, "int8": ValueType.INT32, "bool": ValueType.BOOL, @@ -119,48 +141,50 @@ def python_type_to_feast_value_type( if type_name in type_map: return type_map[type_name] - if type_name == "ndarray" or isinstance(value, list): - if recurse: - - # Convert to list type - list_items = pd.core.series.Series(value) - - # This is the final type which we infer from the list - common_item_value_type = None - for item in list_items: - if isinstance(item, ProtoValue): - current_item_value_type: ValueType = _proto_value_to_value_type( - item - ) - else: - # Get the type from the current item, only one level deep - current_item_value_type = python_type_to_feast_value_type( - name=name, value=item, recurse=False - ) - # Validate whether the type stays consistent - if ( - common_item_value_type - and not common_item_value_type == current_item_value_type - ): - raise ValueError( - f"List value type for field {name} is inconsistent. " - f"{common_item_value_type} different from " - f"{current_item_value_type}." - ) - common_item_value_type = current_item_value_type - if common_item_value_type is None: - return ValueType.UNKNOWN - return ValueType[common_item_value_type.name + "_LIST"] - else: - assert value + if isinstance(value, np.ndarray) and str(value.dtype) in type_map: + item_type = type_map[str(value.dtype)] + return ValueType[item_type.name + "_LIST"] + + if isinstance(value, (list, np.ndarray)): + # if the value's type is "ndarray" and we couldn't infer from "value.dtype" + # this is most probably array of "object", + # so we need to iterate over objects and try to infer type of each item + if not recurse: raise ValueError( - f"Value type for field {name} is {value.dtype.__str__()} but " + f"Value type for field {name} is {type(value)} but " f"recursion is not allowed. Array types can only be one level " f"deep." ) - assert value - return type_map[value.dtype.__str__()] + # This is the final type which we infer from the list + common_item_value_type = None + for item in value: + if isinstance(item, ProtoValue): + current_item_value_type: ValueType = _proto_value_to_value_type(item) + else: + # Get the type from the current item, only one level deep + current_item_value_type = python_type_to_feast_value_type( + name=name, value=item, recurse=False + ) + # Validate whether the type stays consistent + if ( + common_item_value_type + and not common_item_value_type == current_item_value_type + ): + raise ValueError( + f"List value type for field {name} is inconsistent. " + f"{common_item_value_type} different from " + f"{current_item_value_type}." + ) + common_item_value_type = current_item_value_type + if common_item_value_type is None: + return ValueType.UNKNOWN + return ValueType[common_item_value_type.name + "_LIST"] + + raise ValueError( + f"Value with native type {type_name} " + f"cannot be converted into Feast value type" + ) def python_values_to_feast_value_type( @@ -195,7 +219,7 @@ def _type_err(item, dtype): PYTHON_LIST_VALUE_TYPE_TO_PROTO_VALUE: Dict[ - ValueType, Tuple[GeneratedProtocolMessageType, str, List[Type]] + ValueType, Tuple[ListType, str, List[Type]] ] = { ValueType.FLOAT_LIST: ( FloatList, @@ -212,7 +236,7 @@ def _type_err(item, dtype): ValueType.UNIX_TIMESTAMP_LIST: ( Int64List, "int64_list_val", - [np.int64, np.int32, int], + [np.datetime64, np.int64, np.int32, int, datetime, Timestamp], ), ValueType.STRING_LIST: (StringList, "string_list_val", [np.str_, str]), ValueType.BOOL_LIST: (BoolList, "bool_list_val", [np.bool_, bool]), @@ -238,6 +262,28 @@ def _type_err(item, dtype): } +def _python_datetime_to_int_timestamp( + values: Sequence[Any], +) -> Sequence[Union[int, np.int_]]: + # Fast path for Numpy array. + if isinstance(values, np.ndarray) and isinstance(values.dtype, np.datetime64): + if values.ndim != 1: + raise ValueError("Only 1 dimensional arrays are supported.") + return cast(Sequence[np.int_], values.astype("datetime64[s]").astype(np.int_)) + + int_timestamps = [] + for value in values: + if isinstance(value, datetime): + int_timestamps.append(int(value.timestamp())) + elif isinstance(value, Timestamp): + int_timestamps.append(int(value.ToSeconds())) + elif isinstance(value, np.datetime64): + int_timestamps.append(value.astype("datetime64[s]").astype(np.int_)) + else: + int_timestamps.append(int(value)) + return int_timestamps + + def _python_value_to_proto_value( feast_value_type: ValueType, values: List[Any] ) -> List[ProtoValue]: @@ -254,9 +300,6 @@ def _python_value_to_proto_value( """ # ToDo: make a better sample for type checks (more than one element) sample = next(filter(_non_empty_value, values), None) # first not empty value - if sample is None: - # all input values are None or empty lists - return [ProtoValue()] * len(values) # Detect list type and handle separately if "list" in feast_value_type.name.lower(): @@ -266,14 +309,26 @@ def _python_value_to_proto_value( feast_value_type ] - if not all(type(item) in valid_types for item in sample): + if sample is not None and not all( + type(item) in valid_types for item in sample + ): first_invalid = next( item for item in sample if type(item) not in valid_types ) raise _type_err(first_invalid, valid_types[0]) + if feast_value_type == ValueType.UNIX_TIMESTAMP_LIST: + int_timestamps_lists = ( + _python_datetime_to_int_timestamp(value) for value in values + ) + return [ + # ProtoValue does actually accept `np.int_` but the typing complains. + ProtoValue(unix_timestamp_list_val=Int64List(val=ts)) # type: ignore + for ts in int_timestamps_lists + ] + return [ - ProtoValue(**{field_name: proto_type(val=value)}) + ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore if value is not None else ProtoValue() for value in values @@ -281,16 +336,14 @@ def _python_value_to_proto_value( # Handle scalar types below else: + if sample is None: + # all input values are None + return [ProtoValue()] * len(values) + if feast_value_type == ValueType.UNIX_TIMESTAMP: - if isinstance(sample, datetime): - return [ - ProtoValue(int64_val=int(value.timestamp())) for value in values - ] - elif isinstance(sample, Timestamp): - return [ - ProtoValue(int64_val=int(value.ToSeconds())) for value in values - ] - return [ProtoValue(int64_val=int(value)) for value in values] + int_timestamps = _python_datetime_to_int_timestamp(values) + # ProtoValue does actually accept `np.int_` but the typing complains. + return [ProtoValue(unix_timestamp_val=ts) for ts in int_timestamps] # type: ignore if feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE: ( @@ -365,30 +418,38 @@ def _proto_value_to_value_type(proto_value: ProtoValue) -> ValueType: def pa_to_feast_value_type(pa_type_as_str: str) -> ValueType: - if re.match(r"^timestamp", pa_type_as_str): - return ValueType.INT64 + is_list = False + if pa_type_as_str.startswith("list", "") - type_map = { - "int32": ValueType.INT32, - "int64": ValueType.INT64, - "double": ValueType.DOUBLE, - "float": ValueType.FLOAT, - "string": ValueType.STRING, - "binary": ValueType.BYTES, - "bool": ValueType.BOOL, - "list": ValueType.INT32_LIST, - "list": ValueType.INT64_LIST, - "list": ValueType.DOUBLE_LIST, - "list": ValueType.FLOAT_LIST, - "list": ValueType.STRING_LIST, - "list": ValueType.BYTES_LIST, - "list": ValueType.BOOL_LIST, - "null": ValueType.NULL, - } - return type_map[pa_type_as_str] + if pa_type_as_str.startswith("timestamp"): + value_type = ValueType.UNIX_TIMESTAMP + else: + type_map = { + "int32": ValueType.INT32, + "int64": ValueType.INT64, + "double": ValueType.DOUBLE, + "float": ValueType.FLOAT, + "string": ValueType.STRING, + "binary": ValueType.BYTES, + "bool": ValueType.BOOL, + "null": ValueType.NULL, + } + value_type = type_map[pa_type_as_str] + + if is_list: + value_type = ValueType[value_type.name + "_LIST"] + + return value_type def bq_to_feast_value_type(bq_type_as_str: str) -> ValueType: + is_list = False + if bq_type_as_str.startswith("ARRAY<"): + is_list = True + bq_type_as_str = bq_type_as_str[6:-1] + type_map: Dict[str, ValueType] = { "DATETIME": ValueType.UNIX_TIMESTAMP, "TIMESTAMP": ValueType.UNIX_TIMESTAMP, @@ -400,15 +461,14 @@ def bq_to_feast_value_type(bq_type_as_str: str) -> ValueType: "BYTES": ValueType.BYTES, "BOOL": ValueType.BOOL, "BOOLEAN": ValueType.BOOL, # legacy sql data type - "ARRAY": ValueType.INT64_LIST, - "ARRAY": ValueType.DOUBLE_LIST, - "ARRAY": ValueType.STRING_LIST, - "ARRAY": ValueType.BYTES_LIST, - "ARRAY": ValueType.BOOL_LIST, "NULL": ValueType.NULL, } - return type_map[bq_type_as_str] + value_type = type_map[bq_type_as_str] + if is_list: + value_type = ValueType[value_type.name + "_LIST"] + + return value_type def redshift_to_feast_value_type(redshift_type_as_str: str) -> ValueType: @@ -431,6 +491,28 @@ def redshift_to_feast_value_type(redshift_type_as_str: str) -> ValueType: return type_map[redshift_type_as_str.lower()] +def snowflake_python_type_to_feast_value_type( + snowflake_python_type_as_str: str, +) -> ValueType: + + type_map = { + "str": ValueType.STRING, + "float64": ValueType.DOUBLE, + "int64": ValueType.INT64, + "uint64": ValueType.INT64, + "int32": ValueType.INT32, + "uint32": ValueType.INT32, + "int16": ValueType.INT32, + "uint16": ValueType.INT32, + "uint8": ValueType.INT32, + "int8": ValueType.INT32, + "datetime64[ns]": ValueType.UNIX_TIMESTAMP, + "object": ValueType.UNKNOWN, + } + + return type_map[snowflake_python_type_as_str.lower()] + + def pa_to_redshift_value_type(pa_type: pyarrow.DataType) -> str: # PyArrow types: https://arrow.apache.org/docs/python/api/datatypes.html # Redshift type: https://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html diff --git a/sdk/python/feast/usage.py b/sdk/python/feast/usage.py index 1a2bf2e290..6a6a7146ce 100644 --- a/sdk/python/feast/usage.py +++ b/sdk/python/feast/usage.py @@ -29,7 +29,7 @@ import requests -from feast.constants import FEAST_USAGE +from feast.constants import DEFAULT_FEAST_USAGE_VALUE, FEAST_USAGE from feast.version import get_version USAGE_ENDPOINT = "https://usage.feast.dev" @@ -37,7 +37,7 @@ _logger = logging.getLogger(__name__) _executor = concurrent.futures.ThreadPoolExecutor(max_workers=1) -_is_enabled = os.getenv(FEAST_USAGE, default="True") == "True" +_is_enabled = os.getenv(FEAST_USAGE, default=DEFAULT_FEAST_USAGE_VALUE) == "True" _constant_attributes = { "session_id": str(uuid.uuid4()), diff --git a/sdk/python/feast/value_type.py b/sdk/python/feast/value_type.py index 3d1817421a..1904baf7bb 100644 --- a/sdk/python/feast/value_type.py +++ b/sdk/python/feast/value_type.py @@ -12,6 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. import enum +from typing import Type, Union + +from feast.protos.feast.types.Value_pb2 import ( + BoolList, + BytesList, + DoubleList, + FloatList, + Int32List, + Int64List, + StringList, +) class ValueType(enum.Enum): @@ -37,3 +48,14 @@ class ValueType(enum.Enum): BOOL_LIST = 17 UNIX_TIMESTAMP_LIST = 18 NULL = 19 + + +ListType = Union[ + Type[BoolList], + Type[BytesList], + Type[DoubleList], + Type[FloatList], + Type[Int32List], + Type[Int64List], + Type[StringList], +] diff --git a/sdk/python/requirements/py3.7-ci-requirements.txt b/sdk/python/requirements/py3.7-ci-requirements.txt index 017652873a..d5f654e515 100644 --- a/sdk/python/requirements/py3.7-ci-requirements.txt +++ b/sdk/python/requirements/py3.7-ci-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --extra=ci --output-file=requirements/py3.7-ci-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata adal==1.2.7 # via @@ -20,19 +20,33 @@ aiosignal==1.2.0 # via aiohttp alabaster==0.7.12 # via sphinx -anyio==3.4.0 +altair==4.2.0 + # via great-expectations +anyio==3.5.0 # via starlette appdirs==1.4.4 # via black -asgiref==3.4.1 +appnope==0.1.2 + # via + # ipykernel + # ipython +argon2-cffi==21.3.0 + # via notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +asgiref==3.5.0 # via uvicorn +asn1crypto==1.4.0 + # via + # oscrypto + # snowflake-connector-python assertpy==1.1 # via feast (setup.py) -async-timeout==4.0.1 +async-timeout==4.0.2 # via aiohttp asynctest==0.13.0 # via aiohttp -attrs==21.2.0 +attrs==21.4.0 # via # aiohttp # black @@ -40,7 +54,7 @@ attrs==21.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.21.0 +azure-core==1.21.1 # via # adlfs # azure-identity @@ -53,15 +67,21 @@ azure-storage-blob==12.9.0 # via adlfs babel==2.9.1 # via sphinx -backports.entry-points-selectable==1.1.1 - # via virtualenv +backcall==0.2.0 + # via ipython +backports.zoneinfo==0.2.1 + # via + # pytz-deprecation-shim + # tzlocal black==19.10b0 # via feast (setup.py) -boto3==1.17.112 +bleach==4.1.0 + # via nbconvert +boto3==1.20.46 # via # feast (setup.py) # moto -botocore==1.20.112 +botocore==1.23.46 # via # boto3 # moto @@ -75,25 +95,30 @@ certifi==2021.10.8 # minio # msrest # requests + # snowflake-connector-python cffi==1.15.0 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography + # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.8 +charset-normalizer==2.0.11 # via # aiohttp # requests -click==7.1.2 + # snowflake-connector-python +click==8.0.3 # via # black # feast (setup.py) + # great-expectations # pip-tools # uvicorn colorama==0.4.4 # via feast (setup.py) -coverage[toml]==6.2 +coverage[toml]==6.3 # via pytest-cov cryptography==3.3.2 # via @@ -103,13 +128,24 @@ cryptography==3.3.2 # feast (setup.py) # moto # msal -decorator==5.1.0 - # via gcsfs + # pyjwt + # pyopenssl + # snowflake-connector-python +debugpy==1.5.1 + # via ipykernel +decorator==5.1.1 + # via + # gcsfs + # ipython +defusedxml==0.7.1 + # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 # via feast (setup.py) -distlib==0.3.3 +distlib==0.3.4 # via virtualenv docker==5.0.3 # via @@ -119,31 +155,36 @@ docutils==0.17.1 # via # sphinx # sphinx-rtd-theme +entrypoints==0.3 + # via + # altair + # jupyter-client + # nbconvert execnet==1.9.0 # via pytest-xdist -fastapi==0.70.0 +fastapi==0.73.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -filelock==3.4.0 +filelock==3.4.2 # via virtualenv firebase-admin==4.5.2 # via feast (setup.py) flake8==4.0.1 # via feast (setup.py) -frozenlist==1.2.0 +frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2021.11.1 +fsspec==2022.1.0 # via # adlfs # gcsfs -gcsfs==2021.11.1 +gcsfs==2022.1.0 # via feast (setup.py) -google-api-core[grpc]==1.31.4 +google-api-core[grpc]==1.31.5 # via # feast (setup.py) # firebase-admin @@ -153,7 +194,7 @@ google-api-core[grpc]==1.31.4 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.32.0 +google-api-python-client==2.36.0 # via firebase-admin google-auth==1.35.0 # via @@ -162,16 +203,17 @@ google-auth==1.35.0 # google-api-python-client # google-auth-httplib2 # google-auth-oauthlib + # google-cloud-core # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client google-auth-oauthlib==0.4.6 # via gcsfs -google-cloud-bigquery==2.31.0 +google-cloud-bigquery==2.32.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.10.1 +google-cloud-bigquery-storage==2.11.0 # via feast (setup.py) -google-cloud-core==1.4.4 +google-cloud-core==1.7.2 # via # feast (setup.py) # google-cloud-bigquery @@ -198,7 +240,9 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +great-expectations==0.14.4 + # via feast (setup.py) +grpcio==1.43.0 # via # feast (setup.py) # google-api-core @@ -206,13 +250,13 @@ grpcio==1.42.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) grpcio-testing==1.34.0 # via feast (setup.py) grpcio-tools==1.34.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn hiredis==2.0.0 # via feast (setup.py) @@ -220,48 +264,95 @@ httplib2==0.20.2 # via # google-api-python-client # google-auth-httplib2 -httptools==0.2.0 +httptools==0.3.0 # via uvicorn -identify==2.4.0 +identify==2.4.7 # via pre-commit idna==3.3 # via # anyio # requests + # snowflake-connector-python # yarl imagesize==1.3.0 # via sphinx importlib-metadata==4.2.0 # via - # backports.entry-points-selectable + # click # flake8 + # great-expectations # jsonschema # moto # pep517 # pluggy # pre-commit # pytest + # redis # virtualenv importlib-resources==5.4.0 # via jsonschema iniconfig==1.1.1 # via pytest -isodate==0.6.0 +ipykernel==6.7.0 + # via + # ipywidgets + # notebook +ipython==7.31.1 + # via + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # ipywidgets + # nbformat + # notebook +ipywidgets==7.6.5 + # via great-expectations +isodate==0.6.1 # via msrest isort==5.10.1 # via feast (setup.py) +jedi==0.18.1 + # via ipython jinja2==3.0.3 # via + # altair # feast (setup.py) + # great-expectations # moto + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via # boto3 # botocore -jsonschema==4.2.1 - # via feast (setup.py) -libcst==0.3.23 +jsonpatch==1.32 + # via great-expectations +jsonpointer==2.2 + # via jsonpatch +jsonschema==4.4.0 + # via + # altair + # feast (setup.py) + # great-expectations + # nbformat +jupyter-client==7.1.2 + # via + # ipykernel + # nbclient + # notebook +jupyter-core==4.9.1 + # via + # jupyter-client + # nbconvert + # nbformat + # notebook +jupyterlab-pygments==0.1.2 + # via nbconvert +jupyterlab-widgets==1.0.2 + # via ipywidgets +libcst==0.4.1 # via # google-cloud-bigquery-storage # google-cloud-datastore @@ -269,23 +360,29 @@ markupsafe==2.0.1 # via # jinja2 # moto +matplotlib-inline==0.1.3 + # via + # ipykernel + # ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) +mistune==0.8.4 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -more-itertools==8.12.0 - # via pytest -moto==2.2.17 +moto==3.0.2 # via feast (setup.py) msal==1.16.0 # via # azure-identity # msal-extensions -msal-extensions==0.3.0 +msal-extensions==0.3.1 # via azure-identity msgpack==1.0.3 # via cachecontrol @@ -295,57 +392,97 @@ msrest==0.6.21 # msrestazure msrestazure==0.6.4 # via adlfs -multidict==5.2.0 +multidict==6.0.2 # via # aiohttp # yarl -mypy==0.790 +mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via # mypy # typing-inspect -mypy-protobuf==1.24 - # via feast (setup.py) +mypy-protobuf==3.1.0 + # via feast (setup.py) +nbclient==0.5.10 + # via nbconvert +nbconvert==6.4.1 + # via notebook +nbformat==5.1.3 + # via + # ipywidgets + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.4 + # via + # ipykernel + # jupyter-client + # nbclient + # notebook nodeenv==1.6.0 # via pre-commit -numpy==1.21.4 +notebook==6.4.8 + # via widgetsnbextension +numpy==1.21.5 # via + # altair + # great-expectations # pandas # pandavro # pyarrow -oauthlib==3.1.1 + # scipy +oauthlib==3.2.0 # via requests-oauthlib +oscrypto==1.2.1 + # via snowflake-connector-python packaging==21.3 # via + # bleach # deprecation # google-api-core # google-cloud-bigquery # google-cloud-firestore # pytest + # redis # sphinx -pandas==1.3.4 +pandas==1.3.5 # via + # altair # feast (setup.py) + # great-expectations # pandavro + # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert +parso==0.8.3 + # via jedi pathspec==0.9.0 # via black pbr==5.8.0 # via mock pep517==0.12.0 # via pip-tools +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython pip-tools==6.4.0 # via feast (setup.py) -platformdirs==2.4.0 +platformdirs==2.4.1 # via virtualenv -pluggy==0.13.1 +pluggy==1.0.0 # via pytest -portalocker==1.7.1 +portalocker==2.3.2 # via msal-extensions -pre-commit==2.16.0 +pre-commit==2.17.0 # via feast (setup.py) +prometheus-client==0.13.1 + # via notebook +prompt-toolkit==3.0.26 + # via ipython proto-plus==1.19.6 # via # feast (setup.py) @@ -353,7 +490,7 @@ proto-plus==1.19.6 # google-cloud-bigquery-storage # google-cloud-datastore # google-cloud-firestore -protobuf==3.19.1 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -365,6 +502,10 @@ protobuf==3.19.1 # mypy-protobuf # proto-plus # tensorflow-metadata +ptyprocess==0.7.0 + # via + # pexpect + # terminado py==1.11.0 # via # pytest @@ -372,7 +513,9 @@ py==1.11.0 py-cpuinfo==8.0.0 # via pytest-benchmark pyarrow==6.0.1 - # via feast (setup.py) + # via + # feast (setup.py) + # snowflake-connector-python pyasn1==0.4.8 # via # pyasn1-modules @@ -383,25 +526,35 @@ pycodestyle==2.8.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.8.2 +pycryptodomex==3.14.0 + # via snowflake-connector-python +pydantic==1.9.0 # via # fastapi # feast (setup.py) pyflakes==2.4.0 # via flake8 -pygments==2.10.0 - # via sphinx +pygments==2.11.2 + # via + # ipython + # jupyterlab-pygments + # nbconvert + # sphinx pyjwt[crypto]==2.3.0 # via # adal # msal -pyparsing==3.0.6 + # snowflake-connector-python +pyopenssl==21.0.0 + # via snowflake-connector-python +pyparsing==2.4.7 # via + # great-expectations # httplib2 # packaging -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema -pytest==6.0.0 +pytest==6.2.5 # via # feast (setup.py) # pytest-benchmark @@ -416,7 +569,7 @@ pytest-benchmark==3.4.1 # via feast (setup.py) pytest-cov==3.0.0 # via feast (setup.py) -pytest-forked==1.3.0 +pytest-forked==1.4.0 # via pytest-xdist pytest-lazy-fixture==0.6.3 # via feast (setup.py) @@ -426,13 +579,15 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==2.4.0 +pytest-xdist==2.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal # botocore # google-cloud-bigquery + # great-expectations + # jupyter-client # moto # pandas python-dotenv==0.19.2 @@ -441,21 +596,27 @@ pytz==2021.3 # via # babel # google-api-core + # great-expectations # moto # pandas + # snowflake-connector-python +pytz-deprecation-shim==0.1.0.post0 + # via tzlocal pyyaml==6.0 # via # feast (setup.py) # libcst # pre-commit # uvicorn -redis==3.5.3 - # via redis-py-cluster -redis-py-cluster==2.1.2 +pyzmq==22.3.0 + # via + # jupyter-client + # notebook +redis==4.1.2 # via feast (setup.py) -regex==2021.11.10 +regex==2022.1.18 # via black -requests==2.26.0 +requests==2.27.1 # via # adal # adlfs @@ -467,27 +628,38 @@ requests==2.26.0 # google-api-core # google-cloud-bigquery # google-cloud-storage + # great-expectations # moto # msal # msrest # requests-oauthlib # responses + # snowflake-connector-python # sphinx -requests-oauthlib==1.3.0 +requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.16.0 +responses==0.17.0 # via moto rsa==4.8 # via google-auth -s3transfer==0.4.2 +ruamel.yaml==0.17.17 + # via great-expectations +ruamel.yaml.clib==0.2.6 + # via ruamel.yaml +s3transfer==0.5.0 # via boto3 +scipy==1.7.3 + # via great-expectations +send2trash==1.8.0 + # via notebook six==1.16.0 # via # absl-py # azure-core # azure-identity + # bleach # cryptography # google-api-core # google-auth @@ -495,9 +667,11 @@ six==1.16.0 # google-cloud-core # google-resumable-media # grpcio + # isodate # mock # msrestazure # pandavro + # pyopenssl # python-dateutil # responses # virtualenv @@ -505,7 +679,9 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -sphinx==4.3.1 +snowflake-connector-python[pandas]==2.7.3 + # via feast (setup.py) +sphinx==4.3.2 # via # feast (setup.py) # sphinx-rtd-theme @@ -523,39 +699,94 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) +termcolor==1.1.0 + # via great-expectations +terminado==0.13.1 + # via notebook testcontainers==3.4.2 # via feast (setup.py) +testpath==0.5.0 + # via nbconvert toml==0.10.2 # via # black # feast (setup.py) # pre-commit # pytest -tomli==1.2.2 +tomli==2.0.0 # via # coverage + # mypy # pep517 +toolz==0.11.2 + # via altair +tornado==6.1 + # via + # ipykernel + # jupyter-client + # notebook + # terminado tqdm==4.62.3 - # via feast (setup.py) -typed-ast==1.4.3 + # via + # feast (setup.py) + # great-expectations +traitlets==5.1.1 + # via + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-core + # matplotlib-inline + # nbclient + # nbconvert + # nbformat + # notebook +typed-ast==1.5.2 # via # black # mypy +types-futures==3.3.8 + # via types-protobuf +types-protobuf==3.19.7 + # via + # feast (setup.py) + # mypy-protobuf +types-python-dateutil==2.8.9 + # via feast (setup.py) +types-pytz==2021.3.4 + # via feast (setup.py) +types-pyyaml==6.0.4 + # via feast (setup.py) +types-redis==4.1.13 + # via feast (setup.py) +types-requests==2.27.8 + # via feast (setup.py) +types-setuptools==57.4.8 + # via feast (setup.py) +types-tabulate==0.8.5 + # via feast (setup.py) +types-urllib3==1.26.8 + # via types-requests typing-extensions==4.0.1 # via # aiohttp # anyio + # argon2-cffi # asgiref # async-timeout + # great-expectations + # h11 # importlib-metadata + # jsonschema # libcst # mypy # pydantic @@ -565,38 +796,50 @@ typing-extensions==4.0.1 # yarl typing-inspect==0.7.1 # via libcst +tzdata==2021.5 + # via pytz-deprecation-shim +tzlocal==4.1 + # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.7 +urllib3==1.26.8 # via # botocore # feast (setup.py) # minio # requests # responses -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.1 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.10.0 +virtualenv==20.13.0 # via pre-commit watchgod==0.7 # via uvicorn -websocket-client==1.2.1 +wcwidth==0.2.5 + # via prompt-toolkit +webencodings==0.5.1 + # via bleach +websocket-client==1.2.3 # via docker websockets==10.1 # via uvicorn werkzeug==2.0.2 # via moto -wheel==0.37.0 +wheel==0.37.1 # via pip-tools +widgetsnbextension==3.5.2 + # via ipywidgets wrapt==1.13.3 - # via testcontainers + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.6.0 +zipp==3.7.0 # via # importlib-metadata # importlib-resources diff --git a/sdk/python/requirements/py3.7-requirements.txt b/sdk/python/requirements/py3.7-requirements.txt index b2473f1c70..c2ad63fdea 100644 --- a/sdk/python/requirements/py3.7-requirements.txt +++ b/sdk/python/requirements/py3.7-requirements.txt @@ -4,21 +4,21 @@ # # pip-compile --output-file=requirements/py3.7-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata -anyio==3.4.0 +anyio==3.5.0 # via starlette asgiref==3.4.1 # via uvicorn -attrs==21.2.0 +attrs==21.4.0 # via jsonschema cachetools==4.2.4 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.8 +charset-normalizer==2.0.10 # via requests -click==7.1.2 +click==8.0.3 # via # feast (setup.py) # uvicorn @@ -26,13 +26,13 @@ colorama==0.4.4 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.70.0 +fastapi==0.72.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -google-api-core==2.2.2 +google-api-core==2.4.0 # via feast (setup.py) google-auth==2.3.3 # via google-api-core @@ -41,38 +41,40 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +grpcio==1.43.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn -httptools==0.2.0 +httptools==0.3.0 # via uvicorn idna==3.3 # via # anyio # requests -importlib-metadata==4.8.2 - # via jsonschema +importlib-metadata==4.10.1 + # via + # click + # jsonschema importlib-resources==5.4.0 # via jsonschema jinja2==3.0.3 # via feast (setup.py) -jsonschema==4.2.1 +jsonschema==4.4.0 # via feast (setup.py) markupsafe==2.0.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.4 +numpy==1.21.5 # via # pandas # pandavro # pyarrow -pandas==1.3.4 +pandas==1.3.5 # via # feast (setup.py) # pandavro @@ -80,7 +82,7 @@ pandavro==1.5.2 # via feast (setup.py) proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.1 +protobuf==3.19.3 # via # feast (setup.py) # google-api-core @@ -96,11 +98,11 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.8.2 +pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas @@ -112,7 +114,7 @@ pyyaml==6.0 # via # feast (setup.py) # uvicorn -requests==2.26.0 +requests==2.27.1 # via google-api-core rsa==4.8 # via google-auth @@ -125,13 +127,13 @@ six==1.16.0 # python-dateutil sniffio==1.2.0 # via anyio -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -141,13 +143,15 @@ typing-extensions==4.0.1 # via # anyio # asgiref + # h11 # importlib-metadata + # jsonschema # pydantic # starlette # uvicorn -urllib3==1.26.7 +urllib3==1.26.8 # via requests -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.0 # via feast (setup.py) uvloop==0.16.0 # via uvicorn @@ -155,7 +159,7 @@ watchgod==0.7 # via uvicorn websockets==10.1 # via uvicorn -zipp==3.6.0 +zipp==3.7.0 # via # importlib-metadata # importlib-resources diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index a2df153c01..7a94294c95 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --extra=ci --output-file=requirements/py3.8-ci-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata adal==1.2.7 # via @@ -20,17 +20,33 @@ aiosignal==1.2.0 # via aiohttp alabaster==0.7.12 # via sphinx -anyio==3.4.0 +altair==4.2.0 + # via great-expectations +anyio==3.5.0 # via starlette appdirs==1.4.4 # via black -asgiref==3.4.1 +appnope==0.1.2 + # via + # ipykernel + # ipython +argon2-cffi==21.3.0 + # via notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +asgiref==3.5.0 # via uvicorn +asn1crypto==1.4.0 + # via + # oscrypto + # snowflake-connector-python assertpy==1.1 # via feast (setup.py) -async-timeout==4.0.1 +async-timeout==4.0.2 + # via aiohttp +asynctest==0.13.0 # via aiohttp -attrs==21.2.0 +attrs==21.4.0 # via # aiohttp # black @@ -38,7 +54,7 @@ attrs==21.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.21.0 +azure-core==1.21.1 # via # adlfs # azure-identity @@ -51,15 +67,21 @@ azure-storage-blob==12.9.0 # via adlfs babel==2.9.1 # via sphinx -backports.entry-points-selectable==1.1.1 - # via virtualenv +backcall==0.2.0 + # via ipython +backports.zoneinfo==0.2.1 + # via + # pytz-deprecation-shim + # tzlocal black==19.10b0 # via feast (setup.py) -boto3==1.17.112 +bleach==4.1.0 + # via nbconvert +boto3==1.20.46 # via # feast (setup.py) # moto -botocore==1.20.112 +botocore==1.23.46 # via # boto3 # moto @@ -73,25 +95,30 @@ certifi==2021.10.8 # minio # msrest # requests + # snowflake-connector-python cffi==1.15.0 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography + # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.8 +charset-normalizer==2.0.11 # via # aiohttp # requests -click==7.1.2 + # snowflake-connector-python +click==8.0.3 # via # black # feast (setup.py) + # great-expectations # pip-tools # uvicorn colorama==0.4.4 # via feast (setup.py) -coverage[toml]==6.2 +coverage[toml]==6.3 # via pytest-cov cryptography==3.3.2 # via @@ -101,13 +128,24 @@ cryptography==3.3.2 # feast (setup.py) # moto # msal -decorator==5.1.0 - # via gcsfs + # pyjwt + # pyopenssl + # snowflake-connector-python +debugpy==1.5.1 + # via ipykernel +decorator==5.1.1 + # via + # gcsfs + # ipython +defusedxml==0.7.1 + # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 # via feast (setup.py) -distlib==0.3.3 +distlib==0.3.4 # via virtualenv docker==5.0.3 # via @@ -117,31 +155,36 @@ docutils==0.17.1 # via # sphinx # sphinx-rtd-theme +entrypoints==0.3 + # via + # altair + # jupyter-client + # nbconvert execnet==1.9.0 # via pytest-xdist -fastapi==0.70.0 +fastapi==0.73.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -filelock==3.4.0 +filelock==3.4.2 # via virtualenv firebase-admin==4.5.2 # via feast (setup.py) flake8==4.0.1 # via feast (setup.py) -frozenlist==1.2.0 +frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2021.11.1 +fsspec==2022.1.0 # via # adlfs # gcsfs -gcsfs==2021.11.1 +gcsfs==2022.1.0 # via feast (setup.py) -google-api-core[grpc]==1.31.4 +google-api-core[grpc]==1.31.5 # via # feast (setup.py) # firebase-admin @@ -151,7 +194,7 @@ google-api-core[grpc]==1.31.4 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.32.0 +google-api-python-client==2.36.0 # via firebase-admin google-auth==1.35.0 # via @@ -160,16 +203,17 @@ google-auth==1.35.0 # google-api-python-client # google-auth-httplib2 # google-auth-oauthlib + # google-cloud-core # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client google-auth-oauthlib==0.4.6 # via gcsfs -google-cloud-bigquery==2.31.0 +google-cloud-bigquery==2.32.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.10.1 +google-cloud-bigquery-storage==2.11.0 # via feast (setup.py) -google-cloud-core==1.4.4 +google-cloud-core==1.7.2 # via # feast (setup.py) # google-cloud-bigquery @@ -196,7 +240,9 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +great-expectations==0.14.4 + # via feast (setup.py) +grpcio==1.43.0 # via # feast (setup.py) # google-api-core @@ -204,13 +250,13 @@ grpcio==1.42.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) grpcio-testing==1.34.0 # via feast (setup.py) grpcio-tools==1.34.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn hiredis==2.0.0 # via feast (setup.py) @@ -218,37 +264,95 @@ httplib2==0.20.2 # via # google-api-python-client # google-auth-httplib2 -httptools==0.2.0 +httptools==0.3.0 # via uvicorn -identify==2.4.0 +identify==2.4.7 # via pre-commit idna==3.3 # via # anyio # requests + # snowflake-connector-python # yarl imagesize==1.3.0 # via sphinx +importlib-metadata==4.2.0 + # via + # click + # flake8 + # great-expectations + # jsonschema + # moto + # pep517 + # pluggy + # pre-commit + # pytest + # redis + # virtualenv importlib-resources==5.4.0 # via jsonschema iniconfig==1.1.1 # via pytest -isodate==0.6.0 +ipykernel==6.7.0 + # via + # ipywidgets + # notebook +ipython==7.31.1 + # via + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # ipywidgets + # nbformat + # notebook +ipywidgets==7.6.5 + # via great-expectations +isodate==0.6.1 # via msrest isort==5.10.1 # via feast (setup.py) +jedi==0.18.1 + # via ipython jinja2==3.0.3 # via + # altair # feast (setup.py) + # great-expectations # moto + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via # boto3 # botocore -jsonschema==4.2.1 - # via feast (setup.py) -libcst==0.3.23 +jsonpatch==1.32 + # via great-expectations +jsonpointer==2.2 + # via jsonpatch +jsonschema==4.4.0 + # via + # altair + # feast (setup.py) + # great-expectations + # nbformat +jupyter-client==7.1.2 + # via + # ipykernel + # nbclient + # notebook +jupyter-core==4.9.1 + # via + # jupyter-client + # nbconvert + # nbformat + # notebook +jupyterlab-pygments==0.1.2 + # via nbconvert +jupyterlab-widgets==1.0.2 + # via ipywidgets +libcst==0.4.1 # via # google-cloud-bigquery-storage # google-cloud-datastore @@ -256,23 +360,29 @@ markupsafe==2.0.1 # via # jinja2 # moto +matplotlib-inline==0.1.3 + # via + # ipykernel + # ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) +mistune==0.8.4 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -more-itertools==8.12.0 - # via pytest -moto==2.2.17 +moto==3.0.2 # via feast (setup.py) msal==1.16.0 # via # azure-identity # msal-extensions -msal-extensions==0.3.0 +msal-extensions==0.3.1 # via azure-identity msgpack==1.0.3 # via cachecontrol @@ -282,57 +392,97 @@ msrest==0.6.21 # msrestazure msrestazure==0.6.4 # via adlfs -multidict==5.2.0 +multidict==6.0.2 # via # aiohttp # yarl -mypy==0.790 +mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via # mypy # typing-inspect -mypy-protobuf==1.24 - # via feast (setup.py) +mypy-protobuf==3.1.0 + # via feast (setup.py) +nbclient==0.5.10 + # via nbconvert +nbconvert==6.4.1 + # via notebook +nbformat==5.1.3 + # via + # ipywidgets + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.4 + # via + # ipykernel + # jupyter-client + # nbclient + # notebook nodeenv==1.6.0 # via pre-commit -numpy==1.21.4 +notebook==6.4.8 + # via widgetsnbextension +numpy==1.21.5 # via + # altair + # great-expectations # pandas # pandavro # pyarrow -oauthlib==3.1.1 + # scipy +oauthlib==3.2.0 # via requests-oauthlib +oscrypto==1.2.1 + # via snowflake-connector-python packaging==21.3 # via + # bleach # deprecation # google-api-core # google-cloud-bigquery # google-cloud-firestore # pytest + # redis # sphinx -pandas==1.3.4 +pandas==1.3.5 # via + # altair # feast (setup.py) + # great-expectations # pandavro + # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert +parso==0.8.3 + # via jedi pathspec==0.9.0 # via black pbr==5.8.0 # via mock pep517==0.12.0 # via pip-tools +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython pip-tools==6.4.0 # via feast (setup.py) -platformdirs==2.4.0 +platformdirs==2.4.1 # via virtualenv -pluggy==0.13.1 +pluggy==1.0.0 # via pytest -portalocker==1.7.1 +portalocker==2.3.2 # via msal-extensions -pre-commit==2.16.0 +pre-commit==2.17.0 # via feast (setup.py) +prometheus-client==0.13.1 + # via notebook +prompt-toolkit==3.0.26 + # via ipython proto-plus==1.19.6 # via # feast (setup.py) @@ -340,7 +490,7 @@ proto-plus==1.19.6 # google-cloud-bigquery-storage # google-cloud-datastore # google-cloud-firestore -protobuf==3.19.1 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -352,6 +502,10 @@ protobuf==3.19.1 # mypy-protobuf # proto-plus # tensorflow-metadata +ptyprocess==0.7.0 + # via + # pexpect + # terminado py==1.11.0 # via # pytest @@ -359,7 +513,9 @@ py==1.11.0 py-cpuinfo==8.0.0 # via pytest-benchmark pyarrow==6.0.1 - # via feast (setup.py) + # via + # feast (setup.py) + # snowflake-connector-python pyasn1==0.4.8 # via # pyasn1-modules @@ -370,25 +526,35 @@ pycodestyle==2.8.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.8.2 +pycryptodomex==3.14.0 + # via snowflake-connector-python +pydantic==1.9.0 # via # fastapi # feast (setup.py) pyflakes==2.4.0 # via flake8 -pygments==2.10.0 - # via sphinx +pygments==2.11.2 + # via + # ipython + # jupyterlab-pygments + # nbconvert + # sphinx pyjwt[crypto]==2.3.0 # via # adal # msal -pyparsing==3.0.6 + # snowflake-connector-python +pyopenssl==21.0.0 + # via snowflake-connector-python +pyparsing==2.4.7 # via + # great-expectations # httplib2 # packaging -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema -pytest==6.0.0 +pytest==6.2.5 # via # feast (setup.py) # pytest-benchmark @@ -403,7 +569,7 @@ pytest-benchmark==3.4.1 # via feast (setup.py) pytest-cov==3.0.0 # via feast (setup.py) -pytest-forked==1.3.0 +pytest-forked==1.4.0 # via pytest-xdist pytest-lazy-fixture==0.6.3 # via feast (setup.py) @@ -413,13 +579,15 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==2.4.0 +pytest-xdist==2.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal # botocore # google-cloud-bigquery + # great-expectations + # jupyter-client # moto # pandas python-dotenv==0.19.2 @@ -428,21 +596,27 @@ pytz==2021.3 # via # babel # google-api-core + # great-expectations # moto # pandas + # snowflake-connector-python +pytz-deprecation-shim==0.1.0.post0 + # via tzlocal pyyaml==6.0 # via # feast (setup.py) # libcst # pre-commit # uvicorn -redis==3.5.3 - # via redis-py-cluster -redis-py-cluster==2.1.2 +pyzmq==22.3.0 + # via + # jupyter-client + # notebook +redis==4.1.2 # via feast (setup.py) -regex==2021.11.10 +regex==2022.1.18 # via black -requests==2.26.0 +requests==2.27.1 # via # adal # adlfs @@ -454,27 +628,38 @@ requests==2.26.0 # google-api-core # google-cloud-bigquery # google-cloud-storage + # great-expectations # moto # msal # msrest # requests-oauthlib # responses + # snowflake-connector-python # sphinx -requests-oauthlib==1.3.0 +requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.16.0 +responses==0.17.0 # via moto rsa==4.8 # via google-auth -s3transfer==0.4.2 +ruamel.yaml==0.17.17 + # via great-expectations +ruamel.yaml.clib==0.2.6 + # via ruamel.yaml +s3transfer==0.5.0 # via boto3 +scipy==1.7.3 + # via great-expectations +send2trash==1.8.0 + # via notebook six==1.16.0 # via # absl-py # azure-core # azure-identity + # bleach # cryptography # google-api-core # google-auth @@ -482,9 +667,11 @@ six==1.16.0 # google-cloud-core # google-resumable-media # grpcio + # isodate # mock # msrestazure # pandavro + # pyopenssl # python-dateutil # responses # virtualenv @@ -492,7 +679,9 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -sphinx==4.3.1 +snowflake-connector-python[pandas]==2.7.3 + # via feast (setup.py) +sphinx==4.3.2 # via # feast (setup.py) # sphinx-rtd-theme @@ -510,74 +699,151 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) +termcolor==1.1.0 + # via great-expectations +terminado==0.13.1 + # via notebook testcontainers==3.4.2 # via feast (setup.py) +testpath==0.5.0 + # via nbconvert toml==0.10.2 # via # black # feast (setup.py) # pre-commit # pytest -tomli==1.2.2 +tomli==2.0.0 # via # coverage + # mypy # pep517 +toolz==0.11.2 + # via altair +tornado==6.1 + # via + # ipykernel + # jupyter-client + # notebook + # terminado tqdm==4.62.3 - # via feast (setup.py) -typed-ast==1.4.3 + # via + # feast (setup.py) + # great-expectations +traitlets==5.1.1 + # via + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-core + # matplotlib-inline + # nbclient + # nbconvert + # nbformat + # notebook +typed-ast==1.5.2 # via # black # mypy +types-futures==3.3.8 + # via types-protobuf +types-protobuf==3.19.7 + # via + # feast (setup.py) + # mypy-protobuf +types-python-dateutil==2.8.9 + # via feast (setup.py) +types-pytz==2021.3.4 + # via feast (setup.py) +types-pyyaml==6.0.4 + # via feast (setup.py) +types-redis==4.1.13 + # via feast (setup.py) +types-requests==2.27.8 + # via feast (setup.py) +types-setuptools==57.4.8 + # via feast (setup.py) +types-tabulate==0.8.5 + # via feast (setup.py) +types-urllib3==1.26.8 + # via types-requests typing-extensions==4.0.1 # via + # aiohttp + # anyio + # argon2-cffi + # asgiref # async-timeout + # great-expectations + # h11 + # importlib-metadata + # jsonschema # libcst # mypy # pydantic + # starlette # typing-inspect + # uvicorn + # yarl typing-inspect==0.7.1 # via libcst +tzdata==2021.5 + # via pytz-deprecation-shim +tzlocal==4.1 + # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.7 +urllib3==1.26.8 # via # botocore # feast (setup.py) # minio # requests # responses -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.1 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.10.0 +virtualenv==20.13.0 # via pre-commit watchgod==0.7 # via uvicorn -websocket-client==1.2.1 +wcwidth==0.2.5 + # via prompt-toolkit +webencodings==0.5.1 + # via bleach +websocket-client==1.2.3 # via docker websockets==10.1 # via uvicorn werkzeug==2.0.2 # via moto -wheel==0.37.0 +wheel==0.37.1 # via pip-tools +widgetsnbextension==3.5.2 + # via ipywidgets wrapt==1.13.3 - # via testcontainers + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.6.0 - # via importlib-resources +zipp==3.7.0 + # via + # importlib-metadata + # importlib-resources + # pep517 # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index e6887dea55..90b4276013 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -4,21 +4,21 @@ # # pip-compile --output-file=requirements/py3.8-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata -anyio==3.4.0 +anyio==3.5.0 # via starlette asgiref==3.4.1 # via uvicorn -attrs==21.2.0 +attrs==21.4.0 # via jsonschema cachetools==4.2.4 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.8 +charset-normalizer==2.0.10 # via requests -click==7.1.2 +click==8.0.3 # via # feast (setup.py) # uvicorn @@ -26,13 +26,13 @@ colorama==0.4.4 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.70.0 +fastapi==0.72.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -google-api-core==2.2.2 +google-api-core==2.4.0 # via feast (setup.py) google-auth==2.3.3 # via google-api-core @@ -41,15 +41,15 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +grpcio==1.43.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn -httptools==0.2.0 +httptools==0.3.0 # via uvicorn idna==3.3 # via @@ -59,18 +59,18 @@ importlib-resources==5.4.0 # via jsonschema jinja2==3.0.3 # via feast (setup.py) -jsonschema==4.2.1 +jsonschema==4.4.0 # via feast (setup.py) markupsafe==2.0.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.4 +numpy==1.21.5 # via # pandas # pandavro # pyarrow -pandas==1.3.4 +pandas==1.3.5 # via # feast (setup.py) # pandavro @@ -78,7 +78,7 @@ pandavro==1.5.2 # via feast (setup.py) proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.1 +protobuf==3.19.3 # via # feast (setup.py) # google-api-core @@ -94,11 +94,11 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.8.2 +pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas @@ -110,7 +110,7 @@ pyyaml==6.0 # via # feast (setup.py) # uvicorn -requests==2.26.0 +requests==2.27.1 # via google-api-core rsa==4.8 # via google-auth @@ -123,13 +123,13 @@ six==1.16.0 # python-dateutil sniffio==1.2.0 # via anyio -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -137,9 +137,9 @@ tqdm==4.62.3 # via feast (setup.py) typing-extensions==4.0.1 # via pydantic -urllib3==1.26.7 +urllib3==1.26.8 # via requests -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.0 # via feast (setup.py) uvloop==0.16.0 # via uvicorn @@ -147,7 +147,7 @@ watchgod==0.7 # via uvicorn websockets==10.1 # via uvicorn -zipp==3.6.0 +zipp==3.7.0 # via importlib-resources # The following packages are considered to be unsafe in a requirements file: diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index aa4cee54e4..1421d7e3c3 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --extra=ci --output-file=requirements/py3.9-ci-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata adal==1.2.7 # via @@ -20,17 +20,33 @@ aiosignal==1.2.0 # via aiohttp alabaster==0.7.12 # via sphinx -anyio==3.4.0 +altair==4.2.0 + # via great-expectations +anyio==3.5.0 # via starlette appdirs==1.4.4 # via black -asgiref==3.4.1 +appnope==0.1.2 + # via + # ipykernel + # ipython +argon2-cffi==21.3.0 + # via notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +asgiref==3.5.0 # via uvicorn +asn1crypto==1.4.0 + # via + # oscrypto + # snowflake-connector-python assertpy==1.1 # via feast (setup.py) -async-timeout==4.0.1 +async-timeout==4.0.2 + # via aiohttp +asynctest==0.13.0 # via aiohttp -attrs==21.2.0 +attrs==21.4.0 # via # aiohttp # black @@ -38,7 +54,7 @@ attrs==21.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.21.0 +azure-core==1.21.1 # via # adlfs # azure-identity @@ -51,15 +67,21 @@ azure-storage-blob==12.9.0 # via adlfs babel==2.9.1 # via sphinx -backports.entry-points-selectable==1.1.1 - # via virtualenv +backcall==0.2.0 + # via ipython +backports.zoneinfo==0.2.1 + # via + # pytz-deprecation-shim + # tzlocal black==19.10b0 # via feast (setup.py) -boto3==1.17.112 +bleach==4.1.0 + # via nbconvert +boto3==1.20.46 # via # feast (setup.py) # moto -botocore==1.20.112 +botocore==1.23.46 # via # boto3 # moto @@ -73,25 +95,30 @@ certifi==2021.10.8 # minio # msrest # requests + # snowflake-connector-python cffi==1.15.0 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography + # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.8 +charset-normalizer==2.0.11 # via # aiohttp # requests -click==7.1.2 + # snowflake-connector-python +click==8.0.3 # via # black # feast (setup.py) + # great-expectations # pip-tools # uvicorn colorama==0.4.4 # via feast (setup.py) -coverage[toml]==6.2 +coverage[toml]==6.3 # via pytest-cov cryptography==3.3.2 # via @@ -101,13 +128,24 @@ cryptography==3.3.2 # feast (setup.py) # moto # msal -decorator==5.1.0 - # via gcsfs + # pyjwt + # pyopenssl + # snowflake-connector-python +debugpy==1.5.1 + # via ipykernel +decorator==5.1.1 + # via + # gcsfs + # ipython +defusedxml==0.7.1 + # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 # via feast (setup.py) -distlib==0.3.3 +distlib==0.3.4 # via virtualenv docker==5.0.3 # via @@ -117,31 +155,36 @@ docutils==0.17.1 # via # sphinx # sphinx-rtd-theme +entrypoints==0.3 + # via + # altair + # jupyter-client + # nbconvert execnet==1.9.0 # via pytest-xdist -fastapi==0.70.0 +fastapi==0.73.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -filelock==3.4.0 +filelock==3.4.2 # via virtualenv firebase-admin==4.5.2 # via feast (setup.py) flake8==4.0.1 # via feast (setup.py) -frozenlist==1.2.0 +frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2021.11.1 +fsspec==2022.1.0 # via # adlfs # gcsfs -gcsfs==2021.11.1 +gcsfs==2022.1.0 # via feast (setup.py) -google-api-core[grpc]==1.31.4 +google-api-core[grpc]==1.31.5 # via # feast (setup.py) # firebase-admin @@ -151,7 +194,7 @@ google-api-core[grpc]==1.31.4 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.32.0 +google-api-python-client==2.36.0 # via firebase-admin google-auth==1.35.0 # via @@ -160,16 +203,17 @@ google-auth==1.35.0 # google-api-python-client # google-auth-httplib2 # google-auth-oauthlib + # google-cloud-core # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client google-auth-oauthlib==0.4.6 # via gcsfs -google-cloud-bigquery==2.31.0 +google-cloud-bigquery==2.32.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.10.1 +google-cloud-bigquery-storage==2.11.0 # via feast (setup.py) -google-cloud-core==1.4.4 +google-cloud-core==1.7.2 # via # feast (setup.py) # google-cloud-bigquery @@ -196,7 +240,9 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +great-expectations==0.14.4 + # via feast (setup.py) +grpcio==1.43.0 # via # feast (setup.py) # google-api-core @@ -204,13 +250,13 @@ grpcio==1.42.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) grpcio-testing==1.34.0 # via feast (setup.py) grpcio-tools==1.34.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn hiredis==2.0.0 # via feast (setup.py) @@ -218,35 +264,95 @@ httplib2==0.20.2 # via # google-api-python-client # google-auth-httplib2 -httptools==0.2.0 +httptools==0.3.0 # via uvicorn -identify==2.4.0 +identify==2.4.7 # via pre-commit idna==3.3 # via # anyio # requests + # snowflake-connector-python # yarl imagesize==1.3.0 # via sphinx +importlib-metadata==4.2.0 + # via + # click + # flake8 + # great-expectations + # jsonschema + # moto + # pep517 + # pluggy + # pre-commit + # pytest + # redis + # virtualenv +importlib-resources==5.4.0 + # via jsonschema iniconfig==1.1.1 # via pytest -isodate==0.6.0 +ipykernel==6.7.0 + # via + # ipywidgets + # notebook +ipython==7.31.1 + # via + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # ipywidgets + # nbformat + # notebook +ipywidgets==7.6.5 + # via great-expectations +isodate==0.6.1 # via msrest isort==5.10.1 # via feast (setup.py) +jedi==0.18.1 + # via ipython jinja2==3.0.3 # via + # altair # feast (setup.py) + # great-expectations # moto + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via # boto3 # botocore -jsonschema==4.2.1 - # via feast (setup.py) -libcst==0.3.23 +jsonpatch==1.32 + # via great-expectations +jsonpointer==2.2 + # via jsonpatch +jsonschema==4.4.0 + # via + # altair + # feast (setup.py) + # great-expectations + # nbformat +jupyter-client==7.1.2 + # via + # ipykernel + # nbclient + # notebook +jupyter-core==4.9.1 + # via + # jupyter-client + # nbconvert + # nbformat + # notebook +jupyterlab-pygments==0.1.2 + # via nbconvert +jupyterlab-widgets==1.0.2 + # via ipywidgets +libcst==0.4.1 # via # google-cloud-bigquery-storage # google-cloud-datastore @@ -254,23 +360,29 @@ markupsafe==2.0.1 # via # jinja2 # moto +matplotlib-inline==0.1.3 + # via + # ipykernel + # ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) +mistune==0.8.4 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -more-itertools==8.12.0 - # via pytest -moto==2.2.17 +moto==3.0.2 # via feast (setup.py) msal==1.16.0 # via # azure-identity # msal-extensions -msal-extensions==0.3.0 +msal-extensions==0.3.1 # via azure-identity msgpack==1.0.3 # via cachecontrol @@ -280,57 +392,97 @@ msrest==0.6.21 # msrestazure msrestazure==0.6.4 # via adlfs -multidict==5.2.0 +multidict==6.0.2 # via # aiohttp # yarl -mypy==0.790 +mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via # mypy # typing-inspect -mypy-protobuf==1.24 - # via feast (setup.py) +mypy-protobuf==3.1.0 + # via feast (setup.py) +nbclient==0.5.10 + # via nbconvert +nbconvert==6.4.1 + # via notebook +nbformat==5.1.3 + # via + # ipywidgets + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.4 + # via + # ipykernel + # jupyter-client + # nbclient + # notebook nodeenv==1.6.0 # via pre-commit -numpy==1.21.4 +notebook==6.4.8 + # via widgetsnbextension +numpy==1.21.5 # via + # altair + # great-expectations # pandas # pandavro # pyarrow -oauthlib==3.1.1 + # scipy +oauthlib==3.2.0 # via requests-oauthlib +oscrypto==1.2.1 + # via snowflake-connector-python packaging==21.3 # via + # bleach # deprecation # google-api-core # google-cloud-bigquery # google-cloud-firestore # pytest + # redis # sphinx -pandas==1.3.4 +pandas==1.3.5 # via + # altair # feast (setup.py) + # great-expectations # pandavro + # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert +parso==0.8.3 + # via jedi pathspec==0.9.0 # via black pbr==5.8.0 # via mock pep517==0.12.0 # via pip-tools +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython pip-tools==6.4.0 # via feast (setup.py) -platformdirs==2.4.0 +platformdirs==2.4.1 # via virtualenv -pluggy==0.13.1 +pluggy==1.0.0 # via pytest -portalocker==1.7.1 +portalocker==2.3.2 # via msal-extensions -pre-commit==2.16.0 +pre-commit==2.17.0 # via feast (setup.py) +prometheus-client==0.13.1 + # via notebook +prompt-toolkit==3.0.26 + # via ipython proto-plus==1.19.6 # via # feast (setup.py) @@ -338,7 +490,7 @@ proto-plus==1.19.6 # google-cloud-bigquery-storage # google-cloud-datastore # google-cloud-firestore -protobuf==3.19.1 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -350,6 +502,10 @@ protobuf==3.19.1 # mypy-protobuf # proto-plus # tensorflow-metadata +ptyprocess==0.7.0 + # via + # pexpect + # terminado py==1.11.0 # via # pytest @@ -357,7 +513,9 @@ py==1.11.0 py-cpuinfo==8.0.0 # via pytest-benchmark pyarrow==6.0.1 - # via feast (setup.py) + # via + # feast (setup.py) + # snowflake-connector-python pyasn1==0.4.8 # via # pyasn1-modules @@ -368,25 +526,35 @@ pycodestyle==2.8.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.8.2 +pycryptodomex==3.14.0 + # via snowflake-connector-python +pydantic==1.9.0 # via # fastapi # feast (setup.py) pyflakes==2.4.0 # via flake8 -pygments==2.10.0 - # via sphinx +pygments==2.11.2 + # via + # ipython + # jupyterlab-pygments + # nbconvert + # sphinx pyjwt[crypto]==2.3.0 # via # adal # msal -pyparsing==3.0.6 + # snowflake-connector-python +pyopenssl==21.0.0 + # via snowflake-connector-python +pyparsing==2.4.7 # via + # great-expectations # httplib2 # packaging -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema -pytest==6.0.0 +pytest==6.2.5 # via # feast (setup.py) # pytest-benchmark @@ -401,7 +569,7 @@ pytest-benchmark==3.4.1 # via feast (setup.py) pytest-cov==3.0.0 # via feast (setup.py) -pytest-forked==1.3.0 +pytest-forked==1.4.0 # via pytest-xdist pytest-lazy-fixture==0.6.3 # via feast (setup.py) @@ -411,13 +579,15 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==2.4.0 +pytest-xdist==2.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal # botocore # google-cloud-bigquery + # great-expectations + # jupyter-client # moto # pandas python-dotenv==0.19.2 @@ -426,21 +596,27 @@ pytz==2021.3 # via # babel # google-api-core + # great-expectations # moto # pandas + # snowflake-connector-python +pytz-deprecation-shim==0.1.0.post0 + # via tzlocal pyyaml==6.0 # via # feast (setup.py) # libcst # pre-commit # uvicorn -redis==3.5.3 - # via redis-py-cluster -redis-py-cluster==2.1.2 +pyzmq==22.3.0 + # via + # jupyter-client + # notebook +redis==4.1.2 # via feast (setup.py) -regex==2021.11.10 +regex==2022.1.18 # via black -requests==2.26.0 +requests==2.27.1 # via # adal # adlfs @@ -452,27 +628,38 @@ requests==2.26.0 # google-api-core # google-cloud-bigquery # google-cloud-storage + # great-expectations # moto # msal # msrest # requests-oauthlib # responses + # snowflake-connector-python # sphinx -requests-oauthlib==1.3.0 +requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.16.0 +responses==0.17.0 # via moto rsa==4.8 # via google-auth -s3transfer==0.4.2 +ruamel.yaml==0.17.17 + # via great-expectations +ruamel.yaml.clib==0.2.6 + # via ruamel.yaml +s3transfer==0.5.0 # via boto3 +scipy==1.7.3 + # via great-expectations +send2trash==1.8.0 + # via notebook six==1.16.0 # via # absl-py # azure-core # azure-identity + # bleach # cryptography # google-api-core # google-auth @@ -480,9 +667,11 @@ six==1.16.0 # google-cloud-core # google-resumable-media # grpcio + # isodate # mock # msrestazure # pandavro + # pyopenssl # python-dateutil # responses # virtualenv @@ -490,7 +679,9 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -sphinx==4.3.1 +snowflake-connector-python[pandas]==2.7.3 + # via feast (setup.py) +sphinx==4.3.2 # via # feast (setup.py) # sphinx-rtd-theme @@ -508,72 +699,151 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) +termcolor==1.1.0 + # via great-expectations +terminado==0.13.1 + # via notebook testcontainers==3.4.2 # via feast (setup.py) +testpath==0.5.0 + # via nbconvert toml==0.10.2 # via # black # feast (setup.py) # pre-commit # pytest -tomli==1.2.2 +tomli==2.0.0 # via # coverage + # mypy # pep517 +toolz==0.11.2 + # via altair +tornado==6.1 + # via + # ipykernel + # jupyter-client + # notebook + # terminado tqdm==4.62.3 - # via feast (setup.py) -typed-ast==1.4.3 + # via + # feast (setup.py) + # great-expectations +traitlets==5.1.1 + # via + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-core + # matplotlib-inline + # nbclient + # nbconvert + # nbformat + # notebook +typed-ast==1.5.2 # via # black # mypy +types-futures==3.3.8 + # via types-protobuf +types-protobuf==3.19.7 + # via + # feast (setup.py) + # mypy-protobuf +types-python-dateutil==2.8.9 + # via feast (setup.py) +types-pytz==2021.3.4 + # via feast (setup.py) +types-pyyaml==6.0.4 + # via feast (setup.py) +types-redis==4.1.13 + # via feast (setup.py) +types-requests==2.27.8 + # via feast (setup.py) +types-setuptools==57.4.8 + # via feast (setup.py) +types-tabulate==0.8.5 + # via feast (setup.py) +types-urllib3==1.26.8 + # via types-requests typing-extensions==4.0.1 # via + # aiohttp + # anyio + # argon2-cffi + # asgiref # async-timeout + # great-expectations + # h11 + # importlib-metadata + # jsonschema # libcst # mypy # pydantic + # starlette # typing-inspect + # uvicorn + # yarl typing-inspect==0.7.1 # via libcst +tzdata==2021.5 + # via pytz-deprecation-shim +tzlocal==4.1 + # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.7 +urllib3==1.26.8 # via # botocore # feast (setup.py) # minio # requests # responses -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.1 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.10.0 +virtualenv==20.13.0 # via pre-commit watchgod==0.7 # via uvicorn -websocket-client==1.2.1 +wcwidth==0.2.5 + # via prompt-toolkit +webencodings==0.5.1 + # via bleach +websocket-client==1.2.3 # via docker websockets==10.1 # via uvicorn werkzeug==2.0.2 # via moto -wheel==0.37.0 +wheel==0.37.1 # via pip-tools +widgetsnbextension==3.5.2 + # via ipywidgets wrapt==1.13.3 - # via testcontainers + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp +zipp==3.7.0 + # via + # importlib-metadata + # importlib-resources + # pep517 # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 4cb45fd809..8db9fd4b14 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -4,21 +4,21 @@ # # pip-compile --output-file=requirements/py3.9-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata -anyio==3.4.0 +anyio==3.5.0 # via starlette asgiref==3.4.1 # via uvicorn -attrs==21.2.0 +attrs==21.4.0 # via jsonschema cachetools==4.2.4 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.8 +charset-normalizer==2.0.10 # via requests -click==7.1.2 +click==8.0.3 # via # feast (setup.py) # uvicorn @@ -26,13 +26,13 @@ colorama==0.4.4 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.70.0 +fastapi==0.72.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -google-api-core==2.2.2 +google-api-core==2.4.0 # via feast (setup.py) google-auth==2.3.3 # via google-api-core @@ -41,15 +41,15 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +grpcio==1.43.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn -httptools==0.2.0 +httptools==0.3.0 # via uvicorn idna==3.3 # via @@ -57,18 +57,18 @@ idna==3.3 # requests jinja2==3.0.3 # via feast (setup.py) -jsonschema==4.2.1 +jsonschema==4.4.0 # via feast (setup.py) markupsafe==2.0.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.4 +numpy==1.21.5 # via # pandas # pandavro # pyarrow -pandas==1.3.4 +pandas==1.3.5 # via # feast (setup.py) # pandavro @@ -76,7 +76,7 @@ pandavro==1.5.2 # via feast (setup.py) proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.1 +protobuf==3.19.3 # via # feast (setup.py) # google-api-core @@ -92,11 +92,11 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.8.2 +pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas @@ -108,7 +108,7 @@ pyyaml==6.0 # via # feast (setup.py) # uvicorn -requests==2.26.0 +requests==2.27.1 # via google-api-core rsa==4.8 # via google-auth @@ -121,13 +121,13 @@ six==1.16.0 # python-dateutil sniffio==1.2.0 # via anyio -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -135,9 +135,9 @@ tqdm==4.62.3 # via feast (setup.py) typing-extensions==4.0.1 # via pydantic -urllib3==1.26.7 +urllib3==1.26.8 # via requests -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.0 # via feast (setup.py) uvloop==0.16.0 # via uvicorn diff --git a/sdk/python/setup.py b/sdk/python/setup.py index e797a1216c..8951f23c50 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -40,7 +40,7 @@ REQUIRES_PYTHON = ">=3.7.0" REQUIRED = [ - "Click==7.*", + "Click==8.*", "colorama>=0.3.9", "dill==0.3.*", "fastavro>=1.1.0", @@ -66,6 +66,7 @@ "uvicorn[standard]>=0.14.0", "proto-plus<1.19.7", "tensorflow-metadata>=1.0.0,<2.0.0", + "dask>=2021.*", ] GCP_REQUIRED = [ @@ -77,7 +78,7 @@ ] REDIS_REQUIRED = [ - "redis-py-cluster==2.1.2", + "redis>=4.1.0", "hiredis>=2.0.0", ] @@ -86,40 +87,63 @@ "docker>=5.0.2", ] -CI_REQUIRED = [ - "cryptography==3.3.2", - "flake8", - "black==19.10b0", - "isort>=5", - "grpcio-tools==1.34.0", - "grpcio-testing==1.34.0", - "minio==7.1.0", - "mock==2.0.0", - "moto", - "mypy==0.790", - "mypy-protobuf==1.24", - "avro==1.10.0", - "gcsfs", - "urllib3>=1.25.4", - "pytest==6.0.0", - "pytest-cov", - "pytest-xdist", - "pytest-benchmark>=3.4.1", - "pytest-lazy-fixture==0.6.3", - "pytest-timeout==1.4.2", - "pytest-ordering==0.6.*", - "pytest-mock==1.10.4", - "Sphinx!=4.0.0", - "sphinx-rtd-theme", - "testcontainers==3.4.2", - "adlfs==0.5.9", - "firebase-admin==4.5.2", - "pre-commit", - "assertpy==1.1", - "pip-tools", -] + GCP_REQUIRED + REDIS_REQUIRED + AWS_REQUIRED - -DEV_REQUIRED = ["mypy-protobuf==1.*", "grpcio-testing==1.*"] + CI_REQUIRED +SNOWFLAKE_REQUIRED = [ + "snowflake-connector-python[pandas]>=2.7.3", +] + +GE_REQUIRED = [ + "great_expectations>=0.14.0,<0.15.0" +] + +CI_REQUIRED = ( + [ + "cryptography==3.3.2", + "flake8", + "black==19.10b0", + "isort>=5", + "grpcio-tools==1.34.0", + "grpcio-testing==1.34.0", + "minio==7.1.0", + "mock==2.0.0", + "moto", + "mypy==0.931", + "mypy-protobuf==3.1.0", + "avro==1.10.0", + "gcsfs", + "urllib3>=1.25.4", + "pytest>=6.0.0", + "pytest-cov", + "pytest-xdist", + "pytest-benchmark>=3.4.1", + "pytest-lazy-fixture==0.6.3", + "pytest-timeout==1.4.2", + "pytest-ordering==0.6.*", + "pytest-mock==1.10.4", + "Sphinx!=4.0.0,<4.4.0", + "sphinx-rtd-theme", + "testcontainers==3.4.2", + "adlfs==0.5.9", + "firebase-admin==4.5.2", + "pre-commit", + "assertpy==1.1", + "pip-tools", + "types-protobuf", + "types-python-dateutil", + "types-pytz", + "types-PyYAML", + "types-redis", + "types-requests", + "types-setuptools", + "types-tabulate", + ] + + GCP_REQUIRED + + REDIS_REQUIRED + + AWS_REQUIRED + + SNOWFLAKE_REQUIRED + + GE_REQUIRED +) + +DEV_REQUIRED = ["mypy-protobuf>=3.1.0", "grpcio-testing==1.*"] + CI_REQUIRED # Get git repo root directory repo_root = str(pathlib.Path(__file__).resolve().parent.parent.parent) @@ -218,6 +242,8 @@ def run(self): "gcp": GCP_REQUIRED, "aws": AWS_REQUIRED, "redis": REDIS_REQUIRED, + "snowflake": SNOWFLAKE_REQUIRED, + "ge": GE_REQUIRED, }, include_package_data=True, license="Apache", @@ -231,7 +257,7 @@ def run(self): ], entry_points={"console_scripts": ["feast=feast.cli:cli"]}, use_scm_version=use_scm_version, - setup_requires=["setuptools_scm", "grpcio", "grpcio-tools==1.34.0", "mypy-protobuf==1.*", "sphinx!=4.0.0"], + setup_requires=["setuptools_scm", "grpcio", "grpcio-tools==1.34.0", "mypy-protobuf==3.1.0", "sphinx!=4.0.0"], package_data={ "": [ "protos/feast/**/*.proto", diff --git a/sdk/python/tests/__init__.py b/sdk/python/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 61e591f237..49f32379a3 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -13,7 +13,9 @@ # limitations under the License. import logging import multiprocessing +import time from datetime import datetime, timedelta +from multiprocessing import Process from sys import platform from typing import List @@ -21,6 +23,7 @@ import pytest from _pytest.nodes import Item +from feast import FeatureStore from tests.data.data_creator import create_dataset from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, @@ -137,23 +140,41 @@ def simple_dataset_2() -> pd.DataFrame: return pd.DataFrame.from_dict(data) +def start_test_local_server(repo_path: str, port: int): + fs = FeatureStore(repo_path) + fs.serve("localhost", port, no_access_log=True) + + @pytest.fixture( params=FULL_REPO_CONFIGS, scope="session", ids=[str(c) for c in FULL_REPO_CONFIGS] ) -def environment(request): - e = construct_test_environment(request.param) +def environment(request, worker_id: str): + e = construct_test_environment(request.param, worker_id=worker_id) + proc = Process( + target=start_test_local_server, + args=(e.feature_store.repo_path, e.get_local_server_port()), + daemon=True, + ) + if e.python_feature_server and e.test_repo_config.provider == "local": + proc.start() + # Wait for server to start + time.sleep(3) def cleanup(): e.feature_store.teardown() + if proc.is_alive(): + proc.kill() request.addfinalizer(cleanup) + return e @pytest.fixture() def local_redis_environment(request, worker_id): - - e = construct_test_environment(IntegrationTestRepoConfig(online_store=REDIS_CONFIG)) + e = construct_test_environment( + IntegrationTestRepoConfig(online_store=REDIS_CONFIG), worker_id=worker_id + ) def cleanup(): e.feature_store.teardown() diff --git a/sdk/python/tests/data/__init__.py b/sdk/python/tests/data/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sdk/python/tests/data/data_creator.py b/sdk/python/tests/data/data_creator.py index e5355b40bb..e08597b67b 100644 --- a/sdk/python/tests/data/data_creator.py +++ b/sdk/python/tests/data/data_creator.py @@ -1,5 +1,5 @@ from datetime import datetime, timedelta -from typing import List +from typing import Dict, List, Optional import pandas as pd from pytz import timezone, utc @@ -38,7 +38,7 @@ def create_dataset( def get_entities_for_value_type(value_type: ValueType) -> List: - value_type_map = { + value_type_map: Dict[ValueType, List] = { ValueType.INT32: [1, 2, 1, 3, 3], ValueType.INT64: [1, 2, 1, 3, 3], ValueType.FLOAT: [1.0, 2.0, 1.0, 3.0, 3.0], @@ -48,18 +48,25 @@ def get_entities_for_value_type(value_type: ValueType) -> List: def get_feature_values_for_dtype( - dtype: str, is_list: bool, has_empty_list: bool + dtype: Optional[str], is_list: bool, has_empty_list: bool ) -> List: if dtype is None: return [0.1, None, 0.3, 4, 5] # TODO(adchia): for int columns, consider having a better error when dealing with None values (pandas int dfs can't # have na) - dtype_map = { + dtype_map: Dict[str, List] = { "int32": [1, 2, 3, 4, 5], "int64": [1, 2, 3, 4, 5], "float": [1.0, None, 3.0, 4.0, 5.0], "string": ["1", None, "3", "4", "5"], "bool": [True, None, False, True, False], + "datetime": [ + datetime(1980, 1, 1), + None, + datetime(1981, 1, 1), + datetime(1982, 1, 1), + datetime(1982, 1, 1), + ], } non_list_val = dtype_map[dtype] if is_list: diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py index e4c7abed0f..84d57bf038 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py @@ -10,7 +10,7 @@ name="driver_hourly_stats", # Intentionally use the same FeatureView name entities=["driver_id"], online=False, - input=driver_hourly_stats, + batch_source=driver_hourly_stats, ttl=Duration(seconds=10), tags={}, ) @@ -19,7 +19,7 @@ name="driver_hourly_stats", # Intentionally use the same FeatureView name entities=["driver_id"], online=False, - input=driver_hourly_stats, + batch_source=driver_hourly_stats, ttl=Duration(seconds=10), tags={}, ) diff --git a/sdk/python/tests/foo_provider.py b/sdk/python/tests/foo_provider.py index 8e9254cd3d..1d4ce7d6cb 100644 --- a/sdk/python/tests/foo_provider.py +++ b/sdk/python/tests/foo_provider.py @@ -10,6 +10,7 @@ from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.registry import Registry +from feast.saved_dataset import SavedDataset class FooProvider(Provider): @@ -75,3 +76,6 @@ def online_read( requested_features: List[str] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: pass + + def retrieve_saved_dataset(self, config: RepoConfig, dataset: SavedDataset): + pass diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py index fbbdd14f23..477c79614c 100644 --- a/sdk/python/tests/integration/e2e/test_universal_e2e.py +++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py @@ -76,14 +76,26 @@ def check_offline_and_online_features( if full_feature_names: if expected_value: - assert abs(df.to_dict()[f"{fv.name}__value"][0] - expected_value) < 1e-6 + assert ( + abs( + df.to_dict(orient="list")[f"{fv.name}__value"][0] + - expected_value + ) + < 1e-6 + ) else: - assert math.isnan(df.to_dict()[f"{fv.name}__value"][0]) + assert not df.to_dict(orient="list")[f"{fv.name}__value"] or math.isnan( + df.to_dict(orient="list")[f"{fv.name}__value"][0] + ) else: if expected_value: - assert abs(df.to_dict()["value"][0] - expected_value) < 1e-6 + assert ( + abs(df.to_dict(orient="list")["value"][0] - expected_value) < 1e-6 + ) else: - assert math.isnan(df.to_dict()["value"][0]) + assert not df.to_dict(orient="list")["value"] or math.isnan( + df.to_dict(orient="list")["value"][0] + ) def run_offline_online_store_consistency_test( diff --git a/sdk/python/tests/integration/e2e/test_usage_e2e.py b/sdk/python/tests/integration/e2e/test_usage_e2e.py index f55fbce55c..0bae973063 100644 --- a/sdk/python/tests/integration/e2e/test_usage_e2e.py +++ b/sdk/python/tests/integration/e2e/test_usage_e2e.py @@ -66,10 +66,16 @@ def test_usage_on(dummy_exporter, enabling_toggle): test_feature_store.apply([entity]) - assert len(dummy_exporter) == 1 + assert len(dummy_exporter) == 3 assert { - "entrypoint": "feast.feature_store.FeatureStore.apply" + "entrypoint": "feast.infra.local.LocalRegistryStore.get_registry_proto" }.items() <= dummy_exporter[0].items() + assert { + "entrypoint": "feast.infra.local.LocalRegistryStore.update_registry_proto" + }.items() <= dummy_exporter[1].items() + assert { + "entrypoint": "feast.feature_store.FeatureStore.apply" + }.items() <= dummy_exporter[2].items() @pytest.mark.integration diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py new file mode 100644 index 0000000000..2bd1e3cbbc --- /dev/null +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -0,0 +1,134 @@ +import pandas as pd +import pytest +from great_expectations.core import ExpectationSuite +from great_expectations.dataset import PandasDataset + +from feast.dqm.errors import ValidationFailed +from feast.dqm.profilers.ge_profiler import ge_profiler +from tests.integration.feature_repos.repo_configuration import ( + construct_universal_feature_views, +) +from tests.integration.feature_repos.universal.entities import ( + customer, + driver, + location, +) + +_features = [ + "customer_profile:current_balance", + "customer_profile:avg_passenger_count", + "customer_profile:lifetime_trip_count", + "order:order_is_success", + "global_stats:num_rides", + "global_stats:avg_ride_length", +] + + +@ge_profiler +def configurable_profiler(dataset: PandasDataset) -> ExpectationSuite: + from great_expectations.profile.user_configurable_profiler import ( + UserConfigurableProfiler, + ) + + return UserConfigurableProfiler( + profile_dataset=dataset, + excluded_expectations=[ + "expect_table_columns_to_match_ordered_list", + "expect_table_row_count_to_be_between", + ], + value_set_threshold="few", + ).build_suite() + + +@ge_profiler +def profiler_with_unrealistic_expectations(dataset: PandasDataset) -> ExpectationSuite: + # need to create dataframe with corrupted data first + df = pd.DataFrame() + df["current_balance"] = [-100] + df["avg_passenger_count"] = [0] + + other_ds = PandasDataset(df) + other_ds.expect_column_max_to_be_between("current_balance", -1000, -100) + other_ds.expect_column_values_to_be_in_set("avg_passenger_count", value_set={0}) + + # this should pass + other_ds.expect_column_min_to_be_between("avg_passenger_count", 0, 1000) + + return other_ds.get_expectation_suite() + + +@pytest.mark.integration +@pytest.mark.universal +def test_historical_retrieval_with_validation(environment, universal_data_sources): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + store.apply([driver(), customer(), location(), *feature_views.values()]) + + entity_df = datasets["entity"].drop( + columns=["order_id", "origin_id", "destination_id"] + ) + + reference_job = store.get_historical_features( + entity_df=entity_df, features=_features, + ) + + store.create_saved_dataset( + from_=reference_job, + name="my_training_dataset", + storage=environment.data_source_creator.create_saved_dataset_destination(), + ) + + job = store.get_historical_features(entity_df=entity_df, features=_features,) + + # if validation pass there will be no exceptions on this point + job.to_df( + validation_reference=store.get_saved_dataset( + "my_training_dataset" + ).as_reference(profiler=configurable_profiler) + ) + + +@pytest.mark.integration +@pytest.mark.universal +def test_historical_retrieval_fails_on_validation(environment, universal_data_sources): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + store.apply([driver(), customer(), location(), *feature_views.values()]) + + entity_df = datasets["entity"].drop( + columns=["order_id", "origin_id", "destination_id"] + ) + + reference_job = store.get_historical_features( + entity_df=entity_df, features=_features, + ) + + store.create_saved_dataset( + from_=reference_job, + name="my_other_dataset", + storage=environment.data_source_creator.create_saved_dataset_destination(), + ) + + job = store.get_historical_features(entity_df=entity_df, features=_features,) + + with pytest.raises(ValidationFailed) as exc_info: + job.to_df( + validation_reference=store.get_saved_dataset( + "my_other_dataset" + ).as_reference(profiler=profiler_with_unrealistic_expectations) + ) + + failed_expectations = exc_info.value.report.errors + assert len(failed_expectations) == 2 + + assert failed_expectations[0].check_name == "expect_column_max_to_be_between" + assert failed_expectations[0].column_name == "current_balance" + + assert failed_expectations[1].check_name == "expect_column_values_to_be_in_set" + assert failed_expectations[1].column_name == "avg_passenger_count" diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 6dedfb63b2..d7f843c645 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -1,20 +1,22 @@ import importlib import json import os +import re import tempfile import uuid -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union import pandas as pd import yaml -from feast import FeatureStore, FeatureView, RepoConfig, driver_test_data +from feast import FeatureStore, FeatureView, driver_test_data from feast.constants import FULL_REPO_CONFIGS_MODULE_ENV_NAME from feast.data_source import DataSource from feast.errors import FeastModuleImportError +from feast.repo_config import RegistryConfig, RepoConfig from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, ) @@ -27,12 +29,16 @@ from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, ) +from tests.integration.feature_repos.universal.data_sources.snowflake import ( + SnowflakeDataSourceCreator, +) from tests.integration.feature_repos.universal.feature_views import ( conv_rate_plus_100_feature_view, create_conv_rate_request_data_source, create_customer_daily_profile_feature_view, create_driver_age_request_feature_view, create_driver_hourly_stats_feature_view, + create_field_mapping_feature_view, create_global_stats_feature_view, create_location_stats_feature_view, create_order_feature_view, @@ -40,6 +46,12 @@ DYNAMO_CONFIG = {"type": "dynamodb", "region": "us-west-2"} REDIS_CONFIG = {"type": "redis", "connection_string": "localhost:6379,db=0"} +REDIS_CLUSTER_CONFIG = { + "type": "redis", + "redis_type": "redis_cluster", + # Redis Cluster Port Forwarding is setup in "pr_integration_tests.yaml" under "Setup Redis Cluster". + "connection_string": "127.0.0.1:6001,127.0.0.1:6002,127.0.0.1:6003", +} # FULL_REPO_CONFIGS contains the repo configurations (e.g. provider, offline store, # online store, test data, and more parameters) that most integration tests will test @@ -51,10 +63,12 @@ DEFAULT_FULL_REPO_CONFIGS: List[IntegrationTestRepoConfig] = [ # Local configurations IntegrationTestRepoConfig(), + IntegrationTestRepoConfig(python_feature_server=True), ] if os.getenv("FEAST_IS_LOCAL_TEST", "False") != "True": DEFAULT_FULL_REPO_CONFIGS.extend( [ + # Redis configurations IntegrationTestRepoConfig(online_store=REDIS_CONFIG), # GCP configurations IntegrationTestRepoConfig( @@ -79,6 +93,12 @@ offline_store_creator=RedshiftDataSourceCreator, online_store=REDIS_CONFIG, ), + # Snowflake configurations + IntegrationTestRepoConfig( + provider="aws", # no list features, no feature server + offline_store_creator=SnowflakeDataSourceCreator, + online_store=REDIS_CONFIG, + ), ] ) full_repo_configs_module = os.environ.get(FULL_REPO_CONFIGS_MODULE_ENV_NAME) @@ -123,6 +143,7 @@ def construct_universal_datasets( order_count=20, ) global_df = driver_test_data.create_global_daily_stats_df(start_time, end_time) + field_mapping_df = driver_test_data.create_field_mapping_df(start_time, end_time) entity_df = orders_df[ [ "customer_id", @@ -140,6 +161,7 @@ def construct_universal_datasets( "location": location_df, "orders": orders_df, "global": global_df, + "field_mapping": field_mapping_df, "entity": entity_df, } @@ -177,12 +199,20 @@ def construct_universal_data_sources( event_timestamp_column="event_timestamp", created_timestamp_column="created", ) + field_mapping_ds = data_source_creator.create_data_source( + datasets["field_mapping"], + destination_name="field_mapping", + event_timestamp_column="event_timestamp", + created_timestamp_column="created", + field_mapping={"column_name": "feature_name"}, + ) return { "customer": customer_ds, "driver": driver_ds, "location": location_ds, "orders": orders_ds, "global": global_ds, + "field_mapping": field_mapping_ds, } @@ -207,6 +237,9 @@ def construct_universal_feature_views( "driver_age_request_fv": create_driver_age_request_feature_view(), "order": create_order_feature_view(data_sources["orders"]), "location": create_location_stats_feature_view(data_sources["location"]), + "field_mapping": create_field_mapping_feature_view( + data_sources["field_mapping"] + ), } @@ -217,29 +250,42 @@ class Environment: feature_store: FeatureStore data_source_creator: DataSourceCreator python_feature_server: bool - - end_date: datetime = field( - default=datetime.utcnow().replace(microsecond=0, second=0, minute=0) - ) + worker_id: str def __post_init__(self): + self.end_date = datetime.utcnow().replace(microsecond=0, second=0, minute=0) self.start_date: datetime = self.end_date - timedelta(days=3) + def get_feature_server_endpoint(self) -> str: + if self.python_feature_server and self.test_repo_config.provider == "local": + return f"http://localhost:{self.get_local_server_port()}" + return self.feature_store.get_feature_server_endpoint() + + def get_local_server_port(self) -> int: + # Heuristic when running with xdist to extract unique ports for each worker + parsed_worker_id = re.findall("gw(\\d+)", self.worker_id) + if len(parsed_worker_id) != 0: + worker_id_num = int(parsed_worker_id[0]) + else: + worker_id_num = 0 + return 6566 + worker_id_num + def table_name_from_data_source(ds: DataSource) -> Optional[str]: if hasattr(ds, "table_ref"): - return ds.table_ref + return ds.table_ref # type: ignore elif hasattr(ds, "table"): - return ds.table + return ds.table # type: ignore return None def construct_test_environment( test_repo_config: IntegrationTestRepoConfig, test_suite_name: str = "integration_test", + worker_id: str = "worker_id", ) -> Environment: - _uuid = str(uuid.uuid4()).replace("-", "")[:8] + _uuid = str(uuid.uuid4()).replace("-", "")[:6] run_id = os.getenv("GITHUB_RUN_ID", default=None) run_id = f"gh_run_{run_id}_{_uuid}" if run_id else _uuid @@ -254,7 +300,7 @@ def construct_test_environment( repo_dir_name = tempfile.mkdtemp() - if test_repo_config.python_feature_server: + if test_repo_config.python_feature_server and test_repo_config.provider == "aws": from feast.infra.feature_servers.aws_lambda.config import ( AwsLambdaFeatureServerConfig, ) @@ -264,10 +310,15 @@ def construct_test_environment( execution_role_name="arn:aws:iam::402087665549:role/lambda_execution_role", ) - registry = f"s3://feast-integration-tests/registries/{project}/registry.db" + registry = ( + f"s3://feast-integration-tests/registries/{project}/registry.db" + ) # type: Union[str, RegistryConfig] else: + # Note: even if it's a local feature server, the repo config does not have this configured feature_server = None - registry = str(Path(repo_dir_name) / "registry.db") + registry = RegistryConfig( + path=str(Path(repo_dir_name) / "registry.db"), cache_ttl_seconds=1, + ) config = RepoConfig( registry=registry, @@ -293,6 +344,7 @@ def construct_test_environment( feature_store=fs, data_source_creator=offline_creator, python_feature_server=test_repo_config.python_feature_server, + worker_id=worker_id, ) return environment diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py index e0d6983bf1..dcefa29df1 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py @@ -5,6 +5,7 @@ from feast.data_source import DataSource from feast.repo_config import FeastConfigBaseModel +from feast.saved_dataset import SavedDatasetStorage class DataSourceCreator(ABC): @@ -40,6 +41,10 @@ def create_data_source( def create_offline_store_config(self) -> FeastConfigBaseModel: ... + @abstractmethod + def create_saved_dataset_destination(self) -> SavedDatasetStorage: + ... + @abstractmethod def teardown(self): ... diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py index 766c31150e..e0ac2050ea 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py @@ -1,4 +1,5 @@ -from typing import Dict, Optional +import uuid +from typing import Dict, List, Optional import pandas as pd from google.cloud import bigquery @@ -7,6 +8,7 @@ from feast import BigQuerySource from feast.data_source import DataSource from feast.infra.offline_stores.bigquery import BigQueryOfflineStoreConfig +from feast.infra.offline_stores.bigquery_source import SavedDatasetBigQueryStorage from tests.integration.feature_repos.universal.data_source_creator import ( DataSourceCreator, ) @@ -21,7 +23,7 @@ def __init__(self, project_name: str): self.gcp_project = self.client.project self.dataset_id = f"{self.gcp_project}.{project_name}" - self.tables = [] + self.tables: List[str] = [] def create_dataset(self): if not self.dataset: @@ -50,7 +52,7 @@ def create_offline_store_config(self): def create_data_source( self, df: pd.DataFrame, - destination_name: Optional[str] = None, + destination_name: str, event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Dict[str, str] = None, @@ -79,5 +81,11 @@ def create_data_source( field_mapping=field_mapping or {"ts_1": "ts"}, ) + def create_saved_dataset_destination(self) -> SavedDatasetBigQueryStorage: + table = self.get_prefixed_table_name( + f"persisted_{str(uuid.uuid4()).replace('-', '_')}" + ) + return SavedDatasetBigQueryStorage(table_ref=table) + def get_prefixed_table_name(self, suffix: str) -> str: return f"{self.client.project}.{self.project_name}.{suffix}" diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index 0d402b2314..baa3db6afc 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -1,4 +1,5 @@ import tempfile +import uuid from typing import Any, Dict, List, Optional import pandas as pd @@ -10,6 +11,7 @@ from feast.data_format import ParquetFormat from feast.data_source import DataSource from feast.infra.offline_stores.file import FileOfflineStoreConfig +from feast.infra.offline_stores.file_source import SavedDatasetFileStorage from feast.repo_config import FeastConfigBaseModel from tests.integration.feature_repos.universal.data_source_creator import ( DataSourceCreator, @@ -50,6 +52,12 @@ def create_data_source( field_mapping=field_mapping or {"ts_1": "ts"}, ) + def create_saved_dataset_destination(self) -> SavedDatasetFileStorage: + d = tempfile.mkdtemp(prefix=self.project_name) + return SavedDatasetFileStorage( + path=d, file_format=ParquetFormat(), s3_endpoint_override=None + ) + def get_prefixed_table_name(self, suffix: str) -> str: return f"{self.project_name}.{suffix}" @@ -127,6 +135,16 @@ def create_data_source( s3_endpoint_override=f"http://{host}:{port}", ) + def create_saved_dataset_destination(self) -> SavedDatasetFileStorage: + port = self.minio.get_exposed_port("9000") + host = self.minio.get_container_host_ip() + + return SavedDatasetFileStorage( + path=f"s3://{self.bucket}/persisted/{str(uuid.uuid4())}", + file_format=ParquetFormat(), + s3_endpoint_override=f"http://{host}:{port}", + ) + def get_prefixed_table_name(self, suffix: str) -> str: return f"{suffix}" diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index 88780f07a0..49b31263cf 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -1,10 +1,12 @@ -from typing import Dict, Optional +import uuid +from typing import Dict, List, Optional import pandas as pd from feast import RedshiftSource from feast.data_source import DataSource from feast.infra.offline_stores.redshift import RedshiftOfflineStoreConfig +from feast.infra.offline_stores.redshift_source import SavedDatasetRedshiftStorage from feast.infra.utils import aws_utils from feast.repo_config import FeastConfigBaseModel from tests.integration.feature_repos.universal.data_source_creator import ( @@ -14,7 +16,7 @@ class RedshiftDataSourceCreator(DataSourceCreator): - tables = [] + tables: List[str] = [] def __init__(self, project_name: str): super().__init__() @@ -65,6 +67,14 @@ def create_data_source( field_mapping=field_mapping or {"ts_1": "ts"}, ) + def create_saved_dataset_destination(self) -> SavedDatasetRedshiftStorage: + table = self.get_prefixed_table_name( + f"persisted_ds_{str(uuid.uuid4()).replace('-', '_')}" + ) + self.tables.append(table) + + return SavedDatasetRedshiftStorage(table_ref=table) + def create_offline_store_config(self) -> FeastConfigBaseModel: return self.offline_store_config diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py new file mode 100644 index 0000000000..f76656f5b7 --- /dev/null +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -0,0 +1,82 @@ +import os +import uuid +from typing import Dict, List, Optional + +import pandas as pd + +from feast import SnowflakeSource +from feast.data_source import DataSource +from feast.infra.offline_stores.snowflake import SnowflakeOfflineStoreConfig +from feast.infra.offline_stores.snowflake_source import SavedDatasetSnowflakeStorage +from feast.infra.utils.snowflake_utils import get_snowflake_conn, write_pandas +from feast.repo_config import FeastConfigBaseModel +from tests.integration.feature_repos.universal.data_source_creator import ( + DataSourceCreator, +) + + +class SnowflakeDataSourceCreator(DataSourceCreator): + + tables: List[str] = [] + + def __init__(self, project_name: str): + super().__init__() + self.project_name = project_name + self.offline_store_config = SnowflakeOfflineStoreConfig( + type="snowflake.offline", + account=os.environ["SNOWFLAKE_CI_DEPLOYMENT"], + user=os.environ["SNOWFLAKE_CI_USER"], + password=os.environ["SNOWFLAKE_CI_PASSWORD"], + role=os.environ["SNOWFLAKE_CI_ROLE"], + warehouse=os.environ["SNOWFLAKE_CI_WAREHOUSE"], + database="FEAST", + schema="OFFLINE", + ) + + def create_data_source( + self, + df: pd.DataFrame, + destination_name: str, + suffix: Optional[str] = None, + event_timestamp_column="ts", + created_timestamp_column="created_ts", + field_mapping: Dict[str, str] = None, + ) -> DataSource: + + snowflake_conn = get_snowflake_conn(self.offline_store_config) + + destination_name = self.get_prefixed_table_name(destination_name) + + write_pandas(snowflake_conn, df, destination_name, auto_create_table=True) + + self.tables.append(destination_name) + + return SnowflakeSource( + table=destination_name, + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + date_partition_column="", + field_mapping=field_mapping or {"ts_1": "ts"}, + ) + + def create_saved_dataset_destination(self) -> SavedDatasetSnowflakeStorage: + table = self.get_prefixed_table_name( + f"persisted_ds_{str(uuid.uuid4()).replace('-', '_')}" + ) + self.tables.append(table) + + return SavedDatasetSnowflakeStorage(table_ref=table) + + def create_offline_store_config(self) -> FeastConfigBaseModel: + return self.offline_store_config + + def get_prefixed_table_name(self, suffix: str) -> str: + return f"{self.project_name}_{suffix}" + + def teardown(self): + snowflake_conn = get_snowflake_conn(self.offline_store_config) + + with snowflake_conn as conn: + cur = conn.cursor() + for table in self.tables: + cur.execute(f'DROP TABLE IF EXISTS "{table}"') diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 3d19212f48..b0dc34197f 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -20,7 +20,7 @@ def driver_feature_view( entities=["driver"], features=None if infer_features else [Feature("value", value_type)], ttl=timedelta(days=5), - input=data_source, + batch_source=data_source, ) @@ -35,7 +35,7 @@ def global_feature_view( entities=[], features=None if infer_features else [Feature("entityless_value", value_type)], ttl=timedelta(days=5), - input=data_source, + batch_source=data_source, ) @@ -217,3 +217,13 @@ def create_location_stats_feature_view(source, infer_features: bool = False): ttl=timedelta(days=2), ) return location_stats_feature_view + + +def create_field_mapping_feature_view(source): + return FeatureView( + name="field_mapping", + entities=[], + features=[Feature(name="feature_name", dtype=ValueType.INT32)], + batch_source=source, + ttl=timedelta(days=2), + ) diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index dad14ac5aa..4a396c7e4d 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import pytest -from pandas.testing import assert_frame_equal +from pandas.testing import assert_frame_equal as pd_assert_frame_equal from pytz import utc from feast import utils @@ -26,6 +26,9 @@ construct_universal_feature_views, table_name_from_data_source, ) +from tests.integration.feature_repos.universal.data_sources.snowflake import ( + SnowflakeDataSourceCreator, +) from tests.integration.feature_repos.universal.entities import ( customer, driver, @@ -55,7 +58,7 @@ def find_asof_record( filter_keys = filter_keys or [] filter_values = filter_values or [] assert len(filter_keys) == len(filter_values) - found_record = {} + found_record: Dict[str, Any] = {} for record in records: if ( all( @@ -82,6 +85,8 @@ def get_expected_training_df( location_fv: FeatureView, global_df: pd.DataFrame, global_fv: FeatureView, + field_mapping_df: pd.DataFrame, + field_mapping_fv: FeatureView, entity_df: pd.DataFrame, event_timestamp: str, full_feature_names: bool = False, @@ -102,6 +107,10 @@ def get_expected_training_df( global_records = convert_timestamp_records_to_utc( global_df.to_dict("records"), global_fv.batch_source.event_timestamp_column ) + field_mapping_records = convert_timestamp_records_to_utc( + field_mapping_df.to_dict("records"), + field_mapping_fv.batch_source.event_timestamp_column, + ) entity_rows = convert_timestamp_records_to_utc( entity_df.to_dict("records"), event_timestamp ) @@ -156,6 +165,13 @@ def get_expected_training_df( ts_end=order_record[event_timestamp], ) + field_mapping_record = find_asof_record( + field_mapping_records, + ts_key=field_mapping_fv.batch_source.event_timestamp_column, + ts_start=order_record[event_timestamp] - field_mapping_fv.ttl, + ts_end=order_record[event_timestamp], + ) + entity_row.update( { ( @@ -197,6 +213,16 @@ def get_expected_training_df( } ) + # get field_mapping_record by column name, but label by feature name + entity_row.update( + { + ( + f"field_mapping__{feature}" if full_feature_names else feature + ): field_mapping_record.get(column, None) + for (column, feature) in field_mapping_fv.input.field_mapping.items() + } + ) + # Convert records back to pandas dataframe expected_df = pd.DataFrame(entity_rows) @@ -213,6 +239,7 @@ def get_expected_training_df( "customer_profile__current_balance": "float32", "customer_profile__avg_passenger_count": "float32", "global_stats__avg_ride_length": "float32", + "field_mapping__feature_name": "int32", } else: expected_column_types = { @@ -221,6 +248,7 @@ def get_expected_training_df( "current_balance": "float32", "avg_passenger_count": "float32", "avg_ride_length": "float32", + "feature_name": "int32", } for col, typ in expected_column_types.items(): @@ -239,9 +267,10 @@ def get_expected_training_df( .round() .astype(pd.Int32Dtype()) ) - expected_df[ - response_feature_name("conv_rate_plus_val_to_add", full_feature_names) - ] = (expected_df[conv_feature_name] + expected_df["val_to_add"]) + if "val_to_add" in expected_df.columns: + expected_df[ + response_feature_name("conv_rate_plus_val_to_add", full_feature_names) + ] = (expected_df[conv_feature_name] + expected_df["val_to_add"]) return expected_df @@ -255,15 +284,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n (entities, datasets, data_sources) = universal_data_sources feature_views = construct_universal_feature_views(data_sources) - customer_df, driver_df, location_df, orders_df, global_df, entity_df = ( - datasets["customer"], - datasets["driver"], - datasets["location"], - datasets["orders"], - datasets["global"], - datasets["entity"], - ) - entity_df_with_request_data = entity_df.copy(deep=True) + entity_df_with_request_data = datasets["entity"].copy(deep=True) entity_df_with_request_data["val_to_add"] = [ i for i in range(len(entity_df_with_request_data)) ] @@ -271,84 +292,55 @@ def test_historical_features(environment, universal_data_sources, full_feature_n i + 100 for i in range(len(entity_df_with_request_data)) ] - ( - customer_fv, - driver_fv, - driver_odfv, - location_fv, - order_fv, - global_fv, - driver_age_request_fv, - ) = ( - feature_views["customer"], - feature_views["driver"], - feature_views["driver_odfv"], - feature_views["location"], - feature_views["order"], - feature_views["global"], - feature_views["driver_age_request_fv"], - ) - feature_service = FeatureService( name="convrate_plus100", features=[ feature_views["driver"][["conv_rate"]], - driver_odfv, - driver_age_request_fv, + feature_views["driver_odfv"], + feature_views["driver_age_request_fv"], ], ) feature_service_entity_mapping = FeatureService( name="entity_mapping", features=[ - location_fv.with_name("origin").with_join_key_map( - {"location_id": "origin_id"} - ), - location_fv.with_name("destination").with_join_key_map( - {"location_id": "destination_id"} - ), + feature_views["location"] + .with_name("origin") + .with_join_key_map({"location_id": "origin_id"}), + feature_views["location"] + .with_name("destination") + .with_join_key_map({"location_id": "destination_id"}), ], ) - feast_objects = [] - feast_objects.extend( + store.apply( [ - customer_fv, - driver_fv, - driver_odfv, - location_fv, - order_fv, - global_fv, - driver_age_request_fv, driver(), customer(), location(), feature_service, feature_service_entity_mapping, + *feature_views.values(), ] ) - store.apply(feast_objects) - - entity_df_query = None - orders_table = table_name_from_data_source(data_sources["orders"]) - if orders_table: - entity_df_query = f"SELECT customer_id, driver_id, order_id, origin_id, destination_id, event_timestamp FROM {orders_table}" event_timestamp = ( DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL - if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in orders_df.columns + if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in datasets["orders"].columns else "e_ts" ) full_expected_df = get_expected_training_df( - customer_df, - customer_fv, - driver_df, - driver_fv, - orders_df, - order_fv, - location_df, - location_fv, - global_df, - global_fv, + datasets["customer"], + feature_views["customer"], + datasets["driver"], + feature_views["driver"], + datasets["orders"], + feature_views["order"], + datasets["location"], + feature_views["location"], + datasets["global"], + feature_views["global"], + datasets["field_mapping"], + feature_views["field_mapping"], entity_df_with_request_data, event_timestamp, full_feature_names, @@ -359,76 +351,6 @@ def test_historical_features(environment, universal_data_sources, full_feature_n columns=["origin__temperature", "destination__temperature"], ) - if entity_df_query: - job_from_sql = store.get_historical_features( - entity_df=entity_df_query, - features=[ - "driver_stats:conv_rate", - "driver_stats:avg_daily_trips", - "customer_profile:current_balance", - "customer_profile:avg_passenger_count", - "customer_profile:lifetime_trip_count", - "order:order_is_success", - "global_stats:num_rides", - "global_stats:avg_ride_length", - ], - full_feature_names=full_feature_names, - ) - - start_time = datetime.utcnow() - actual_df_from_sql_entities = job_from_sql.to_df() - end_time = datetime.utcnow() - print( - str(f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'") - ) - - # Not requesting the on demand transform with an entity_df query (can't add request data in them) - expected_df_query = expected_df.drop( - columns=[ - response_feature_name("conv_rate_plus_100", full_feature_names), - response_feature_name("conv_rate_plus_100_rounded", full_feature_names), - response_feature_name("conv_rate_plus_val_to_add", full_feature_names), - "val_to_add", - "driver_age", - ] - ) - assert sorted(expected_df_query.columns) == sorted( - actual_df_from_sql_entities.columns - ) - - actual_df_from_sql_entities = ( - actual_df_from_sql_entities[expected_df_query.columns] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) - ) - expected_df_query = ( - expected_df_query.sort_values( - by=[event_timestamp, "order_id", "driver_id", "customer_id"] - ) - .drop_duplicates() - .reset_index(drop=True) - ) - - assert_frame_equal( - actual_df_from_sql_entities, expected_df_query, check_dtype=False, - ) - - table_from_sql_entities = job_from_sql.to_arrow() - df_from_sql_entities = ( - table_from_sql_entities.to_pandas()[expected_df_query.columns] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) - ) - - for col in df_from_sql_entities.columns: - expected_df_query[col] = expected_df_query[col].astype( - df_from_sql_entities[col].dtype - ) - - assert_frame_equal(expected_df_query, df_from_sql_entities) - job_from_df = store.get_historical_features( entity_df=entity_df_with_request_data, features=[ @@ -444,6 +366,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n "global_stats:num_rides", "global_stats:avg_ride_length", "driver_age:driver_age", + "field_mapping:feature_name", ], full_feature_names=full_feature_names, ) @@ -456,23 +379,12 @@ def test_historical_features(environment, universal_data_sources, full_feature_n print(str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n")) assert sorted(expected_df.columns) == sorted(actual_df_from_df_entities.columns) - expected_df: pd.DataFrame = ( - expected_df.sort_values( - by=[event_timestamp, "order_id", "driver_id", "customer_id"] - ) - .drop_duplicates() - .reset_index(drop=True) - ) - actual_df_from_df_entities = ( - actual_df_from_df_entities[expected_df.columns] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) - ) - assert_frame_equal( - expected_df, actual_df_from_df_entities, check_dtype=False, + expected_df, + actual_df_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], ) + assert_feature_service_correctness( store, feature_service, @@ -489,26 +401,33 @@ def test_historical_features(environment, universal_data_sources, full_feature_n full_expected_df, event_timestamp, ) - table_from_df_entities: pd.DataFrame = job_from_df.to_arrow().to_pandas() - columns_expected_in_table = expected_df.columns.tolist() - - table_from_df_entities = ( - table_from_df_entities[columns_expected_in_table] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) + assert_frame_equal( + expected_df, + table_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], ) - assert_frame_equal(actual_df_from_df_entities, table_from_df_entities) + + +@pytest.mark.integration +@pytest.mark.universal +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) +def test_historical_features_with_missing_request_data( + environment, universal_data_sources, full_feature_names +): + store = environment.feature_store + + (_, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + store.apply([driver(), customer(), location(), *feature_views.values()]) # If request data is missing that's needed for on demand transform, throw an error with pytest.raises(RequestDataNotFoundInEntityDfException): store.get_historical_features( - entity_df=entity_df, + entity_df=datasets["entity"], features=[ - "driver_stats:conv_rate", - "driver_stats:avg_daily_trips", "customer_profile:current_balance", "customer_profile:avg_passenger_count", "customer_profile:lifetime_trip_count", @@ -516,27 +435,204 @@ def test_historical_features(environment, universal_data_sources, full_feature_n "conv_rate_plus_100:conv_rate_plus_val_to_add", "global_stats:num_rides", "global_stats:avg_ride_length", + "field_mapping:feature_name", ], full_feature_names=full_feature_names, ) + # If request data is missing that's needed for a request feature view, throw an error with pytest.raises(RequestDataNotFoundInEntityDfException): store.get_historical_features( - entity_df=entity_df, + entity_df=datasets["entity"], features=[ - "driver_stats:conv_rate", - "driver_stats:avg_daily_trips", "customer_profile:current_balance", "customer_profile:avg_passenger_count", "customer_profile:lifetime_trip_count", "driver_age:driver_age", "global_stats:num_rides", "global_stats:avg_ride_length", + "field_mapping:feature_name", ], full_feature_names=full_feature_names, ) +@pytest.mark.integration +@pytest.mark.universal +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) +def test_historical_features_with_entities_from_query( + environment, universal_data_sources, full_feature_names +): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + orders_table = table_name_from_data_source(data_sources["orders"]) + if not orders_table: + raise pytest.skip("Offline source is not sql-based") + + if ( + environment.test_repo_config.offline_store_creator.__name__ + == SnowflakeDataSourceCreator.__name__ + ): + entity_df_query = f'''SELECT "customer_id", "driver_id", "order_id", "origin_id", "destination_id", "event_timestamp" FROM "{orders_table}"''' + else: + entity_df_query = f"SELECT customer_id, driver_id, order_id, origin_id, destination_id, event_timestamp FROM {orders_table}" + + store.apply([driver(), customer(), location(), *feature_views.values()]) + + job_from_sql = store.get_historical_features( + entity_df=entity_df_query, + features=[ + "customer_profile:current_balance", + "customer_profile:avg_passenger_count", + "customer_profile:lifetime_trip_count", + "order:order_is_success", + "global_stats:num_rides", + "global_stats:avg_ride_length", + "field_mapping:feature_name", + ], + full_feature_names=full_feature_names, + ) + + start_time = datetime.utcnow() + actual_df_from_sql_entities = job_from_sql.to_df() + end_time = datetime.utcnow() + print(str(f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'")) + + event_timestamp = ( + DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL + if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in datasets["orders"].columns + else "e_ts" + ) + full_expected_df = get_expected_training_df( + datasets["customer"], + feature_views["customer"], + datasets["driver"], + feature_views["driver"], + datasets["orders"], + feature_views["order"], + datasets["location"], + feature_views["location"], + datasets["global"], + feature_views["global"], + datasets["field_mapping"], + feature_views["field_mapping"], + datasets["entity"], + event_timestamp, + full_feature_names, + ) + + # Not requesting the on demand transform with an entity_df query (can't add request data in them) + expected_df_query = full_expected_df.drop( + columns=[ + response_feature_name("conv_rate_plus_100", full_feature_names), + response_feature_name("conv_rate_plus_100_rounded", full_feature_names), + response_feature_name("avg_daily_trips", full_feature_names), + response_feature_name("conv_rate", full_feature_names), + "origin__temperature", + "destination__temperature", + ] + ) + assert_frame_equal( + expected_df_query, + actual_df_from_sql_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], + ) + + table_from_sql_entities = job_from_sql.to_arrow().to_pandas() + for col in table_from_sql_entities.columns: + expected_df_query[col] = expected_df_query[col].astype( + table_from_sql_entities[col].dtype + ) + + assert_frame_equal( + expected_df_query, + table_from_sql_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], + ) + + +@pytest.mark.integration +@pytest.mark.universal +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) +def test_historical_features_persisting( + environment, universal_data_sources, full_feature_names +): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + store.apply([driver(), customer(), location(), *feature_views.values()]) + + entity_df = datasets["entity"].drop( + columns=["order_id", "origin_id", "destination_id"] + ) + + job = store.get_historical_features( + entity_df=entity_df, + features=[ + "customer_profile:current_balance", + "customer_profile:avg_passenger_count", + "customer_profile:lifetime_trip_count", + "order:order_is_success", + "global_stats:num_rides", + "global_stats:avg_ride_length", + "field_mapping:feature_name", + ], + full_feature_names=full_feature_names, + ) + + saved_dataset = store.create_saved_dataset( + from_=job, + name="saved_dataset", + storage=environment.data_source_creator.create_saved_dataset_destination(), + tags={"env": "test"}, + ) + + event_timestamp = DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL + expected_df = get_expected_training_df( + datasets["customer"], + feature_views["customer"], + datasets["driver"], + feature_views["driver"], + datasets["orders"], + feature_views["order"], + datasets["location"], + feature_views["location"], + datasets["global"], + feature_views["global"], + datasets["field_mapping"], + feature_views["field_mapping"], + entity_df, + event_timestamp, + full_feature_names, + ).drop( + columns=[ + response_feature_name("conv_rate_plus_100", full_feature_names), + response_feature_name("conv_rate_plus_100_rounded", full_feature_names), + response_feature_name("avg_daily_trips", full_feature_names), + response_feature_name("conv_rate", full_feature_names), + "origin__temperature", + "destination__temperature", + ] + ) + + assert_frame_equal( + expected_df, + saved_dataset.to_df(), + keys=[event_timestamp, "driver_id", "customer_id"], + ) + + assert_frame_equal( + job.to_df(), + saved_dataset.to_df(), + keys=[event_timestamp, "driver_id", "customer_id"], + ) + + @pytest.mark.integration @pytest.mark.universal def test_historical_features_from_bigquery_sources_containing_backfills(environment): @@ -630,13 +726,7 @@ def test_historical_features_from_bigquery_sources_containing_backfills(environm print(str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n")) assert sorted(expected_df.columns) == sorted(actual_df.columns) - assert_frame_equal( - expected_df.sort_values(by=["driver_id"]).reset_index(drop=True), - actual_df[expected_df.columns] - .sort_values(by=["driver_id"]) - .reset_index(drop=True), - check_dtype=False, - ) + assert_frame_equal(expected_df, actual_df, keys=["driver_id"]) def response_feature_name(feature: str, full_feature_names: bool) -> str: @@ -669,13 +759,6 @@ def assert_feature_service_correctness( actual_df_from_df_entities = job_from_df.to_df() - expected_df: pd.DataFrame = ( - expected_df.sort_values( - by=[event_timestamp, "order_id", "driver_id", "customer_id"] - ) - .drop_duplicates() - .reset_index(drop=True) - ) expected_df = expected_df[ [ event_timestamp, @@ -687,15 +770,11 @@ def assert_feature_service_correctness( "driver_age", ] ] - actual_df_from_df_entities = ( - actual_df_from_df_entities[expected_df.columns] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) - ) assert_frame_equal( - expected_df, actual_df_from_df_entities, check_dtype=False, + expected_df, + actual_df_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], ) @@ -736,24 +815,18 @@ def assert_feature_service_entity_mapping_correctness( "destination__temperature", ] ] - actual_df_from_df_entities = ( - actual_df_from_df_entities[expected_df.columns] - .sort_values( - by=[ - event_timestamp, - "order_id", - "driver_id", - "customer_id", - "origin_id", - "destination_id", - ] - ) - .drop_duplicates() - .reset_index(drop=True) - ) assert_frame_equal( - expected_df, actual_df_from_df_entities, check_dtype=False, + expected_df, + actual_df_from_df_entities, + keys=[ + event_timestamp, + "order_id", + "driver_id", + "customer_id", + "origin_id", + "destination_id", + ], ) else: # using 2 of the same FeatureView without full_feature_names=True will result in collision @@ -763,3 +836,20 @@ def assert_feature_service_entity_mapping_correctness( features=feature_service, full_feature_names=full_feature_names, ) + + +def assert_frame_equal(expected_df, actual_df, keys): + expected_df: pd.DataFrame = ( + expected_df.sort_values(by=keys).drop_duplicates().reset_index(drop=True) + ) + + actual_df = ( + actual_df[expected_df.columns] + .sort_values(by=keys) + .drop_duplicates() + .reset_index(drop=True) + ) + + pd_assert_frame_equal( + expected_df, actual_df, check_dtype=False, + ) diff --git a/sdk/python/tests/integration/online_store/test_e2e_local.py b/sdk/python/tests/integration/online_store/test_e2e_local.py index dd900e90dc..7990227344 100644 --- a/sdk/python/tests/integration/online_store/test_e2e_local.py +++ b/sdk/python/tests/integration/online_store/test_e2e_local.py @@ -40,7 +40,14 @@ def _assert_online_features( # Float features should still be floats from the online store... assert ( - response.field_values[0].fields["driver_hourly_stats__conv_rate"].float_val > 0 + response.proto.results[0] + .values[ + list(response.proto.metadata.feature_names.val).index( + "driver_hourly_stats__conv_rate" + ) + ] + .float_val + > 0 ) result = response.to_dict() diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index c47f2bbfd0..7d6296baa5 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -4,19 +4,21 @@ import time import unittest from datetime import timedelta -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Tuple, Union import assertpy import numpy as np import pandas as pd import pytest import requests +from botocore.exceptions import BotoCoreError from feast import Entity, Feature, FeatureService, FeatureView, ValueType from feast.errors import ( FeatureNameCollisionError, RequestDataNotFoundInEntityRowsException, ) +from feast.wait import wait_retry_backoff from tests.integration.feature_repos.repo_configuration import ( Environment, construct_universal_feature_views, @@ -28,6 +30,7 @@ ) from tests.integration.feature_repos.universal.feature_views import ( create_driver_hourly_stats_feature_view, + driver_feature_view, ) from tests.utils.data_source_utils import prep_file_source @@ -136,6 +139,7 @@ def test_write_to_online_store_event_check(local_redis_environment): @pytest.mark.integration +@pytest.mark.universal def test_write_to_online_store(environment, universal_data_sources): fs = environment.feature_store entities, datasets, data_sources = universal_data_sources @@ -185,7 +189,7 @@ def _get_online_features_dict_remotely( The output should be identical to: - >>> fs.get_online_features(features=features, entity_rows=entity_rows, full_feature_names=full_feature_names).to_dict() + fs.get_online_features(features=features, entity_rows=entity_rows, full_feature_names=full_feature_names).to_dict() This makes it easy to test the remote feature server by comparing the output to the local method. @@ -212,11 +216,15 @@ def _get_online_features_dict_remotely( time.sleep(1) else: raise Exception("Failed to get online features from remote feature server") - keys = response["field_values"][0]["statuses"].keys() + if "metadata" not in response: + raise Exception( + f"Failed to get online features from remote feature server {response}" + ) + keys = response["metadata"]["feature_names"] # Get rid of unnecessary structure in the response, leaving list of dicts - response = [row["fields"] for row in response["field_values"]] + response = [row["values"] for row in response["results"]] # Convert list of dicts (response) into dict of lists which is the format of the return value - return {key: [row.get(key) for row in response] for key in keys} + return {key: [row[idx] for row in response] for idx, key in enumerate(keys)} def get_online_features_dict( @@ -238,8 +246,8 @@ def get_online_features_dict( assertpy.assert_that(online_features).is_not_none() dict1 = online_features.to_dict() - endpoint = environment.feature_store.get_feature_server_endpoint() - # If endpoint is None, it means that the remote feature server isn't configured + endpoint = environment.get_feature_server_endpoint() + # If endpoint is None, it means that a local / remote feature server aren't configured if endpoint is not None: dict2 = _get_online_features_dict_remotely( endpoint=endpoint, @@ -499,6 +507,90 @@ def test_online_retrieval(environment, universal_data_sources, full_feature_name ) +@pytest.mark.integration +@pytest.mark.universal +def test_online_store_cleanup(environment, universal_data_sources): + """ + Some online store implementations (like Redis) keep features from different features views + but with common entities together. + This might end up with deletion of all features attached to the entity, + when only one feature view was deletion target (see https://github.com/feast-dev/feast/issues/2150). + + Plan: + 1. Register two feature views with common entity "driver" + 2. Materialize data + 3. Check if features are available (via online retrieval) + 4. Delete one feature view + 5. Check that features for other are still available + 6. Delete another feature view (and create again) + 7. Verify that features for both feature view were deleted + """ + fs = environment.feature_store + entities, datasets, data_sources = universal_data_sources + driver_stats_fv = construct_universal_feature_views(data_sources)["driver"] + + df = pd.DataFrame( + { + "ts_1": [environment.end_date] * len(entities["driver"]), + "created_ts": [environment.end_date] * len(entities["driver"]), + "driver_id": entities["driver"], + "value": np.random.random(size=len(entities["driver"])), + } + ) + + ds = environment.data_source_creator.create_data_source( + df, destination_name="simple_driver_dataset" + ) + + simple_driver_fv = driver_feature_view( + data_source=ds, name="test_universal_online_simple_driver" + ) + + fs.apply([driver(), simple_driver_fv, driver_stats_fv]) + + fs.materialize( + environment.start_date - timedelta(days=1), + environment.end_date + timedelta(days=1), + ) + expected_values = df.sort_values(by="driver_id") + + features = [f"{simple_driver_fv.name}:value"] + entity_rows = [{"driver": driver_id} for driver_id in sorted(entities["driver"])] + + online_features = fs.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + assert np.allclose(expected_values["value"], online_features["value"]) + + fs.apply( + objects=[simple_driver_fv], objects_to_delete=[driver_stats_fv], partial=False + ) + + online_features = fs.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + assert np.allclose(expected_values["value"], online_features["value"]) + + fs.apply(objects=[], objects_to_delete=[simple_driver_fv], partial=False) + + def eventually_apply() -> Tuple[None, bool]: + try: + fs.apply([simple_driver_fv]) + except BotoCoreError: + return None, False + + return None, True + + # Online store backend might have eventual consistency in schema update + # So recreating table that was just deleted might need some retries + wait_retry_backoff(eventually_apply, timeout_secs=60) + + online_features = fs.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + assert all(v is None for v in online_features["value"]) + + def response_feature_name(feature: str, full_feature_names: bool) -> str: if ( feature in {"current_balance", "avg_passenger_count", "lifetime_trip_count"} diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/integration/registration/test_cli.py index 0fe73316ad..a2c4a9a8e5 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/integration/registration/test_cli.py @@ -1,35 +1,48 @@ +import os import tempfile import uuid from contextlib import contextmanager -from pathlib import Path, PosixPath +from pathlib import Path from textwrap import dedent +from typing import List import pytest import yaml from assertpy import assertpy from feast import FeatureStore, RepoConfig -from tests.integration.feature_repos.repo_configuration import FULL_REPO_CONFIGS +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) +from tests.integration.feature_repos.repo_configuration import Environment from tests.integration.feature_repos.universal.data_source_creator import ( DataSourceCreator, ) +from tests.integration.feature_repos.universal.data_sources.bigquery import ( + BigQueryDataSourceCreator, +) +from tests.integration.feature_repos.universal.data_sources.file import ( + FileDataSourceCreator, +) +from tests.integration.feature_repos.universal.data_sources.redshift import ( + RedshiftDataSourceCreator, +) from tests.utils.cli_utils import CliRunner, get_example_repo from tests.utils.online_read_write_test import basic_rw_test @pytest.mark.integration -@pytest.mark.parametrize("test_repo_config", FULL_REPO_CONFIGS) -def test_universal_cli(test_repo_config) -> None: +@pytest.mark.universal +def test_universal_cli(environment: Environment): project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" - runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: try: + repo_path = Path(repo_dir_name) feature_store_yaml = make_feature_store_yaml( - project, test_repo_config, repo_dir_name + project, environment.test_repo_config, repo_path ) - repo_path = Path(repo_dir_name) repo_config = repo_path / "feature_store.yaml" @@ -43,10 +56,9 @@ def test_universal_cli(test_repo_config) -> None: # Store registry contents, to be compared later. fs = FeatureStore(repo_path=str(repo_path)) registry_dict = fs.registry.to_dict(project=project) - # Save only the specs, not the metadata. registry_specs = { - key: [fco["spec"] for fco in value] + key: [fco["spec"] if "spec" in fco else fco for fco in value] for key, value in registry_dict.items() } @@ -92,7 +104,7 @@ def test_universal_cli(test_repo_config) -> None: registry_dict = fs.registry.to_dict(project=project) assertpy.assert_that(registry_specs).is_equal_to( { - key: [fco["spec"] for fco in value] + key: [fco["spec"] if "spec" in fco else fco for fco in value] for key, value in registry_dict.items() } ) @@ -103,7 +115,7 @@ def test_universal_cli(test_repo_config) -> None: runner.run(["teardown"], cwd=repo_path) -def make_feature_store_yaml(project, test_repo_config, repo_dir_name: PosixPath): +def make_feature_store_yaml(project, test_repo_config, repo_dir_name: Path): offline_creator: DataSourceCreator = test_repo_config.offline_store_creator(project) offline_store_config = offline_creator.create_offline_store_config() @@ -128,6 +140,56 @@ def make_feature_store_yaml(project, test_repo_config, repo_dir_name: PosixPath) return yaml.safe_dump(config_dict) +NULLABLE_ONLINE_STORE_CONFIGS: List[IntegrationTestRepoConfig] = [ + IntegrationTestRepoConfig( + provider="local", + offline_store_creator=FileDataSourceCreator, + online_store=None, + ), +] + +if os.getenv("FEAST_IS_LOCAL_TEST", "False") == "True": + NULLABLE_ONLINE_STORE_CONFIGS.extend( + [ + IntegrationTestRepoConfig( + provider="gcp", + offline_store_creator=BigQueryDataSourceCreator, + online_store=None, + ), + IntegrationTestRepoConfig( + provider="aws", + offline_store_creator=RedshiftDataSourceCreator, + online_store=None, + ), + ] + ) + + +@pytest.mark.integration +@pytest.mark.parametrize("test_nullable_online_store", NULLABLE_ONLINE_STORE_CONFIGS) +def test_nullable_online_store(test_nullable_online_store) -> None: + project = f"test_nullable_online_store{str(uuid.uuid4()).replace('-', '')[:8]}" + runner = CliRunner() + + with tempfile.TemporaryDirectory() as repo_dir_name: + try: + repo_path = Path(repo_dir_name) + feature_store_yaml = make_feature_store_yaml( + project, test_nullable_online_store, repo_path + ) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text(dedent(feature_store_yaml)) + + repo_example = repo_path / "example.py" + repo_example.write_text(get_example_repo("example_feature_repo_1.py")) + result = runner.run(["apply"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + finally: + runner.run(["teardown"], cwd=repo_path) + + @contextmanager def setup_third_party_provider_repo(provider_name: str): with tempfile.TemporaryDirectory() as repo_dir_name: diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 14aa1e13ad..ca5f56c435 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -3,7 +3,7 @@ from feast import Entity, Feature, RepoConfig, ValueType from feast.data_source import RequestDataSource -from feast.errors import RegistryInferenceFailure +from feast.errors import RegistryInferenceFailure, SpecifiedFeaturesNotPresentError from feast.feature_view import FeatureView from feast.inference import ( update_data_sources_with_inferred_event_timestamp_col, @@ -86,7 +86,7 @@ def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1) ) -def test_modify_feature_views_success(): +def test_on_demand_features_type_inference(): # Create Feature Views date_request = RequestDataSource( name="date_request", schema={"some_date": ValueType.UNIX_TIMESTAMP} @@ -94,11 +94,46 @@ def test_modify_feature_views_success(): @on_demand_feature_view( inputs={"date_request": date_request}, - features=[Feature("output", ValueType.UNIX_TIMESTAMP)], + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("string_output", ValueType.STRING), + ], ) def test_view(features_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["output"] = features_df["some_date"] + data["string_output"] = features_df["some_date"].astype(pd.StringDtype()) return data test_view.infer_features() + + @on_demand_feature_view( + inputs={"date_request": date_request}, + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("object_output", ValueType.STRING), + ], + ) + def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + data["object_output"] = features_df["some_date"].astype(str) + return data + + with pytest.raises(ValueError, match="Value with native type object"): + invalid_test_view.infer_features() + + @on_demand_feature_view( + inputs={"date_request": date_request}, + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("missing", ValueType.STRING), + ], + ) + def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + return data + + with pytest.raises(SpecifiedFeaturesNotPresentError): + test_view_with_missing_feature.infer_features() diff --git a/sdk/python/tests/integration/registration/test_universal_types.py b/sdk/python/tests/integration/registration/test_universal_types.py index c007d56c35..59ca119f98 100644 --- a/sdk/python/tests/integration/registration/test_universal_types.py +++ b/sdk/python/tests/integration/registration/test_universal_types.py @@ -1,10 +1,11 @@ import logging from dataclasses import dataclass from datetime import datetime, timedelta -from typing import List +from typing import Any, Dict, List, Tuple, Union import numpy as np import pandas as pd +import pyarrow as pa import pytest from feast.infra.offline_stores.offline_store import RetrievalJob @@ -28,6 +29,7 @@ def populate_test_configs(offline: bool): (ValueType.INT64, "int64"), (ValueType.STRING, "float"), (ValueType.STRING, "bool"), + (ValueType.INT32, "datetime"), ] configs: List[TypeTestConfig] = [] for test_repo_config in FULL_REPO_CONFIGS: @@ -217,9 +219,14 @@ def test_feature_get_online_features_types_match(online_types_test_fixtures): ) fs = environment.feature_store features = [fv.name + ":value"] - entity = driver(value_type=ValueType.UNKNOWN) + entity = driver(value_type=config.entity_type) fs.apply([fv, entity]) - fs.materialize(environment.start_date, environment.end_date) + fs.materialize( + environment.start_date, + environment.end_date + - timedelta(hours=1) # throwing out last record to make sure + # we can successfully infer type even from all empty values + ) driver_id_value = "1" if config.entity_type == ValueType.STRING else 1 online_features = fs.get_online_features( @@ -232,13 +239,20 @@ def test_feature_get_online_features_types_match(online_types_test_fixtures): "float": float, "string": str, "bool": bool, + "datetime": datetime, } expected_dtype = feature_list_dtype_to_expected_online_response_value_type[ config.feature_dtype ] + + assert len(online_features["value"]) == 1 + if config.feature_is_list: for feature in online_features["value"]: - assert isinstance(feature, list) + assert isinstance(feature, list), "Feature value should be a list" + assert ( + config.has_empty_list or len(feature) > 0 + ), "List of values should not be empty" for element in feature: assert isinstance(element, expected_dtype) else: @@ -258,6 +272,8 @@ def create_feature_view( value_type = ValueType.FLOAT_LIST elif feature_dtype == "bool": value_type = ValueType.BOOL_LIST + elif feature_dtype == "datetime": + value_type = ValueType.UNIX_TIMESTAMP_LIST else: if feature_dtype == "int32": value_type = ValueType.INT32 @@ -267,6 +283,8 @@ def create_feature_view( value_type = ValueType.FLOAT elif feature_dtype == "bool": value_type = ValueType.BOOL + elif feature_dtype == "datetime": + value_type = ValueType.UNIX_TIMESTAMP return driver_feature_view(data_source, name=name, value_type=value_type,) @@ -281,6 +299,7 @@ def assert_expected_historical_feature_types( "float": (pd.api.types.is_float_dtype,), "string": (pd.api.types.is_string_dtype,), "bool": (pd.api.types.is_bool_dtype, pd.api.types.is_object_dtype), + "datetime": (pd.api.types.is_datetime64_any_dtype,), } dtype_checkers = feature_dtype_to_expected_historical_feature_dtype[feature_dtype] assert any( @@ -292,7 +311,9 @@ def assert_feature_list_types( provider: str, feature_dtype: str, historical_features_df: pd.DataFrame ): print("Asserting historical feature list types") - feature_list_dtype_to_expected_historical_feature_list_dtype = { + feature_list_dtype_to_expected_historical_feature_list_dtype: Dict[ + str, Union[type, Tuple[Union[type, Tuple[Any, ...]], ...]] + ] = { "int32": ( int, np.int64, @@ -307,6 +328,7 @@ def assert_feature_list_types( bool, np.bool_, ), # Can be `np.bool_` if from `np.array` rather that `list` + "datetime": np.datetime64, } expected_dtype = feature_list_dtype_to_expected_historical_feature_list_dtype[ feature_dtype @@ -328,22 +350,21 @@ def assert_expected_arrow_types( historical_features_arrow = historical_features.to_arrow() print(historical_features_arrow) feature_list_dtype_to_expected_historical_feature_arrow_type = { - "int32": "int64", - "int64": "int64", - "float": "double", - "string": "string", - "bool": "bool", + "int32": pa.types.is_int64, + "int64": pa.types.is_int64, + "float": pa.types.is_float64, + "string": pa.types.is_string, + "bool": pa.types.is_boolean, + "date": pa.types.is_date, + "datetime": pa.types.is_timestamp, } - arrow_type = feature_list_dtype_to_expected_historical_feature_arrow_type[ + arrow_type_checker = feature_list_dtype_to_expected_historical_feature_arrow_type[ feature_dtype ] + pa_type = historical_features_arrow.schema.field("value").type + if feature_is_list: - assert ( - str(historical_features_arrow.schema.field_by_name("value").type) - == f"list" - ) + assert pa.types.is_list(pa_type) + assert arrow_type_checker(pa_type.value_type) else: - assert ( - str(historical_features_arrow.schema.field_by_name("value").type) - == arrow_type - ) + assert arrow_type_checker(pa_type) diff --git a/sdk/python/tests/integration/scaffolding/test_repo_config.py b/sdk/python/tests/integration/scaffolding/test_repo_config.py index dfa80cb618..3ec91c0044 100644 --- a/sdk/python/tests/integration/scaffolding/test_repo_config.py +++ b/sdk/python/tests/integration/scaffolding/test_repo_config.py @@ -34,6 +34,49 @@ def _test_config(config_text, expect_error: Optional[str]): return rc +def test_nullable_online_store_aws(): + _test_config( + dedent( + """ + project: foo + registry: "registry.db" + provider: aws + online_store: null + """ + ), + expect_error="__root__ -> offline_store -> cluster_id\n" + " field required (type=value_error.missing)", + ) + + +def test_nullable_online_store_gcp(): + _test_config( + dedent( + """ + project: foo + registry: "registry.db" + provider: gcp + online_store: null + """ + ), + expect_error=None, + ) + + +def test_nullable_online_store_local(): + _test_config( + dedent( + """ + project: foo + registry: "registry.db" + provider: local + online_store: null + """ + ), + expect_error=None, + ) + + def test_local_config(): _test_config( dedent( diff --git a/sdk/python/tests/unit/diff/test_infra_diff.py b/sdk/python/tests/unit/diff/test_infra_diff.py new file mode 100644 index 0000000000..8e3d5b765f --- /dev/null +++ b/sdk/python/tests/unit/diff/test_infra_diff.py @@ -0,0 +1,154 @@ +from google.protobuf import wrappers_pb2 as wrappers + +from feast.diff.infra_diff import ( + diff_between, + diff_infra_protos, + tag_infra_proto_objects_for_keep_delete_add, +) +from feast.diff.property_diff import TransitionType +from feast.infra.online_stores.datastore import DatastoreTable +from feast.infra.online_stores.dynamodb import DynamoDBTable +from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto + + +def test_tag_infra_proto_objects_for_keep_delete_add(): + to_delete = DynamoDBTable(name="to_delete", region="us-west-2").to_proto() + to_add = DynamoDBTable(name="to_add", region="us-west-2").to_proto() + unchanged_table = DynamoDBTable(name="unchanged", region="us-west-2").to_proto() + pre_changed = DynamoDBTable(name="table", region="us-west-2").to_proto() + post_changed = DynamoDBTable(name="table", region="us-east-2").to_proto() + + keep, delete, add = tag_infra_proto_objects_for_keep_delete_add( + [to_delete, unchanged_table, pre_changed], + [to_add, unchanged_table, post_changed], + ) + + assert len(list(keep)) == 2 + assert unchanged_table in keep + assert post_changed in keep + assert to_add not in keep + assert len(list(delete)) == 1 + assert to_delete in delete + assert unchanged_table not in delete + assert pre_changed not in delete + assert len(list(add)) == 1 + assert to_add in add + assert unchanged_table not in add + assert post_changed not in add + + +def test_diff_between_datastore_tables(): + pre_changed = DatastoreTable( + project="test", name="table", project_id="pre", namespace="pre" + ).to_proto() + post_changed = DatastoreTable( + project="test", name="table", project_id="post", namespace="post" + ).to_proto() + + infra_object_diff = diff_between(pre_changed, pre_changed, "datastore table") + infra_object_property_diffs = infra_object_diff.infra_object_property_diffs + assert len(infra_object_property_diffs) == 0 + + infra_object_diff = diff_between(pre_changed, post_changed, "datastore table") + infra_object_property_diffs = infra_object_diff.infra_object_property_diffs + assert len(infra_object_property_diffs) == 2 + + assert infra_object_property_diffs[0].property_name == "project_id" + assert infra_object_property_diffs[0].val_existing == wrappers.StringValue( + value="pre" + ) + assert infra_object_property_diffs[0].val_declared == wrappers.StringValue( + value="post" + ) + assert infra_object_property_diffs[1].property_name == "namespace" + assert infra_object_property_diffs[1].val_existing == wrappers.StringValue( + value="pre" + ) + assert infra_object_property_diffs[1].val_declared == wrappers.StringValue( + value="post" + ) + + +def test_diff_infra_protos(): + to_delete = DynamoDBTable(name="to_delete", region="us-west-2") + to_add = DynamoDBTable(name="to_add", region="us-west-2") + unchanged_table = DynamoDBTable(name="unchanged", region="us-west-2") + pre_changed = DatastoreTable( + project="test", name="table", project_id="pre", namespace="pre" + ) + post_changed = DatastoreTable( + project="test", name="table", project_id="post", namespace="post" + ) + + infra_objects_before = [to_delete, unchanged_table, pre_changed] + infra_objects_after = [to_add, unchanged_table, post_changed] + + infra_proto_before = InfraProto() + infra_proto_before.infra_objects.extend( + [obj.to_infra_object_proto() for obj in infra_objects_before] + ) + + infra_proto_after = InfraProto() + infra_proto_after.infra_objects.extend( + [obj.to_infra_object_proto() for obj in infra_objects_after] + ) + + infra_diff = diff_infra_protos(infra_proto_before, infra_proto_after) + infra_object_diffs = infra_diff.infra_object_diffs + + # There should be one addition, one deletion, one unchanged, and one changed. + assert len(infra_object_diffs) == 4 + + additions = [ + infra_object_diff + for infra_object_diff in infra_object_diffs + if infra_object_diff.transition_type == TransitionType.CREATE + ] + assert len(additions) == 1 + assert not additions[0].current_infra_object + assert additions[0].new_infra_object == to_add.to_proto() + assert len(additions[0].infra_object_property_diffs) == 0 + + deletions = [ + infra_object_diff + for infra_object_diff in infra_object_diffs + if infra_object_diff.transition_type == TransitionType.DELETE + ] + assert len(deletions) == 1 + assert deletions[0].current_infra_object == to_delete.to_proto() + assert not deletions[0].new_infra_object + assert len(deletions[0].infra_object_property_diffs) == 0 + + unchanged = [ + infra_object_diff + for infra_object_diff in infra_object_diffs + if infra_object_diff.transition_type == TransitionType.UNCHANGED + ] + assert len(unchanged) == 1 + assert unchanged[0].current_infra_object == unchanged_table.to_proto() + assert unchanged[0].new_infra_object == unchanged_table.to_proto() + assert len(unchanged[0].infra_object_property_diffs) == 0 + + updates = [ + infra_object_diff + for infra_object_diff in infra_object_diffs + if infra_object_diff.transition_type == TransitionType.UPDATE + ] + assert len(updates) == 1 + assert updates[0].current_infra_object == pre_changed.to_proto() + assert updates[0].new_infra_object == post_changed.to_proto() + assert len(updates[0].infra_object_property_diffs) == 2 + assert updates[0].infra_object_property_diffs[0].property_name == "project_id" + assert updates[0].infra_object_property_diffs[ + 0 + ].val_existing == wrappers.StringValue(value="pre") + assert updates[0].infra_object_property_diffs[ + 0 + ].val_declared == wrappers.StringValue(value="post") + assert updates[0].infra_object_property_diffs[1].property_name == "namespace" + assert updates[0].infra_object_property_diffs[ + 1 + ].val_existing == wrappers.StringValue(value="pre") + assert updates[0].infra_object_property_diffs[ + 1 + ].val_declared == wrappers.StringValue(value="post") diff --git a/sdk/python/tests/unit/diff/test_fco_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py similarity index 56% rename from sdk/python/tests/unit/diff/test_fco_diff.py rename to sdk/python/tests/unit/diff/test_registry_diff.py index 802a6438c3..0322ab47ab 100644 --- a/sdk/python/tests/unit/diff/test_fco_diff.py +++ b/sdk/python/tests/unit/diff/test_registry_diff.py @@ -1,51 +1,58 @@ -from feast.diff.FcoDiff import diff_between, tag_proto_objects_for_keep_delete_add +from feast.diff.registry_diff import ( + diff_registry_objects, + tag_objects_for_keep_delete_update_add, +) from feast.feature_view import FeatureView from tests.utils.data_source_utils import prep_file_source -def test_tag_proto_objects_for_keep_delete_add(simple_dataset_1): +def test_tag_objects_for_keep_delete_update_add(simple_dataset_1): with prep_file_source( df=simple_dataset_1, event_timestamp_column="ts_1" ) as file_source: to_delete = FeatureView( name="to_delete", entities=["id"], batch_source=file_source, ttl=None, - ).to_proto() + ) unchanged_fv = FeatureView( name="fv1", entities=["id"], batch_source=file_source, ttl=None, - ).to_proto() + ) pre_changed = FeatureView( name="fv2", entities=["id"], batch_source=file_source, ttl=None, tags={"when": "before"}, - ).to_proto() + ) post_changed = FeatureView( name="fv2", entities=["id"], batch_source=file_source, ttl=None, tags={"when": "after"}, - ).to_proto() + ) to_add = FeatureView( name="to_add", entities=["id"], batch_source=file_source, ttl=None, - ).to_proto() + ) - keep, delete, add = tag_proto_objects_for_keep_delete_add( + keep, delete, update, add = tag_objects_for_keep_delete_update_add( [unchanged_fv, pre_changed, to_delete], [unchanged_fv, post_changed, to_add] ) assert len(list(keep)) == 2 assert unchanged_fv in keep - assert post_changed in keep - assert pre_changed not in keep + assert pre_changed in keep + assert post_changed not in keep assert len(list(delete)) == 1 assert to_delete in delete + assert len(list(update)) == 2 + assert unchanged_fv in update + assert post_changed in update + assert pre_changed not in update assert len(list(add)) == 1 assert to_add in add -def test_diff_between_feature_views(simple_dataset_1): +def test_diff_registry_objects_feature_views(simple_dataset_1): with prep_file_source( df=simple_dataset_1, event_timestamp_column="ts_1" ) as file_source: @@ -55,21 +62,29 @@ def test_diff_between_feature_views(simple_dataset_1): batch_source=file_source, ttl=None, tags={"when": "before"}, - ).to_proto() + ) post_changed = FeatureView( name="fv2", entities=["id"], batch_source=file_source, ttl=None, tags={"when": "after"}, - ).to_proto() + ) - fco_diffs = diff_between(pre_changed, pre_changed, "feature view") - assert len(fco_diffs.fco_property_diffs) == 0 + feast_object_diffs = diff_registry_objects( + pre_changed, pre_changed, "feature view" + ) + assert len(feast_object_diffs.feast_object_property_diffs) == 0 - fco_diffs = diff_between(pre_changed, post_changed, "feature view") - assert len(fco_diffs.fco_property_diffs) == 1 + feast_object_diffs = diff_registry_objects( + pre_changed, post_changed, "feature view" + ) + assert len(feast_object_diffs.feast_object_property_diffs) == 1 - assert fco_diffs.fco_property_diffs[0].property_name == "tags" - assert fco_diffs.fco_property_diffs[0].val_existing == {"when": "before"} - assert fco_diffs.fco_property_diffs[0].val_declared == {"when": "after"} + assert feast_object_diffs.feast_object_property_diffs[0].property_name == "tags" + assert feast_object_diffs.feast_object_property_diffs[0].val_existing == { + "when": "before" + } + assert feast_object_diffs.feast_object_property_diffs[0].val_declared == { + "when": "after" + } diff --git a/sdk/python/tests/unit/test_proto_json.py b/sdk/python/tests/unit/test_proto_json.py index 1b352ccb19..6bfdbbbf91 100644 --- a/sdk/python/tests/unit/test_proto_json.py +++ b/sdk/python/tests/unit/test_proto_json.py @@ -9,7 +9,7 @@ ) from feast.protos.feast.types.Value_pb2 import RepeatedValue -FieldValues = GetOnlineFeaturesResponse.FieldValues +FeatureVector = GetOnlineFeaturesResponse.FeatureVector @pytest.fixture(scope="module") @@ -17,70 +17,63 @@ def proto_json_patch(): proto_json.patch() -def test_feast_value(proto_json_patch): - # FieldValues contains "map fields" proto field. +def test_feature_vector_values(proto_json_patch): + # FeatureVector contains "repeated values" proto field. # We want to test that feast.types.Value can take different types in JSON # without using additional structure (e.g. 1 instead of {int64_val: 1}). - field_values_str = """{ - "fields": { - "a": 1, - "b": 2.0, - "c": true, - "d": "foo", - "e": [1, 2, 3], - "f": [2.0, 3.0, 4.0, null], - "g": [true, false, true], - "h": ["foo", "bar", "foobar"], - "i": null - } + feature_vector_str = """{ + "values": [ + 1, + 2.0, + true, + "foo", + [1, 2, 3], + [2.0, 3.0, 4.0, null], + [true, false, true], + ["foo", "bar", "foobar"] + ] }""" - field_values_proto = FieldValues() - Parse(field_values_str, field_values_proto) - assertpy.assert_that(field_values_proto.fields.keys()).is_equal_to( - {"a", "b", "c", "d", "e", "f", "g", "h", "i"} - ) - assertpy.assert_that(field_values_proto.fields["a"].int64_val).is_equal_to(1) - assertpy.assert_that(field_values_proto.fields["b"].double_val).is_equal_to(2.0) - assertpy.assert_that(field_values_proto.fields["c"].bool_val).is_equal_to(True) - assertpy.assert_that(field_values_proto.fields["d"].string_val).is_equal_to("foo") - assertpy.assert_that(field_values_proto.fields["e"].int64_list_val.val).is_equal_to( + feature_vector_proto = FeatureVector() + Parse(feature_vector_str, feature_vector_proto) + assertpy.assert_that(len(feature_vector_proto.values)).is_equal_to(8) + assertpy.assert_that(feature_vector_proto.values[0].int64_val).is_equal_to(1) + assertpy.assert_that(feature_vector_proto.values[1].double_val).is_equal_to(2.0) + assertpy.assert_that(feature_vector_proto.values[2].bool_val).is_equal_to(True) + assertpy.assert_that(feature_vector_proto.values[3].string_val).is_equal_to("foo") + assertpy.assert_that(feature_vector_proto.values[4].int64_list_val.val).is_equal_to( [1, 2, 3] ) # Can't directly check equality to [2.0, 3.0, 4.0, float("nan")], because float("nan") != float("nan") assertpy.assert_that( - field_values_proto.fields["f"].double_list_val.val[:3] + feature_vector_proto.values[5].double_list_val.val[:3] ).is_equal_to([2.0, 3.0, 4.0]) - assertpy.assert_that(field_values_proto.fields["f"].double_list_val.val[3]).is_nan() - assertpy.assert_that(field_values_proto.fields["g"].bool_list_val.val).is_equal_to( + assertpy.assert_that(feature_vector_proto.values[5].double_list_val.val[3]).is_nan() + assertpy.assert_that(feature_vector_proto.values[6].bool_list_val.val).is_equal_to( [True, False, True] ) assertpy.assert_that( - field_values_proto.fields["h"].string_list_val.val + feature_vector_proto.values[7].string_list_val.val ).is_equal_to(["foo", "bar", "foobar"]) - assertpy.assert_that(field_values_proto.fields["i"].null_val).is_equal_to(0) # Now convert protobuf back to json and check that - field_values_json = MessageToDict(field_values_proto) - assertpy.assert_that(field_values_json["fields"].keys()).is_equal_to( - {"a", "b", "c", "d", "e", "f", "g", "h", "i"} - ) - assertpy.assert_that(field_values_json["fields"]["a"]).is_equal_to(1) - assertpy.assert_that(field_values_json["fields"]["b"]).is_equal_to(2.0) - assertpy.assert_that(field_values_json["fields"]["c"]).is_equal_to(True) - assertpy.assert_that(field_values_json["fields"]["d"]).is_equal_to("foo") - assertpy.assert_that(field_values_json["fields"]["e"]).is_equal_to([1, 2, 3]) + feature_vector_json = MessageToDict(feature_vector_proto) + assertpy.assert_that(len(feature_vector_json["values"])).is_equal_to(8) + assertpy.assert_that(feature_vector_json["values"][0]).is_equal_to(1) + assertpy.assert_that(feature_vector_json["values"][1]).is_equal_to(2.0) + assertpy.assert_that(feature_vector_json["values"][2]).is_equal_to(True) + assertpy.assert_that(feature_vector_json["values"][3]).is_equal_to("foo") + assertpy.assert_that(feature_vector_json["values"][4]).is_equal_to([1, 2, 3]) # Can't directly check equality to [2.0, 3.0, 4.0, float("nan")], because float("nan") != float("nan") - assertpy.assert_that(field_values_json["fields"]["f"][:3]).is_equal_to( + assertpy.assert_that(feature_vector_json["values"][5][:3]).is_equal_to( [2.0, 3.0, 4.0] ) - assertpy.assert_that(field_values_json["fields"]["f"][3]).is_nan() - assertpy.assert_that(field_values_json["fields"]["g"]).is_equal_to( + assertpy.assert_that(feature_vector_json["values"][5][3]).is_nan() + assertpy.assert_that(feature_vector_json["values"][6]).is_equal_to( [True, False, True] ) - assertpy.assert_that(field_values_json["fields"]["h"]).is_equal_to( + assertpy.assert_that(feature_vector_json["values"][7]).is_equal_to( ["foo", "bar", "foobar"] ) - assertpy.assert_that(field_values_json["fields"]["i"]).is_equal_to(None) def test_feast_repeated_value(proto_json_patch): diff --git a/sdk/python/tests/unit/test_unit_feature_store.py b/sdk/python/tests/unit/test_unit_feature_store.py new file mode 100644 index 0000000000..6f9dd6acb0 --- /dev/null +++ b/sdk/python/tests/unit/test_unit_feature_store.py @@ -0,0 +1,50 @@ +from dataclasses import dataclass +from typing import Dict, List + +from feast import FeatureStore +from feast.protos.feast.types.Value_pb2 import Value + + +@dataclass +class MockFeatureViewProjection: + join_key_map: Dict[str, str] + + +@dataclass +class MockFeatureView: + name: str + entities: List[str] + projection: MockFeatureViewProjection + + +def test__get_unique_entities(): + entity_values = { + "entity_1": [Value(int64_val=1), Value(int64_val=2), Value(int64_val=1)], + "entity_2": [ + Value(string_val="1"), + Value(string_val="2"), + Value(string_val="1"), + ], + "entity_3": [Value(int64_val=8), Value(int64_val=9), Value(int64_val=10)], + } + + entity_name_to_join_key_map = {"entity_1": "entity_1", "entity_2": "entity_2"} + + fv = MockFeatureView( + name="fv_1", + entities=["entity_1", "entity_2"], + projection=MockFeatureViewProjection(join_key_map={}), + ) + + unique_entities, indexes = FeatureStore._get_unique_entities( + FeatureStore, + table=fv, + join_key_values=entity_values, + entity_name_to_join_key_map=entity_name_to_join_key_map, + ) + + assert unique_entities == ( + {"entity_1": Value(int64_val=1), "entity_2": Value(string_val="1")}, + {"entity_1": Value(int64_val=2), "entity_2": Value(string_val="2")}, + ) + assert indexes == ([0, 2], [1]) diff --git a/sdk/python/tests/utils/data_source_utils.py b/sdk/python/tests/utils/data_source_utils.py index 6e3d77ead0..12870186bf 100644 --- a/sdk/python/tests/utils/data_source_utils.py +++ b/sdk/python/tests/utils/data_source_utils.py @@ -2,6 +2,7 @@ import random import tempfile import time +from typing import Iterator from google.cloud import bigquery @@ -10,7 +11,7 @@ @contextlib.contextmanager -def prep_file_source(df, event_timestamp_column=None) -> FileSource: +def prep_file_source(df, event_timestamp_column=None) -> Iterator[FileSource]: with tempfile.NamedTemporaryFile(suffix=".parquet") as f: f.close() df.to_parquet(f.name)