From fe9a0bdbb705eaee4fb847e1bd67abe5004770fc Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Fri, 5 Aug 2022 17:56:39 -0700 Subject: [PATCH] chore: Add documentation for maintainer development (#3025) * Maintainer test docs Signed-off-by: Kevin Zhang * Add links from contributing and development guide Signed-off-by: Kevin Zhang --- .../fork_pr_integration_tests_aws.yml | 159 ++++++++++++++++++ .../fork_pr_integration_tests_gcp.yml | 97 +++++++++++ .../fork_pr_integration_tests_snowflake.yml | 96 +++++++++++ .github/workflows/java_master_only.yml | 4 + .github/workflows/java_pr.yml | 7 +- .github/workflows/lint_pr.yml | 1 + .github/workflows/master_only.yml | 3 + .github/workflows/nightly-ci.yml | 3 + .github/workflows/pr_integration_tests.yml | 10 +- .../workflows/pr_local_integration_tests.yml | 22 ++- .github/workflows/publish.yml | 5 + .github/workflows/unit_tests.yml | 9 +- CONTRIBUTING.md | 100 +++++++---- docs/SUMMARY.md | 1 + docs/project/development-guide.md | 1 + docs/project/maintainers.md | 57 +++++++ .../feature_repos/repo_configuration.py | 10 +- .../universal/data_sources/snowflake.py | 6 +- 18 files changed, 526 insertions(+), 65 deletions(-) create mode 100644 .github/fork_workflows/fork_pr_integration_tests_aws.yml create mode 100644 .github/fork_workflows/fork_pr_integration_tests_gcp.yml create mode 100644 .github/fork_workflows/fork_pr_integration_tests_snowflake.yml create mode 100644 docs/project/maintainers.md diff --git a/.github/fork_workflows/fork_pr_integration_tests_aws.yml b/.github/fork_workflows/fork_pr_integration_tests_aws.yml new file mode 100644 index 0000000000..ef53fc1c7d --- /dev/null +++ b/.github/fork_workflows/fork_pr_integration_tests_aws.yml @@ -0,0 +1,159 @@ +name: fork-pr-integration-tests-aws + +on: [pull_request] + +jobs: + build-docker-image: + if: github.repository == 'your github repo' # swap here with your project id + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # pull_request_target runs the workflow in the context of the base repo + # as such actions/checkout needs to be explicit configured to retrieve + # code from the PR. + ref: refs/pull/${{ github.event.pull_request.number }}/merge + submodules: recursive + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + with: + install: true + - name: Set up AWS SDK + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + - name: Set ECR image tag + id: image-tag + run: echo "::set-output name=DOCKER_IMAGE_TAG::`git rev-parse HEAD`" + - name: Cache Public ECR Image + id: lambda_python_3_9 + uses: actions/cache@v2 + with: + path: ~/cache + key: lambda_python_3_9 + - name: Handle Cache Miss (pull public ECR image & save it to tar file) + if: steps.cache-primes.outputs.cache-hit != 'true' + run: | + mkdir -p ~/cache + docker pull public.ecr.aws/lambda/python:3.9 + docker save public.ecr.aws/lambda/python:3.9 -o ~/cache/lambda_python_3_9.tar + - name: Handle Cache Hit (load docker image from tar file) + if: steps.cache-primes.outputs.cache-hit == 'true' + run: | + docker load -i ~/cache/lambda_python_3_9.tar + - name: Build and push + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: feast-python-server + run: | + docker build \ + --file sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile \ + --tag $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} \ + --load \ + . + docker push $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} + outputs: + DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} + integration-test-python: + if: github.repository == 'your github repo' # swap here with your project id + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: [ "3.8" ] + os: [ ubuntu-latest ] + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + services: + redis: + image: redis + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - uses: actions/checkout@v2 + with: + # pull_request_target runs the workflow in the context of the base repo + # as such actions/checkout needs to be explicit configured to retrieve + # code from the PR. + ref: refs/pull/${{ github.event.pull_request.number }}/merge + submodules: recursive + - name: Setup Python + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Set up AWS SDK + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + - name: Use AWS CLI + run: aws sts get-caller-identity + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install apache-arrow on macos + if: matrix.os == 'macOS-latest' + run: brew install apache-arrow + - name: Install dependencies + run: make install-python-ci-dependencies + - name: Setup Redis Cluster + run: | + docker pull vishnunair/docker-redis-cluster:latest + docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster + - name: Test python + if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak + env: + FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-docker-image.outputs.DOCKER_IMAGE_TAG }} + run: | + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "aws and not Snowflake and not BigQuery" + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "File and not Snowflake and not BigQuery" + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "dynamo and not Snowflake and not BigQuery" + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "Redshift and not Snowflake and not BigQuery" + + diff --git a/.github/fork_workflows/fork_pr_integration_tests_gcp.yml b/.github/fork_workflows/fork_pr_integration_tests_gcp.yml new file mode 100644 index 0000000000..d53aef0155 --- /dev/null +++ b/.github/fork_workflows/fork_pr_integration_tests_gcp.yml @@ -0,0 +1,97 @@ +name: fork-pr-integration-tests-gcp + +on: [pull_request] + +jobs: + integration-test-python: + if: github.repository == 'your github repo' # swap here with your project id + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: [ "3.8" ] + os: [ ubuntu-latest ] + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + services: + redis: + image: redis + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - uses: actions/checkout@v2 + with: + # pull_request_target runs the workflow in the context of the base repo + # as such actions/checkout needs to be explicit configured to retrieve + # code from the PR. + ref: refs/pull/${{ github.event.pull_request.number }}/merge + submodules: recursive + - name: Setup Python + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Set up gcloud SDK + uses: google-github-actions/setup-gcloud@v0 + with: + project_id: ${{ secrets.GCP_PROJECT_ID }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + export_default_credentials: true + - name: Use gcloud CLI + run: gcloud info + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install apache-arrow on macos + if: matrix.os == 'macOS-latest' + run: brew install apache-arrow + - name: Install dependencies + run: make install-python-ci-dependencies + - name: Setup Redis Cluster + run: | + docker pull vishnunair/docker-redis-cluster:latest + docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster + - name: Test python + if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak + # Run only BigQuery and File tests without dynamo and redshift tests. + run: | + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "BigQuery and not dynamo and not Redshift and not Snowflake" + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "File and not dynamo and not Redshift and not Snowflake" + diff --git a/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml b/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml new file mode 100644 index 0000000000..8832c75fca --- /dev/null +++ b/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml @@ -0,0 +1,96 @@ +name: fork-pr-integration-tests-snowflake + +on: [pull_request] + +jobs: + integration-test-python: + if: github.repository == 'your github repo' # swap here with your project id + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: [ "3.8" ] + os: [ ubuntu-latest ] + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + services: + redis: + image: redis + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - uses: actions/checkout@v2 + with: + # pull_request_target runs the workflow in the context of the base repo + # as such actions/checkout needs to be explicit configured to retrieve + # code from the PR. + ref: refs/pull/${{ github.event.pull_request.number }}/merge + submodules: recursive + - name: Setup Python + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install apache-arrow on macos + if: matrix.os == 'macOS-latest' + run: brew install apache-arrow + - name: Install dependencies + run: make install-python-ci-dependencies + - name: Setup Redis Cluster + run: | + docker pull vishnunair/docker-redis-cluster:latest + docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster + - name: Test python + if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak + env: + SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} + SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} + SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} + SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} + SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} + # Run only Snowflake BigQuery and File tests without dynamo and redshift tests. + run: | + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "Snowflake and not dynamo and not Redshift and not Bigquery and not gcp" + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "File and not dynamo and not Redshift and not Bigquery and not gcp" + diff --git a/.github/workflows/java_master_only.yml b/.github/workflows/java_master_only.yml index fc2bb52387..f5297615f6 100644 --- a/.github/workflows/java_master_only.yml +++ b/.github/workflows/java_master_only.yml @@ -9,6 +9,7 @@ on: jobs: build-docker-images: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest strategy: matrix: @@ -46,6 +47,7 @@ jobs: fi lint-java: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -55,6 +57,7 @@ jobs: run: make lint-java unit-test-java: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -80,6 +83,7 @@ jobs: path: ${{ github.workspace }}/docs/coverage/java/target/site/jacoco-aggregate/ integration-test: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/java_pr.yml b/.github/workflows/java_pr.yml index 39593f02ce..73e70c33d1 100644 --- a/.github/workflows/java_pr.yml +++ b/.github/workflows/java_pr.yml @@ -9,6 +9,7 @@ on: jobs: lint-java: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -22,6 +23,7 @@ jobs: run: make lint-java unit-test-java: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest needs: lint-java steps: @@ -54,8 +56,9 @@ jobs: integration-test: # all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes. if: - (github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'ok-to-test')) || - (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved'))) + ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'ok-to-test')) || + (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved')))) && + github.repository == 'feast-dev/feast' runs-on: ubuntu-latest needs: unit-test-java steps: diff --git a/.github/workflows/lint_pr.yml b/.github/workflows/lint_pr.yml index 40c3dead00..f9af8b27c7 100644 --- a/.github/workflows/lint_pr.yml +++ b/.github/workflows/lint_pr.yml @@ -9,6 +9,7 @@ on: jobs: validate-title: + if: github.repository == 'feast-dev/feast' name: Validate PR title runs-on: ubuntu-latest steps: diff --git a/.github/workflows/master_only.yml b/.github/workflows/master_only.yml index c9ebcdaf04..51e3830fe6 100644 --- a/.github/workflows/master_only.yml +++ b/.github/workflows/master_only.yml @@ -7,6 +7,7 @@ on: jobs: build-lambda-docker-image: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -58,6 +59,7 @@ jobs: outputs: DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} integration-test-python-and-go: + if: github.repository == 'feast-dev/feast' needs: build-lambda-docker-image runs-on: ${{ matrix.os }} strategy: @@ -180,6 +182,7 @@ jobs: run: aws s3 cp --recursive .benchmarks s3://feast-ci-pytest-benchmarks build-all-docker-images: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest strategy: matrix: diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml index fead512408..0c2ba6a66a 100644 --- a/.github/workflows/nightly-ci.yml +++ b/.github/workflows/nightly-ci.yml @@ -11,6 +11,7 @@ on: jobs: check_date: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest name: Check latest commit outputs: @@ -24,6 +25,7 @@ jobs: if: ${{ github.event_name == 'schedule' }} run: echo '::set-output name=WAS_EDITED::'$(test -n "$(git log --format=%H --since='24 hours ago')" && echo 'true' || echo 'false') build-docker-image: + if: github.repository == 'feast-dev/feast' needs: [check_date] runs-on: ubuntu-latest steps: @@ -79,6 +81,7 @@ jobs: outputs: DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} integration-test-python: + if: github.repository == 'feast-dev/feast' needs: [check_date, build-docker-image] runs-on: ${{ matrix.os }} strategy: diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index 58bf45c687..ab8a79760f 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -16,8 +16,9 @@ jobs: build-docker-image: # all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes. if: - (github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || - (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm'))) + ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || + (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) && + github.repository == 'feast-dev/feast' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -77,8 +78,9 @@ jobs: integration-test-python: # all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes. if: - (github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || - (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm'))) + ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || + (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) && + github.repository == 'feast-dev/feast' needs: build-docker-image runs-on: ${{ matrix.os }} strategy: diff --git a/.github/workflows/pr_local_integration_tests.yml b/.github/workflows/pr_local_integration_tests.yml index d4db8a3a7c..2a012c323b 100644 --- a/.github/workflows/pr_local_integration_tests.yml +++ b/.github/workflows/pr_local_integration_tests.yml @@ -12,9 +12,10 @@ jobs: integration-test-python-local: # all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes. if: - (github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || - (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm'))) - runs-on: ${{ matrix.os }} + ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || + (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) || + github.repository != 'feast-dev/feast' + runs-on: ${{ matrix.os }}p strategy: fail-fast: false matrix: @@ -67,14 +68,11 @@ jobs: sudo apt install -y -V libarrow-dev - name: Install dependencies run: make install-python-ci-dependencies - - name: Set up gcloud SDK # TODO(adchia): remove this dependency - uses: google-github-actions/setup-gcloud@v0 - with: - project_id: ${{ secrets.GCP_PROJECT_ID }} - service_account_key: ${{ secrets.GCP_SA_KEY }} - export_default_credentials: true - - name: Use gcloud CLI - run: gcloud info - name: Test local integration tests if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak - run: make test-python-integration-local + env: + FEAST_USAGE: "False" + IS_TEST: "True" + FEAST_LOCAL_ONLINE_CONTAINER: "True" + FEAST_IS_LOCAL_TEST: "True" + run: pytest -n 8 --cov=./ --cov-report=xml --color=yes --integration -k "not gcs_registry and not s3_registry and not test_lambda_materialization" sdk/python/tests diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 54051be385..c114dedd36 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -7,6 +7,7 @@ on: jobs: get-version: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest outputs: release_version: ${{ steps.get_release_version.outputs.release_version }} @@ -100,6 +101,7 @@ jobs: fi publish-helm-charts: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest needs: get-version env: @@ -129,6 +131,7 @@ jobs: uses: ./.github/workflows/build_wheels.yml publish-python-sdk: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest needs: [build_wheels, publish-web-ui-npm] steps: @@ -142,6 +145,7 @@ jobs: password: ${{ secrets.PYPI_PASSWORD }} publish-java-sdk: + if: github.repository == 'feast-dev/feast' container: maven:3.6-jdk-11 runs-on: ubuntu-latest needs: get-version @@ -179,6 +183,7 @@ jobs: infra/scripts/publish-java-sdk.sh --revision ${VERSION_WITHOUT_PREFIX} --gpg-key-import-dir /root publish-web-ui-npm: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 7bbe9ad6ac..de6d98d140 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -1,6 +1,6 @@ name: unit-tests -on: [push, pull_request] +on: [pull_request] jobs: unit-test-python: runs-on: ${{ matrix.os }} @@ -69,15 +69,10 @@ jobs: - name: Install dependencies run: make install-python-ci-dependencies - name: Test Python - env: - SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} - SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} - SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} - SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} - SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 + if: github.repository == 'feast-dev/feast' with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ef2d25c60d..079ae03768 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,6 +2,10 @@ > Please see [Development Guide](https://docs.feast.dev/project/development-guide) for project level development instructions. +

Maintainer's Guide

+ +> Please see [Maintainer's Guide](https://docs.feast.dev/project/maintainers) for instructions for maintainers. +

Table of Contents

- [Overview](#overview) @@ -36,7 +40,7 @@ - [Unit Tests](#unit-tests-1) - [Testing with Github Actions workflows](#testing-with-github-actions-workflows) - [Issues](#issues) - + ## Overview This guide is targeted at developers looking to contribute to Feast components in the main Feast repository: @@ -191,36 +195,45 @@ To test across clouds, on top of setting up Redis, you also need GCP / AWS / Sno > and commenting out tests that are added to `DEFAULT_FULL_REPO_CONFIGS` **GCP** -1. Install the [Cloud SDK](https://cloud.google.com/sdk/docs/install). -2. Then run login to gcloud: +### Setup your GCP BigQuery Instance +1. You can get free credits [here](https://cloud.google.com/free/docs/free-cloud-features#free-trial). +2. You will need to setup a service account, enable the BigQuery API, and create a staging location for a bucket. + * Setup your service account and project using steps 1-5 [here](https://codelabs.developers.google.com/codelabs/cloud-bigquery-python#0). + * Follow these [instructions](https://cloud.google.com/storage/docs/creating-buckets) in your project to create a bucket for running GCP tests and remember to save the bucket name. +3. Install the [Cloud SDK](https://cloud.google.com/sdk/docs/install). +4. Login to gcloud if you haven't already: ``` gcloud auth login gcloud auth application-default login ``` -- When you run `gcloud auth application-default login`, you should see some output of the form: - ``` - Credentials saved to file: [$HOME/.config/gcloud/application_default_credentials.json] - ``` -- You should run `export GOOGLE_APPLICATION_CREDENTIALS="$HOME/.config/gcloud/application_default_credentials.json”` to add the application credentials to your .zshrc or .bashrc. -3. Run `export GCLOUD_PROJECT=[your project]` to your .zshrc or .bashrc. -4. Running `gcloud config list` should give you something like this: -```sh -$ gcloud config list -[core] -account = [your email] -disable_usage_reporting = True -project = [your project] + - When you run `gcloud auth application-default login`, you should see some output of the form: + ``` + Credentials saved to file: [$HOME/.config/gcloud/application_default_credentials.json] + ``` + - You should run `export GOOGLE_APPLICATION_CREDENTIALS="$HOME/.config/gcloud/application_default_credentials.json”` to add the application credentials to your .zshrc or .bashrc. +5. Run `export GCLOUD_PROJECT=[your project id from step 2]` to your .zshrc or .bashrc. +6. Running `gcloud config list` should give you something like this: + ```sh + $ gcloud config list + [core] + account = [your email] + disable_usage_reporting = True + project = [your project id] + + Your active configuration is: [default] + ``` +7. Export GCP specific environment variables. Namely, + ```sh + export GCS_REGION='[your gcs region e.g US]' + export GCS_STAGING_LOCATION='[your gcs staging location]' + ``` + **NOTE**: Your `GCS_STAGING_LOCATION` should be in the form `gs://` where the bucket name is from step 2. -Your active configuration is: [default] -``` -5. Export gcp specific environment variables. Namely, -```sh -export GCS_REGION='[your gcs region e.g US]' -export GCS_STAGING_LOCATION='[your gcs staging location]' -``` +8. Once authenticated, you should be able to run the integration tests for BigQuery without any failures. **AWS** -1. TODO(adchia): flesh out setting up AWS login (or create helper script) +1. Setup AWS by creating an account, database, and cluster. You will need to enable Redshift and Dynamo. + * You can get free credits [here](https://aws.amazon.com/free/?all-free-tier.sort-by=item.additionalFields.SortRank&al[…]f.Free%20Tier%20Types=*all&awsf.Free%20Tier%20Categories=*all). 2. To run the AWS Redshift and Dynamo integration tests you will have to export your own AWS credentials. Namely, ```sh @@ -236,16 +249,37 @@ export AWS_REGISTRY_PATH='[your aws registry path]' **Snowflake** 1. See https://signup.snowflake.com/ to setup a trial. -2. Then to run successfully, you'll need some environment variables setup: -```sh -export SNOWFLAKE_CI_DEPLOYMENT='[snowflake_deployment]' -export SNOWFLAKE_CI_USER='[your user]' -export SNOWFLAKE_CI_PASSWORD='[your pw]' -export SNOWFLAKE_CI_ROLE='[your CI role e.g. SYSADMIN]' -export SNOWFLAKE_CI_WAREHOUSE='[your warehouse]' -``` +2. Setup your account and if you are not an `ACCOUNTADMIN` (if you created your own account, you should be), give yourself the `SYSADMIN` role. + ```sql + grant role accountadmin, sysadmin to user user2; + ``` + * Also remember to save your [account name](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#:~:text=organization_name%20is%20the%20name%20of,your%20account%20within%20your%20organization), username, and role. +3. Create a warehouse and database named `FEAST` with the schema `OFFLINE`. + ```sql + create or replace warehouse feast_tests_wh with + warehouse_size='MEDIUM' --set your warehouse size to whatever your budget allows-- + auto_suspend = 180 + auto_resume = true + initially_suspended=true; + + create or replace database FEAST; + use database FEAST; + create schema OFFLINE; + ``` +4. You will need to create a data unloading location(either on S3, GCP, or Azure). Detailed instructions [here](https://docs.snowflake.com/en/user-guide/data-unload-overview.html). You will need to save the storage export location and the storage export name. +5. Then to run successfully, you'll need some environment variables setup: + ```sh + export SNOWFLAKE_CI_DEPLOYMENT='[your snowflake account name]' + export SNOWFLAKE_CI_USER='[your snowflake username]' + export SNOWFLAKE_CI_PASSWORD='[your snowflake pw]' + export SNOWFLAKE_CI_ROLE='[your CI role e.g. SYSADMIN]' + export SNOWFLAKE_CI_WAREHOUSE='[your warehouse]' + export BLOB_EXPORT_STORAGE_NAME='[your data unloading storage name]' + export BLOB_EXPORT_URI='[your data unloading blob uri]` + ``` +6. Once everything is setup, running snowflake integration tests should pass without failures. -Then run `make test-python-integration`. Note that for Snowflake / GCP / AWS, this will create new temporary tables / datasets. +Note that for Snowflake / GCP / AWS, running `make test-python-integration` will create new temporary tables / datasets in your cloud storage tables. #### (Advanced) Running specific provider tests or running your test against specific online or offline stores diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 410ca6a5c6..aa95d40368 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -109,6 +109,7 @@ * [Contribution process](project/contributing.md) * [Development guide](project/development-guide.md) + * [Maintainer Docs](project/maintainers.md) * [Versioning policy](project/versioning-policy.md) * [Release process](project/release-process.md) * [Feast 0.9 vs Feast 0.10+](project/feast-0.9-vs-feast-0.10+.md) diff --git a/docs/project/development-guide.md b/docs/project/development-guide.md index 58e29a5ca7..5aae0628f6 100644 --- a/docs/project/development-guide.md +++ b/docs/project/development-guide.md @@ -8,6 +8,7 @@ This guide is targeted at developers looking to contribute to Feast: * [Making a Pull Request](development-guide.md#making-a-pull-request) * [Feast Data Storage Format](development-guide.md#feast-data-storage-format) * [Feast Protobuf API](development-guide.md#feast-protobuf-api) +* [Maintainer Guide](./maintainers.md) > Learn How the Feast [Contributing Process](contributing.md) works. diff --git a/docs/project/maintainers.md b/docs/project/maintainers.md new file mode 100644 index 0000000000..a773c758b5 --- /dev/null +++ b/docs/project/maintainers.md @@ -0,0 +1,57 @@ +# Setting up your environment +> Please see the [Development Guide](https://docs.feast.dev/project/development-guide) for project level development instructions and [Contributing Guide](https://github.com/feast-dev/feast/blob/master/CONTRIBUTING.md) for specific details on how to set up your develop environment and contribute to Feast. + +# Maintainers Development +> In most scenarios, your code changes or the areas of Feast that you are actively maintaining will only touch parts of the code(e.g one offline store/online store). + +## Forked Repo Best Practices +1. You should setup your fork so that you can make pull requests against your own master branch. + - This prevents unnecessary integration tests and other github actions that are irrelevant to your code changes from being run everytime you would like to make a code change. + - **NOTE**: Most workflows are enabled by default so manually [disable workflows](https://docs.github.com/en/actions/managing-workflow-runs/disabling-and-enabling-a-workflow) that are not needed. +2. When you are ready to merge changes into the official feast branch, make a pull request with the main feast branch and request a review from other maintainers. + - Since your code changes should only touch tests that are relevant to your functionality, and other tests should pass as well. + +**NOTE**: Remember to frequently sync your fork master branch with `feast-dev/feast:master`. + +## Github Actions Workflow on Fork +- **Recommended**: The github actions workflows that should be enabled on the fork are as follows: + - `unit-tests` + - Runs all of the unit tests that should always pass. + - `linter` + - Lints your pr for styling or complexity issues using mypy, isort, and flake. + - `fork-pr-integration-tests-[provider]` + - Run all of the integration tests to test Feast functionality on your fork for a specific provider. + - The `.github/workflows` folder has examples of common workflows(`aws`, `gcp`, and `snowflake`). + 1. Move the `fork_pr_integration_tests_[provider].yml` from `.github/fork_workflows` to `.github/workflows`. + 2. Edit `fork_pr_integration_tests_[provider].yml` (more details below) to only run the integration tests that are relevant to your area of interest. + 3. Push the workflow to your branch and it should automatically be added to the actions on your fork. + - `build_wheels` + - Release verification workflow to use for [release](docs/project/release-process.md). + +## Integration Test Workflow Changes +Fork specific integration tests are run by the `fork_pr_integration_tests.yml_[provider]` yaml workflow files. + +1. Under the `integration-test-python` job, replace `your github repo` with your feast github repo name. +2. If your offline store/online store needs special setup, add it to the job similar to how gcp is setup. + + ```yaml + - name: Set up gcloud SDK + uses: google-github-actions/setup-gcloud@v0 + with: + project_id: ${{ secrets.GCP_PROJECT_ID }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + export_default_credentials: true + ``` + +3. Add any environment variables that you need to your github [secrets](https://github.com/Azure/actions-workflow-samples/blob/master/assets/create-secrets-for-GitHub-workflows.md). + - For specific github secrets that you will need to test the already supported datastores(e.g AWS, Bigquery, Snowflake, etc.) refer to this [guide](https://github.com/feast-dev/feast/blob/master/CONTRIBUTING.md) under the `Integration Tests` section. + - Access these by setting environment variables as `secrets.SECRET_NAME`. +4. To limit pytest in your github workflow to test only your specific tests, leverage the `-k` option for pytest. + + ```bash + pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "BigQuery and not dynamo and not Redshift" + ``` + + - Each test in Feast is parametrized by its offline and online store so we can filter out tests by name. The above command chooses only tests with BigQuery that do not use Dynamo or Redshift. + +5. Everytime a pull request or a change to a pull request is made, the integration tests, the local integration tests, the unit tests, and the linter should run. diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 776fff3bb9..52675242b7 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -75,11 +75,11 @@ SNOWFLAKE_CONFIG = { "type": "snowflake.online", - "account": os.environ.get("SNOWFLAKE_CI_DEPLOYMENT", ""), - "user": os.environ.get("SNOWFLAKE_CI_USER", ""), - "password": os.environ.get("SNOWFLAKE_CI_PASSWORD", ""), - "role": os.environ.get("SNOWFLAKE_CI_ROLE", ""), - "warehouse": os.environ.get("SNOWFLAKE_CI_WAREHOUSE", ""), + "account": os.getenv("SNOWFLAKE_CI_DEPLOYMENT", ""), + "user": os.getenv("SNOWFLAKE_CI_USER", ""), + "password": os.getenv("SNOWFLAKE_CI_PASSWORD", ""), + "role": os.getenv("SNOWFLAKE_CI_ROLE", ""), + "warehouse": os.getenv("SNOWFLAKE_CI_WAREHOUSE", ""), "database": "FEAST", "schema": "ONLINE", } diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py index ae83ea8eb0..b5fc2448d4 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -34,8 +34,10 @@ def __init__(self, project_name: str, *args, **kwargs): warehouse=os.environ["SNOWFLAKE_CI_WAREHOUSE"], database="FEAST", schema="OFFLINE", - storage_integration_name="FEAST_S3", - blob_export_location="s3://feast-snowflake-offload/export", + storage_integration_name=os.getenv("BLOB_EXPORT_STORAGE_NAME", "FEAST_S3"), + blob_export_location=os.getenv( + "BLOB_EXPORT_URI", "s3://feast-snowflake-offload/export" + ), ) def create_data_source(