Skip to content

Add SentencePieceTokenizer and LlamaTokenizer #1375

Add SentencePieceTokenizer and LlamaTokenizer

Add SentencePieceTokenizer and LlamaTokenizer #1375

Workflow file for this run

name: nvtabular
on:
workflow_dispatch:
push:
branches: [main]
tags:
- "v[0-9]+.[0-9]+.[0-9]+"
pull_request:
branches: [main]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
check-changes:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install GitPython
pip install . --no-deps
- name: Get changed backends
id: backend_check
run: |
echo "changed=$(python ci/get_changed_backends.py --backend datasets --branch ${{github.base_ref}})" >> "$GITHUB_OUTPUT"
outputs:
needs_testing: ${{ steps.backend_check.outputs.changed }}
tests:
needs: check-changes
if: ${{needs.check-changes.outputs.needs_testing == 'true' || github.ref == 'refs/heads/main'}}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [3.8]
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install Ubuntu packages
run: |
sudo apt-get update -y
sudo apt-get install -y protobuf-compiler
- name: Install and upgrade python packages
run: |
python -m pip install --upgrade pip setuptools==59.4.0 wheel tox
- name: Get Merlin Branch name
id: get-branch-name
uses: NVIDIA-Merlin/.github/actions/branch-name@main
- name: Run tests
run: |
merlin_branch="${{ steps.get-branch-name.outputs.branch }}"
MERLIN_BRANCH="$merlin_branch" GIT_COMMIT=$(git rev-parse HEAD) tox -e nvtabular-cpu