Skip to content

Commit

Permalink
Add github action to test templates
Browse files Browse the repository at this point in the history
  • Loading branch information
blythed committed Oct 28, 2024
1 parent 19c44d5 commit 8580a6c
Show file tree
Hide file tree
Showing 95 changed files with 1,240 additions and 1,106 deletions.
134 changes: 134 additions & 0 deletions .github/workflows/ci_templates.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
name: Template Testing

on:
pull_request:
branches:
- main
- '[0-9]+.[0-9]+'
paths: # Paths that may affect code quality

concurrency:
group: ${{ github.ref }}-template
cancel-in-progress: true

jobs:
template_update_check:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.final_output.outputs.matrix }}

steps:
- uses: actions/checkout@v3

- name: Setup conditions based on branch
id: setup
run: |
if [[ "${{ github.base_ref }}" == "main" ]]; then
echo "mode=changed" >> $GITHUB_ENV
else
echo "mode=all" >> $GITHUB_ENV
fi
- name: Get changed templates
if: env.mode == 'changed'
id: changed-files-specific
uses: tj-actions/changed-files@v44

- name: Filter changed templates and set output
if: env.mode == 'changed'
id: set-matrix-1
run: |
IFS=$'\n'
changed_files=(${{ steps.changed-files-specific.outputs.all_changed_files }})
declare -A template_set
for file in "${changed_files[@]}"; do
if [[ "$file" =~ ^templates/ ]]; then
template_name=$(echo "$file" | cut -d '/' -f 2)
template_set[$template_name]=1
fi
done
templates=("${!template_set[@]}")
matrix_json=$(printf '%s\n' "${templates[@]}" | jq -R -s -c '{template: split("\n")[:-1]}')
echo "matrix_json=$matrix_json"
echo "Changed templates: ${templates[*]}"
echo "matrix_json: $matrix_json"
echo "matrix_json=$matrix_json" >> $GITHUB_ENV
- name: Get all templates to test
if: env.mode == 'all'
id: set-matrix-2
run: |
IFS=$'\n'
# List directories only under 'templates/' and create an array
changed_templates=($(find templates/ -maxdepth 1 -mindepth 1 -type d -exec basename {} \;))
echo "Here are the changed templates:"
echo "${changed_templates[@]}"
declare -A template_set
# Loop through the array to populate another associative array to ensure uniqueness
for file in "${changed_templates[@]}"; do
template_set[$file]=1
done
# Create an array from the associative array's keys
templates=("${!template_set[@]}")
echo "All templates: ${templates[*]}"
# Create JSON array from the list of templates
matrix_json=$(printf '%s\n' "${templates[@]}" | jq -R -s -c '{template: split("\n")[:-1]}')
echo "matrix_json=$matrix_json"
echo "matrix_json: $matrix_json"
echo "matrix_json=$matrix_json" >> $GITHUB_ENV
- name: Set job output
id: final_output
run: |
echo "::set-output name=matrix::$matrix_json"
test_template:
needs: template_update_check
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix: ${{fromJson(needs.template_update_check.outputs.matrix)}}
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Cache Pip Packages
id: setup-python
uses: actions/setup-python@v4
with:
python-version: '3.10'
cache: 'pip'

- name: Cache Python Installation
uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ matrix.template }}_${{ hashFiles('pyproject.toml', 'templates/${{ matrix.template }}/requirements.txt') }}

- name: Install Superduper Project
run: |
# Install core and testsuite dependencies on the cached python environment.
python -m pip install '.[test]'
python -m pip install -e plugins/mongodb
- name: Install template requirements
run: |
echo "Installing local template dependencies..."
python -m pip install -r "templates/${{ matrix.template }}/requirements.txt"
if [ -e "templates/${{ matrix.template }}/install.sh" ]; then
bash templates/${{ matrix.template }}/install.sh
fi
- name: Template testing
run: |
export SUPERDUPER_TEMPLATE=${{ matrix.template }}
export SUPERDUPER_DATA_BACKEND='mongomock://test_db'
pytest test/integration/template/test_template.py -s
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add pdf rag template
- Updated llm_finetuning template
- Add sql table length exceed limit and uuid truncation.
- Add ci workflow to test templates

#### Bug Fixes

Expand Down
17 changes: 17 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,23 @@ new_release: ## Release a new version of superduper.io
git push origin release-$(RELEASE_VERSION)
git push origin $(RELEASE_VERSION)

TEMPLATES ?= '*'

build_templates: # build the templates with APPLY=False TEMPLATES='*'
if [ "$(TEMPLATES)" = "*" ]; then \
templates_to_build=$$(ls -d templates/*/); \
else \
templates_to_build=$$(echo $(TEMPLATES) | tr ',' ' '); \
fi; \
for template in $$templates_to_build; do \
echo $$template; \
rm -rf templates/$$template/blobs; \
rm -rf templates/$$template/files; \
rm -rf templates/$$template/.ipynb_checkpoints; \
(cd templates/$$template && papermill build.ipynb /tmp/papermill_output.ipynb -p APPLY False); \
jupyter nbconvert templates/$$template/build.ipynb --clear-output; \
done;

##@ Code Quality

gen_docs: ## Generate Docs and API
Expand Down
21 changes: 21 additions & 0 deletions superduper/components/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,24 @@ def __str__(self):
return f'Dataset(identifier={self.identifier}, select={self.select})'

__repr__ = __str__


class RemoteData(Component):
"""Class to fetch dataset from remote.
:param getter: Function to fetch data.
"""

type_id: t.ClassVar[str] = 'dataset'
getter: t.Callable

def __post_init__(self, db, artifacts):
self._data = None
return super().__post_init__(db, artifacts)

@property
def data(self):
"""Get the data."""
if self._data is None:
self._data = self.getter()
return self._data
9 changes: 7 additions & 2 deletions superduper/components/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

if t.TYPE_CHECKING:
from superduper.base.datalayer import Datalayer
from superduper.components.dataset import Dataset, RemoteData

DEFAULT_PRIMARY_ID = 'id'

Expand All @@ -27,7 +28,7 @@ class Table(Component):

schema: Schema
primary_id: str = DEFAULT_PRIMARY_ID
data: t.List[t.Dict] | None = None
data: t.List[t.Dict] | 'Dataset' | 'RemoteData' | None = None

def __post_init__(self, db, artifacts):
super().__post_init__(db, artifacts)
Expand Down Expand Up @@ -64,4 +65,8 @@ def on_create(self, db: 'Datalayer'):

@trigger('apply', requires='data')
def add_data(self):
self.db[self.identifier].insert(self.data).execute()
if isinstance(self.data, Component):
data = self.data.data
else:
data = self.data
self.db[self.identifier].insert(data).execute()
4 changes: 3 additions & 1 deletion superduper/components/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ def __post_init__(self, db, artifacts, substitutions):
def __call__(self, **kwargs):
"""Method to create component from the given template and `kwargs`."""
kwargs.update({k: v for k, v in self.default_values.items() if k not in kwargs})
assert set(kwargs.keys()) == set(self.template_variables)

assert set(kwargs.keys()) == (set(self.template_variables) - {'output_prefix'})

if 'output_prefix' in kwargs:
assert kwargs['output_prefix'] == CFG.output_prefix
else:
kwargs["output_prefix"] = CFG.output_prefix

component = _replace_variables(self.template, **kwargs)
return Document.decode(component, db=self.db).unpack()

Expand Down
Binary file not shown.
Loading

0 comments on commit 8580a6c

Please sign in to comment.