feat: set user being able to set chunk size and overlap for indices (… #545
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: unit-test | |
on: | |
pull_request: | |
branches: [main] | |
push: | |
branches: [main] | |
env: | |
THEFLOW_TEMP_PATH: ./tmp | |
jobs: | |
unit-test: | |
# if: false # temporary disable this job due to legacy interface | |
#TODO: enable this job after the new interface is ready | |
if: ${{ !cancelled() }} | |
runs-on: ${{ matrix.os }} | |
timeout-minutes: 20 | |
defaults: | |
run: | |
shell: ${{ matrix.shell }} | |
strategy: | |
matrix: | |
python-version: ["3.10", "3.11"] | |
include: | |
- os: ubuntu-latest | |
shell: bash | |
ACTIVATE_ENV: ". env/bin/activate" | |
GITHUB_OUTPUT: "$GITHUB_OUTPUT" | |
# - os: windows-latest | |
# shell: pwsh | |
# ACTIVATE_ENV: env/Scripts/activate.ps1 | |
# GITHUB_OUTPUT: "$env:GITHUB_OUTPUT" | |
name: unit testing with python ${{ matrix.python-version }} | |
steps: | |
- name: Clone the repo | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Get Head Commit Message | |
id: get-head-commit-message | |
run: echo "message=$(git show -s --format=%s)" | tee -a ${{ matrix.GITHUB_OUTPUT }} | |
- name: Check ignore caching | |
id: check-ignore-cache | |
run: | | |
ignore_cache=${{ contains(steps.get-head-commit-message.outputs.message, '[ignore cache]') }} | |
echo "check=$ignore_cache" | tee -a ${{ matrix.GITHUB_OUTPUT }} | |
- name: Set up Python ${{ matrix.python-version }} on ${{ runner.os }} | |
uses: actions/setup-python@v4 | |
id: setup_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
architecture: x64 | |
- name: Get cache key | |
id: get-cache-key | |
run: | | |
pip install "setuptools-git-versioning>=2.0,<3" | |
package_version=$(setuptools-git-versioning) | |
cache_key="${{ runner.os }}-py${{ matrix.python-version }}-v${package_version}" | |
echo "key=$cache_key" | tee -a ${{ matrix.GITHUB_OUTPUT }} | |
- name: Try to restore dependencies from ${{ steps.get-cache-key.outputs.key }} | |
id: restore-dependencies | |
if: steps.check-ignore-cache.outputs.check != 'true' | |
uses: actions/cache/restore@v3 | |
with: | |
path: ${{ env.pythonLocation }} | |
key: ${{ steps.get-cache-key.outputs.key }} | |
# could using cache of previous ver to reuse unchanged packages | |
restore-keys: ${{ runner.os }}-py${{ matrix.python-version }} | |
- name: Check cache hit | |
id: check-cache-hit | |
run: | | |
echo "cache-hit=${{ steps.restore-dependencies.outputs.cache-hit }}" | |
echo "cache-matched-key=${{ steps.restore-dependencies.outputs.cache-matched-key }}" | |
cache_hit=${{ steps.restore-dependencies.outputs.cache-primary-key == steps.restore-dependencies.outputs.cache-matched-key }} | |
echo "check=$cache_hit" | tee -a ${{ matrix.GITHUB_OUTPUT }} | |
- name: Install additional dependencies (if any) | |
run: | | |
python -m pip install --upgrade pip | |
cd libs/kotaemon | |
pip install -U --upgrade-strategy eager -e .[all] | |
- name: New dependencies cache for key ${{ steps.restore-dependencies.outputs.cache-primary-key }} | |
if: | | |
steps.check-ignore-cache.outputs.check != 'true' && | |
steps.check-cache-hit.outputs.check != 'true' | |
uses: actions/cache/save@v3 | |
with: | |
path: ${{ env.pythonLocation }} | |
key: ${{ steps.restore-dependencies.outputs.cache-primary-key }} | |
- name: Install OS-based packages | |
run: | | |
sudo apt update -qqy | |
sudo apt install -y poppler-utils libpoppler-dev tesseract-ocr | |
- name: Test kotaemon with pytest | |
run: | | |
pip show pytest | |
cd libs/kotaemon | |
pytest |