Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Mar 21, 2024
2 parents 364be7f + dd5d617 commit b42abe4
Show file tree
Hide file tree
Showing 233 changed files with 7,196 additions and 3,658 deletions.
28 changes: 28 additions & 0 deletions .github/disabled_workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# GitHub action to run linting

name: run-linting

on:
pull_request:
branches: [main]
push:
branches: [main]

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- uses: actions/setup-python@v4
with:
python-version: "3.8"
cache: "pip"

- name: Install dependencies
run: make install

- name: Lint
id: lint
run: |
make lint
50 changes: 50 additions & 0 deletions .github/disabled_workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# This workflow will
# - Find the latest version tag based on the commit history
# - Create a git tag for the new version
# - Update the version number in pyproject.toml based on the commit history
# - Upload the package to PyPI
# - Create a release on GitHub

# This workflow required the following secrets to be set:
# - a GitHub personal access token with the `repo` scope called `RELEASE`
# - and that you setup trusted publishing using PyPI as described here: https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/

name: Release
on:
workflow_run:
workflows: ["tests"]
types:
- completed
jobs:
release:
runs-on: ubuntu-latest
concurrency: release
permissions:
id-token: write # IMPORTANT: this permission is mandatory for trusted publishing using PyPI


if: ${{ github.ref == 'refs/heads/main' && github.event.workflow_run.conclusion == 'success'}}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
token: ${{ secrets.RELEASE }}

- name: Python Semantic Release
id: release
uses: python-semantic-release/python-semantic-release@v8.0.4
with:
github_token: ${{ secrets.RELEASE }}

- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
if: steps.release.outputs.released == 'true'
# This action supports PyPI's trusted publishing implementation, which allows authentication to PyPI without a manually
# configured API token or username/password combination. To perform trusted publishing with this action, your project's
# publisher must already be configured on PyPI.

- name: Publish package distributions to GitHub Releases
uses: python-semantic-release/upload-to-gh-release@main
if: steps.release.outputs.released == 'true'
with:
github_token: ${{ secrets.RELEASE }}
36 changes: 0 additions & 36 deletions .github/workflows/python-package.yml

This file was deleted.

43 changes: 43 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This workflow will:
# 1) install Python dependencies
# 2) run make test


name: Tests
on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
pytest:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest] #, macos-latest, windows-latest]
python-version: ["3.8", "3.9", "3.10"]

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: "${{ github.workflow }}-${{ matrix.python-version}}-${{ matrix.os }} @ ${{ github.ref }}"
cancel-in-progress: true

steps:
- uses: actions/checkout@v3

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"

- name: Install dependencies
shell: bash
run: |
make install
- name: Run tests
shell: bash
run: |
make test
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,6 @@ dmypy.json

# error logs
error_logs.txt

# tests
tests/results
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"editor.defaultFormatter": "ms-python.black-formatter"
"editor.defaultFormatter": "charliermarsh.ruff",
}
48 changes: 8 additions & 40 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,49 +1,17 @@
.PHONY: modified_only_fixup quality style fixup tests

check_dirs := tests mteb scripts

modified_only_fixup:
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
@if test -n "$(modified_py_files)"; then \
echo "Checking/fixing $(modified_py_files)"; \
black --preview $(modified_py_files); \
isort $(modified_py_files); \
flake8 $(modified_py_files); \
else \
echo "No library .py files were modified"; \
fi

# Super fast fix and check target that only works on relevant modified files since the branch was made
fixup: modified_only_fixup


# This installs all the required dependencies
install:
@echo "--- 🚀 Installing project dependencies ---"
pip install -e ".[dev]"

# this target runs checks on all files
quality:
black --check --preview $(check_dirs)
isort --check-only $(check_dirs)
flake8 $(check_dirs)


# this target runs checks on all files and potentially modifies some of them
style:
black --preview $(check_dirs)
isort $(check_dirs)

# runs the same lints as the github actions
lint:
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
@echo "--- 🧹 Running linters ---"
ruff format . # running ruff formatting
ruff check . --fix # running ruff linting

# Run tests for the library
test:
@echo "--- 🧪 Running tests ---"
pytest

# add parllel test for faster execution (can sometimes cause issues with some tests)
test-parallel:
pytest -n auto --dist=loadfile -s -v
@echo "--- 🧪 Running tests ---"
@echo "Note that parallel tests can sometimes cause issues with some tests."
pytest -n auto --dist=loadfile -s -v
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,6 @@ evaluation.run(model)
| 📈 [Leaderboard] | The interactive leaderboard of the benchmark |
| 🤖 [Adding a model] | Information related to how to submit a model to the leaderboard |
| 🤝 [Contributing] | How to contribute to MTEB and set it up for development |
|


[Tasks]: docs/tasks.md
[Contributing]: docs/contributing.md
Expand Down
2 changes: 1 addition & 1 deletion docs/tasks.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Available tasks
The following tables gives you an overview of the tasks in MTEB.

<!-- The following table is auto-generated: -->
<!-- This allows the table to be autogenerated in the future: -->
<!-- TABLE START -->

## Available tasks
Expand Down
8 changes: 6 additions & 2 deletions mteb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
__version__ = "1.2.1.dev0"
from __future__ import annotations

from importlib.metadata import version

from mteb.evaluation import *

__version__ = version("mteb") # fetch version from install metadata


MTEB_MAIN_EN = [
"AmazonCounterfactualClassification",
Expand Down Expand Up @@ -70,5 +74,5 @@
"TweetSentimentExtractionClassification",
"TwentyNewsgroupsClustering",
"TwitterSemEval2015",
"TwitterURLCorpus"
"TwitterURLCorpus",
]
10 changes: 7 additions & 3 deletions mteb/abstasks/AbsTask.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import random
from abc import ABC, abstractmethod

Expand All @@ -24,17 +26,19 @@ def load_data(self, **kwargs):
"""
Load dataset from HuggingFace hub
"""
if self.data_loaded: return
if self.data_loaded:
return

# TODO: add split argument
self.dataset = datasets.load_dataset(
self.description["hf_hub_name"], revision=self.description.get("revision", None)
self.metadata_dict["hf_hub_name"],
revision=self.metadata_dict.get("revision", None),
)
self.data_loaded = True

@property
@abstractmethod
def description(self):
def metadata_dict(self) -> dict[str, str]:
"""
Returns a description of the task. Should contain the following fields:
name: Name of the task (usually equal to the class name. Should be a valid name for a path on disc)
Expand Down
14 changes: 8 additions & 6 deletions mteb/abstasks/AbsTaskBitextMining.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import logging

from ..evaluation.evaluators import BitextMiningEvaluator
Expand All @@ -11,7 +13,7 @@ class AbsTaskBitextMining(AbsTask):
Abstract class for BitextMining tasks
The similarity is computed between pairs and the results are ranked.
self.load_data() must generate a huggingface dataset with a split matching self.description["eval_splits"], and assign it to self.dataset. It must contain the following columns:
self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns:
id: str
sentence1: str
sentence2: str
Expand All @@ -28,13 +30,13 @@ def evaluate(self, model, split, **kwargs):
scores = {}
for lang in self.dataset:
logger.info(
f"\nTask: {self.description['name']}, split: {split}, language: {lang}. Running..."
f"\nTask: {self.metadata_dict['name']}, split: {split}, language: {lang}. Running..."
)
data_split = self.dataset[lang][split]
scores[lang] = self._evaluate_split(model, data_split, **kwargs)
else:
logger.info(
f"\nTask: {self.description['name']}, split: {split}. Running..."
f"\nTask: {self.metadata_dict['name']}, split: {split}. Running..."
)
data_split = self.dataset[split]
scores = self._evaluate_split(model, data_split, **kwargs)
Expand Down Expand Up @@ -72,9 +74,9 @@ def _evaluate_split(self, model, data_split, **kwargs):
return metrics

def _add_main_score(self, scores):
if self.description["main_score"] in scores:
scores["main_score"] = scores[self.description["main_score"]]
if self.metadata_dict["main_score"] in scores:
scores["main_score"] = scores[self.metadata_dict["main_score"]]
else:
logger.warn(
f"main score {self.description['main_score']} not found in scores {scores.keys()}"
f"main score {self.metadata_dict['main_score']} not found in scores {scores.keys()}"
)
Loading

0 comments on commit b42abe4

Please sign in to comment.