Skip to content

Commit

Permalink
Fix splits
Browse files Browse the repository at this point in the history
  • Loading branch information
Muennighoff committed Sep 4, 2023
1 parent 52c1fd8 commit 93f6f85
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions scripts/mteb_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@
results = json.load(f)
all_results = {**all_results, **{file_name.replace(".json", ""): results}}

# Use "train" split instead
TRAIN_SPLIT = ["DanishPoliticalCommentsClassification"]
# Use "validation" split instead
VALIDATION_SPLIT = ["AFQMC", "Cmnli", "IFlyTek", "TNews", "MSMARCO", "MultilingualSentiment", "Ocnli"]
# Use "dev" split instead
DEV_SPLIT = ["CmedqaRetrieval", "CovidRetrieval", "DuRetrieval", "EcomRetrieval", "MedicalRetrieval", "MMarcoReranking", "MMarcoRetrieval", "MSMARCO", "T2Reranking", "T2Retrieval", "VideoRetrieval"]

MARKER = "---"
TAGS = "tags:"
MTEB_TAG = "- mteb"
Expand All @@ -71,9 +78,13 @@
mteb_type = mteb_desc["type"]
revision = res_dict.get("dataset_revision") # Okay if it's None
split = "test"
if ds_name == "MSMARCO":
split = "dev" if "dev" in res_dict else "validation"
if split not in res_dict:
if (ds_name in TRAIN_SPLIT) and ("train" in res_dict):
split = "train"
elif (ds_name in VALIDATION_SPLIT) and ("validation" in res_dict):
split = "validation"
elif (ds_name in DEV_SPLIT) and ("dev" in res_dict):
split = "dev"
elif "test" not in res_dict:
logger.info(f"Skipping {ds_name} as split {split} not present.")
continue
res_dict = res_dict.get(split)
Expand Down

0 comments on commit 93f6f85

Please sign in to comment.