Skip to content

Commit

Permalink
fix tranlsation tasks + add copas
Browse files Browse the repository at this point in the history
  • Loading branch information
hynky1999 committed Sep 25, 2024
1 parent 7faaa8a commit bfd34e0
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 2 deletions.
112 changes: 111 additions & 1 deletion src/lighteval/tasks/multilingual/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from lighteval.metrics.dynamic_metrics import loglikelihood_acc_metric
from lighteval.metrics.normalizations import LogProbTokenNorm
from lighteval.tasks.lighteval_task import LightevalTaskConfig
from lighteval.tasks.templates.copa import get_copa_prompt_function
from lighteval.tasks.templates.nli import get_nli_prompt_function
from lighteval.tasks.templates.utils.formulation import (
CFFormulation,
Expand Down Expand Up @@ -176,4 +177,113 @@
]


TASKS_TABLE = [*xnli_tasks, *xnli2_tasks, *xnli_indic_tasks]
# ------------------------------- Copa Tasks ------------------------------- #

copa_tasks = [
LightevalTaskConfig(
name=f"xcopa_{language.value}_{formulation.name.lower()}",
suite=["custom"],
prompt_function=get_copa_prompt_function(
language,
adapter=lambda line: {
"context": line["premise"],
"cause_effect": line["question"],
"continuations": [line["choice1"], line["choice2"]],
"gold_idx": int(line["label"]),
},
formulation=formulation,
),
hf_repo="OALL/AlGhafa-Arabic-LLM-Benchmark-Translated" if language == Language.ARABIC else "xcopa",
hf_subset="copa_ext_ar" if language == Language.ARABIC else standardize_tag(language.value),
evaluation_splits=["test"],
few_shots_split="validation",
generation_size=-1,
metric=[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
],
)
for language in [
Language.ESTONIAN,
Language.INDONESIAN,
Language.ITALIAN,
Language.SWAHILI,
Language.TAMIL,
Language.THAI,
Language.TURKISH,
Language.VIETNAMESE,
Language.CHINESE,
# Optionally: Haitian, Quechu
]
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]

copa_indic_tasks = [
LightevalTaskConfig(
name=f"indicxcopa_{language.value}_{formulation.name.lower()}",
suite=["custom"],
prompt_function=get_copa_prompt_function(
language,
adapter=lambda line: {
"context": line["premise"],
"cause_effect": line["question"],
"continuations": [line["choice1"], line["choice2"]],
"gold_idx": int(line["label"]),
},
formulation=formulation,
),
hf_repo="ai4bharat/IndicCOPA",
hf_subset=f"translation-{standardize_tag(language.value)}",
evaluation_splits=["test"],
metric=[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
],
trust_dataset=True,
)
for language in [
Language.ASSAMESE,
Language.BENGALI,
Language.GUJARATI,
Language.HINDI,
Language.KANNADA,
Language.MALAYALAM,
Language.MARATHI,
Language.NEPALI,
Language.ORIYA,
Language.PUNJABI,
Language.SANSKRIT,
Language.SINDHI,
Language.TAMIL,
Language.TELUGU,
Language.URDU,
# Optionally: Maithili, Santali, Sindhi, Konkani
]
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]

parus_tasks = [
LightevalTaskConfig(
name=f"parus_{Language.RUSSIAN.value}_{formulation.name.lower()}",
suite=["custom"],
prompt_function=get_copa_prompt_function(
language=Language.RUSSIAN,
adapter=lambda line: {
"context": line["inputs"]["premise"],
"cause_effect": line["meta"]["task"],
"continuations": [line["inputs"]["choice1"], line["inputs"]["choice2"]],
"gold_idx": int(line["outputs"]) - 1,
},
formulation=formulation,
),
hf_repo="ai-forever/MERA",
hf_subset="parus",
evaluation_splits=["train"],
few_shots_split="validation",
metric=[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
],
)
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]


TASKS_TABLE = [*xnli_tasks, *xnli2_tasks, *xnli_indic_tasks, *copa_tasks, *copa_indic_tasks, *parus_tasks]
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,6 @@ def __getattribute__(self, name: str) -> str:
Language.GALICIAN: TranslationLiterals(language=Language.GALICIAN),
Language.ARMENIAN: TranslationLiterals(language=Language.ARMENIAN),
Language.BASQUE: TranslationLiterals(language=Language.BASQUE),
Language.SWAHILI: TranslationLiterals(language=Language.SWAHILI),
Language.MALAY: TranslationLiterals(language=Language.MALAY),
Language.TAGALOG: TranslationLiterals(language=Language.TAGALOG),
Language.JAVANESE: TranslationLiterals(language=Language.JAVANESE),
Expand Down

0 comments on commit bfd34e0

Please sign in to comment.