diff --git a/src/malco/post_process/post_process.py b/src/malco/post_process/post_process.py index e243ffa0..0353330f 100644 --- a/src/malco/post_process/post_process.py +++ b/src/malco/post_process/post_process.py @@ -4,7 +4,7 @@ import os -def post_process(raw_results_dir: Path, output_dir: Path, langs: tuple, models: tuple) -> None: +def post_process(self) -> None: """ Post-process the raw results output to standardised PhEval TSV format. @@ -12,24 +12,26 @@ def post_process(raw_results_dir: Path, output_dir: Path, langs: tuple, models: raw_results_dir (Path): Path to the raw results directory. output_dir (Path): Path to the output directory. """ - - ''' - for lang in langs: - raw_results_lang = raw_results_dir / "multilingual" / lang - output_lang = output_dir / "multilingual" / lang - raw_results_lang.mkdir(exist_ok=True, parents=True) - output_lang.mkdir(exist_ok=True, parents=True) + raw_results_dir = self.raw_results_dir + output_dir = self.output_dir + langs = self.languages + models = self.models + + if self.modality == "several_languages": + for lang in langs: + raw_results_lang = raw_results_dir / "multilingual" / lang + output_lang = output_dir / "multilingual" / lang + raw_results_lang.mkdir(exist_ok=True, parents=True) + output_lang.mkdir(exist_ok=True, parents=True) - create_standardised_results(raw_results_dir=raw_results_lang, - output_dir=output_lang, output_file_name="results.tsv") + create_standardised_results(raw_results_dir=raw_results_lang, + output_dir=output_lang, output_file_name="results.tsv") + elif self.modality == "several_models": + for model in models: + raw_results_model = raw_results_dir / "multimodel" / model + output_model = output_dir / "multimodel" / model + raw_results_model.mkdir(exist_ok=True, parents=True) + output_model.mkdir(exist_ok=True, parents=True) - ''' - #TODO should this duplicated code a single code with a parameter? - for model in models: - raw_results_model = raw_results_dir / "multimodel" / model - output_model = output_dir / "multimodel" / model - raw_results_model.mkdir(exist_ok=True, parents=True) - output_model.mkdir(exist_ok=True, parents=True) - - create_standardised_results(raw_results_dir=raw_results_model, - output_dir=output_model, output_file_name="results.tsv") + create_standardised_results(raw_results_dir=raw_results_model, + output_dir=output_model, output_file_name="results.tsv") diff --git a/src/malco/prepare/setup_run_pars.py b/src/malco/prepare/setup_run_pars.py index 6d1be475..d27dd0b6 100644 --- a/src/malco/prepare/setup_run_pars.py +++ b/src/malco/prepare/setup_run_pars.py @@ -1,5 +1,6 @@ # setup_run_pars import csv +import sys def import_inputdata(self): """Example inputfile is located in input_dir and named run_parameters.csv @@ -26,10 +27,22 @@ def import_inputdata(self): in_models = next(lines) in_what_to_run = next(lines) + l = len(in_langs) + m = len(in_models) + if (l > 1 and m > 1): + sys.exit("Error, either run multiple languages or models, not both, exiting...") + elif l == 1 and m >= 1: + if in_langs[0]=="en": + self.modality = "several_models" # English and more than 1 model defaults to multiple models + else: + if m > 1: + sys.exit("Error, only English and multiple models supported, exiting...") + else: # m==1 + self.modality = "several_languages" # non English defaults to multiple languages + elif l > 1: + self.modality = "several_languages" self.languages = tuple(in_langs) - #self.languages = ("en", "es", "nl", "it", "de") - #self.models = ("gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-4o") # Decide on list of models: Claude-Sonnet (Anthropic key), self.models = tuple(in_models) self.do_run_step = in_what_to_run[0] # only run the run part of the code self.do_postprocess_step = in_what_to_run[1] # only run the postprocess part of the code diff --git a/src/malco/run/run.py b/src/malco/run/run.py index 0a88e58b..61870d5f 100644 --- a/src/malco/run/run.py +++ b/src/malco/run/run.py @@ -19,7 +19,7 @@ def call_ontogpt( lang_or_model_dir = model prompt_dir += "en/" else: - raise ValueError('not permitted run modality!\n') + raise ValueError('Not permitted run modality!\n') selected_indir = search_ppkts(input_dir, prompt_dir, raw_results_dir, lang_or_model_dir) yaml_file = f"{raw_results_dir}/{lang_or_model_dir}/results.yaml" @@ -56,12 +56,7 @@ def call_ontogpt( pass -#TODO decide whether to get rid of parallelization -def run(testdata_dir: Path, - raw_results_dir: Path, - input_dir: Path, - langs: tuple, - models: tuple, +def run(self, max_workers: int = None) -> None: """ Run the tool to obtain the raw results. @@ -73,20 +68,28 @@ def run(testdata_dir: Path, langs: Tuple of languages. max_workers: Maximum number of worker processes to use. """ + testdata_dir = self.testdata_dir + raw_results_dir = self.raw_results_dir + input_dir = self.input_dir + langs = self.languages + models = self.models + modality = self.modality if max_workers is None: max_workers = multiprocessing.cpu_count() - ''' - modality = "several_languages" - with multiprocessing.Pool(processes=max_workers) as pool: - pool.starmap(call_ontogpt, [(lang, raw_results_dir / "multilingual", input_dir, "gpt-4-turbo", modality) for lang in langs]) - ''' - - # English only many models - modality = "several_models" - with multiprocessing.Pool(processes=max_workers) as pool: - try: - pool.starmap(call_ontogpt, [("en", raw_results_dir / "multimodel", input_dir, model, modality) for model in models]) - except FileExistsError as e: - raise ValueError('Did not clean up after last run, check tmp dir: \n' + e) + if modality == "several_languages": + with multiprocessing.Pool(processes=max_workers) as pool: + try: + pool.starmap(call_ontogpt, [(lang, raw_results_dir / "multilingual", input_dir, "gpt-4o", modality) for lang in langs]) + except FileExistsError as e: + raise ValueError('Did not clean up after last run, check tmp dir: \n' + e) + + + if modality == "several_models": + # English only many models + with multiprocessing.Pool(processes=max_workers) as pool: + try: + pool.starmap(call_ontogpt, [("en", raw_results_dir / "multimodel", input_dir, model, modality) for model in models]) + except FileExistsError as e: + raise ValueError('Did not clean up after last run, check tmp dir: \n' + e) diff --git a/src/malco/runner.py b/src/malco/runner.py index 278bf8f5..047238e6 100644 --- a/src/malco/runner.py +++ b/src/malco/runner.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from pathlib import Path - +from shutil import rmtree from pheval.runners.runner import PhEvalRunner from malco.post_process.ranking_utils import compute_mrr_and_ranks @@ -11,7 +11,7 @@ from malco.post_process.generate_plots import make_plots import os -@dataclass # (N) if PhevalRunner is already one? +@dataclass # necessary if PhevalRunner is already one? class MalcoRunner(PhEvalRunner): input_dir: Path testdata_dir: Path @@ -19,11 +19,6 @@ class MalcoRunner(PhEvalRunner): output_dir: Path config_file: Path version: str - #TODO Declaring the following does not work - #languages: tuple - #models: tuple - #just_run: bool - #just_postprocess: bool def prepare(self): @@ -40,13 +35,12 @@ def run(self): print("running with predictor") pass if self.do_run_step: - run(testdata_dir=self.testdata_dir, - raw_results_dir=self.raw_results_dir, - input_dir=self.input_dir, - langs=self.languages, - models=self.models) + run(self, + ) # Cleanup - os.system(f"rm -r {self.input_dir}/prompts/tmp/") + tmp_dir = f"{self.input_dir}/prompts/tmp/" + if os.path.isdir(tmp_dir): + rmtree(tmp_dir) def post_process(self, @@ -60,31 +54,25 @@ def post_process(self, if self.do_postprocess_step: print("post processing results to PhEval standardised TSV output.") - post_process(raw_results_dir=self.raw_results_dir, - output_dir=self.output_dir, - langs=self.languages, - models=self.models) + post_process(self) - ''' - comparing = "language" - mrr_file, plot_dir, num_ppkt, topn_aggr_file = compute_mrr_and_ranks(comparing, - output_dir=self.output_dir, - out_subdir="multilingual", - prompt_dir=os.path.join(self.input_dir, prompts_subdir_name), - correct_answer_file=correct_answer_file) - if print_plot: - make_plots(mrr_file, plot_dir, self.languages, num_ppkt, self.models, topn_aggr_file, comparing) - - ''' - comparing = "model" + if self.modality=="several_languages": + comparing = "language" + out_subdir="multilingual" + elif self.modality=="several_models": + comparing = "model" + out_subdir="multimodel" + else: + raise ValueError('Not permitted run modality!\n') + mrr_file, data_dir, num_ppkt, topn_aggr_file = compute_mrr_and_ranks(comparing, output_dir=self.output_dir, - out_subdir="multimodel", + out_subdir=out_subdir, prompt_dir=os.path.join(self.input_dir, prompts_subdir_name), correct_answer_file=correct_answer_file) if print_plot: make_plots(mrr_file, data_dir, self.languages, num_ppkt, self.models, topn_aggr_file, comparing) - + \ No newline at end of file