From 026dcfe9d947c0a1b575c0bf4a802b987dfec4a3 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 1 Nov 2024 00:29:07 +0100 Subject: [PATCH] simplify oton tests --- .../operandi_utils/oton/ocrd_validator.py | 4 +- .../test_2_oton/test_3_converter.py | 63 -------------- .../test_3_converter_ocrd_workflow1.py | 85 +++++++++++++++++++ 3 files changed, 87 insertions(+), 65 deletions(-) delete mode 100644 tests/tests_utils/test_2_oton/test_3_converter.py create mode 100644 tests/tests_utils/test_2_oton/test_3_converter_ocrd_workflow1.py diff --git a/src/utils/operandi_utils/oton/ocrd_validator.py b/src/utils/operandi_utils/oton/ocrd_validator.py index 19020ec6..6a5813da 100644 --- a/src/utils/operandi_utils/oton/ocrd_validator.py +++ b/src/utils/operandi_utils/oton/ocrd_validator.py @@ -38,12 +38,12 @@ def validate_file_path(self, filepath: str): def validate_all_processors(self, processors: List[ProcessorCallArguments]): prev_output_file_grps = [] first_processor = processors[0] - self.logger.info(f"Validating parameters against json schema of processor: {first_processor}") + self.logger.info(f"Validating parameters against json schema of processor: {first_processor.executable}") self.validate_processor_params(first_processor, overwrite_with_defaults=False) prev_output_file_grps += first_processor.output_file_grps.split(',') for processor in processors[1:]: - self.logger.info(f"Validating parameters against json schema of processor: {first_processor}") + self.logger.info(f"Validating parameters against json schema of processor: {first_processor.executable}") self.validate_processor_params(processor, overwrite_with_defaults=False) for input_file_grp in processor.input_file_grps.split(','): if input_file_grp not in prev_output_file_grps: diff --git a/tests/tests_utils/test_2_oton/test_3_converter.py b/tests/tests_utils/test_2_oton/test_3_converter.py deleted file mode 100644 index 42b2236c..00000000 --- a/tests/tests_utils/test_2_oton/test_3_converter.py +++ /dev/null @@ -1,63 +0,0 @@ -from operandi_utils.oton.oton_converter import OTONConverter -from re import sub -import os - - -def clean_up(path): - """Cleans up test artifacts from file system - """ - if os.path.isfile(path): - os.remove(path) - - -def test_conversion_with_env_local(): - """E2E test for an OCR-D workflow conversion using native ocrd_all - """ - oton_converter = OTONConverter() - input_path = 'tests/assets/workflows_oton/workflow1.txt' - output_path = 'tests/assets/workflows_oton/test_output_nextflow1.txt' - nextflow_file_class = oton_converter.convert_oton_env_local(input_path=input_path, output_path=output_path) - assert 'params.input_file_group = "OCR-D-IMG"' in nextflow_file_class.nf_lines_parameters - assert 'params.mets_path = "null"' in nextflow_file_class.nf_lines_parameters - assert 'params.workspace_dir = "null"' in nextflow_file_class.nf_lines_parameters - - expected_workflow = """workflow { - main: - ocrd_cis_ocropy_binarize_0(params.mets_path, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out, "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out, "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out, "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out, "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out, "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out, "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out, "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") - }""" - expected_normalized = sub(r'\s+', '', expected_workflow) - - with open(output_path, mode='r', encoding='utf-8') as fp: - wf = fp.read() - no_tab_string = sub(r'\t+', '', wf) - no_spaces_result = sub(r'\s+', '', no_tab_string) - - # clean_up(output_path) - - assert expected_normalized in no_spaces_result - - -def test_conversion_with_env_docker(): - """E2E test for an OCR-D workflow conversion using the Docker flag. - We test for success by looking for an exemplary line that is executed by Docker. - """ - - input_path = 'tests/assets/workflows_oton/workflow1.txt' - output_path = 'tests/assets/workflows_oton/test_output_nextflow1_docker1.txt' - - oton_converter = OTONConverter() - nextflow_file_class = oton_converter.convert_oton_env_docker(input_path=input_path, output_path=output_path) - - expected = '${params.env_wrapper} ocrd-cis-ocropy-binarize -m ${mets_file} -I ${input_file_group} -O ${output_file_group}' - - with open(output_path, mode='r', encoding='utf-8') as fp: - wf = fp.read() - # clean_up(output_path) - assert expected in wf diff --git a/tests/tests_utils/test_2_oton/test_3_converter_ocrd_workflow1.py b/tests/tests_utils/test_2_oton/test_3_converter_ocrd_workflow1.py new file mode 100644 index 00000000..8fc30e32 --- /dev/null +++ b/tests/tests_utils/test_2_oton/test_3_converter_ocrd_workflow1.py @@ -0,0 +1,85 @@ +from operandi_utils.oton.oton_converter import OTONConverter +from re import sub +import os + + +def clean_up(path): + """Cleans up test artifacts from file system + """ + if os.path.isfile(path): + os.remove(path) + +INPUT_OCRD_PROCESS_WORKFLOW = 'tests/assets/workflows_oton/workflow1.txt' +OUTPUT_NEXTFLOW_WORKFLOW_LOCAL = 'tests/assets/workflows_oton/test_output_nextflow1.txt' +OUTPUT_NEXTFLOW_WORKFLOW_DOCKER = 'tests/assets/workflows_oton/test_output_nextflow1_docker1.txt' + +# Test parameters based on INPUT_OCRD_PROCESS_WORKFLOW +TEST_PARAMETERS = [ + 'params.input_file_group = "OCR-D-IMG"', + 'params.mets_path = "null"', + 'params.workspace_dir = "null"' +] + +EXPECTED_MAIN_WORKFLOW = """ +workflow { + main: + ocrd_cis_ocropy_binarize_0(params.mets_path, params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out, "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out, "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out, "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out, "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out, "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out, "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out, "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") +} +""" + + +def assert_common_features(nextflow_file_class, output_file_path): + parameters = nextflow_file_class.nf_lines_parameters + blocks_process = nextflow_file_class.nf_blocks_process + blocks_workflows = nextflow_file_class.nf_blocks_workflow + + for parameter in TEST_PARAMETERS: + assert parameter in parameters + assert len(blocks_process) == 8 + assert len(blocks_workflows) == 1 + + for block in blocks_process: + assert block.directives == {"maxForks": "1"} + assert len(block.input_params) == 3 + assert len(block.output_params) == 1 + dump_script = block.dump_script() + assert "ocrd-" in dump_script + + for block in blocks_workflows: + assert block.workflow_name == "main" + + expected_normalized = sub(r'\s+', '', EXPECTED_MAIN_WORKFLOW) + with open(output_file_path, mode='r', encoding='utf-8') as fp: + wf = fp.read() + no_tab_string = sub(r'\t+', '', wf) + no_spaces_result = sub(r'\s+', '', no_tab_string) + # clean_up(output_file_path) + assert expected_normalized in no_spaces_result + + +def test_conversion_with_env_local(): + oton_converter = OTONConverter() + nextflow_file_class = oton_converter.convert_oton_env_local( + input_path=INPUT_OCRD_PROCESS_WORKFLOW, + output_path=OUTPUT_NEXTFLOW_WORKFLOW_LOCAL + ) + assert_common_features(nextflow_file_class, OUTPUT_NEXTFLOW_WORKFLOW_LOCAL) + + +def test_conversion_with_env_docker(): + oton_converter = OTONConverter() + nextflow_file_class = oton_converter.convert_oton_env_docker( + input_path=INPUT_OCRD_PROCESS_WORKFLOW, + output_path=OUTPUT_NEXTFLOW_WORKFLOW_DOCKER + ) + assert 'params.env_wrapper = "null"' in nextflow_file_class.nf_lines_parameters + assert_common_features(nextflow_file_class, OUTPUT_NEXTFLOW_WORKFLOW_DOCKER) + for block in nextflow_file_class.nf_blocks_process: + assert '${params.env_wrapper}' in block.file_representation()