-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
import: oton converter code and tests
- Loading branch information
Showing
29 changed files
with
5,974 additions
and
1 deletion.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import click | ||
from .converter import Converter | ||
from .validators.ocrd_validator import OCRDValidator | ||
|
||
|
||
@click.group() | ||
def cli(): | ||
pass | ||
|
||
|
||
@cli.command("convert", help="Convert an OCR-D workflow to a Nextflow workflow script.") | ||
@click.option('-I', '--input_path', type=click.Path(dir_okay=False, exists=True, readable=True), | ||
show_default=True, help='Path to the OCR-D workflow file to be converted.') | ||
@click.option('-O', '--output_path', type=click.Path(dir_okay=False, writable=True), | ||
show_default=True, help='Path of the Nextflow workflow script to be generated.') | ||
@click.option('-D', '--dockerized', is_flag=True, | ||
help='If set, then the dockerized variant of the Nextflow script is generated.') | ||
def convert(input_path: str, output_path: str, dockerized: bool): | ||
print(f"Converting from: {input_path}") | ||
print(f"Converting to: {output_path}") | ||
Converter().convert_OtoN(input_path, output_path, dockerized) | ||
print("Conversion was successful!") | ||
|
||
|
||
@cli.command("validate", help="Validate an OCR-D workflow txt file.") | ||
@click.option('-I', '--input_path', show_default=True, | ||
help='Path to the OCR-D workflow file to be validated.') | ||
def validate(input_path: str): | ||
OCRDValidator().validate(input_path) | ||
print(f"Validating: {input_path}") | ||
print("Validation was successful!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
from json import load | ||
from os import environ | ||
from pkg_resources import resource_filename | ||
|
||
|
||
__all__ = [ | ||
"DIR_IN", | ||
"DIR_OUT", | ||
"METS_FILE", | ||
|
||
"OCRD_ALL_JSON", | ||
"OTON_LOG_LEVEL", | ||
"OTON_LOG_FORMAT", | ||
|
||
"PARAMS_KEY_DOCKER_PWD", | ||
"PARAMS_KEY_DOCKER_VOLUME", | ||
"PARAMS_KEY_DOCKER_MODELS", | ||
"PARAMS_KEY_DOCKER_IMAGE", | ||
"PARAMS_KEY_DOCKER_COMMAND", | ||
"PARAMS_KEY_INPUT_FILE_GRP", | ||
"PARAMS_KEY_METS_PATH", | ||
"PARAMS_KEY_WORKSPACE_PATH", | ||
|
||
"PH_DOCKER_COMMAND", | ||
"PH_DIR_IN", | ||
"PH_DIR_OUT", | ||
"PH_METS_FILE", | ||
|
||
"REPR_DOCKER_COMMAND", | ||
"REPR_DOCKER_IMAGE", | ||
"REPR_DOCKER_MODELS", | ||
"REPR_DOCKER_MODELS_DIR", | ||
"REPR_DOCKER_PWD", | ||
"REPR_DOCKER_VOLUME", | ||
"REPR_INPUT_FILE_GRP", | ||
"REPR_METS_PATH", | ||
"REPR_MODELS_PATH", | ||
"REPR_WORKSPACE_PATH", | ||
|
||
"SPACES" | ||
] | ||
|
||
OCRD_ALL_JSON_FILE = resource_filename(__name__, 'ocrd_all_tool.json') | ||
with open(OCRD_ALL_JSON_FILE) as f: | ||
OCRD_ALL_JSON = load(f) | ||
|
||
OTON_LOG_LEVEL = environ.get("OTON_LOG_LEVEL", "INFO") | ||
OTON_LOG_FORMAT = '%(asctime)s %(levelname)s %(name)s:%(funcName)s: %(lineno)s: %(message)s' | ||
|
||
# Parameter keys | ||
PARAMS_KEY_DOCKER_COMMAND: str = 'params.docker_command' | ||
PARAMS_KEY_DOCKER_IMAGE: str = 'params.docker_image' | ||
PARAMS_KEY_DOCKER_PWD: str = 'params.docker_pwd' | ||
PARAMS_KEY_DOCKER_VOLUME: str = 'params.docker_volume' | ||
PARAMS_KEY_DOCKER_MODELS: str = 'params.docker_models' | ||
PARAMS_KEY_DOCKER_MODELS_DIR: str = 'params.docker_models_dir' | ||
PARAMS_KEY_METS_PATH: str = 'params.mets_path' | ||
PARAMS_KEY_INPUT_FILE_GRP: str = 'params.input_file_grp' | ||
PARAMS_KEY_MODELS_PATH: str = 'params.models_path' | ||
PARAMS_KEY_WORKSPACE_PATH: str = 'params.workspace_path' | ||
|
||
|
||
def __build_docker_command(): | ||
docker_command = 'docker run --rm' | ||
docker_command += f' -u \\$(id -u)' | ||
docker_command += f' -v ${PARAMS_KEY_DOCKER_VOLUME}' | ||
docker_command += f' -v ${PARAMS_KEY_DOCKER_MODELS}' | ||
docker_command += f' -w ${PARAMS_KEY_DOCKER_PWD}' | ||
docker_command += f' -- ${PARAMS_KEY_DOCKER_IMAGE}' | ||
return docker_command | ||
|
||
|
||
def __build_repr(parameter, value): | ||
return f'{parameter} = "{value}"' | ||
|
||
|
||
# Parameters - file representation | ||
REPR_DOCKER_COMMAND: str = __build_repr(PARAMS_KEY_DOCKER_COMMAND, __build_docker_command()) | ||
REPR_DOCKER_IMAGE: str = __build_repr(PARAMS_KEY_DOCKER_IMAGE, "null") | ||
REPR_DOCKER_MODELS: str = __build_repr(PARAMS_KEY_DOCKER_MODELS, | ||
f'${PARAMS_KEY_MODELS_PATH}:${PARAMS_KEY_DOCKER_MODELS_DIR}') | ||
REPR_DOCKER_MODELS_DIR: str = __build_repr(PARAMS_KEY_DOCKER_MODELS_DIR, "null") | ||
REPR_DOCKER_PWD: str = __build_repr(PARAMS_KEY_DOCKER_PWD, "null") | ||
REPR_DOCKER_VOLUME: str = __build_repr(PARAMS_KEY_DOCKER_VOLUME, | ||
f'${PARAMS_KEY_WORKSPACE_PATH}:${PARAMS_KEY_DOCKER_PWD}') | ||
REPR_METS_PATH: str = __build_repr(PARAMS_KEY_METS_PATH, "null") | ||
REPR_INPUT_FILE_GRP: str = __build_repr(PARAMS_KEY_INPUT_FILE_GRP, "null") | ||
REPR_MODELS_PATH: str = __build_repr(PARAMS_KEY_MODELS_PATH, "null") | ||
REPR_WORKSPACE_PATH: str = __build_repr(PARAMS_KEY_WORKSPACE_PATH, "null") | ||
|
||
DIR_IN: str = 'input_file_grp' | ||
DIR_OUT: str = 'output_file_grp' | ||
METS_FILE: str = 'mets_file' | ||
|
||
# Placeholders | ||
BS: str = '{}' | ||
PH_DOCKER_COMMAND: str = f'${BS[0]}{PARAMS_KEY_DOCKER_COMMAND}{BS[1]}' | ||
PH_DIR_IN: str = f'${BS[0]}{DIR_IN}{BS[1]}' | ||
PH_DIR_OUT: str = f'${BS[0]}{DIR_OUT}{BS[1]}' | ||
PH_METS_FILE: str = f'${BS[0]}{METS_FILE}{BS[1]}' | ||
SPACES = ' ' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from .models import NextflowFileExecutable | ||
from .validator import OCRDValidator | ||
|
||
|
||
class Converter: | ||
def __init__(self): | ||
pass | ||
|
||
@staticmethod | ||
def convert_OtoN(input_path: str, output_path: str, dockerized: bool = False): | ||
validator = OCRDValidator() | ||
validator.validate(input_path) | ||
|
||
nf_file_executable = NextflowFileExecutable() | ||
nf_file_executable.build_parameters(dockerized) | ||
# TODO: first_file_grps replacement is a wacky hack | ||
# to replace the default value of the pre-built REPR_INPUT_FILE_GRP | ||
nf_processes, first_file_grps = nf_file_executable.build_nextflow_processes(validator.processors, dockerized) | ||
# TODO: This index is currently 3, but may change! | ||
nf_file_executable.nf_lines_parameters[3] = nf_file_executable.nf_lines_parameters[3].replace("null", f"{first_file_grps}") | ||
nf_file_executable.build_main_workflow(nf_processes) | ||
nf_file_executable.produce_nextflow_file(output_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
__all__ = ["NextflowBlockProcess", "NextflowBlockWorkflow", "NextflowFileExecutable"] | ||
|
||
from .nf_block_process import NextflowBlockProcess | ||
from .nf_block_workflow import NextflowBlockWorkflow | ||
from .nf_file_executable import NextflowFileExecutable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import logging | ||
from ..validator import ProcessorCallArguments | ||
from ..constants import OTON_LOG_LEVEL, OTON_LOG_FORMAT, PH_DIR_IN, PH_DIR_OUT, PH_METS_FILE, PH_DOCKER_COMMAND, SPACES | ||
|
||
|
||
class NextflowBlockProcess: | ||
def __init__(self, processor_call_arguments: ProcessorCallArguments, index_pos: int, dockerized: bool = False): | ||
self.logger = logging.getLogger(__name__) | ||
self.logger.setLevel(logging.getLevelName(OTON_LOG_LEVEL)) | ||
logging.basicConfig(format=OTON_LOG_FORMAT) | ||
|
||
self.dockerized = dockerized | ||
self.nf_process_name = processor_call_arguments.executable.replace('-', '_') + "_" + str(index_pos) | ||
self.repr_in_workflow = [ | ||
self.nf_process_name, | ||
f'"{processor_call_arguments.input_file_grps}"', | ||
f'"{processor_call_arguments.output_file_grps}"' | ||
] | ||
|
||
processor_call_arguments.input_file_grps = PH_DIR_IN | ||
processor_call_arguments.output_file_grps = PH_DIR_OUT | ||
processor_call_arguments.mets_file_path = PH_METS_FILE | ||
|
||
self.ocrd_command_bash = f'{processor_call_arguments}' | ||
self.directives = [] | ||
self.input_params = [] | ||
self.output_params = [] | ||
|
||
def file_representation(self): | ||
representation = f'process {self.nf_process_name}' + ' {\n' | ||
|
||
for directive in self.directives: | ||
representation += f'{SPACES}{directive}\n' | ||
representation += '\n' | ||
|
||
representation += f'{SPACES}input:\n' | ||
for input_param in self.input_params: | ||
representation += f'{SPACES}{SPACES}{input_param}\n' | ||
representation += '\n' | ||
|
||
representation += f'{SPACES}output:\n' | ||
for output_param in self.output_params: | ||
representation += f'{SPACES}{SPACES}{output_param}\n' | ||
representation += '\n' | ||
|
||
representation += f'{SPACES}script:\n' | ||
representation += f'{SPACES}{SPACES}"""\n' | ||
if self.dockerized: | ||
representation += f'{SPACES}{SPACES}{PH_DOCKER_COMMAND} {self.ocrd_command_bash}\n' | ||
else: | ||
representation += f'{SPACES}{SPACES}{self.ocrd_command_bash}\n' | ||
representation += f'{SPACES}{SPACES}"""\n' | ||
|
||
representation += '}\n' | ||
|
||
self.logger.debug(f"\n{representation}") | ||
return representation | ||
|
||
def add_directive(self, directive: str): | ||
self.directives.append(directive) | ||
|
||
def add_input_param(self, parameter: str): | ||
self.input_params.append(parameter) | ||
|
||
def add_output_param(self, parameter: str): | ||
self.output_params.append(parameter) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import logging | ||
from typing import List | ||
from ..constants import OTON_LOG_LEVEL, OTON_LOG_FORMAT, PARAMS_KEY_METS_PATH, PARAMS_KEY_INPUT_FILE_GRP, SPACES | ||
|
||
|
||
class NextflowBlockWorkflow: | ||
def __init__(self, workflow_name: str, nf_processes: List[str]): | ||
self.logger = logging.getLogger(__name__) | ||
self.logger.setLevel(logging.getLevelName(OTON_LOG_LEVEL)) | ||
logging.basicConfig(format=OTON_LOG_FORMAT) | ||
|
||
self.workflow_name = workflow_name | ||
self.nf_processes: List[str] = nf_processes | ||
|
||
def file_representation(self): | ||
representation = 'workflow {\n' | ||
representation += f'{SPACES}{self.workflow_name}:\n' | ||
|
||
previous_nfp = None | ||
for nfp in self.nf_processes: | ||
nfp_0 = nfp[0] | ||
nfp_1 = nfp[1] | ||
nfp_2 = nfp[2] | ||
if previous_nfp is None: | ||
representation += f'{SPACES}{SPACES}{nfp_0}({PARAMS_KEY_METS_PATH}, {PARAMS_KEY_INPUT_FILE_GRP}, {nfp_2})\n' | ||
else: | ||
representation += f'{SPACES}{SPACES}{nfp_0}({previous_nfp}.out, {nfp_1}, {nfp_2})\n' | ||
previous_nfp = nfp_0 | ||
|
||
representation += '}' | ||
|
||
self.logger.debug(f"\n{representation}") | ||
self.logger.info(f"Successfully created Nextflow Workflow: {self.workflow_name}") | ||
return representation |
98 changes: 98 additions & 0 deletions
98
src/utils/operandi_utils/oton/models/nf_file_executable.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import logging | ||
from typing import List, Tuple | ||
|
||
from ..validator import ProcessorCallArguments | ||
from ..constants import ( | ||
DIR_IN, | ||
DIR_OUT, | ||
METS_FILE, | ||
|
||
OTON_LOG_FORMAT, | ||
OTON_LOG_LEVEL, | ||
|
||
REPR_DOCKER_COMMAND, | ||
REPR_DOCKER_IMAGE, | ||
REPR_DOCKER_MODELS, | ||
REPR_DOCKER_MODELS_DIR, | ||
REPR_DOCKER_PWD, | ||
REPR_DOCKER_VOLUME, | ||
REPR_METS_PATH, | ||
REPR_INPUT_FILE_GRP, | ||
REPR_MODELS_PATH, | ||
REPR_WORKSPACE_PATH | ||
) | ||
from .nf_block_process import NextflowBlockProcess | ||
from .nf_block_workflow import NextflowBlockWorkflow | ||
|
||
|
||
class NextflowFileExecutable: | ||
def __init__(self): | ||
self.nf_lines_parameters = [] | ||
self.nf_lines_processes = [] | ||
self.nf_lines_workflow = [] | ||
self.logger = logging.getLogger(__name__) | ||
self.logger.setLevel(logging.getLevelName(OTON_LOG_LEVEL)) | ||
logging.basicConfig(format=OTON_LOG_FORMAT) | ||
|
||
def build_parameters(self, dockerized: bool = False): | ||
self.nf_lines_parameters.append('nextflow.enable.dsl = 2') | ||
self.nf_lines_parameters.append('') | ||
|
||
self.nf_lines_parameters.append(REPR_METS_PATH) | ||
self.nf_lines_parameters.append(REPR_INPUT_FILE_GRP) | ||
|
||
if dockerized: | ||
self.nf_lines_parameters.append(REPR_WORKSPACE_PATH) | ||
self.nf_lines_parameters.append(REPR_DOCKER_PWD) | ||
self.nf_lines_parameters.append(REPR_DOCKER_VOLUME) | ||
self.nf_lines_parameters.append(REPR_DOCKER_MODELS_DIR) | ||
self.nf_lines_parameters.append(REPR_MODELS_PATH) | ||
self.nf_lines_parameters.append(REPR_DOCKER_MODELS) | ||
self.nf_lines_parameters.append(REPR_DOCKER_IMAGE) | ||
self.nf_lines_parameters.append(REPR_DOCKER_COMMAND) | ||
|
||
self.nf_lines_parameters.append('') | ||
|
||
def build_nextflow_processes( | ||
self, | ||
ocrd_processor: List[ProcessorCallArguments], | ||
dockerized: bool = False | ||
) -> Tuple[List[str], str]: | ||
|
||
nf_processes = [] | ||
first_file_grps = "DEFAULT" | ||
index = 0 | ||
for processor in ocrd_processor: | ||
nf_process_block = NextflowBlockProcess(processor, index, dockerized) | ||
nf_process_block.add_directive('maxForks 1') | ||
nf_process_block.add_input_param(f'path {METS_FILE}') | ||
nf_process_block.add_input_param(f'val {DIR_IN}') | ||
nf_process_block.add_input_param(f'val {DIR_OUT}') | ||
nf_process_block.add_output_param(f'path {METS_FILE}') | ||
self.nf_lines_processes.append(nf_process_block.file_representation()) | ||
|
||
# Take the input_file_grp of the first processor and change the value of the | ||
# REPR_INPUT_FILE_GRP to set the desired file group as default | ||
if index == 0: | ||
first_file_grps = nf_process_block.repr_in_workflow[1].strip('"') | ||
|
||
# This list is used when building the workflow | ||
nf_processes.append(nf_process_block.repr_in_workflow) | ||
self.logger.info(f"Successfully created Nextflow Process: {nf_process_block.nf_process_name}") | ||
index += 1 | ||
|
||
return nf_processes, first_file_grps | ||
|
||
def build_main_workflow(self, nf_processes: List[str]): | ||
nf_workflow_block = NextflowBlockWorkflow("main", nf_processes) | ||
self.nf_lines_workflow.append(nf_workflow_block.file_representation()) | ||
|
||
def produce_nextflow_file(self, output_path: str): | ||
# Write Nextflow line tokens to an output file | ||
with open(output_path, mode='w', encoding='utf-8') as nextflow_file: | ||
for nextflow_line in self.nf_lines_parameters: | ||
nextflow_file.write(f'{nextflow_line}\n') | ||
for nextflow_line in self.nf_lines_processes: | ||
nextflow_file.write(f'{nextflow_line}\n') | ||
for nextflow_line in self.nf_lines_workflow: | ||
nextflow_file.write(f'{nextflow_line}\n') |
Oops, something went wrong.