Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More helpers for reasoning about gxformat2 steps. #98

Merged
merged 1 commit into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 6 additions & 49 deletions gxformat2/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,23 @@

from ._labels import Labels
from .model import (
append_step_id_to_step_list_elements,
clean_connection,
convert_dict_to_id_list_if_needed,
ensure_step_position,
get_native_step_type,
inputs_as_native_steps,
pop_connect_from_step_dict,
setup_connected_values,
steps_as_list,
SUPPORT_LEGACY_CONNECTIONS,
with_step_ids,
)
from .yaml import ordered_load

SCRIPT_DESCRIPTION = """
Convert a Format 2 Galaxy workflow description into a native format.
"""

STEP_TYPES = [
"subworkflow",
"data_input",
"data_collection_input",
"tool",
"pause",
"parameter_input",
]

STEP_TYPE_ALIASES = {
'input': 'data_input',
'input_collection': 'data_collection_input',
'parameter': 'parameter_input',
}

RUN_ACTIONS_TO_STEPS = {
'GalaxyWorkflow': 'run_workflow_to_step',
'GalaxyTool': 'run_tool_to_step',
Expand Down Expand Up @@ -130,34 +117,6 @@ def python_to_workflow(as_python, galaxy_interface, workflow_directory=None, imp
return converted


# move to a utils file?
def steps_as_list(format2_workflow: dict, add_ids: bool = False, inputs_offset: int = 0, mutate: bool = False):
"""Return steps as a list, converting ID map to list representation if needed.

This method does mutate the supplied steps, try to make progress toward not doing this.

Add keys as labels instead of IDs. Why am I doing this?
"""
if "steps" not in format2_workflow:
raise Exception(f"No 'steps' key in dict, keys are {format2_workflow.keys()}")
steps = format2_workflow["steps"]
steps = convert_dict_to_id_list_if_needed(steps, add_label=True, mutate=mutate)
if add_ids:
if mutate:
_append_step_id_to_step_list_elements(steps, inputs_offset=inputs_offset)
else:
steps = with_step_ids(steps, inputs_offset=inputs_offset)
return steps


def _append_step_id_to_step_list_elements(steps: list, inputs_offset: int = 0):
assert isinstance(steps, list)
for i, step in enumerate(steps):
if "id" not in step:
step["id"] = i + inputs_offset
assert step["id"] is not None


def _python_to_workflow(as_python, conversion_context):

if "class" not in as_python:
Expand All @@ -182,7 +141,7 @@ def _python_to_workflow(as_python, conversion_context):
convert_inputs_to_steps(as_python, steps)

if isinstance(steps, list):
_append_step_id_to_step_list_elements(steps)
append_step_id_to_step_list_elements(steps)
steps_as_dict: Dict[str, Any] = {}
for i, step in enumerate(steps):
steps_as_dict[str(i)] = step
Expand Down Expand Up @@ -214,10 +173,8 @@ def _python_to_workflow(as_python, conversion_context):
del step["run"]

for step in steps.values():
step_type = step.get("type", "tool")
step_type = STEP_TYPE_ALIASES.get(step_type, step_type)
if step_type not in STEP_TYPES:
raise Exception(f"Unknown step type encountered {step_type}")
step_type = get_native_step_type(step)
# in case it was an alias or default - set it back up in the resulting dict
step["type"] = step_type
eval(f"transform_{step_type}")(conversion_context, step)

Expand Down
79 changes: 78 additions & 1 deletion gxformat2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
Union,
)

from typing_extensions import TypedDict
from typing_extensions import (
Literal,
TypedDict,
)

log = logging.getLogger(__name__)

Expand All @@ -20,6 +23,52 @@

EmbeddedLink = TypedDict("EmbeddedLink", {"$link": str})

NativeGalaxyStepType = Literal[
"subworkflow",
"data_input",
"data_collection_input",
"tool",
"pause",
"parameter_input",
]
GxFormat2StepTypeAlias = Literal[
"input",
"input_collection",
"parameter",
]
StepTypes = Union[NativeGalaxyStepType, GxFormat2StepTypeAlias]


STEP_TYPES = [
"subworkflow",
"data_input",
"data_collection_input",
"tool",
"pause",
"parameter_input",
]
STEP_TYPE_ALIASES: Dict[GxFormat2StepTypeAlias, NativeGalaxyStepType] = {
'input': 'data_input',
'input_collection': 'data_collection_input',
'parameter': 'parameter_input',
}


def get_native_step_type(gxformat2_step_dict: dict) -> NativeGalaxyStepType:
"""Infer native galaxy step type from the gxformat2 step as a dict."""
specifies_subworkflow_run = bool(gxformat2_step_dict.get("run"))
step_type_default = "tool" if not specifies_subworkflow_run else "subworkflow"
raw_step_type = gxformat2_step_dict.get("type", step_type_default)
if raw_step_type not in STEP_TYPES and raw_step_type not in STEP_TYPE_ALIASES:
raise Exception(f"Unknown step type encountered {raw_step_type}")
step_type: NativeGalaxyStepType
if raw_step_type in STEP_TYPE_ALIASES:
step_type = STEP_TYPE_ALIASES[cast(GxFormat2StepTypeAlias, raw_step_type)]
else:
step_type = cast(NativeGalaxyStepType, raw_step_type)
return step_type


# source: step#output and $link: step#output instead of outputSource: step/output and $link: step/output
SUPPORT_LEGACY_CONNECTIONS = os.environ.get("GXFORMAT2_SUPPORT_LEGACY_CONNECTIONS") == "1"

Expand Down Expand Up @@ -318,3 +367,31 @@ def outputs_as_list(as_python: dict) -> list:
outputs = as_python.get("outputs", [])
outputs = convert_dict_to_id_list_if_needed(outputs)
return outputs


def steps_as_list(format2_workflow: dict, add_ids: bool = False, inputs_offset: int = 0, mutate: bool = False) -> List[Dict[str, Any]]:
"""Return steps as a list, converting ID map to list representation if needed.

This method does mutate the supplied steps, try to make progress toward not doing this.

Add keys as labels instead of IDs. Why am I doing this?
"""
if "steps" not in format2_workflow:
raise Exception(f"No 'steps' key in dict, keys are {format2_workflow.keys()}")
steps = format2_workflow["steps"]
steps = convert_dict_to_id_list_if_needed(steps, add_label=True, mutate=mutate)
if add_ids:
if mutate:
append_step_id_to_step_list_elements(steps, inputs_offset=inputs_offset)
else:
steps = with_step_ids(steps, inputs_offset=inputs_offset)
return steps


def append_step_id_to_step_list_elements(steps: List[Dict[str, Any]], inputs_offset: int = 0) -> None:
"""Ensure a list of steps each contains an 'id' element."""
assert isinstance(steps, list)
for i, step in enumerate(steps):
if "id" not in step:
step["id"] = i + inputs_offset
assert step["id"] is not None
3 changes: 2 additions & 1 deletion tests/_helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import copy
import os

from gxformat2.converter import python_to_workflow, STEP_TYPES, yaml_to_workflow
from gxformat2.converter import python_to_workflow, yaml_to_workflow
from gxformat2.export import from_galaxy_native
from gxformat2.interface import ImporterGalaxyInterface
from gxformat2.model import STEP_TYPES

TEST_PATH = os.path.abspath(os.path.dirname(__file__))
TEST_INTEROP_EXAMPLES = os.environ.get("GXFORMAT2_INTEROP_EXAMPLES", os.path.join(TEST_PATH, "examples"))
Expand Down
Loading