From 1bfbbd79e4fe32226153a8e911aae549d46b45e6 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 19 Oct 2023 08:56:21 -0700 Subject: [PATCH 1/4] Report errors in WDL using MiniWDL's error location printer --- src/toil/wdl/wdltoil.py | 55 +++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py index c29afb4bdc..5149efd2ab 100755 --- a/src/toil/wdl/wdltoil.py +++ b/src/toil/wdl/wdltoil.py @@ -37,12 +37,13 @@ Iterable, Generator from urllib.parse import urlsplit, urljoin, quote, unquote -from WDL import Error from configargparse import ArgParser from WDL._util import byte_size_units +from WDL.CLI import print_error from WDL.runtime.task_container import TaskContainer from WDL.runtime.backend.singularity import SingularityContainer from WDL.runtime.backend.docker_swarm import SwarmContainer +import WDL.Error import WDL.runtime.config from toil.common import Config, Toil, addOptions @@ -832,7 +833,7 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None: host_path_strip = host_path.rstrip("/") if host_path not in task_container.input_path_map and host_path_strip not in task_container.input_path_map: if not os.path.exists(host_path_strip): - raise Error.InputError("input path not found: " + host_path) + raise WDL.Error.InputError("input path not found: " + host_path) host_paths_by_dir.setdefault(os.path.dirname(host_path_strip), set()).add(host_path) # for each such partition of files # - if there are no basename collisions under input subdirectory 0, then mount them there. @@ -2496,24 +2497,42 @@ def main() -> None: if options.restart: output_bindings = toil.restart() else: - # Load the WDL document - document: WDL.Tree.Document = WDL.load(options.wdl_uri, read_source=toil_read_source) - - if document.workflow is None: - logger.critical("No workflow in document!") + try: + # Load the WDL document + document: WDL.Tree.Document = WDL.load(options.wdl_uri, read_source=toil_read_source) + + if document.workflow is None: + # Complain that we need a workflow. + # We need the absolute path or URL to raise the error + wdl_abspath = options.wdl_uri if not os.path.exists(options.wdl_uri) else os.path.abspath(options.wdl_uri) + raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.wdl_uri, wdl_abspath, 0, 0), "No workflow found in document") + + if options.inputs_uri: + # Load the inputs. Use the same loading mechanism, which means we + # have to break into async temporarily. + downloaded = asyncio.run(toil_read_source(options.inputs_uri, [], None)) + try: + inputs = json.loads(downloaded.source_text) + except json.JSONDecodeError as e: + # Complain about the JSON document. + # We need the absolute path or URL to raise the error + inputs_abspath = options.inputs_uri if not os.path.exists(options.inputs_uri) else os.path.abspath(options.inputs_uri) + raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno), "Cannot parse input JSON: " + e.msg) from e + else: + inputs = {} + except ( + WDL.Error.SyntaxError, + WDL.Error.ImportError, + WDL.Error.ValidationError, + WDL.Error.MultipleValidationErrors, + FileNotFoundError + ) as e: + logger.critical("Could not load workflow/inputs") + # These are the errors that MiniWDL's parser can raise. See https://github.com/chanzuckerberg/miniwdl/blob/a780b1bf2db61f18de37616068968b2bb4c2d21c/WDL/CLI.py#L91-L97. + # We are going to use MiniWDL's pretty printer to print them. + print_error(e) sys.exit(1) - if options.inputs_uri: - # Load the inputs. Use the same loading mechanism, which means we - # have to break into async temporarily. - downloaded = asyncio.run(toil_read_source(options.inputs_uri, [], None)) - try: - inputs = json.loads(downloaded.source_text) - except json.JSONDecodeError as e: - logger.critical('Cannot parse JSON at %s: %s', downloaded.abspath, e) - sys.exit(1) - else: - inputs = {} # Parse out the available and required inputs. Each key in the # JSON ought to start with the workflow's name and then a . # TODO: WDL's Bindings[] isn't variant in the right way, so we From 3ea52d7c667392caae679dad5ad1714202ca79f3 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 19 Oct 2023 09:34:36 -0700 Subject: [PATCH 2/4] Decorate actual tasks with fancy WDL error reporting --- src/toil/wdl/wdltoil.py | 84 +++++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 16 deletions(-) diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py index 5149efd2ab..038f51fdde 100755 --- a/src/toil/wdl/wdltoil.py +++ b/src/toil/wdl/wdltoil.py @@ -58,6 +58,58 @@ logger = logging.getLogger(__name__) +@contextmanager +def wdl_error_reporter(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Generator[None, None, None]: + """ + Run code in a context where WDL errors will be reported with pretty formatting. + """ + + try: + yield + except ( + WDL.Error.SyntaxError, + WDL.Error.ImportError, + WDL.Error.ValidationError, + WDL.Error.MultipleValidationErrors, + FileNotFoundError + ) as e: + log("Could not " + task) + # These are the errors that MiniWDL's parser can raise and its reporter + # can report. See + # https://github.com/chanzuckerberg/miniwdl/blob/a780b1bf2db61f18de37616068968b2bb4c2d21c/WDL/CLI.py#L91-L97. + # + # We are going to use MiniWDL's pretty printer to print them. + print_error(e) + if exit: + # Stop right now + sys.exit(1) + else: + # Reraise the exception to stop + raise + +F = TypeVar('F', bound=Callable[..., Any]) +def report_wdl_errors(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Callable[[F], F]: + """ + Create a decorator to report WDL errors with the given task message. + + Decorator can then be applied to a function, and if a WDL error happens it + will say that it cannot {task} and quit. + """ + def decorator(decoratee: F) -> F: + """ + Decorate a function with WDL error reporting. + """ + def decorated(*args: Any, **kwargs: Any) -> Any: + """ + Run the decoratee and handle WDL errors. + """ + with wdl_error_reporter(task, exit=exit, log=log): + return decoratee(*args, **kwargs) # type: ignore + return cast(F, decorated) + return decorator + + + def potential_absolute_uris(uri: str, path: List[str], importer: Optional[WDL.Tree.Document] = None) -> Iterator[str]: """ Get potential absolute URIs to check for an imported file. @@ -1072,6 +1124,8 @@ def __init__(self, execution_dir: Optional[str] = None, **kwargs: Any) -> None: def run(self, file_store: AbstractFileStore) -> Any: """ Run a WDL-related job. + + Remember to decorate non-trivial overrides with :func:`report_wdl_errors`. """ # Make sure that pickle is prepared to save our return values, which # might take a lot of recursive calls. TODO: This might be because @@ -1209,6 +1263,7 @@ def can_fake_root(self) -> bool: logger.warning('No subuids are assigned to %s; cannot fake root.', username) return False + @report_wdl_errors("run task") def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: """ Actually run the task. @@ -1570,6 +1625,7 @@ def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Prom if isinstance(self._node, WDL.Tree.Call): logger.debug("Preparing job for call node %s", self._node.workflow_node_id) + @report_wdl_errors("run workflow node") def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: """ Actually execute the workflow node. @@ -1660,6 +1716,7 @@ def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequen if isinstance(n, (WDL.Tree.Call, WDL.Tree.Scatter, WDL.Tree.Conditional)): raise RuntimeError("Node cannot be evaluated with other nodes: " + str(n)) + @report_wdl_errors("run workflow node list") def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: """ Actually execute the workflow nodes. @@ -1702,6 +1759,7 @@ def __init__(self, prev_node_results: Sequence[Promised[WDLBindings]], **kwargs: self._prev_node_results = prev_node_results + @report_wdl_errors("combine bindings") def run(self, file_store: AbstractFileStore) -> WDLBindings: """ Aggregate incoming results. @@ -2117,6 +2175,7 @@ def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promis self._scatter = scatter self._prev_node_results = prev_node_results + @report_wdl_errors("run scatter") def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: """ Run the scatter. @@ -2199,6 +2258,7 @@ def __init__(self, input_bindings: Sequence[Promised[WDLBindings]], base_binding self._input_bindings = input_bindings self._base_bindings = base_bindings + @report_wdl_errors("create array bindings") def run(self, file_store: AbstractFileStore) -> WDLBindings: """ Actually produce the array-ified bindings now that promised values are available. @@ -2250,6 +2310,7 @@ def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequenc self._conditional = conditional self._prev_node_results = prev_node_results + @report_wdl_errors("run conditional") def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: """ Run the conditional. @@ -2312,6 +2373,7 @@ def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Prom self._workflow_id = workflow_id self._namespace = namespace + @report_wdl_errors("run workflow") def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: """ Run the workflow. Return the result of the workflow. @@ -2362,6 +2424,7 @@ def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings], self._bindings = bindings self._workflow = workflow + @report_wdl_errors("evaluate outputs") def run(self, file_store: AbstractFileStore) -> WDLBindings: """ Make bindings for the outputs. @@ -2409,6 +2472,7 @@ def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, execution_d self._workflow = workflow self._inputs = inputs + @report_wdl_errors("run root job") def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: """ Actually build the subgraph. @@ -2497,7 +2561,7 @@ def main() -> None: if options.restart: output_bindings = toil.restart() else: - try: + with wdl_error_reporter("parse workflow/inputs", exit=True): # Load the WDL document document: WDL.Tree.Document = WDL.load(options.wdl_uri, read_source=toil_read_source) @@ -2505,7 +2569,7 @@ def main() -> None: # Complain that we need a workflow. # We need the absolute path or URL to raise the error wdl_abspath = options.wdl_uri if not os.path.exists(options.wdl_uri) else os.path.abspath(options.wdl_uri) - raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.wdl_uri, wdl_abspath, 0, 0), "No workflow found in document") + raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.wdl_uri, wdl_abspath, 0, 0, 0, 1), "No workflow found in document") if options.inputs_uri: # Load the inputs. Use the same loading mechanism, which means we @@ -2517,22 +2581,10 @@ def main() -> None: # Complain about the JSON document. # We need the absolute path or URL to raise the error inputs_abspath = options.inputs_uri if not os.path.exists(options.inputs_uri) else os.path.abspath(options.inputs_uri) - raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno), "Cannot parse input JSON: " + e.msg) from e + raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno, e.lineno, e.colno + 1), "Cannot parse input JSON: " + e.msg) from e else: inputs = {} - except ( - WDL.Error.SyntaxError, - WDL.Error.ImportError, - WDL.Error.ValidationError, - WDL.Error.MultipleValidationErrors, - FileNotFoundError - ) as e: - logger.critical("Could not load workflow/inputs") - # These are the errors that MiniWDL's parser can raise. See https://github.com/chanzuckerberg/miniwdl/blob/a780b1bf2db61f18de37616068968b2bb4c2d21c/WDL/CLI.py#L91-L97. - # We are going to use MiniWDL's pretty printer to print them. - print_error(e) - sys.exit(1) - + # Parse out the available and required inputs. Each key in the # JSON ought to start with the workflow's name and then a . # TODO: WDL's Bindings[] isn't variant in the right way, so we From c69fae2da0e4aa652ac4b260d8378e40d36853aa Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 19 Oct 2023 09:37:20 -0700 Subject: [PATCH 3/4] Slap WDL error reporting on main --- src/toil/wdl/wdltoil.py | 45 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py index 038f51fdde..176bac9f46 100755 --- a/src/toil/wdl/wdltoil.py +++ b/src/toil/wdl/wdltoil.py @@ -93,7 +93,7 @@ def report_wdl_errors(task: str, exit: bool = False, log: Callable[[str], None] Create a decorator to report WDL errors with the given task message. Decorator can then be applied to a function, and if a WDL error happens it - will say that it cannot {task} and quit. + will say that it could not {task}. """ def decorator(decoratee: F) -> F: """ @@ -2521,7 +2521,7 @@ def string_coerce(self: WDL.Value.String, desired_type: Optional[WDL.Type.Base] WDL.Value.Base.coerce = old_base_coerce # type: ignore[method-assign] WDL.Value.String.coerce = old_str_coerce # type: ignore[method-assign] - +@report_wdl_errors("run workflow", exit=True) def main() -> None: """ A Toil workflow to interpret WDL input files. @@ -2561,29 +2561,28 @@ def main() -> None: if options.restart: output_bindings = toil.restart() else: - with wdl_error_reporter("parse workflow/inputs", exit=True): - # Load the WDL document - document: WDL.Tree.Document = WDL.load(options.wdl_uri, read_source=toil_read_source) + # Load the WDL document + document: WDL.Tree.Document = WDL.load(options.wdl_uri, read_source=toil_read_source) + + if document.workflow is None: + # Complain that we need a workflow. + # We need the absolute path or URL to raise the error + wdl_abspath = options.wdl_uri if not os.path.exists(options.wdl_uri) else os.path.abspath(options.wdl_uri) + raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.wdl_uri, wdl_abspath, 0, 0, 0, 1), "No workflow found in document") - if document.workflow is None: - # Complain that we need a workflow. + if options.inputs_uri: + # Load the inputs. Use the same loading mechanism, which means we + # have to break into async temporarily. + downloaded = asyncio.run(toil_read_source(options.inputs_uri, [], None)) + try: + inputs = json.loads(downloaded.source_text) + except json.JSONDecodeError as e: + # Complain about the JSON document. # We need the absolute path or URL to raise the error - wdl_abspath = options.wdl_uri if not os.path.exists(options.wdl_uri) else os.path.abspath(options.wdl_uri) - raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.wdl_uri, wdl_abspath, 0, 0, 0, 1), "No workflow found in document") - - if options.inputs_uri: - # Load the inputs. Use the same loading mechanism, which means we - # have to break into async temporarily. - downloaded = asyncio.run(toil_read_source(options.inputs_uri, [], None)) - try: - inputs = json.loads(downloaded.source_text) - except json.JSONDecodeError as e: - # Complain about the JSON document. - # We need the absolute path or URL to raise the error - inputs_abspath = options.inputs_uri if not os.path.exists(options.inputs_uri) else os.path.abspath(options.inputs_uri) - raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno, e.lineno, e.colno + 1), "Cannot parse input JSON: " + e.msg) from e - else: - inputs = {} + inputs_abspath = options.inputs_uri if not os.path.exists(options.inputs_uri) else os.path.abspath(options.inputs_uri) + raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno, e.lineno, e.colno + 1), "Cannot parse input JSON: " + e.msg) from e + else: + inputs = {} # Parse out the available and required inputs. Each key in the # JSON ought to start with the workflow's name and then a . From 0b1892a3ab61f106f6e8e56df567137ed0c6b64e Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 19 Oct 2023 09:45:34 -0700 Subject: [PATCH 4/4] Remove banned ignore comment --- src/toil/wdl/wdltoil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py index 176bac9f46..a29d78677a 100755 --- a/src/toil/wdl/wdltoil.py +++ b/src/toil/wdl/wdltoil.py @@ -104,7 +104,7 @@ def decorated(*args: Any, **kwargs: Any) -> Any: Run the decoratee and handle WDL errors. """ with wdl_error_reporter(task, exit=exit, log=log): - return decoratee(*args, **kwargs) # type: ignore + return decoratee(*args, **kwargs) return cast(F, decorated) return decorator