CLI: Add the verdi calcjob remotecat command (#4861)

The command can be used to inspect the files in the working directory of a `CalcJob` while it is still running. The `outputcat` only works on the files in the `retrieved` folder so only works for retrieved jobs. There is the `verdi data core.remote cat` command, but there the user needs to specify the identifier of the `remote_folder` of the calcjob and the output file. The `verdi calcjob remotecat` command simplifies these steps. It was discussed to add the `--monitor` option to simulate the functionality of a `tail -f` call on an output file, but this required the adaptation of the `Transport` interface which may get refactored soon, so it was decided to omit that feature for the time being. Co-authored-by: Sebastiaan Huber <mail@sphuber.net>
aiidateam · Dec 14, 2022 · 7df4f60 · 7df4f60
1 parent a0cf2ba
commit 7df4f60
Show file tree

Hide file tree

Showing 3 changed files with 118 additions and 7 deletions.
diff --git a/aiida/cmdline/commands/cmd_calcjob.py b/aiida/cmdline/commands/cmd_calcjob.py
@@ -121,6 +121,31 @@ def calcjob_inputcat(calcjob, path):
             echo.echo_critical(f'Could not open output path "{path}". Exception: {exception}')
 
 
+@verdi_calcjob.command('remotecat')
+@arguments.CALCULATION('calcjob', type=CalculationParamType(sub_classes=('aiida.node:process.calculation.calcjob',)))
+@click.argument('path', type=str, required=False)
+@decorators.with_dbenv()
+def calcjob_remotecat(calcjob, path):
+    """Show the contents of a file in the remote working directory.
+
+    The file to show can be specified using the PATH argument. If PATH is not specified, the default output file path
+    as defined by the `CalcJob` plugin class will be used instead.
+    """
+    import shutil
+    import sys
+    import tempfile
+
+    remote_folder, path = get_remote_and_path(calcjob, path)
+
+    with tempfile.NamedTemporaryFile() as tmp_path:
+        try:
+            remote_folder.getfile(path, tmp_path.name)
+            with open(tmp_path.name, 'rb') as handle:
+                shutil.copyfileobj(handle, sys.stdout.buffer)
+        except IOError as exception:
+            echo.echo_critical(str(exception))
+
+
 @verdi_calcjob.command('outputcat')
 @arguments.CALCULATION('calcjob', type=CalculationParamType(sub_classes=('aiida.node:process.calculation.calcjob',)))
 @click.argument('path', type=click.STRING, required=False)
@@ -279,3 +304,56 @@ def calcjob_cleanworkdir(calcjobs, past_days, older_than, computers, force, exit
                 counter += 1
 
         echo.echo_success(f'{counter} remote folders cleaned on {computer.label}')
+
+
+def get_remote_and_path(calcjob, path=None):
+    """Return the remote folder output node and process the path argument.
+
+    :param calcjob: The ``CalcJobNode`` whose remote_folder to be returned.
+    :param path: The relative path of file. If not defined, it is attempted to determine the default output file from
+        the node options or otherwise from the associated process class. If neither are defined, a ``ValueError`` is
+        raised.
+    :returns: A tuple of the ``RemoteData`` and the path of the output file to be used.
+    :raises ValueError: If path is not defined and no default output file is defined on the node nor its associated
+        process class.
+    """
+    remote_folder_linkname = 'remote_folder'  # The `remote_folder` is the standard output of a calculation.
+
+    try:
+        remote_folder = getattr(calcjob.outputs, remote_folder_linkname)
+    except AttributeError:
+        echo.echo_critical(
+            f'`CalcJobNode<{calcjob.pk}>` has no `{remote_folder_linkname}` output. '
+            'It probably has not started running yet.'
+        )
+
+    if path is not None:
+        return remote_folder, path
+
+    # Try to get the default output filename from the node
+    path = calcjob.get_option('output_filename')
+
+    if path is not None:
+        return remote_folder, path
+
+    try:
+        process_class = calcjob.process_class
+    except ValueError as exception:
+        raise ValueError(
+            f'The process class of `CalcJobNode<{calcjob.pk}>` cannot be loaded and so the default output filename '
+            'cannot be determined.\nPlease specify a path explicitly.'
+        ) from exception
+
+    # Try to get the default output filename from the node's associated process class spec
+    port = process_class.spec_options.get('output_filename')
+    if port and port.has_default():
+        path = port.default
+
+    if path is not None:
+        return remote_folder, path
+
+    raise ValueError(
+        f'`CalcJobNode<{calcjob.pk}>` does not define a default output file (option "output_filename" not found) '
+        f'nor does its associated process class `{calcjob.process_class.__class__.__name__}`\n'
+        'Please specify a path explicitly.'
+    )
diff --git a/docs/source/reference/command_line.rst b/docs/source/reference/command_line.rst
@@ -53,6 +53,7 @@ Below is a list with all available subcommands.
       inputls       Show the list of the generated calcjob input files.
       outputcat     Show the contents of one of the calcjob retrieved outputs.
       outputls      Show the list of the retrieved calcjob output files.
+      remotecat     Show the contents of a file in the remote working directory.
       res           Print data from the result output Dict node of a calcjob.
 
 

diff --git a/tests/cmdline/commands/test_calcjob.py b/tests/cmdline/commands/test_calcjob.py
@@ -33,9 +33,9 @@ class TestVerdiCalculation:
     """Tests for `verdi calcjob`."""
 
     @pytest.fixture(autouse=True)
-    def init_profile(self, aiida_profile_clean, aiida_localhost):  # pylint: disable=unused-argument
+    def init_profile(self, aiida_profile_clean, aiida_localhost, tmp_path):  # pylint: disable=unused-argument
         """Initialize the profile."""
-        # pylint: disable=attribute-defined-outside-init
+        # pylint: disable=attribute-defined-outside-init,too-many-statements
 
         self.computer = aiida_localhost
         self.code = orm.InstalledCode(computer=self.computer, filepath_executable='/bin/true').store()
@@ -47,14 +47,20 @@ def init_profile(self, aiida_profile_clean, aiida_localhost):  # pylint: disable
         process_type = get_entry_point_string_from_class(process_class.__module__, process_class.__name__)
 
         # Create 5 CalcJobNodes (one for each CalculationState)
-        for calculation_state in CalcJobState:
+        for index, calculation_state in enumerate(CalcJobState):
+
+            dirpath = (tmp_path / str(index))
+            dirpath.mkdir()
 
             calc = orm.CalcJobNode(computer=self.computer, process_type=process_type)
             calc.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1})
-            calc.set_remote_workdir('/tmp/aiida/work')
-            remote = RemoteData(remote_path='/tmp/aiida/work')
+            calc.set_option('output_filename', 'fileA.txt')
+            calc.set_remote_workdir(str(dirpath))
+            remote = RemoteData(remote_path=str(dirpath))
             remote.computer = calc.computer
             remote.base.links.add_incoming(calc, LinkType.CREATE, link_label='remote_folder')
+            (dirpath / 'fileA.txt').write_text('test stringA')
+            (dirpath / 'fileB.txt').write_text('test stringB')
             calc.store()
             remote.store()
 
@@ -81,14 +87,16 @@ def init_profile(self, aiida_profile_clean, aiida_localhost):  # pylint: disable
                 self.group.add_nodes([calc])
 
         # Create a single failed CalcJobNode
+        dirpath = (tmp_path / 'failed')
+        dirpath.mkdir()
         self.EXIT_STATUS = 100
         calc = orm.CalcJobNode(computer=self.computer)
         calc.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1})
         calc.store()
         calc.set_exit_status(self.EXIT_STATUS)
         calc.set_process_state(ProcessState.FINISHED)
-        calc.set_remote_workdir('/tmp/aiida/work')
-        remote = RemoteData(remote_path='/tmp/aiida/work')
+        calc.set_remote_workdir(str(tmp_path))
+        remote = RemoteData(remote_path=str(tmp_path))
         remote.computer = calc.computer
         remote.base.links.add_incoming(calc, LinkType.CREATE, link_label='remote_folder')
         remote.store()
@@ -325,3 +333,27 @@ def test_calcjob_inoutputcat_old(self):
         assert result.exception is None, result.output
         assert len(get_result_lines(result)) == 1
         assert get_result_lines(result)[0] == '5'
+
+    def test_calcjob_remotecat(self):
+        """Test the remotecat command that prints the remote file for a given calcjob"""
+        # Specifying no filtering options and no explicit calcjobs should exit with non-zero status
+        options = []
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.exception is not None, result.output
+
+        # This should be the failed calc without remote data - exception raised
+        options = [str(self.calcs[-1].uuid), 'fileB.txt']
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.exception is not None, result.output
+
+        options = [str(self.result_job.uuid), 'fileB.txt']
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.stdout == 'test stringB'
+
+        options = [str(self.result_job.uuid)]
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.stdout == 'test stringA'
+
+        options = [str(self.result_job.uuid), 'fileA.txt']
+        result = self.cli_runner.invoke(command.calcjob_remotecat, options)
+        assert result.stdout == 'test stringA'