Skip to content

Commit

Permalink
CLI: Add the verdi calcjob remotecat command (#4861)
Browse files Browse the repository at this point in the history
The command can be used to inspect the files in the working directory
of a `CalcJob` while it is still running. The `outputcat` only works on
the files in the `retrieved` folder so only works for retrieved jobs.
There is the `verdi data core.remote cat` command, but there the user
needs to specify the identifier of the `remote_folder` of the calcjob
and the output file. The `verdi calcjob remotecat` command simplifies
these steps.

It was discussed to add the `--monitor` option to simulate the
functionality of a `tail -f` call on an output file, but this required
the adaptation of the `Transport` interface which may get refactored
soon, so it was decided to omit that feature for the time being.

Co-authored-by: Sebastiaan Huber <mail@sphuber.net>
  • Loading branch information
zhubonan and sphuber authored Dec 14, 2022
1 parent a0cf2ba commit 7df4f60
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 7 deletions.
78 changes: 78 additions & 0 deletions aiida/cmdline/commands/cmd_calcjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,31 @@ def calcjob_inputcat(calcjob, path):
echo.echo_critical(f'Could not open output path "{path}". Exception: {exception}')


@verdi_calcjob.command('remotecat')
@arguments.CALCULATION('calcjob', type=CalculationParamType(sub_classes=('aiida.node:process.calculation.calcjob',)))
@click.argument('path', type=str, required=False)
@decorators.with_dbenv()
def calcjob_remotecat(calcjob, path):
"""Show the contents of a file in the remote working directory.
The file to show can be specified using the PATH argument. If PATH is not specified, the default output file path
as defined by the `CalcJob` plugin class will be used instead.
"""
import shutil
import sys
import tempfile

remote_folder, path = get_remote_and_path(calcjob, path)

with tempfile.NamedTemporaryFile() as tmp_path:
try:
remote_folder.getfile(path, tmp_path.name)
with open(tmp_path.name, 'rb') as handle:
shutil.copyfileobj(handle, sys.stdout.buffer)
except IOError as exception:
echo.echo_critical(str(exception))


@verdi_calcjob.command('outputcat')
@arguments.CALCULATION('calcjob', type=CalculationParamType(sub_classes=('aiida.node:process.calculation.calcjob',)))
@click.argument('path', type=click.STRING, required=False)
Expand Down Expand Up @@ -279,3 +304,56 @@ def calcjob_cleanworkdir(calcjobs, past_days, older_than, computers, force, exit
counter += 1

echo.echo_success(f'{counter} remote folders cleaned on {computer.label}')


def get_remote_and_path(calcjob, path=None):
"""Return the remote folder output node and process the path argument.
:param calcjob: The ``CalcJobNode`` whose remote_folder to be returned.
:param path: The relative path of file. If not defined, it is attempted to determine the default output file from
the node options or otherwise from the associated process class. If neither are defined, a ``ValueError`` is
raised.
:returns: A tuple of the ``RemoteData`` and the path of the output file to be used.
:raises ValueError: If path is not defined and no default output file is defined on the node nor its associated
process class.
"""
remote_folder_linkname = 'remote_folder' # The `remote_folder` is the standard output of a calculation.

try:
remote_folder = getattr(calcjob.outputs, remote_folder_linkname)
except AttributeError:
echo.echo_critical(
f'`CalcJobNode<{calcjob.pk}>` has no `{remote_folder_linkname}` output. '
'It probably has not started running yet.'
)

if path is not None:
return remote_folder, path

# Try to get the default output filename from the node
path = calcjob.get_option('output_filename')

if path is not None:
return remote_folder, path

try:
process_class = calcjob.process_class
except ValueError as exception:
raise ValueError(
f'The process class of `CalcJobNode<{calcjob.pk}>` cannot be loaded and so the default output filename '
'cannot be determined.\nPlease specify a path explicitly.'
) from exception

# Try to get the default output filename from the node's associated process class spec
port = process_class.spec_options.get('output_filename')
if port and port.has_default():
path = port.default

if path is not None:
return remote_folder, path

raise ValueError(
f'`CalcJobNode<{calcjob.pk}>` does not define a default output file (option "output_filename" not found) '
f'nor does its associated process class `{calcjob.process_class.__class__.__name__}`\n'
'Please specify a path explicitly.'
)
1 change: 1 addition & 0 deletions docs/source/reference/command_line.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Below is a list with all available subcommands.
inputls Show the list of the generated calcjob input files.
outputcat Show the contents of one of the calcjob retrieved outputs.
outputls Show the list of the retrieved calcjob output files.
remotecat Show the contents of a file in the remote working directory.
res Print data from the result output Dict node of a calcjob.
Expand Down
46 changes: 39 additions & 7 deletions tests/cmdline/commands/test_calcjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ class TestVerdiCalculation:
"""Tests for `verdi calcjob`."""

@pytest.fixture(autouse=True)
def init_profile(self, aiida_profile_clean, aiida_localhost): # pylint: disable=unused-argument
def init_profile(self, aiida_profile_clean, aiida_localhost, tmp_path): # pylint: disable=unused-argument
"""Initialize the profile."""
# pylint: disable=attribute-defined-outside-init
# pylint: disable=attribute-defined-outside-init,too-many-statements

self.computer = aiida_localhost
self.code = orm.InstalledCode(computer=self.computer, filepath_executable='/bin/true').store()
Expand All @@ -47,14 +47,20 @@ def init_profile(self, aiida_profile_clean, aiida_localhost): # pylint: disable
process_type = get_entry_point_string_from_class(process_class.__module__, process_class.__name__)

# Create 5 CalcJobNodes (one for each CalculationState)
for calculation_state in CalcJobState:
for index, calculation_state in enumerate(CalcJobState):

dirpath = (tmp_path / str(index))
dirpath.mkdir()

calc = orm.CalcJobNode(computer=self.computer, process_type=process_type)
calc.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1})
calc.set_remote_workdir('/tmp/aiida/work')
remote = RemoteData(remote_path='/tmp/aiida/work')
calc.set_option('output_filename', 'fileA.txt')
calc.set_remote_workdir(str(dirpath))
remote = RemoteData(remote_path=str(dirpath))
remote.computer = calc.computer
remote.base.links.add_incoming(calc, LinkType.CREATE, link_label='remote_folder')
(dirpath / 'fileA.txt').write_text('test stringA')
(dirpath / 'fileB.txt').write_text('test stringB')
calc.store()
remote.store()

Expand All @@ -81,14 +87,16 @@ def init_profile(self, aiida_profile_clean, aiida_localhost): # pylint: disable
self.group.add_nodes([calc])

# Create a single failed CalcJobNode
dirpath = (tmp_path / 'failed')
dirpath.mkdir()
self.EXIT_STATUS = 100
calc = orm.CalcJobNode(computer=self.computer)
calc.set_option('resources', {'num_machines': 1, 'num_mpiprocs_per_machine': 1})
calc.store()
calc.set_exit_status(self.EXIT_STATUS)
calc.set_process_state(ProcessState.FINISHED)
calc.set_remote_workdir('/tmp/aiida/work')
remote = RemoteData(remote_path='/tmp/aiida/work')
calc.set_remote_workdir(str(tmp_path))
remote = RemoteData(remote_path=str(tmp_path))
remote.computer = calc.computer
remote.base.links.add_incoming(calc, LinkType.CREATE, link_label='remote_folder')
remote.store()
Expand Down Expand Up @@ -325,3 +333,27 @@ def test_calcjob_inoutputcat_old(self):
assert result.exception is None, result.output
assert len(get_result_lines(result)) == 1
assert get_result_lines(result)[0] == '5'

def test_calcjob_remotecat(self):
"""Test the remotecat command that prints the remote file for a given calcjob"""
# Specifying no filtering options and no explicit calcjobs should exit with non-zero status
options = []
result = self.cli_runner.invoke(command.calcjob_remotecat, options)
assert result.exception is not None, result.output

# This should be the failed calc without remote data - exception raised
options = [str(self.calcs[-1].uuid), 'fileB.txt']
result = self.cli_runner.invoke(command.calcjob_remotecat, options)
assert result.exception is not None, result.output

options = [str(self.result_job.uuid), 'fileB.txt']
result = self.cli_runner.invoke(command.calcjob_remotecat, options)
assert result.stdout == 'test stringB'

options = [str(self.result_job.uuid)]
result = self.cli_runner.invoke(command.calcjob_remotecat, options)
assert result.stdout == 'test stringA'

options = [str(self.result_job.uuid), 'fileA.txt']
result = self.cli_runner.invoke(command.calcjob_remotecat, options)
assert result.stdout == 'test stringA'

0 comments on commit 7df4f60

Please sign in to comment.