Skip to content

Commit

Permalink
Improve verdi node delete performance (#4575)
Browse files Browse the repository at this point in the history
The `verdi node delete` process fully loaded all ORM objects at multiple stages
during the process, which is highly inefficient.
This commit ensures the process now only loads the PKs when possible.
As an example, the time to delete 100 "empty" nodes (no attributes/objects)
is now reduced from ~32 seconds to ~5 seconds.
  • Loading branch information
chrisjsewell authored Nov 18, 2020
1 parent 20996d1 commit d29fb3b
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 113 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ repos:
(?x)^(
aiida/common/progress_reporter.py|
aiida/engine/processes/calcjobs/calcjob.py|
aiida/manage/database/delete/nodes.py|
aiida/tools/graph/graph_traversers.py|
aiida/tools/groups/paths.py|
aiida/tools/importexport/archive/.*py|
aiida/tools/importexport/dbexport/__init__.py|
Expand Down
16 changes: 12 additions & 4 deletions aiida/cmdline/commands/cmd_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,19 +296,20 @@ def tree(nodes, depth):


@verdi_node.command('delete')
@arguments.NODES('nodes', required=True)
@click.argument('identifier', nargs=-1, metavar='NODES')
@options.VERBOSE()
@options.DRY_RUN()
@options.FORCE()
@options.graph_traversal_rules(GraphTraversalRules.DELETE.value)
@with_dbenv()
def node_delete(nodes, dry_run, verbose, force, **kwargs):
def node_delete(identifier, dry_run, verbose, force, **kwargs):
"""Delete nodes from the provenance graph.
This will not only delete the nodes explicitly provided via the command line, but will also include
the nodes necessary to keep a consistent graph, according to the rules outlined in the documentation.
You can modify some of those rules using options of this command.
"""
from aiida.orm.utils.loaders import NodeEntityLoader
from aiida.manage.database.delete.nodes import delete_nodes

verbosity = 1
Expand All @@ -317,9 +318,16 @@ def node_delete(nodes, dry_run, verbose, force, **kwargs):
elif verbose:
verbosity = 2

node_pks_to_delete = [node.pk for node in nodes]
pks = []

delete_nodes(node_pks_to_delete, dry_run=dry_run, verbosity=verbosity, force=force, **kwargs)
for obj in identifier:
# we only load the node if we need to convert from a uuid/label
if isinstance(obj, int):
pks.append(obj)
else:
pks.append(NodeEntityLoader.load_entity(obj).pk)

delete_nodes(pks, dry_run=dry_run, verbosity=verbosity, force=force, **kwargs)


@verdi_node.command('rehash')
Expand Down
66 changes: 32 additions & 34 deletions aiida/manage/database/delete/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Function to delete nodes from the database."""
from typing import Iterable

import click
from aiida.cmdline.utils import echo


def delete_nodes(pks, verbosity=0, dry_run=False, force=False, **kwargs):
def delete_nodes(
pks: Iterable[int], verbosity: int = 0, dry_run: bool = False, force: bool = False, **traversal_rules: bool
):
"""Delete nodes by a list of pks.
This command will delete not only the specified nodes, but also the ones that are
Expand All @@ -36,61 +39,56 @@ def delete_nodes(pks, verbosity=0, dry_run=False, force=False, **kwargs):
inputs, and so on.
:param pks: a list of the PKs of the nodes to delete
:param bool force: do not ask for confirmation to delete nodes.
:param int verbosity: 0 prints nothing,
:param force: do not ask for confirmation to delete nodes.
:param verbosity: 0 prints nothing,
1 prints just sums and total,
2 prints individual nodes.
:param kwargs: graph traversal rules. See :const:`aiida.common.links.GraphTraversalRules` what rule names
:param dry_run:
Just perform a dry run and do not delete anything.
Print statistics according to the verbosity level set.
:param force: Do not ask for confirmation to delete nodes
:param traversal_rules: graph traversal rules. See :const:`aiida.common.links.GraphTraversalRules` what rule names
are toggleable and what the defaults are.
:param bool dry_run:
Just perform a dry run and do not delete anything. Print statistics according
to the verbosity level set.
:param bool force:
Do not ask for confirmation to delete nodes.
"""
# pylint: disable=too-many-arguments,too-many-branches,too-many-locals,too-many-statements
from aiida.backends.utils import delete_nodes_and_connections
from aiida.common import exceptions
from aiida.orm import Node, QueryBuilder, load_node
from aiida.tools.graph.graph_traversers import get_nodes_delete

starting_pks = []
for pk in pks:
try:
load_node(pk)
except exceptions.NotExistent:
echo.echo_warning(f'warning: node with pk<{pk}> does not exist, skipping')
else:
starting_pks.append(pk)
def _missing_callback(_pks: Iterable[int]):
for _pk in _pks:
echo.echo_warning(f'warning: node with pk<{_pk}> does not exist, skipping')

pks_set_to_delete = get_nodes_delete(pks, get_links=False, missing_callback=_missing_callback,
**traversal_rules)['nodes']

# An empty set might be problematic for the queries done below.
if not starting_pks:
if not pks_set_to_delete:
if verbosity:
echo.echo('Nothing to delete')
return

pks_set_to_delete = get_nodes_delete(starting_pks, **kwargs)['nodes']

if verbosity > 0:
echo.echo(
'I {} delete {} node{}'.format(
'would' if dry_run else 'will', len(pks_set_to_delete), 's' if len(pks_set_to_delete) > 1 else ''
)
)
if verbosity > 1:
builder = QueryBuilder().append(
Node, filters={'id': {
'in': pks_set_to_delete
}}, project=('uuid', 'id', 'node_type', 'label')
)
echo.echo(f"The nodes I {'would' if dry_run else 'will'} delete:")
for uuid, pk, type_string, label in builder.iterall():
try:
short_type_string = type_string.split('.')[-2]
except IndexError:
short_type_string = type_string
echo.echo(f' {uuid} {pk} {short_type_string} {label}')
if verbosity > 1:
builder = QueryBuilder().append(
Node, filters={'id': {
'in': pks_set_to_delete
}}, project=('uuid', 'id', 'node_type', 'label')
)
echo.echo(f"The nodes I {'would' if dry_run else 'will'} delete:")
for uuid, pk, type_string, label in builder.iterall():
try:
short_type_string = type_string.split('.')[-2]
except IndexError:
short_type_string = type_string
echo.echo(f' {uuid} {pk} {short_type_string} {label}')

if dry_run:
if verbosity > 0:
Expand Down
Loading

0 comments on commit d29fb3b

Please sign in to comment.