Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose number of jobs to organize #1341

Merged
11 changes: 11 additions & 0 deletions dandi/cli/cmd_organize.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from typing import Optional

import click

from .base import dandiset_path_option, devel_debug_option, map_to_click_exceptions
Expand Down Expand Up @@ -59,6 +61,13 @@
),
)
@click.argument("paths", nargs=-1, type=click.Path(exists=True))
@click.option(
"--number-of-jobs",
"number_of_jobs",
type=int,
default=None,
help="The number of jobs to use during organization.",
)
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved
@devel_debug_option()
@map_to_click_exceptions
def organize(
Expand All @@ -70,6 +79,7 @@ def organize(
devel_debug=False,
update_external_file_paths=False,
media_files_mode=None,
number_of_jobs: Optional[int] = None,
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved
):
"""(Re)organize files according to the metadata.

Expand Down Expand Up @@ -115,4 +125,5 @@ def organize(
update_external_file_paths=update_external_file_paths,
media_files_mode=media_files_mode,
required_fields=required_fields,
number_of_jobs=number_of_jobs,
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved
)
6 changes: 4 additions & 2 deletions dandi/organize.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,8 +721,10 @@ def organize(
update_external_file_paths=False,
media_files_mode=None,
required_fields=None,
number_of_jobs: Optional[int] = None,
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved
):
in_place = False # If we deduce that we are organizing in-place
number_of_jobs = number_of_jobs or -1
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved

# will come handy when dry becomes proper separate option
def dry_print(msg):
Expand Down Expand Up @@ -812,12 +814,12 @@ def _get_metadata(path):
meta["path"] = path
return meta

if not devel_debug:
if not devel_debug and number_of_jobs != 1: # Do not use joblib at all if number_of_jobs=1
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved
# Note: It is Python (pynwb) intensive, not IO, so ATM there is little
# to no benefit from Parallel without using multiproc! But that would
# complicate progress bar indication... TODO
metadata = list(
Parallel(n_jobs=-1, verbose=10)(
Parallel(n_jobs=number_of_jobs, verbose=10)(
CodyCBakerPhD marked this conversation as resolved.
Show resolved Hide resolved
delayed(_get_metadata)(path) for path in paths
)
)
Expand Down
Loading