diff --git a/src/ephemeris/_config_models.py b/src/ephemeris/_config_models.py index ce74804..a45113a 100644 --- a/src/ephemeris/_config_models.py +++ b/src/ephemeris/_config_models.py @@ -30,6 +30,7 @@ class RepositoryInstallTarget(BaseModel): class RepositoryInstallTargets(BaseModel): """ """ + api_key: Optional[str] galaxy_instance: Optional[str] tools: List[RepositoryInstallTarget] @@ -58,7 +59,9 @@ class Genome(BaseModel): version: Optional[str] # Any version information associated with the data # Description of actions (data managers) to run on target genome. - indexers: Optional[List[str]] # indexers to run - keyed on repository name - see data_managers.yml for how to resolve these to tools + indexers: Optional[ + List[str] + ] # indexers to run - keyed on repository name - see data_managers.yml for how to resolve these to tools skiplist: Optional[List[str]] # unimplemented: but if we implement classes of indexers, these will be ones to skip diff --git a/src/ephemeris/_idc_data_managers_to_tools.py b/src/ephemeris/_idc_data_managers_to_tools.py index 20f7b68..dd14ead 100644 --- a/src/ephemeris/_idc_data_managers_to_tools.py +++ b/src/ephemeris/_idc_data_managers_to_tools.py @@ -84,8 +84,8 @@ def _parser(): general_group = parser.add_argument_group("General options") add_verbosity_argument(general_group) add_log_file_argument(general_group) - parser.add_argument('--data-managers-conf', default="data_managers.yml") - parser.add_argument('--shed-install-output-conf', default="tools.yml") + parser.add_argument("--data-managers-conf", default="data_managers.yml") + parser.add_argument("--shed-install-output-conf", default="tools.yml") return parser diff --git a/src/ephemeris/_idc_lint.py b/src/ephemeris/_idc_lint.py index e8949ea..6b1f32b 100644 --- a/src/ephemeris/_idc_lint.py +++ b/src/ephemeris/_idc_lint.py @@ -25,11 +25,13 @@ def lint_idc_directory(directory: Path): for data_manager in data_managers.values(): data_manager_tool_id = data_manager.tool_id if not data_manager_tool_id.startswith("toolshed.g2.bx.psu.edu/"): - raise Exception(f"Expected a data manager repository from main Galaxy tool shed but discovered tool ID {data_manager_tool_id}") + raise Exception( + f"Expected a data manager repository from main Galaxy tool shed but discovered tool ID {data_manager_tool_id}" + ) for genome in genomes.genomes: print(genome) - for indexer in (genome.indexers or []): + for indexer in genome.indexers or []: if indexer not in data_managers: raise Exception(f"Failed to find data manager {indexer} referenced for genome {genome}") diff --git a/src/ephemeris/_idc_split_data_manager_genomes.py b/src/ephemeris/_idc_split_data_manager_genomes.py index 2392c11..c078f90 100644 --- a/src/ephemeris/_idc_split_data_manager_genomes.py +++ b/src/ephemeris/_idc_split_data_manager_genomes.py @@ -184,9 +184,7 @@ def walk_over_incomplete_runs(split_options: SplitOptions): fetch_params.append({"sequence_name": description}) elif re.match("^[A-Z_]+[0-9.]+", source): fetch_params.append({"reference_source|reference_source_selector": "ncbi"}) - fetch_params.append( - {"reference_source|requested_identifier": source} - ) + fetch_params.append({"reference_source|requested_identifier": source}) fetch_params.append({"sequence_name": genome["description"]}) fetch_params.append({"sequence.id": genome["id"]}) elif re.match("^http", source): @@ -247,7 +245,6 @@ def walk_over_incomplete_runs(split_options: SplitOptions): def split_genomes(split_options: SplitOptions) -> None: - def write_task_file(build_id: str, indexer: str, run_data_manager: RunDataManager): split_genomes_path = split_options.split_genomes_path if not os.path.exists(split_options.split_genomes_path): @@ -262,7 +259,6 @@ def write_task_file(build_id: str, indexer: str, run_data_manager: RunDataManage class GalaxyHistoryIsBuildComplete: - def __init__(self, history_names: List[str]): self._history_names = history_names @@ -272,7 +268,6 @@ def __call__(self, build_id: str, indexer_name: str) -> bool: class CVMFSPublishIsComplete: - def __init__(self, records: Dict[str, List[str]]): self.records = records @@ -284,18 +279,18 @@ def _parser(): """returns the parser object.""" # login required to check history... parser = get_common_args(login_required=True, log_file=True) - parser.add_argument('--merged-genomes-path', '-m', default="genomes.yml") - parser.add_argument('--split-genomes-path', '-s', default="data_manager_tasks") - parser.add_argument('--data-managers-path', default="data_managers.yml") - parser.add_argument('--complete-check-cvmfs', default=False, action="store_true") - parser.add_argument('--cvmfs-root', default="/cvmfs/idc.galaxyproject.org") + parser.add_argument("--merged-genomes-path", "-m", default="genomes.yml") + parser.add_argument("--split-genomes-path", "-s", default="data_manager_tasks") + parser.add_argument("--data-managers-path", default="data_managers.yml") + parser.add_argument("--complete-check-cvmfs", default=False, action="store_true") + parser.add_argument("--cvmfs-root", default="/cvmfs/idc.galaxyproject.org") parser.add_argument("--tool-id-mode", choices=["tool_shed_guid", "short"], default=DEFAULT_TOOL_ID_MODE) # filters - parser.add_argument('--filter-stage', default=None) - parser.add_argument('--filter-data-manager', default=None) - parser.add_argument('--filter-build-id', default=None) + parser.add_argument("--filter-stage", default=None) + parser.add_argument("--filter-data-manager", default=None) + parser.add_argument("--filter-build-id", default=None) return parser diff --git a/src/ephemeris/run_data_managers.py b/src/ephemeris/run_data_managers.py index 377249b..c109429 100644 --- a/src/ephemeris/run_data_managers.py +++ b/src/ephemeris/run_data_managers.py @@ -296,7 +296,10 @@ def run_jobs(jobs, skipped_jobs): all_skipped_jobs.append(skipped_job) for job in jobs: started_job = self.tool_client.run_tool( - history_id=history_id, tool_id=job["tool_id"], tool_inputs=job["inputs"], data_manager_mode=data_manager_mode + history_id=history_id, + tool_id=job["tool_id"], + tool_inputs=job["inputs"], + data_manager_mode=data_manager_mode, ) log.info( 'Dispatched job %i. Running DM: "%s" with parameters: %s' @@ -357,7 +360,9 @@ def _parser(): action="store_true", help="Do not stop running when jobs have failed.", ) - parser.add_argument("--data-manager-mode", "--data_manager_mode", choices=["bundle", "populate", "dry_run"], default="populate") + parser.add_argument( + "--data-manager-mode", "--data_manager_mode", choices=["bundle", "populate", "dry_run"], default="populate" + ) parser.add_argument("--history-name", default=None) return parser @@ -374,7 +379,13 @@ def main(argv=None): gi = get_galaxy_connection(args, file=args.config, log=log, login_required=True) config = load_yaml_file(args.config) data_managers = DataManagers(gi, config) - data_managers.run(log, args.ignore_errors, args.overwrite, data_manager_mode=args.data_manager_mode, history_name=args.history_name) + data_managers.run( + log, + args.ignore_errors, + args.overwrite, + data_manager_mode=args.data_manager_mode, + history_name=args.history_name, + ) if __name__ == "__main__": diff --git a/src/ephemeris/shed_tools.py b/src/ephemeris/shed_tools.py index e68f1b3..76f0019 100644 --- a/src/ephemeris/shed_tools.py +++ b/src/ephemeris/shed_tools.py @@ -33,6 +33,7 @@ Galaxy's configuration directory and set Galaxy configuration option `tool_config_file` to include it. """ + import datetime as dt import json import logging diff --git a/tests/test_split_genomes.py b/tests/test_split_genomes.py index 2ebb39f..005daa9 100644 --- a/tests/test_split_genomes.py +++ b/tests/test_split_genomes.py @@ -90,7 +90,10 @@ def test_split_genomes(tmp_path: Path): run = read_and_validate_run_data_manager_yaml(new_task_run_yaml) assert len(run.data_managers) == 1 data_manager = run.data_managers[0] - assert data_manager.id == "toolshed.g2.bx.psu.edu/repos/devteam/data_manager_twobit_builder/twobit_builder_data_manager/0.0.2" + assert ( + data_manager.id + == "toolshed.g2.bx.psu.edu/repos/devteam/data_manager_twobit_builder/twobit_builder_data_manager/0.0.2" + ) assert data_manager.items[0]["id"] == "hg19_rCRS_pUC18_phiX174" assert data_manager.items[0]["dbkey"] == "hg19_rCRS_pUC18_phiX174"