From 8f368de8fe65b61a550ed4284298c69ab56a97cb Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Tue, 13 Feb 2024 23:01:50 -0500 Subject: [PATCH] :art: Replace `print` with `logger.*` or `raise Exception` --- CPAC/utils/bids_utils.py | 141 +++++++++++++++++++++++++++++---------- 1 file changed, 104 insertions(+), 37 deletions(-) diff --git a/CPAC/utils/bids_utils.py b/CPAC/utils/bids_utils.py index 1337d63441..a911c187e5 100755 --- a/CPAC/utils/bids_utils.py +++ b/CPAC/utils/bids_utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2023 C-PAC Developers +# Copyright (C) 2016-2024 C-PAC Developers # This file is part of C-PAC. @@ -20,8 +20,13 @@ import sys from warnings import warn +from botocore.exceptions import BotoCoreError import yaml +from CPAC.utils.monitoring.custom_logging import getLogger + +logger = getLogger("nipype.workflow") + def bids_decode_fname(file_path, dbg=False, raise_error=True): f_dict = {} @@ -30,33 +35,39 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): # first lets make sure that we know how to handle the file if "nii" not in fname.lower() and "json" not in fname.lower(): - raise IOError( - "File (%s) does not appear to be" % fname + "a nifti or json file" - ) + msg = f"File ({fname}) does not appear to be a nifti or json file" + raise IOError(msg) if dbg: - pass + logger.debug("parsing %s", file_path) # first figure out if there is a site directory level, this isn't # specified in BIDS currently, but hopefully will be in the future file_path_vals = os.path.dirname(file_path).split("/") sub = [s for s in file_path_vals if "sub-" in s] if dbg: - pass + logger.debug("found subject %s in %s", sub, file_path_vals) if len(sub) > 1: - pass + logger.debug( + "Odd that there is more than one subject directory in (%s), does the" + " filename conform to BIDS format?", + file_path, + ) if sub: sub_ndx = file_path_vals.index(sub[0]) if sub_ndx > 0 and file_path_vals[sub_ndx - 1]: if dbg: - pass + logger.debug("setting site to %s", file_path_vals[sub_ndx - 1]) f_dict["site"] = file_path_vals[sub_ndx - 1] else: f_dict["site"] = "none" elif file_path_vals[-1]: if dbg: - pass + logger.debug( + "looking for subject id didn't pan out settling for last subdir %s", + file_path_vals[-1], + ) f_dict["site"] = file_path_vals[-1] else: f_dict["site"] = "none" @@ -83,7 +94,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): if raise_error: raise ValueError(msg) else: - pass + logger.error(msg) elif not f_dict["scantype"]: msg = ( f"Filename ({fname}) does not appear to contain" @@ -92,18 +103,17 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): if raise_error: raise ValueError(msg) else: - pass + logger.error(msg) else: if "bold" in f_dict["scantype"] and not f_dict["task"]: msg = ( - f"Filename ({fname}) is a BOLD file, but " - "doesn't contain a task, does it conform to the" - " BIDS format?" + f"Filename ({fname}) is a BOLD file, but doesn't contain a task, does" + " it conform to the BIDS format?" ) if raise_error: raise ValueError(msg) else: - pass + logger.error(msg) return f_dict @@ -275,14 +285,16 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): key = "-".join([level, "none"]) if dbg: - pass + logger.debug(key) # if the key doesn't exist in the config dictionary, check to see if # the generic key exists and return that if key in t_dict: t_dict = t_dict[key] else: if dbg: - pass + logger.debug( + "Couldn't find %s, so going with %s", key, "-".join([level, "none"]) + ) key = "-".join([level, "none"]) if key in t_dict: t_dict = t_dict[key] @@ -293,7 +305,7 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): # sidecar files if dbg: - pass + logger.debug(t_dict) for key in t_dict.keys(): if "RepetitionTime" in key: @@ -334,7 +346,7 @@ def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): t_dict = t_dict[key] if dbg: - pass + logger.debug(bids_config_dict) # get the paths to the json yaml files in config_dict, the paths contain # the information needed to map the parameters from the jsons (the vals @@ -345,11 +357,11 @@ def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): config_paths = sorted(config_dict.keys(), key=lambda p: len(p.split("/"))) if dbg: - pass + logger.debug(config_paths) for cp in config_paths: if dbg: - pass + logger.debug("processing %s", cp) # decode the filepath into its various components as defined by BIDS f_dict = bids_decode_fname(cp, raise_error=raise_error) @@ -503,7 +515,7 @@ def gen_bids_outputs_sublist(base_path, paths_list, key_list, creds_path): if run_info not in subjdict[subj_info]["funcs"]: subjdict[subj_info]["funcs"][run_info] = {"run_info": run_info} if resource in subjdict[subj_info]["funcs"][run_info]: - pass + logger.warning("resource %s already exists in subjdict ??", resource) subjdict[subj_info]["funcs"][run_info][resource] = p else: subjdict[subj_info][resource] = p @@ -513,6 +525,7 @@ def gen_bids_outputs_sublist(base_path, paths_list, key_list, creds_path): missing = 0 for tkey in top_keys: if tkey not in subj_res: + logger.warning("%s not found for %s", tkey, subj_info) missing += 1 break @@ -520,9 +533,11 @@ def gen_bids_outputs_sublist(base_path, paths_list, key_list, creds_path): for func_key, func_res in subj_res["funcs"].items(): for bkey in bot_keys: if bkey not in func_res: + logger.warning("%s not found for %s", bkey, func_key) missing += 1 break if missing == 0: + logger.info("adding: %s, %s, %d", subj_info, func_key, len(sublist)) tdict = copy.deepcopy(subj_res) del tdict["funcs"] tdict.update(func_res) @@ -584,7 +599,14 @@ def bids_gen_cpac_sublist( to be processed """ if dbg: - pass + logger.debug( + "gen_bids_sublist called with:\n bids_dir: %s\n # paths: %s" + "\n config_dict: %s\n creds_path: %s", + bids_dir, + len(paths_list), + "missing" if not config_dict else "found", + creds_path, + ) # if configuration information is not desired, config_dict will be empty, # otherwise parse the information in the sidecar json files into a dict @@ -615,7 +637,9 @@ def bids_gen_cpac_sublist( if config_dict: t_params = bids_retrieve_params(bids_config_dict, f_dict) if not t_params: - pass + logger.warning( + "Did not receive any parameters for %s, is this a problem?", p + ) task_info = { "scan": os.path.join(bids_dir, p), @@ -652,7 +676,15 @@ def bids_gen_cpac_sublist( "lesion_mask" ] = task_info["scan"] else: - pass + logger.warning( + "Lesion mask file (%s) already found for (%s:%s)" + " discarding %s", + subdict[f_dict["sub"]][f_dict["ses"]]["lesion_mask"], + f_dict["sub"], + f_dict["ses"], + p, + ) + # TODO deal with scan parameters anatomical if "anat" not in subdict[f_dict["sub"]][f_dict["ses"]]: subdict[f_dict["sub"]][f_dict["ses"]]["anat"] = {} @@ -689,7 +721,14 @@ def bids_gen_cpac_sublist( subdict[f_dict["sub"]][f_dict["ses"]]["func"][task_key] = task_info else: - pass + logger.warning( + "Func file (%s) already found for (%s: %s: %s) discarding %s", + subdict[f_dict["sub"]][f_dict["ses"]]["func"][task_key], + f_dict["sub"], + f_dict["ses"], + task_key, + p, + ) if "phase" in f_dict["scantype"]: if "fmap" not in subdict[f_dict["sub"]][f_dict["ses"]]: @@ -734,9 +773,19 @@ def bids_gen_cpac_sublist( sublist.append(ses) else: if "anat" not in ses: - pass + logger.warning( + "%s %s %s is missing an anat", + ses["site_id"] if "none" not in ses["site_id"] else "", + ses["subject_id"], + ses["unique_id"], + ) if "func" not in ses: - pass + logger.warning( + "%s %s %s is missing a func", + ses["site_id"] if "none" not in ses["site_id"] else "", + ses["subject_id"], + ses["unique_id"], + ) return sublist @@ -777,6 +826,8 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""): bucket = fetch_creds.return_bucket(aws_input_creds, bucket_name) + logger.info("gathering files from S3 bucket (%s) for %s", bucket, prefix) + for s3_obj in bucket.objects.filter(Prefix=prefix): for suf in suffixes: if suf in str(s3_obj.key): @@ -787,8 +838,12 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""): config_dict[ s3_obj.key.replace(prefix, "").lstrip("/") ] = json.loads(s3_obj.get()["Body"].read()) - except Exception: - raise + except Exception as e: + msg = ( + f"Error retrieving {s3_obj.key.replace(prefix, '')}" + f" ({e.message})" + ) + raise BotoCoreError(msg) from e elif "nii" in str(s3_obj.key): file_paths.append( str(s3_obj.key).replace(prefix, "").lstrip("/") @@ -816,14 +871,13 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""): ) except UnicodeDecodeError: msg = f"Could not decode {os.path.join(root, f)}" - raise Exception(msg) + raise UnicodeDecodeError(msg) if not file_paths and not config_dict: msg = ( - f"Didn't find any files in {bids_dir}. Please verify that the " - "path is typed correctly, that you have read access to " - "the directory, and that it is not " - "empty." + f"Didn't find any files in {bids_dir}. Please verify that the path is" + " typed correctly, that you have read access to the directory, and that it" + " is not empty." ) raise IOError(msg) @@ -937,7 +991,8 @@ def load_yaml_config(config_filename, aws_input_creds): config_content = b64decode(encoded) return yaml.safe_load(config_content) except: - raise + msg = f"Error! Could not find load config from data URI {config_filename}" + raise BotoCoreError(msg) if config_filename.lower().startswith("s3://"): # s3 paths begin with s3://bucket/ @@ -961,7 +1016,8 @@ def load_yaml_config(config_filename, aws_input_creds): try: return yaml.safe_load(open(config_filename, "r")) except IOError: - raise + msg = f"Error! Could not find config file {config_filename}" + raise FileNotFoundError(msg) def cl_strip_brackets(arg_list): @@ -1019,6 +1075,8 @@ def create_cpac_data_config( ------- list """ + logger.info("Parsing %s..", bids_dir) + (file_paths, config) = collect_bids_files_configs(bids_dir, aws_input_creds) if participant_labels and file_paths: @@ -1032,6 +1090,7 @@ def create_cpac_data_config( ] if not file_paths: + logger.error("Did not find data for %s", ", ".join(participant_labels)) sys.exit(1) raise_error = not skip_bids_validator @@ -1046,6 +1105,7 @@ def create_cpac_data_config( ) if not sub_list: + logger.error("Did not find data in %s", bids_dir) sys.exit(1) return sub_list @@ -1083,6 +1143,13 @@ def load_cpac_data_config(data_config_file, participant_labels, aws_input_creds) ] if not sub_list: + logger.error( + "Did not find data for %s in %s", + ", ".join(participant_labels), + data_config_file + if not data_config_file.startswith("data:") + else "data URI", + ) sys.exit(1) return sub_list