diff --git a/upimapi.py b/upimapi.py index b6409bc..289c4cf 100644 --- a/upimapi.py +++ b/upimapi.py @@ -28,7 +28,7 @@ from functools import partial import re -__version__ = '1.13.0' +__version__ = '1.13.1' def load_api_info(): @@ -124,7 +124,7 @@ def get_arguments(): help="If true, UPIMAPI will not check if IDs are valid before mapping [false]") parser.add_argument( "--skip-db-check", action="store_true", default=False, - help="So UPIMAPI doesn't check for (FASTA) database existence [false]") + help="DEPRECATED: This parameter does nothing now, and will be removed in future versions.") parser.add_argument( "--mirror", choices=['expasy', 'uniprot', 'ebi'], default='expasy', help="From where to download UniProt database [expasy]") @@ -800,7 +800,7 @@ def download_uniprot(output_folder, mirror='expasy'): os.remove(file) -def build_reference_database(database, output_folder, taxids=None, max_tries=3, mirror='expasy'): +def download_fasta_database(database, output_folder, taxids=None, max_tries=3, mirror='expasy'): if database == 'uniprot': download_uniprot(output_folder, mirror=mirror) elif database == 'swissprot': @@ -814,12 +814,10 @@ def build_reference_database(database, output_folder, taxids=None, max_tries=3, f.write(get_proteome_for_taxid(taxid, max_tries=max_tries)) -def must_build_database(database, resources_folder): - db2suffix = {'uniprot': 'uniprot.fasta', 'swissprot': 'uniprot_sprot.fasta', 'taxids': 'taxids_database.fasta'} +def must_download_database(database, db2suffix): if database in db2suffix.keys(): - if os.path.isfile(f'{resources_folder}/{db2suffix[database]}'): - return str2bool(input(f'{resources_folder}/{db2suffix[database]} exists. Overwrite? [Y/N] ')) - return True + return not os.path.isfile(db2suffix[database]) + return False # custom database def get_tabular_taxonomy(output): @@ -1342,24 +1340,27 @@ def upimapi(): # Annotation with DIAMOND if not args.no_annotation: - db2file = {'uniprot': f'{args.resources_directory}/uniprot.fasta', - 'swissprot': f'{args.resources_directory}/uniprot_sprot.fasta', - 'taxids': f'{args.resources_directory}/taxids_database.fasta'} + db2file = {db: f'{args.resources_directory}/{filename}' for db, filename in [ + ('uniprot', 'uniprot.fasta'), ('swissprot', 'uniprot_sprot.fasta'), ('taxids', 'taxids_database.fasta')]} if args.database in db2file.keys(): database = db2file[args.database] else: - database = args.database - - if not args.skip_db_check: - if must_build_database(args.database, args.resources_directory): - build_reference_database( - args.database, args.resources_directory, taxids=args.taxids, max_tries=args.max_tries, - mirror=args.mirror) - if not database.endswith(".dmnd"): + database = args.database # custom database, must have UniProt IDs as sequence names + + if not database.endswith(".dmnd"): # database is FASTA or one of "uniprot", "swissprot" or "taxids", or not valid diamond_formatted = f"{'.'.join(database.split('.')[:-1])}.dmnd" - if not os.path.isfile(diamond_formatted): + if not os.path.isfile(diamond_formatted): # DMND version of database not available + if must_download_database(args.database, db2file): # check if expected FASTA file exists + print(f'{args.database} database not found at {args.resources_directory}. ' + f'Downloading in FASTA version.') + download_fasta_database( + args.database, args.resources_directory, taxids=args.taxids, max_tries=args.max_tries, + mirror=args.mirror) + print(f'DMND formatted version of database not found at: {diamond_formatted}') make_diamond_database(database, diamond_formatted) database = diamond_formatted + if not os.path.isfile(database): + sys.exit(f'Database [{database}] not found! Exiting...') # only happens for DMND or custom databases (b, c) = block_size_and_index_chunks( argsb=args.block_size, argsc=args.index_chunks, memory=args.max_memory) run_diamond(