Skip to content

Commit

Permalink
Simplified database download
Browse files Browse the repository at this point in the history
Now UPIMAPI checks if file exists, if so, it will move forward.
No need for --skip-db-check parameter
  • Loading branch information
iquasere committed Dec 29, 2023
1 parent c2a00b4 commit 2d30ba6
Showing 1 changed file with 21 additions and 20 deletions.
41 changes: 21 additions & 20 deletions upimapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from functools import partial
import re

__version__ = '1.13.0'
__version__ = '1.13.1'


def load_api_info():
Expand Down Expand Up @@ -124,7 +124,7 @@ def get_arguments():
help="If true, UPIMAPI will not check if IDs are valid before mapping [false]")
parser.add_argument(
"--skip-db-check", action="store_true", default=False,
help="So UPIMAPI doesn't check for (FASTA) database existence [false]")
help="DEPRECATED: This parameter does nothing now, and will be removed in future versions.")
parser.add_argument(
"--mirror", choices=['expasy', 'uniprot', 'ebi'], default='expasy',
help="From where to download UniProt database [expasy]")
Expand Down Expand Up @@ -800,7 +800,7 @@ def download_uniprot(output_folder, mirror='expasy'):
os.remove(file)


def build_reference_database(database, output_folder, taxids=None, max_tries=3, mirror='expasy'):
def download_fasta_database(database, output_folder, taxids=None, max_tries=3, mirror='expasy'):
if database == 'uniprot':
download_uniprot(output_folder, mirror=mirror)
elif database == 'swissprot':
Expand All @@ -814,12 +814,10 @@ def build_reference_database(database, output_folder, taxids=None, max_tries=3,
f.write(get_proteome_for_taxid(taxid, max_tries=max_tries))


def must_build_database(database, resources_folder):
db2suffix = {'uniprot': 'uniprot.fasta', 'swissprot': 'uniprot_sprot.fasta', 'taxids': 'taxids_database.fasta'}
def must_download_database(database, db2suffix):
if database in db2suffix.keys():
if os.path.isfile(f'{resources_folder}/{db2suffix[database]}'):
return str2bool(input(f'{resources_folder}/{db2suffix[database]} exists. Overwrite? [Y/N] '))
return True
return not os.path.isfile(db2suffix[database])
return False # custom database


def get_tabular_taxonomy(output):
Expand Down Expand Up @@ -1342,24 +1340,27 @@ def upimapi():

# Annotation with DIAMOND
if not args.no_annotation:
db2file = {'uniprot': f'{args.resources_directory}/uniprot.fasta',
'swissprot': f'{args.resources_directory}/uniprot_sprot.fasta',
'taxids': f'{args.resources_directory}/taxids_database.fasta'}
db2file = {db: f'{args.resources_directory}/{filename}' for db, filename in [
('uniprot', 'uniprot.fasta'), ('swissprot', 'uniprot_sprot.fasta'), ('taxids', 'taxids_database.fasta')]}
if args.database in db2file.keys():
database = db2file[args.database]
else:
database = args.database

if not args.skip_db_check:
if must_build_database(args.database, args.resources_directory):
build_reference_database(
args.database, args.resources_directory, taxids=args.taxids, max_tries=args.max_tries,
mirror=args.mirror)
if not database.endswith(".dmnd"):
database = args.database # custom database, must have UniProt IDs as sequence names

if not database.endswith(".dmnd"): # database is FASTA or one of "uniprot", "swissprot" or "taxids", or not valid
diamond_formatted = f"{'.'.join(database.split('.')[:-1])}.dmnd"
if not os.path.isfile(diamond_formatted):
if not os.path.isfile(diamond_formatted): # DMND version of database not available
if must_download_database(args.database, db2file): # check if expected FASTA file exists
print(f'{args.database} database not found at {args.resources_directory}. '
f'Downloading in FASTA version.')
download_fasta_database(
args.database, args.resources_directory, taxids=args.taxids, max_tries=args.max_tries,
mirror=args.mirror)
print(f'DMND formatted version of database not found at: {diamond_formatted}')
make_diamond_database(database, diamond_formatted)
database = diamond_formatted
if not os.path.isfile(database):
sys.exit(f'Database [{database}] not found! Exiting...') # only happens for DMND or custom databases
(b, c) = block_size_and_index_chunks(
argsb=args.block_size, argsc=args.index_chunks, memory=args.max_memory)
run_diamond(
Expand Down

0 comments on commit 2d30ba6

Please sign in to comment.