Skip to content

Commit

Permalink
Update makefile to read different isoform overrides file
Browse files Browse the repository at this point in the history
  • Loading branch information
leexgh committed Feb 23, 2023
1 parent 6c7a73b commit 21cbbbe
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ GENOME_BUILD=$(firstword $(subst _, ,$(VERSION)))

ifeq ($(GENOME_BUILD), grch38)
MSKCC_ISOFORM_OVERRIDES_FILE_NAME=isoform_overrides_at_mskcc_grch38.txt
GENOME_NEXUS_ISOFORM_OVERRIDES_FILE_NAME=isoform_overrides_genome_nexus_grch38.txt
else
MSKCC_ISOFORM_OVERRIDES_FILE_NAME=isoform_overrides_at_mskcc_grch37.txt
GENOME_NEXUS_ISOFORM_OVERRIDES_FILE_NAME=isoform_overrides_genome_nexus_grch37.txt
endif

# Generic rule to unzip prerequisite files
Expand Down Expand Up @@ -162,9 +164,9 @@ $(TMP_DIR)/ensembl_biomart_transcripts_mouse.json.gz: $(TMP_DIR)/ensembl_biomart
# about 50m to run (TODO: this can be easily optimized)
# isoform_overrides_genome_nexus.txt is made for genome nexus, others files are generated for vcf2maf
# Please note: we should keep hgnc_complete_set_20221001 in sync with https://github.com/cBioPortal/datahub-study-curation-tools/blob/master/gene-table-update/build-input-for-importer/hgnc_complete_set.txt
# isoform_overrides_oncokb.txt is a list of OncoKB transcripts and genes that differ from msk_override, original file could be downloaded here: https://docs.google.com/spreadsheets/d/1ZZt8x0vvhrwL6VLQRzx3YE7XUOvEM-noQl7v6Tg7lCQ/edit#gid=0
$(TMP_DIR)/ensembl_biomart_canonical_transcripts_per_hgnc.txt: $(TMP_DIR)/ensembl_canonical_data.txt common_input/hgnc_complete_set_20221001.txt common_input/isoform_overrides_uniprot.txt common_input/$(MSKCC_ISOFORM_OVERRIDES_FILE_NAME) common_input/isoform_overrides_genome_nexus.txt common_input/isoform_overrides_oncokb.txt common_input/ignored_genes.txt
python3 ../scripts/make_one_canonical_transcript_per_gene.py $^ $@
# isoform_overrides_oncokb_grch3*.txt is a list of OncoKB transcripts and genes, it's generated by download_oncokb_isoform_overrides.py
$(TMP_DIR)/ensembl_biomart_canonical_transcripts_per_hgnc.txt: $(TMP_DIR)/ensembl_canonical_data.txt common_input/hgnc_complete_set_20221001.txt common_input/isoform_overrides_uniprot.txt common_input/$(MSKCC_ISOFORM_OVERRIDES_FILE_NAME) common_input/$(GENOME_NEXUS_ISOFORM_OVERRIDES_FILE_NAME) common_input/$(ONCOKB_ISOFORM_OVERRIDES_FILE_NAME) common_input/ignored_genes.txt
python ../scripts/make_one_canonical_transcript_per_gene.py $^ $@

# mouse version. A different script is called that set the canonicals based on Ensembl lookup.
$(TMP_DIR)/ensembl_biomart_canonical_transcripts_per_mgi.txt: $(TMP_DIR)/ensembl_canonical_data.txt common_input/mouse/MRK_ENSEMBL.rpt common_input/mouse/MGI_Gene_Model_Coord.rpt
Expand Down

0 comments on commit 21cbbbe

Please sign in to comment.