diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/GATKSparkTool.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/GATKSparkTool.java index a625d324f9f..1d237da4a6b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/GATKSparkTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/GATKSparkTool.java @@ -3,6 +3,7 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; +import htsjdk.samtools.util.GZIIndex; import htsjdk.samtools.util.IOUtil; import htsjdk.variant.vcf.VCFHeaderLine; import org.apache.spark.api.java.JavaRDD; @@ -588,6 +589,7 @@ protected static String addReferenceFilesForSpark(JavaSparkContext ctx, String r Path referencePath = IOUtils.getPath(referenceFile); Path indexPath = ReferenceSequenceFileFactory.getFastaIndexFileName(referencePath); Path dictPath = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(referencePath); + Path gziPath = GZIIndex.resolveIndexNameForBgzipFile(referencePath); ctx.addFile(referenceFile); if (Files.exists(indexPath)) { @@ -596,6 +598,9 @@ protected static String addReferenceFilesForSpark(JavaSparkContext ctx, String r if (Files.exists(dictPath)) { ctx.addFile(dictPath.toUri().toString()); } + if (Files.exists(gziPath)) { + ctx.addFile(gziPath.toUri().toString()); + } return referencePath.getFileName().toString(); } diff --git a/src/test/java/org/broadinstitute/hellbender/GATKBaseTest.java b/src/test/java/org/broadinstitute/hellbender/GATKBaseTest.java index 341bf4a346d..1f8a74d794d 100644 --- a/src/test/java/org/broadinstitute/hellbender/GATKBaseTest.java +++ b/src/test/java/org/broadinstitute/hellbender/GATKBaseTest.java @@ -49,6 +49,8 @@ public abstract class GATKBaseTest extends BaseTest { // All of chromosomes 20 and 21 from the b37 reference public static final String b37_reference_20_21 = largeFileTestDir + "human_g1k_v37.20.21.fasta"; + public static final String b37_reference_20_21_gz = largeFileTestDir + "human_g1k_v37.20.21.fasta.gz"; + public static final String b37_2bit_reference_20_21 = largeFileTestDir + "human_g1k_v37.20.21.2bit"; public static final String b37_reference_20_21_img = largeFileTestDir + "human_g1k_v37.20.21.fasta.img"; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/BaseRecalibratorSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/BaseRecalibratorSparkIntegrationTest.java index 88963255fb7..1c2ce65ea51 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/BaseRecalibratorSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/BaseRecalibratorSparkIntegrationTest.java @@ -63,6 +63,7 @@ public Object[][] createBQSRTestData() { final String localResources = getResourceDir(); final String GRCh37Ref_chr2021 = b37_reference_20_21; + final String GRCh37Ref_chr2021_gz = b37_reference_20_21_gz; final String hiSeqBam_chr20 = localResources + WGS_B37_CH20_1M_1M1K_BAM; final String hiSeqBam_1read = localResources + "overlappingRead.bam"; final String dbSNPb37_chr20 = localResources + DBSNP_138_B37_CH20_1M_1M1K_VCF; @@ -87,6 +88,7 @@ public Object[][] createBQSRTestData() { // local input/computation/reference {new BQSRTest(GRCh37Ref_chr2021, hiSeqBam_1read, dbSNPb37_chr2021, "-indels --enable-baq", getResourceDir() + BQSRTestData.EXPECTED_WGS_B37_CH20_1READ_RECAL)}, {new BQSRTest(GRCh37Ref_chr2021, hiSeqBam_chr20, dbSNPb37_chr20, "-indels --enable-baq", getResourceDir() + BQSRTestData.EXPECTED_WGS_B37_CH20_1M_1M1K_RECAL)}, + {new BQSRTest(GRCh37Ref_chr2021_gz, hiSeqBam_chr20, dbSNPb37_chr20, "-indels --enable-baq", getResourceDir() + BQSRTestData.EXPECTED_WGS_B37_CH20_1M_1M1K_RECAL)}, {new BQSRTest(GRCh37Ref_chr2021, hiSeqBam_chr20, dbSNPb37_chr20, "", getResourceDir() + BQSRTestData.EXPECTED_WGS_B37_CH20_1M_1M1K_NOINDEL_NOBAQ_RECAL)}, {new BQSRTest(GRCh37Ref_chr2021, hiSeqBam_chr20, dbSNPb37_chr20, "-indels --enable-baq --indels-context-size 4", getResourceDir() + BQSRTestData.EXPECTED_WGS_B37_CH20_1M_1M1K_INDELS_CONTEXT_SIZE_4_RECAL)}, {new BQSRTest(GRCh37Ref_chr2021, hiSeqBam_chr20, dbSNPb37_chr20, "-indels --enable-baq --low-quality-tail 5", getResourceDir() + BQSRTestData.EXPECTED_WGS_B37_CH20_1M_1M1K_LOW_QUALITY_TAIL_5_RECAL)}, diff --git a/src/test/resources/large/human_g1k_v37.20.21.fasta.gz b/src/test/resources/large/human_g1k_v37.20.21.fasta.gz new file mode 100644 index 00000000000..86202a92a3f --- /dev/null +++ b/src/test/resources/large/human_g1k_v37.20.21.fasta.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd30f8fab9d4081e0fe150b310cb5f74d0d05b5aa0230e328dc51ddf0b0caf2f +size 29207154 diff --git a/src/test/resources/large/human_g1k_v37.20.21.fasta.gz.fai b/src/test/resources/large/human_g1k_v37.20.21.fasta.gz.fai new file mode 100644 index 00000000000..ad31c98204d --- /dev/null +++ b/src/test/resources/large/human_g1k_v37.20.21.fasta.gz.fai @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5daa85c50bc06de53d26794dc02ed3bddcad083badae7a601e524982eeb58d +size 48 diff --git a/src/test/resources/large/human_g1k_v37.20.21.fasta.gz.gzi b/src/test/resources/large/human_g1k_v37.20.21.fasta.gz.gzi new file mode 100644 index 00000000000..3c679426b3c --- /dev/null +++ b/src/test/resources/large/human_g1k_v37.20.21.fasta.gz.gzi @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a18a249f2e11da8de4f25accd2afbda0168ed1cccd25edf0eacdb5b02176776 +size 27704