Skip to content

Commit

Permalink
working on fasta.gz support in gatk
Browse files Browse the repository at this point in the history
  • Loading branch information
lbergelson committed Aug 22, 2018
1 parent 401d217 commit c6bf892
Show file tree
Hide file tree
Showing 26 changed files with 318 additions and 281 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -410,14 +410,14 @@ public List<ActivityProfileState> getSupportingStates() {
/**
* See #getActiveRegionReference but using the span including regions not the extended span
*/
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
public byte[] getFullReference( final ReferenceSequenceFile referenceReader ) {
return getFullReference(referenceReader, 0);
}

/**
* See #getActiveRegionReference but using the span including regions not the extended span
*/
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
public byte[] getFullReference( final ReferenceSequenceFile referenceReader, final int padding ) {
return getReference(referenceReader, padding, spanIncludingReads);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.haplotype.HaplotypeBAMWriter;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.read.*;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadCoordinateComparator;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -147,12 +149,8 @@ public static SimpleInterval getPaddedReferenceLoc(final AssemblyRegion region,
}

public static CachingIndexedFastaSequenceFile createReferenceReader(final String reference) {
try {
// fasta reference reader to supplement the edges of the reference sequence
return new CachingIndexedFastaSequenceFile(IOUtils.getPath(reference));
} catch( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(IOUtils.getPath(reference), e);
}
return CachingIndexedFastaSequenceFile.checkAndCreate(IOUtils.getPath(reference));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
Expand All @@ -14,18 +13,14 @@
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;
import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.hellbender.utils.io.IOUtils;

import java.io.FileNotFoundException;
import java.util.*;
import java.util.ArrayList;
import java.util.List;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants;
import java.nio.file.Path;
import java.util.Collection;
import java.util.List;


/**
Expand Down Expand Up @@ -233,14 +228,8 @@ public void onTraversalStart() {
}

private static CachingIndexedFastaSequenceFile getReferenceReader(ReferenceInputArgumentCollection referenceArguments) {
final CachingIndexedFastaSequenceFile referenceReader;
final Path reference = IOUtils.getPath(referenceArguments.getReferenceFileName());
try {
referenceReader = new CachingIndexedFastaSequenceFile(reference);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(reference, e);
}
return referenceReader;
return CachingIndexedFastaSequenceFile.checkAndCreate(reference);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.TextCigarCodec;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.Tuple;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -60,7 +61,7 @@ public class OverhangFixingManager {
private final GATKReadWriter writer;

// fasta reference reader to check overhanging edges in the exome reference sequence
private final IndexedFastaSequenceFile referenceReader;
private final ReferenceSequenceFile referenceReader;

// the genome unclippedLoc parser
private final GenomeLocParser genomeLocParser;
Expand Down Expand Up @@ -92,7 +93,7 @@ public class OverhangFixingManager {
public OverhangFixingManager(final SAMFileHeader header,
final GATKReadWriter writer,
final GenomeLocParser genomeLocParser,
final IndexedFastaSequenceFile referenceReader,
final ReferenceSequenceFile referenceReader,
final int maxRecordsInMemory,
final int maxMismatchesInOverhangs,
final int maxBasesInOverhangs,
Expand Down Expand Up @@ -469,7 +470,7 @@ public Splice(final String contig, final int start, final int end) {
loc = genomeLocParser.createGenomeLoc(contig, start, end);
}

public void initialize(final IndexedFastaSequenceFile referenceReader) {
public void initialize(final ReferenceSequenceFile referenceReader) {
reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package org.broadinstitute.hellbender.tools.walkers.rnaseq;

import htsjdk.samtools.*;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.TextCigarCodec;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.TwoPassReadWalker;
Expand All @@ -21,10 +22,13 @@
import org.broadinstitute.hellbender.utils.SATagBuilder;
import org.broadinstitute.hellbender.utils.clipping.ReadClipper;
import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.hellbender.utils.read.*;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
import org.broadinstitute.hellbender.utils.read.CigarUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
Expand Down Expand Up @@ -128,7 +132,7 @@ public boolean requiresReference() {

private SAMFileGATKReadWriter outputWriter;
private OverhangFixingManager overhangManager;
private IndexedFastaSequenceFile referenceReader;
private ReferenceSequenceFile referenceReader;
SAMFileHeader header;

@Override
Expand Down Expand Up @@ -164,15 +168,10 @@ public ReadTransformer makePostReadFilterTransformer(){
@Override
public void onTraversalStart() {
header = getHeaderForSAMWriter();
try {
referenceReader = new CachingIndexedFastaSequenceFile(referenceArguments.getReferencePath());
GenomeLocParser genomeLocParser = new GenomeLocParser(getBestAvailableSequenceDictionary());
outputWriter = createSAMWriter(IOUtils.getPath(OUTPUT), false);
overhangManager = new OverhangFixingManager(header, outputWriter, genomeLocParser, referenceReader, MAX_RECORDS_IN_MEMORY, MAX_MISMATCHES_IN_OVERHANG, MAX_BASES_TO_CLIP, doNotFixOverhangs, processSecondaryAlignments);

} catch (FileNotFoundException ex) {
throw new UserException.CouldNotReadInputFile(referenceArguments.getReferencePath(), ex);
}
referenceReader = CachingIndexedFastaSequenceFile.checkAndCreate(referenceArguments.getReferencePath());
GenomeLocParser genomeLocParser = new GenomeLocParser(getBestAvailableSequenceDictionary());
outputWriter = createSAMWriter(IOUtils.getPath(OUTPUT), false);
overhangManager = new OverhangFixingManager(header, outputWriter, genomeLocParser, referenceReader, MAX_RECORDS_IN_MEMORY, MAX_MISMATCHES_IN_OVERHANG, MAX_BASES_TO_CLIP, doNotFixOverhangs, processSecondaryAlignments);
}

@Override
Expand Down
Loading

0 comments on commit c6bf892

Please sign in to comment.