-
Notifications
You must be signed in to change notification settings - Fork 589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify HaplotypeBAMWriter code. #944 #5122
Changes from 3 commits
464a704
51315b7
5cbbce3
3348855
3bc40a3
1ba7de7
b5fa08f
222ecc0
893d557
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,29 +3,46 @@ | |
import htsjdk.samtools.SAMFileHeader; | ||
import htsjdk.samtools.SAMProgramRecord; | ||
import htsjdk.samtools.SAMReadGroupRecord; | ||
import java.nio.file.Path; | ||
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter; | ||
|
||
|
||
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.read.GATKRead; | ||
import org.broadinstitute.hellbender.utils.read.ReadUtils; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
/** | ||
* Utility class that allows easy creation of destinations for the HaplotypeBAMWriters | ||
* Class used to direct output from a HaplotypeBAMWriter to a bam/sam file. | ||
* | ||
*/ | ||
public abstract class HaplotypeBAMDestination { | ||
|
||
public class HaplotypeBAMDestination { | ||
private final SAMFileGATKReadWriter samWriter; | ||
private final SAMFileHeader bamOutputHeader; | ||
private final String haplotypeReadGroupID; | ||
private final static String haplotypeSampleTag = "HC"; | ||
|
||
/** | ||
* Create a new HaplotypeBAMDestination | ||
* | ||
* @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination. | ||
* @param haplotypeReadGroupID read group ID used when writing haplotypes as reads | ||
* @param outPath path where output is written (doesn't have to be local) | ||
* @param createBamOutIndex true to create an index file for the bamout | ||
* @param createBamOutMD5 true to create an md5 file for the bamout | ||
* @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination, must not be null | ||
* @param haplotypeReadGroupID read group ID used when writing haplotypes as reads | ||
*/ | ||
protected HaplotypeBAMDestination(SAMFileHeader sourceHeader, final String haplotypeReadGroupID) { | ||
protected HaplotypeBAMDestination( | ||
final Path outPath, | ||
final boolean createBamOutIndex, | ||
final boolean createBamOutMD5, | ||
final SAMFileHeader sourceHeader, | ||
final String haplotypeReadGroupID) | ||
{ | ||
Utils.nonNull(outPath, "outputPath cannot be null"); | ||
Utils.nonNull(sourceHeader, "sourceHeader cannot be null"); | ||
Utils.nonNull(sourceHeader, "sourceHeader cannot be null"); | ||
Utils.nonNull(haplotypeReadGroupID, "haplotypeReadGroupID cannot be null"); | ||
this.haplotypeReadGroupID = haplotypeReadGroupID; | ||
|
@@ -45,14 +62,26 @@ protected HaplotypeBAMDestination(SAMFileHeader sourceHeader, final String haplo | |
bamOutputHeader.setReadGroups(readGroups); | ||
|
||
bamOutputHeader.addProgramRecord(new SAMProgramRecord("HalpotypeBAMWriter")); | ||
|
||
samWriter = new SAMFileGATKReadWriter(ReadUtils.createCommonSAMWriter( | ||
outPath, | ||
null, | ||
getBAMOutputHeader(), // use the header derived from the source header by HaplotypeBAMDestination | ||
false, | ||
createBamOutIndex, | ||
createBamOutMD5 | ||
)); | ||
} | ||
|
||
/** | ||
* Write a read to the output specified by this destination. | ||
* | ||
* @param read the read to write out | ||
*/ | ||
public abstract void add(final GATKRead read); | ||
public void add(final GATKRead read){ | ||
Utils.nonNull(read, "read cannot be null"); | ||
samWriter.addRead(read); | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are some extraneous semicolons at the ends of method bodies (here and below). These are not needed now that the methods are no longer abstract. |
||
|
||
/** | ||
* Get the read group ID that is used by this writer when writing halpotypes as reads. | ||
|
@@ -69,10 +98,12 @@ protected HaplotypeBAMDestination(SAMFileHeader sourceHeader, final String haplo | |
public String getHaplotypeSampleTag() { return haplotypeSampleTag; } | ||
|
||
/** | ||
* Close the destination | ||
* | ||
* Close any resources associated with this destination. | ||
*/ | ||
abstract void close(); | ||
|
||
void close(){ | ||
samWriter.close(); | ||
}; | ||
|
||
/** | ||
* Get the SAMFileHeader that is used for writing the output for this destination. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,16 +11,16 @@ | |
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods; | ||
|
||
import java.io.File; | ||
import java.util.Collection; | ||
import java.util.LinkedHashSet; | ||
import java.util.function.Function; | ||
import java.util.Set; | ||
|
||
/** | ||
* A BAMWriter that aligns reads to haplotypes and emits their best alignments to a destination. | ||
* | ||
*/ | ||
public abstract class HaplotypeBAMWriter implements AutoCloseable { | ||
public class HaplotypeBAMWriter implements AutoCloseable { | ||
/** | ||
* Allows us to write out unique names for our synthetic haplotype reads | ||
*/ | ||
|
@@ -33,6 +33,7 @@ public abstract class HaplotypeBAMWriter implements AutoCloseable { | |
private static final int otherMQ = 0; | ||
|
||
protected final HaplotypeBAMDestination output; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. output can be made private now |
||
private WriterType writerType; | ||
private boolean writeHaplotypes = true; | ||
|
||
/** | ||
|
@@ -43,26 +44,14 @@ public enum WriterType { | |
* A mode that's for method developers. Writes out all of the possible | ||
* haplotypes considered, as well as reads aligned to each | ||
*/ | ||
ALL_POSSIBLE_HAPLOTYPES(AllHaplotypeBAMWriter::new), | ||
ALL_POSSIBLE_HAPLOTYPES, | ||
|
||
/** | ||
* A mode for users. Writes out the reads aligned only to the called | ||
* haplotypes. Useful to understand why the caller is calling what it is | ||
*/ | ||
CALLED_HAPLOTYPES(CalledHaplotypeBAMWriter::new); | ||
CALLED_HAPLOTYPES; | ||
|
||
final private Function<HaplotypeBAMDestination, HaplotypeBAMWriter> factory; | ||
|
||
WriterType(Function<HaplotypeBAMDestination, HaplotypeBAMWriter> factory) { | ||
this.factory = factory; | ||
} | ||
|
||
/** | ||
* Create an instance of the HaplotypeBAMWriter corresponding to this type. | ||
*/ | ||
public HaplotypeBAMWriter create(HaplotypeBAMDestination destination) { | ||
Utils.nonNull(destination, "destination cannot be null"); | ||
return factory.apply(destination); } | ||
} | ||
|
||
/** | ||
|
@@ -75,17 +64,14 @@ public HaplotypeBAMWriter create(HaplotypeBAMDestination destination) { | |
* @param sourceHeader the header of the input BAMs used to make calls, must not be null. | ||
* @return a new HaplotypeBAMWriter | ||
*/ | ||
public static HaplotypeBAMWriter create( | ||
public HaplotypeBAMWriter( | ||
final WriterType type, | ||
final Path outputPath, | ||
final boolean createBamOutIndex, | ||
final boolean createBamOutMD5, | ||
final SAMFileHeader sourceHeader) { | ||
Utils.nonNull(type, "type cannot be null"); | ||
Utils.nonNull(outputPath, "outputPath cannot be null"); | ||
Utils.nonNull(sourceHeader, "sourceHeader cannot be null"); | ||
|
||
return create(type, new SAMFileDestination(outputPath, createBamOutIndex, createBamOutMD5, sourceHeader, DEFAULT_HAPLOTYPE_READ_GROUP_ID)); | ||
this(type, new HaplotypeBAMDestination(outputPath, createBamOutIndex, createBamOutMD5, sourceHeader, DEFAULT_HAPLOTYPE_READ_GROUP_ID)); | ||
} | ||
|
||
/** | ||
|
@@ -97,22 +83,14 @@ public static HaplotypeBAMWriter create( | |
* | ||
* @return a new HaplotypeBAMWriter | ||
*/ | ||
public static HaplotypeBAMWriter create(final WriterType type, final HaplotypeBAMDestination destination) { | ||
public HaplotypeBAMWriter( | ||
final WriterType type, | ||
final HaplotypeBAMDestination destination) { | ||
|
||
Utils.nonNull(type, "type cannot be null"); | ||
Utils.nonNull(destination, "destination cannot be null"); | ||
|
||
return type.create(destination); | ||
} | ||
|
||
/** | ||
* Create a new HaplotypeBAMWriter writing its output to the given destination | ||
* | ||
* @param output the output destination, must not be null. Note that SAM writer associated with the destination must | ||
* have its presorted bit set to false, as reads may come in out of order during writing. | ||
*/ | ||
protected HaplotypeBAMWriter(final HaplotypeBAMDestination output) { | ||
Utils.nonNull(output, "output cannot be null"); | ||
this.output = output; | ||
this.writerType = type; | ||
this.output = destination; | ||
} | ||
|
||
/** | ||
|
@@ -125,18 +103,41 @@ public void close() { | |
|
||
/** | ||
* Write out a BAM representing for the haplotype caller at this site | ||
* writerType (ALL_POSSIBLE_HAPLOTYPES or CALLED_HAPLOTYPES) determines inputs to writeHaplotypesAsReads | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets fix up the javadoc here (the first sentence was incomplete before):
|
||
* | ||
* @param haplotypes a list of all possible haplotypes at this loc | ||
* @param paddedReferenceLoc the span of the based reference here | ||
* @param bestHaplotypes a list of the best (a subset of all) haplotypes that actually went forward into genotyping | ||
* @param calledHaplotypes a list of the haplotypes that where actually called as non-reference | ||
* @param readLikelihoods a map from sample -> likelihoods for each read for each of the best haplotypes | ||
*/ | ||
public abstract void writeReadsAlignedToHaplotypes(final Collection<Haplotype> haplotypes, | ||
final Locatable paddedReferenceLoc, | ||
final Collection<Haplotype> bestHaplotypes, | ||
final Set<Haplotype> calledHaplotypes, | ||
final ReadLikelihoods<Haplotype> readLikelihoods); | ||
public void writeReadsAlignedToHaplotypes(final Collection<Haplotype> haplotypes, | ||
final Locatable paddedReferenceLoc, | ||
final Collection<Haplotype> bestHaplotypes, | ||
final Set<Haplotype> calledHaplotypes, | ||
final ReadLikelihoods<Haplotype> readLikelihoods) { | ||
|
||
Utils.nonNull(haplotypes, "haplotypes cannot be null"); | ||
Utils.nonNull(paddedReferenceLoc, "paddedReferenceLoc cannot be null"); | ||
Utils.nonNull(calledHaplotypes, "calledHaplotypes cannot be null"); | ||
Utils.nonNull(readLikelihoods, "readLikelihoods cannot be null"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should add a null check for bestHaplotypes as well. Also, since we're now requiring calledHaplotypes to always be non-null (I think it used to depend on the writer type), lets update the javadoc for all of these args to say they must be non-null. |
||
|
||
if (calledHaplotypes.isEmpty() && writerType.equals(WriterType.CALLED_HAPLOTYPES)) { // only write out called haplotypes | ||
return; | ||
} | ||
|
||
Collection<Haplotype> haplotypesToWrite = writerType.equals(WriterType.CALLED_HAPLOTYPES) ? calledHaplotypes : haplotypes; | ||
Set<Haplotype> bestHaplotypesToWrite = writerType.equals(WriterType.CALLED_HAPLOTYPES) ? calledHaplotypes : new LinkedHashSet<>(bestHaplotypes); | ||
|
||
writeHaplotypesAsReads(haplotypesToWrite, bestHaplotypesToWrite, paddedReferenceLoc); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be much clearer to just have an if test after the Utils checks, with one branch for ALL and one for called CALLED. |
||
|
||
final int sampleCount = readLikelihoods.numberOfSamples(); | ||
for (int i = 0; i < sampleCount; i++) { | ||
for (final GATKRead read : readLikelihoods.sampleReads(i)) { | ||
writeReadAgainstHaplotype(read); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Write out read aligned to haplotype to the BAM file | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you're checking
sourceHeader
twice.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you're right. I will remove the second instance of this.