Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify HaplotypeBAMWriter code. #944 #5122

Merged
merged 9 commits into from
Aug 30, 2018
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ public static Optional<HaplotypeBAMWriter> createBamWriter(final AssemblyBasedCa
final boolean createBamOutMD5,
final SAMFileHeader header) {
return args.bamOutputPath != null ?
Optional.of(HaplotypeBAMWriter.create(args.bamWriterType, IOUtils.getPath(args.bamOutputPath), createBamOutIndex, createBamOutMD5, header)) :
Optional.of(new HaplotypeBAMWriter(args.bamWriterType, IOUtils.getPath(args.bamOutputPath), createBamOutIndex, createBamOutMD5, header)) :
Optional.empty();
}

Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,46 @@
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMReadGroupRecord;
import java.nio.file.Path;
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter;


import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;

import java.util.ArrayList;
import java.util.List;

/**
* Utility class that allows easy creation of destinations for the HaplotypeBAMWriters
* Class used to direct output from a HaplotypeBAMWriter to a bam/sam file.
*
*/
public abstract class HaplotypeBAMDestination {

public class HaplotypeBAMDestination {
private final SAMFileGATKReadWriter samWriter;
private final SAMFileHeader bamOutputHeader;
private final String haplotypeReadGroupID;
private final static String haplotypeSampleTag = "HC";

/**
* Create a new HaplotypeBAMDestination
*
* @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination.
* @param haplotypeReadGroupID read group ID used when writing haplotypes as reads
* @param outPath path where output is written (doesn't have to be local)
* @param createBamOutIndex true to create an index file for the bamout
* @param createBamOutMD5 true to create an md5 file for the bamout
* @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination, must not be null
* @param haplotypeReadGroupID read group ID used when writing haplotypes as reads
*/
protected HaplotypeBAMDestination(SAMFileHeader sourceHeader, final String haplotypeReadGroupID) {
protected HaplotypeBAMDestination(
final Path outPath,
final boolean createBamOutIndex,
final boolean createBamOutMD5,
final SAMFileHeader sourceHeader,
final String haplotypeReadGroupID)
{
Utils.nonNull(outPath, "outputPath cannot be null");
Utils.nonNull(sourceHeader, "sourceHeader cannot be null");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're checking sourceHeader twice.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you're right. I will remove the second instance of this.

Utils.nonNull(sourceHeader, "sourceHeader cannot be null");
Utils.nonNull(haplotypeReadGroupID, "haplotypeReadGroupID cannot be null");
this.haplotypeReadGroupID = haplotypeReadGroupID;
Expand All @@ -45,14 +62,26 @@ protected HaplotypeBAMDestination(SAMFileHeader sourceHeader, final String haplo
bamOutputHeader.setReadGroups(readGroups);

bamOutputHeader.addProgramRecord(new SAMProgramRecord("HalpotypeBAMWriter"));

samWriter = new SAMFileGATKReadWriter(ReadUtils.createCommonSAMWriter(
outPath,
null,
getBAMOutputHeader(), // use the header derived from the source header by HaplotypeBAMDestination
false,
createBamOutIndex,
createBamOutMD5
));
}

/**
* Write a read to the output specified by this destination.
*
* @param read the read to write out
*/
public abstract void add(final GATKRead read);
public void add(final GATKRead read){
Utils.nonNull(read, "read cannot be null");
samWriter.addRead(read);
};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some extraneous semicolons at the ends of method bodies (here and below). These are not needed now that the methods are no longer abstract.


/**
* Get the read group ID that is used by this writer when writing halpotypes as reads.
Expand All @@ -69,10 +98,12 @@ protected HaplotypeBAMDestination(SAMFileHeader sourceHeader, final String haplo
public String getHaplotypeSampleTag() { return haplotypeSampleTag; }

/**
* Close the destination
*
* Close any resources associated with this destination.
*/
abstract void close();

void close(){
samWriter.close();
};

/**
* Get the SAMFileHeader that is used for writing the output for this destination.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;

import java.io.File;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.function.Function;
import java.util.Set;

/**
* A BAMWriter that aligns reads to haplotypes and emits their best alignments to a destination.
*
*/
public abstract class HaplotypeBAMWriter implements AutoCloseable {
public class HaplotypeBAMWriter implements AutoCloseable {
/**
* Allows us to write out unique names for our synthetic haplotype reads
*/
Expand All @@ -33,6 +33,7 @@ public abstract class HaplotypeBAMWriter implements AutoCloseable {
private static final int otherMQ = 0;

protected final HaplotypeBAMDestination output;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

output can be made private now

private WriterType writerType;
private boolean writeHaplotypes = true;

/**
Expand All @@ -43,26 +44,14 @@ public enum WriterType {
* A mode that's for method developers. Writes out all of the possible
* haplotypes considered, as well as reads aligned to each
*/
ALL_POSSIBLE_HAPLOTYPES(AllHaplotypeBAMWriter::new),
ALL_POSSIBLE_HAPLOTYPES,

/**
* A mode for users. Writes out the reads aligned only to the called
* haplotypes. Useful to understand why the caller is calling what it is
*/
CALLED_HAPLOTYPES(CalledHaplotypeBAMWriter::new);
CALLED_HAPLOTYPES;

final private Function<HaplotypeBAMDestination, HaplotypeBAMWriter> factory;

WriterType(Function<HaplotypeBAMDestination, HaplotypeBAMWriter> factory) {
this.factory = factory;
}

/**
* Create an instance of the HaplotypeBAMWriter corresponding to this type.
*/
public HaplotypeBAMWriter create(HaplotypeBAMDestination destination) {
Utils.nonNull(destination, "destination cannot be null");
return factory.apply(destination); }
}

/**
Expand All @@ -75,17 +64,14 @@ public HaplotypeBAMWriter create(HaplotypeBAMDestination destination) {
* @param sourceHeader the header of the input BAMs used to make calls, must not be null.
* @return a new HaplotypeBAMWriter
*/
public static HaplotypeBAMWriter create(
public HaplotypeBAMWriter(
final WriterType type,
final Path outputPath,
final boolean createBamOutIndex,
final boolean createBamOutMD5,
final SAMFileHeader sourceHeader) {
Utils.nonNull(type, "type cannot be null");
Utils.nonNull(outputPath, "outputPath cannot be null");
Utils.nonNull(sourceHeader, "sourceHeader cannot be null");

return create(type, new SAMFileDestination(outputPath, createBamOutIndex, createBamOutMD5, sourceHeader, DEFAULT_HAPLOTYPE_READ_GROUP_ID));
this(type, new HaplotypeBAMDestination(outputPath, createBamOutIndex, createBamOutMD5, sourceHeader, DEFAULT_HAPLOTYPE_READ_GROUP_ID));
}

/**
Expand All @@ -97,22 +83,14 @@ public static HaplotypeBAMWriter create(
*
* @return a new HaplotypeBAMWriter
*/
public static HaplotypeBAMWriter create(final WriterType type, final HaplotypeBAMDestination destination) {
public HaplotypeBAMWriter(
final WriterType type,
final HaplotypeBAMDestination destination) {

Utils.nonNull(type, "type cannot be null");
Utils.nonNull(destination, "destination cannot be null");

return type.create(destination);
}

/**
* Create a new HaplotypeBAMWriter writing its output to the given destination
*
* @param output the output destination, must not be null. Note that SAM writer associated with the destination must
* have its presorted bit set to false, as reads may come in out of order during writing.
*/
protected HaplotypeBAMWriter(final HaplotypeBAMDestination output) {
Utils.nonNull(output, "output cannot be null");
this.output = output;
this.writerType = type;
this.output = destination;
}

/**
Expand All @@ -125,18 +103,41 @@ public void close() {

/**
* Write out a BAM representing for the haplotype caller at this site
* writerType (ALL_POSSIBLE_HAPLOTYPES or CALLED_HAPLOTYPES) determines inputs to writeHaplotypesAsReads
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets fix up the javadoc here (the first sentence was incomplete before):

Write out a BAM representing the haplotypes at this site, based on the value for writerType used when the writer was constructed (ALL_POSSIBLE_HAPLOTYPES or CALLED_HAPLOTYPES).

*
* @param haplotypes a list of all possible haplotypes at this loc
* @param paddedReferenceLoc the span of the based reference here
* @param bestHaplotypes a list of the best (a subset of all) haplotypes that actually went forward into genotyping
* @param calledHaplotypes a list of the haplotypes that where actually called as non-reference
* @param readLikelihoods a map from sample -> likelihoods for each read for each of the best haplotypes
*/
public abstract void writeReadsAlignedToHaplotypes(final Collection<Haplotype> haplotypes,
final Locatable paddedReferenceLoc,
final Collection<Haplotype> bestHaplotypes,
final Set<Haplotype> calledHaplotypes,
final ReadLikelihoods<Haplotype> readLikelihoods);
public void writeReadsAlignedToHaplotypes(final Collection<Haplotype> haplotypes,
final Locatable paddedReferenceLoc,
final Collection<Haplotype> bestHaplotypes,
final Set<Haplotype> calledHaplotypes,
final ReadLikelihoods<Haplotype> readLikelihoods) {

Utils.nonNull(haplotypes, "haplotypes cannot be null");
Utils.nonNull(paddedReferenceLoc, "paddedReferenceLoc cannot be null");
Utils.nonNull(calledHaplotypes, "calledHaplotypes cannot be null");
Utils.nonNull(readLikelihoods, "readLikelihoods cannot be null");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should add a null check for bestHaplotypes as well. Also, since we're now requiring calledHaplotypes to always be non-null (I think it used to depend on the writer type), lets update the javadoc for all of these args to say they must be non-null.


if (calledHaplotypes.isEmpty() && writerType.equals(WriterType.CALLED_HAPLOTYPES)) { // only write out called haplotypes
return;
}

Collection<Haplotype> haplotypesToWrite = writerType.equals(WriterType.CALLED_HAPLOTYPES) ? calledHaplotypes : haplotypes;
Set<Haplotype> bestHaplotypesToWrite = writerType.equals(WriterType.CALLED_HAPLOTYPES) ? calledHaplotypes : new LinkedHashSet<>(bestHaplotypes);

writeHaplotypesAsReads(haplotypesToWrite, bestHaplotypesToWrite, paddedReferenceLoc);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be much clearer to just have an if test after the Utils checks, with one branch for ALL and one for called CALLED.


final int sampleCount = readLikelihoods.numberOfSamples();
for (int i = 0; i < sampleCount; i++) {
for (final GATKRead read : readLikelihoods.sampleReads(i)) {
writeReadAgainstHaplotype(read);
}
}
}

/**
* Write out read aligned to haplotype to the BAM file
Expand Down
Loading