Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

the long awaited FastaAlternateReferenceMaker #5549

Merged
merged 3 commits into from
Jan 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@

import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.ReferenceSequence;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.iterators.ByteArrayIterator;

import java.util.Arrays;
import java.util.Iterator;

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package org.broadinstitute.hellbender.engine;

import org.broadinstitute.hellbender.engine.filters.CountingReadFilter;
import org.broadinstitute.hellbender.tools.examples.ExampleReferenceWalker;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.iterators.IntervalLocusIterator;

/**
* A reference walker is a tool which processes each base in a given reference. Each base can be processed individually
* or in a context of multiple bases in a window. Reads and Feature tracks can optionally be included as well. The
* reference bases to process can be subset to a given set of intervals.
*
* ReferenceWalker authors must implement the apply() method to process each position, and may optionally implement
* {@link #onTraversalStart()} and/or {@link #onTraversalSuccess()}.
*
* Tool authors may also implement {@link #getReferenceWindow(SimpleInterval)} to provide additional bases of context
* around each position.
*
* See the {@link ExampleReferenceWalker} walker for an example.
*/
public abstract class ReferenceWalker extends GATKTool {

@Override
public String getProgressMeterRecordLabel() { return "bases"; }

@Override
public final boolean requiresReference() { return true; }

/**
* Initialize data sources for traversal.
*
* Marked final so that tool authors don't override it. Tool authors should override onTraversalStart() instead.
*/
@Override
protected final void onStartup() {
super.onStartup();
}

/**
* Implementation of reference-locus-based traversal.
* Subclasses can override to provide own behavior but default implementation should be suitable for most uses.
*/
@Override
public void traverse() {
final CountingReadFilter readFilter = makeReadFilter();

for(final SimpleInterval locus : getIntervalIterator()){
final SimpleInterval referenceWindow = getReferenceWindow(locus);
final ReferenceContext referenceContext = new ReferenceContext(reference, locus, referenceWindow);
apply(referenceContext,
new ReadsContext(reads, referenceContext.getWindow(), readFilter), // Will create an empty ReadsContext if reads == null
new FeatureContext(features, referenceContext.getWindow())); // Will create an empty FeatureContext if features == null

progressMeter.update(referenceContext.getInterval());
};

}

/**
* Determine the window to use when creating the ReferenceContext in apply. This determines which reference bases are
* seen at each position, as well as which reads / features are considered to overlap the reference site.
*
* The default implementation returns the single reference locus passed that is passed in, but subclasses may override
* this method in order to change the window size.
*
* @param locus the current locus being processed in the traversal
* @return the window to use when creating the ReferenceContext that is passed into {@link #apply(ReferenceContext, ReadsContext, FeatureContext)}
*/
protected SimpleInterval getReferenceWindow(SimpleInterval locus){
return locus;
}

private Iterable<SimpleInterval> getIntervalIterator(){
return () -> new IntervalLocusIterator(getTraversalIntervals().iterator());
}

/**
* Process an individual reference locus (with optional contextual information). Must be implemented by tool authors.
* In general, tool authors should simply stream their output from apply(), and maintain as little internal state
* as possible.
*
* @param referenceContext Reference bases at the current locus. The window around the single locus is provided by {@link #getReferenceWindow(SimpleInterval)}
* Additional bases my be retrieved by invoking the by invoking {@link ReferenceContext#setWindow}
* on this object before calling {@link ReferenceContext#getBases}, however this will not effect
* the set of reads and features that are considered overlapping which are based on the preset
* window passed in.
* @param readsContext Reads spanning the current reference window. Will be an empty, but non-null, context object
* if there is no backing source of Read data (in which case all queries on it will return an
* empty List).
* @param featureContext Features spanning the current reference window. Will be an empty, but non-null, context object
* if there is no backing source of Feature data (in which case all queries on it will return an
* empty List).
*
*/
public abstract void apply(ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext );

/**
* Shutdown data sources.
*
* Marked final so that tool authors don't override it. Tool authors should override {@link #onTraversalSuccess()} instead.
*/
@Override
protected final void onShutdown() {
// Overridden only to make final so that concrete tool implementations don't override
super.onShutdown();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package org.broadinstitute.hellbender.tools.examples;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.utils.SimpleInterval;

import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

/**
* Counts the number of times each reference context is seen as well as how many times it's overlapped by reads and variants.
* Why you would want to do this is a mystery.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add: "A toy example to illustrate how to implement the ReferenceWalker interface"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

*
* This is a toy example to illustrate how to implement a ReferenceWalker.
*/
@CommandLineProgramProperties(
summary = "Example of how to implement a ReferenceWalker that uses reads and features as well as a custom window",
oneLineSummary = "Example of how to implement a ReferenceWalker",
programGroup = ExampleProgramGroup.class,
omitFromCommandLine = true)
public class ExampleReferenceWalker extends ReferenceWalker {

@Argument(fullName = StandardArgumentDefinitions.VARIANT_LONG_NAME,
shortName = StandardArgumentDefinitions.VARIANT_SHORT_NAME,
doc="variants to count overlaps of")
private FeatureInput<VariantContext> variants;

@VisibleForTesting
final Map<String, OverlapCounts> contextCounts = new HashMap<>();

@Override
protected SimpleInterval getReferenceWindow(SimpleInterval locus) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add explanatory comment: "Add 1 base of padding to each side of each reference locus" (this is an example, after all!)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

// add one base of padding to each side of each reference locus.
return new SimpleInterval(locus.getContig(), Math.max(locus.getStart() -1, 1), locus.getStart()+1);
}

@Override
public void apply(ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext) {
final byte[] bases = referenceContext.getBases();
final String baseString = new String(bases);
final OverlapCounts counts = contextCounts.getOrDefault(baseString, new OverlapCounts());
if(readsContext.iterator().hasNext()){
counts.overlappedByReads++;
}

if(!featureContext.getValues(variants).isEmpty()){
counts.overlappedByVariants++;
}

counts.timesSeen++;

contextCounts.put(baseString, counts);
}

@Override
public Object onTraversalSuccess(){
contextCounts.entrySet().stream()
.sorted(Comparator.comparing(Entry::getKey))
.forEachOrdered(entry -> System.out.println(entry.getKey() + " " + entry.getValue().toString()));
return 0;
}

@VisibleForTesting
static class OverlapCounts {
long timesSeen = 0;
long overlappedByReads = 0;
long overlappedByVariants = 0;

@Override
public String toString(){
return String.format("Seen: %d Reads %d Variants %d", timesSeen, overlappedByReads, overlappedByVariants);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package org.broadinstitute.hellbender.tools.walkers.fasta;

import com.google.common.annotations.VisibleForTesting;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.ReferenceWalker;
import picard.cmdline.programgroups.ReferenceProgramGroup;

@DocumentedFeature
@CommandLineProgramProperties(
oneLineSummary = "Count the numbers of each base in a reference file",
summary = "Count the number of times each individual base occurs in a reference file.",
programGroup = ReferenceProgramGroup.class
)
public class CountBasesInReference extends ReferenceWalker {
@VisibleForTesting
final long[] baseCounts = new long[256];

@Override
public void apply(ReferenceContext referenceContext, ReadsContext read, FeatureContext featureContext) {
baseCounts[referenceContext.getBase()]++;
}

@Override
public Object onTraversalSuccess(){
for (int i = 0; i < baseCounts.length; i++) {
final long count = baseCounts[i];
if (count > 0){
System.out.println((char)i + " : " + count );
}
}
return 0;
}
}
Loading