-
Notifications
You must be signed in to change notification settings - Fork 589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
the long awaited FastaAlternateReferenceMaker #5549
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
107 changes: 107 additions & 0 deletions
107
src/main/java/org/broadinstitute/hellbender/engine/ReferenceWalker.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
package org.broadinstitute.hellbender.engine; | ||
|
||
import org.broadinstitute.hellbender.engine.filters.CountingReadFilter; | ||
import org.broadinstitute.hellbender.tools.examples.ExampleReferenceWalker; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
import org.broadinstitute.hellbender.utils.iterators.IntervalLocusIterator; | ||
|
||
/** | ||
* A reference walker is a tool which processes each base in a given reference. Each base can be processed individually | ||
* or in a context of multiple bases in a window. Reads and Feature tracks can optionally be included as well. The | ||
* reference bases to process can be subset to a given set of intervals. | ||
* | ||
* ReferenceWalker authors must implement the apply() method to process each position, and may optionally implement | ||
* {@link #onTraversalStart()} and/or {@link #onTraversalSuccess()}. | ||
* | ||
* Tool authors may also implement {@link #getReferenceWindow(SimpleInterval)} to provide additional bases of context | ||
* around each position. | ||
* | ||
* See the {@link ExampleReferenceWalker} walker for an example. | ||
*/ | ||
public abstract class ReferenceWalker extends GATKTool { | ||
|
||
@Override | ||
public String getProgressMeterRecordLabel() { return "bases"; } | ||
|
||
@Override | ||
public final boolean requiresReference() { return true; } | ||
|
||
/** | ||
* Initialize data sources for traversal. | ||
* | ||
* Marked final so that tool authors don't override it. Tool authors should override onTraversalStart() instead. | ||
*/ | ||
@Override | ||
protected final void onStartup() { | ||
super.onStartup(); | ||
} | ||
|
||
/** | ||
* Implementation of reference-locus-based traversal. | ||
* Subclasses can override to provide own behavior but default implementation should be suitable for most uses. | ||
*/ | ||
@Override | ||
public void traverse() { | ||
final CountingReadFilter readFilter = makeReadFilter(); | ||
|
||
for(final SimpleInterval locus : getIntervalIterator()){ | ||
final SimpleInterval referenceWindow = getReferenceWindow(locus); | ||
final ReferenceContext referenceContext = new ReferenceContext(reference, locus, referenceWindow); | ||
apply(referenceContext, | ||
new ReadsContext(reads, referenceContext.getWindow(), readFilter), // Will create an empty ReadsContext if reads == null | ||
new FeatureContext(features, referenceContext.getWindow())); // Will create an empty FeatureContext if features == null | ||
|
||
progressMeter.update(referenceContext.getInterval()); | ||
}; | ||
|
||
} | ||
|
||
/** | ||
* Determine the window to use when creating the ReferenceContext in apply. This determines which reference bases are | ||
* seen at each position, as well as which reads / features are considered to overlap the reference site. | ||
* | ||
* The default implementation returns the single reference locus passed that is passed in, but subclasses may override | ||
* this method in order to change the window size. | ||
* | ||
* @param locus the current locus being processed in the traversal | ||
* @return the window to use when creating the ReferenceContext that is passed into {@link #apply(ReferenceContext, ReadsContext, FeatureContext)} | ||
*/ | ||
protected SimpleInterval getReferenceWindow(SimpleInterval locus){ | ||
return locus; | ||
} | ||
|
||
private Iterable<SimpleInterval> getIntervalIterator(){ | ||
return () -> new IntervalLocusIterator(getTraversalIntervals().iterator()); | ||
} | ||
|
||
/** | ||
* Process an individual reference locus (with optional contextual information). Must be implemented by tool authors. | ||
* In general, tool authors should simply stream their output from apply(), and maintain as little internal state | ||
* as possible. | ||
* | ||
* @param referenceContext Reference bases at the current locus. The window around the single locus is provided by {@link #getReferenceWindow(SimpleInterval)} | ||
* Additional bases my be retrieved by invoking the by invoking {@link ReferenceContext#setWindow} | ||
* on this object before calling {@link ReferenceContext#getBases}, however this will not effect | ||
* the set of reads and features that are considered overlapping which are based on the preset | ||
* window passed in. | ||
* @param readsContext Reads spanning the current reference window. Will be an empty, but non-null, context object | ||
* if there is no backing source of Read data (in which case all queries on it will return an | ||
* empty List). | ||
* @param featureContext Features spanning the current reference window. Will be an empty, but non-null, context object | ||
* if there is no backing source of Feature data (in which case all queries on it will return an | ||
* empty List). | ||
* | ||
*/ | ||
public abstract void apply(ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext ); | ||
|
||
/** | ||
* Shutdown data sources. | ||
* | ||
* Marked final so that tool authors don't override it. Tool authors should override {@link #onTraversalSuccess()} instead. | ||
*/ | ||
@Override | ||
protected final void onShutdown() { | ||
// Overridden only to make final so that concrete tool implementations don't override | ||
super.onShutdown(); | ||
} | ||
} |
81 changes: 81 additions & 0 deletions
81
src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleReferenceWalker.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package org.broadinstitute.hellbender.tools.examples; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import htsjdk.variant.variantcontext.VariantContext; | ||
import org.broadinstitute.barclay.argparser.Argument; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup; | ||
import org.broadinstitute.hellbender.engine.*; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
|
||
import java.util.Comparator; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.Map.Entry; | ||
|
||
/** | ||
* Counts the number of times each reference context is seen as well as how many times it's overlapped by reads and variants. | ||
* Why you would want to do this is a mystery. | ||
* | ||
* This is a toy example to illustrate how to implement a ReferenceWalker. | ||
*/ | ||
@CommandLineProgramProperties( | ||
summary = "Example of how to implement a ReferenceWalker that uses reads and features as well as a custom window", | ||
oneLineSummary = "Example of how to implement a ReferenceWalker", | ||
programGroup = ExampleProgramGroup.class, | ||
omitFromCommandLine = true) | ||
public class ExampleReferenceWalker extends ReferenceWalker { | ||
|
||
@Argument(fullName = StandardArgumentDefinitions.VARIANT_LONG_NAME, | ||
shortName = StandardArgumentDefinitions.VARIANT_SHORT_NAME, | ||
doc="variants to count overlaps of") | ||
private FeatureInput<VariantContext> variants; | ||
|
||
@VisibleForTesting | ||
final Map<String, OverlapCounts> contextCounts = new HashMap<>(); | ||
|
||
@Override | ||
protected SimpleInterval getReferenceWindow(SimpleInterval locus) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add explanatory comment: "Add 1 base of padding to each side of each reference locus" (this is an example, after all!) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
// add one base of padding to each side of each reference locus. | ||
return new SimpleInterval(locus.getContig(), Math.max(locus.getStart() -1, 1), locus.getStart()+1); | ||
} | ||
|
||
@Override | ||
public void apply(ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext) { | ||
final byte[] bases = referenceContext.getBases(); | ||
final String baseString = new String(bases); | ||
final OverlapCounts counts = contextCounts.getOrDefault(baseString, new OverlapCounts()); | ||
if(readsContext.iterator().hasNext()){ | ||
counts.overlappedByReads++; | ||
} | ||
|
||
if(!featureContext.getValues(variants).isEmpty()){ | ||
counts.overlappedByVariants++; | ||
} | ||
|
||
counts.timesSeen++; | ||
|
||
contextCounts.put(baseString, counts); | ||
} | ||
|
||
@Override | ||
public Object onTraversalSuccess(){ | ||
contextCounts.entrySet().stream() | ||
.sorted(Comparator.comparing(Entry::getKey)) | ||
.forEachOrdered(entry -> System.out.println(entry.getKey() + " " + entry.getValue().toString())); | ||
return 0; | ||
} | ||
|
||
@VisibleForTesting | ||
static class OverlapCounts { | ||
long timesSeen = 0; | ||
long overlappedByReads = 0; | ||
long overlappedByVariants = 0; | ||
|
||
@Override | ||
public String toString(){ | ||
return String.format("Seen: %d Reads %d Variants %d", timesSeen, overlappedByReads, overlappedByVariants); | ||
} | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
src/main/java/org/broadinstitute/hellbender/tools/walkers/fasta/CountBasesInReference.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package org.broadinstitute.hellbender.tools.walkers.fasta; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
import org.broadinstitute.barclay.help.DocumentedFeature; | ||
import org.broadinstitute.hellbender.engine.FeatureContext; | ||
import org.broadinstitute.hellbender.engine.ReadsContext; | ||
import org.broadinstitute.hellbender.engine.ReferenceContext; | ||
import org.broadinstitute.hellbender.engine.ReferenceWalker; | ||
import picard.cmdline.programgroups.ReferenceProgramGroup; | ||
|
||
@DocumentedFeature | ||
@CommandLineProgramProperties( | ||
oneLineSummary = "Count the numbers of each base in a reference file", | ||
summary = "Count the number of times each individual base occurs in a reference file.", | ||
programGroup = ReferenceProgramGroup.class | ||
) | ||
public class CountBasesInReference extends ReferenceWalker { | ||
@VisibleForTesting | ||
final long[] baseCounts = new long[256]; | ||
|
||
@Override | ||
public void apply(ReferenceContext referenceContext, ReadsContext read, FeatureContext featureContext) { | ||
baseCounts[referenceContext.getBase()]++; | ||
} | ||
|
||
@Override | ||
public Object onTraversalSuccess(){ | ||
for (int i = 0; i < baseCounts.length; i++) { | ||
final long count = baseCounts[i]; | ||
if (count > 0){ | ||
System.out.println((char)i + " : " + count ); | ||
} | ||
} | ||
return 0; | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add: "A toy example to illustrate how to implement the ReferenceWalker interface"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added