Skip to content

Commit

Permalink
cleaned up SoftClippedReadFilter to conform to the logic for other fi…
Browse files Browse the repository at this point in the history
…lters
  • Loading branch information
jamesemery committed Jun 24, 2024
1 parent abef8e1 commit 2bd562e
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ private ReadFilterArgumentDefinitions(){}

public static final String KEEP_INTERVAL_NAME = "keep-intervals";

public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "soft-clipped-ratio-threshold";
public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "soft-clipped-leading-trailing-ratio";
public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratio-threshold";
public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "max-soft-clipped-leading-trailing-ratio";

public static final String INVERT_SOFT_CLIP_RATIO_FILTER = "invert-soft-clip-ratio-filter";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,28 @@ public final class SoftClippedReadFilter extends ReadFilter {
static final long serialVersionUID = 1L;
private final Logger logger = LogManager.getLogger(this.getClass());

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.INVERT_SOFT_CLIP_RATIO_FILTER,
doc = "Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.",
optional = true
)
boolean doInvertFilter = false;
// @VisibleForTesting
// @Argument(fullName = ReadFilterArgumentDefinitions.INVERT_SOFT_CLIP_RATIO_FILTER,
// doc = "Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.",
// optional = true
// )
// boolean doInvertFilter = false;

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD,
doc = "Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in read for read to be filtered.",
optional = true,
mutex = { ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD }
)
Double minimumSoftClippedRatio = null;
Double maximumSoftClippedRatio = null;

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD,
doc = "Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases in read for read to be filtered.",
optional = true,
mutex = {ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD}
)
Double minimumLeadingTrailingSoftClippedRatio = null;
Double maximumLeadingTrailingSoftClippedRatio = null;

// Command line parser requires a no-arg constructor
public SoftClippedReadFilter() {}
Expand All @@ -63,7 +63,7 @@ private boolean testMinSoftClippedRatio(final GATKRead read) {

final double softClipRatio = ((double)numSoftClippedBases / (double)totalLength);

return softClipRatio > minimumSoftClippedRatio;
return softClipRatio < maximumSoftClippedRatio;
}

private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) {
Expand Down Expand Up @@ -92,22 +92,23 @@ private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) {
// Calculate the ratio:
final double softClipRatio = ((double)numLeadingTrailingSoftClippedBases / (double)totalLength);

return softClipRatio > minimumLeadingTrailingSoftClippedRatio;
return softClipRatio < maximumLeadingTrailingSoftClippedRatio;
}

@Override
// NOTE: for read filters we always return true if the read passes the filter, and false if it doesn't.
public boolean test(final GATKRead read) {

final boolean result;

// NOTE: Since we have mutex'd the args for the clipping ratios, we only need to see if they
// have been specified. If they have, that's the filter logic we're using.
// If we specified the clipping ratio, we use the min sequence length test:
if ( minimumSoftClippedRatio != null ) {
if ( maximumSoftClippedRatio != null ) {
result = testMinSoftClippedRatio(read);
}
// If we specified the leading/trailing clipping ratio, we use the min sequence length test:
else if ( minimumLeadingTrailingSoftClippedRatio != null ) {
else if ( maximumLeadingTrailingSoftClippedRatio != null ) {
result = testMinLeadingTrailingSoftClippedRatio(read);
}
else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ public void testOverclippedSoftClipRatioFilter(final String cigarString,
final boolean expectedResult) {

final SoftClippedReadFilter filter = new SoftClippedReadFilter();
filter.minimumSoftClippedRatio = clipRatio;
filter.maximumSoftClippedRatio = clipRatio;

final GATKRead read = buildSAMRead(cigarString);
Assert.assertEquals(filter.test(read), expectedResult, cigarString);

filter.doInvertFilter = true;
Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString);
// filter.doInvertFilter = true;
// Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString);
}

@Test(dataProvider= "SoftClippedLeadingTrailingRatioDataProvider")
Expand All @@ -52,13 +52,13 @@ public void testSoftClippedLeadingTrailingRatioFilter(final String cigarString,
final boolean expectedResult) {

final SoftClippedReadFilter filter = new SoftClippedReadFilter();
filter.minimumLeadingTrailingSoftClippedRatio = clipRatio;
filter.maximumLeadingTrailingSoftClippedRatio = clipRatio;

final GATKRead read = buildSAMRead(cigarString);
Assert.assertEquals(filter.test(read), expectedResult, cigarString);

filter.doInvertFilter = true;
Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString);
// filter.doInvertFilter = true;
// Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString);
}

@DataProvider(name = "SoftClipRatioDataProvider")
Expand All @@ -67,25 +67,25 @@ public Iterator<Object[]> softClipRatioDataProvider() {

// ---------------------------------------
// Null / trivial cases:
testData.add(new Object[] { "", 0.1, false });
testData.add(new Object[] { "10H", 0.1, false });
testData.add(new Object[] { "", 0.1, true });
testData.add(new Object[] { "10H", 0.1, true });

// ---------------------------------------
// Soft clip ratio test:

testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280

// ---------------------------------------
// Soft clip placement:

testData.add(new Object[] { "101S100M", 0.5, true });
testData.add(new Object[] { "100M101S", 0.5, true });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true });
testData.add(new Object[] { "101S100M", 0.5, false });
testData.add(new Object[] { "100M101S", 0.5, false });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false });

return testData.iterator();
}
Expand All @@ -96,42 +96,42 @@ public Iterator<Object[]> softClippedLeadingTrailingRatioDataProvider() {

// ---------------------------------------
// Null / trivial cases:
testData.add(new Object[] { "", 0.1, false });
testData.add(new Object[] { "10H", 0.1, false });
testData.add(new Object[] { "", 0.1, true });
testData.add(new Object[] { "10H", 0.1, true });

// ---------------------------------------
// Soft clip ratio test:

// Non-leading/-trailing
testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280
testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280

// Leading:
testData.add(new Object[] { "2S1S1S16M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "3S1S1S16M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "4S1S1S16M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "5S1S1S16M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "6S1S1S16M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "7S1S1S16M", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "2S1S1S16M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "3S1S1S16M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "4S1S1S16M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "5S1S1S16M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "6S1S1S16M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "7S1S1S16M", 0.2, false }); // 7/25 = .280

// Trailing:
testData.add(new Object[] { "1M1S16M2S", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1M1S16M3S", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1M1S16M4S", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1M1S16M5S", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1M1S16M6S", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1M1S16M7S", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "1M1S16M2S", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1M1S16M3S", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1M1S16M4S", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1M1S16M5S", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1M1S16M6S", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1M1S16M7S", 0.2, false }); // 7/25 = .280

// ---------------------------------------
// Soft clip placement:

testData.add(new Object[] { "101S100M", 0.5, true });
testData.add(new Object[] { "100M101S", 0.5, true });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false });
testData.add(new Object[] { "101S100M", 0.5, false });
testData.add(new Object[] { "100M101S", 0.5, false });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true });

return testData.iterator();
}
Expand Down

0 comments on commit 2bd562e

Please sign in to comment.