Skip to content

Commit

Permalink
histogram update
Browse files Browse the repository at this point in the history
  • Loading branch information
dror27 committed Jan 1, 2024
1 parent d9ac0d3 commit 2a2665e
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 21 deletions.
62 changes: 41 additions & 21 deletions src/main/java/picard/analysis/CollectQualityYieldMetricsFlow.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@
import picard.flow.FlowBasedArgumentCollection;
import picard.flow.FlowBasedRead;
import picard.flow.FlowBasedReadUtils;
import picard.util.SeriesStats;
import picard.util.help.HelpConstants;

import java.io.File;
import java.util.Arrays;
import java.util.Vector;

/**
* Command line program to calculate quality yield metrics for flow based read files
Expand All @@ -59,11 +61,11 @@
)
@ExperimentalFeature
public class CollectQualityYieldMetricsFlow extends SinglePassSamProgram {
private static final int CYCLE_SIZE = 4;
private QualityYieldMetricsCollectorFlow collector = null;
public Histogram<Integer> qualityHistogram = new Histogram<>("KEY", "QUAL_COUNT");
public Histogram<Integer> flowAvgQualityHistogram = new Histogram<>("KEY", "AVG_FLOW_QUAL");
public int[] flowAvgQualityCount = null;
public long[] flowAvgQualitySum = null;

private Vector<SeriesStats> flowQualityStats = new Vector<>();

static final String USAGE_SUMMARY = "Collect metrics about reads that pass quality thresholds from flow based read files. ";
static final String USAGE_DETAILS = "This tool evaluates the overall quality of reads within a bam file containing one read group. " +
Expand Down Expand Up @@ -112,7 +114,7 @@ protected void finish() {
this.collector.finish();
this.collector.addMetricsToFile(metricsFile);
metricsFile.addHistogram(qualityHistogram);
metricsFile.addHistogram(flowAvgQualityHistogram);
this.collector.addHistograms(metricsFile);
metricsFile.write(OUTPUT);
}

Expand Down Expand Up @@ -148,7 +150,6 @@ public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {
FlowBasedReadUtils.ReadGroupInfo info = FlowBasedReadUtils.getReadGroupInfo(rec.getHeader(), rec);
FlowBasedRead fread = new FlowBasedRead(rec, info.flowOrder, info.maxClass, fbargs);

final int length = rec.getReadLength();
metrics.TOTAL_READS++;
metrics.TOTAL_FLOWS += fread.getKey().length;

Expand All @@ -161,14 +162,10 @@ public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {
// get flow quals
final byte[] quals = getFlowQualities(fread);

// make sure flowQualityCount/Sum are large enough
if ( flowAvgQualityCount == null ) {
flowAvgQualityCount = new int[quals.length];
flowAvgQualitySum = new long[quals.length];
} else if ( flowAvgQualityCount.length < quals.length ) {
flowAvgQualityCount = Arrays.copyOf(flowAvgQualityCount, quals.length);
flowAvgQualitySum = Arrays.copyOf(flowAvgQualitySum, quals.length);
}
// allocate cycle qual accounting
int cycleCount = (int)Math.ceil((float)quals.length / CYCLE_SIZE);
int cycleQualCount[] = new int[cycleCount];
int cycleQualSum[] = new int[cycleCount];

// add up quals, and quals >= 20
int flow = 0;
Expand All @@ -194,13 +191,23 @@ public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {

// enter quality into histograms
qualityHistogram.increment(qual);
flowAvgQualityCount[flow]++;
flowAvgQualitySum[flow] += qual;

// enter quality into cycle stats
final int cycle = flow / CYCLE_SIZE;
cycleQualCount[cycle]++;
cycleQualSum[cycle] += qual;

// advance
flow++;
}

// make sure flowQualityCount/Sum are large enough and enter accounted values
while ( flowQualityStats.size() < cycleCount )
flowQualityStats.add(new SeriesStats());
for ( int cycle = 0 ; cycle < cycleCount ; cycle++ ) {
int id = !rec.getReadNegativeStrandFlag() ? cycle : (cycleCount - 1 - cycle);
flowQualityStats.get(id).add((double)cycleQualSum[cycle] / cycleQualCount[cycle]);
}
}

private byte[] getFlowQualities(FlowBasedRead fread) {
Expand All @@ -223,8 +230,6 @@ private byte[] getFlowQualities(FlowBasedRead fread) {
private double[] computeErrorProb(final FlowBasedRead flowRead) {

final int[] key = flowRead.getKey();
final byte[] flowOrder = flowRead.getFlowOrderArray();

final double[] probCol = new double[flowRead.getMaxHmer() + 1];
double[] result = new double[key.length];

Expand Down Expand Up @@ -252,15 +257,30 @@ public void finish() {
metrics.Q20_EQUIVALENT_YIELD = metrics.Q20_EQUIVALENT_YIELD / 20;
metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20;
metrics.calculateDerivedFields();

for (int i = 0; i < flowAvgQualityCount.length ; i++ ) {
flowAvgQualityHistogram.increment(i, flowAvgQualityCount[i] != 0 ? ((double) flowAvgQualitySum[i] / flowAvgQualityCount[i]) : 0.0);
}
}

public void addMetricsToFile(final MetricsFile<QualityYieldMetricsFlow, Integer> metricsFile) {
metricsFile.addMetric(metrics);
}

public void addHistograms(MetricsFile<QualityYieldMetricsFlow, Integer> metricsFile) {

Histogram<Integer> meanHist = new Histogram<>("KEY", "MEAN_CYCLE_QUAL");
Histogram<Integer> medianHist = new Histogram<>("KEY", "MEDIAN_CYCLE_QUAL");
Histogram<Integer> q25Hist = new Histogram<>("KEY", "Q25_CYCLE_QUAL");
Histogram<Integer> q75Hist = new Histogram<>("KEY", "Q75_CYCLE_QUAL");
for (int i = 0; i < flowQualityStats.size() ; i++ ) {
SeriesStats ss = flowQualityStats.get(i);
meanHist.increment(i, ss.getMean());
medianHist.increment(i, ss.getMedian());
q25Hist.increment(i, ss.getPercentile(25));
q75Hist.increment(i, ss.getPercentile(75));
}
metricsFile.addHistogram(meanHist);
metricsFile.addHistogram(medianHist);
metricsFile.addHistogram(q25Hist);
metricsFile.addHistogram(q75Hist);
}
}

/**
Expand Down
110 changes: 110 additions & 0 deletions src/main/java/picard/util/SeriesStats.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package picard.util;

import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;

public class SeriesStats {

// local state
private double last = Double.NaN;
private int count = 0;
private double sum = 0;
private double min = Double.NaN;
private double max = Double.NaN;
private SortedMap<Double, AtomicInteger> bins = new TreeMap<>();

public void add(double v) {

// save in simple values
last = v;
sum += v;
if ( count > 0 ) {
min = Math.min(min, v);
max = Math.max(max, v);
} else {
min = max = v;
}
count++;

// save in bins
if ( bins.containsKey(v) ) {
bins.get(v).incrementAndGet();
} else {
bins.put(v, new AtomicInteger(1));
}
}

public double getLast() {
return last;
}

public int getCount() {
return count;
}

public double getMin() {
return (count != 0) ? min : Double.NaN;
}

public double getMax() {
return (count != 0) ? max : Double.NaN;
}

public int getUniq() {
return bins.size();
}

public double getMean() {
return (count != 0) ? (sum / count) : Double.NaN;
}

public double getMedian() {
return getPercentile(50);
}

public double getPercentile(double precentile) {
if ( count == 0 ) {
return Double.NaN;
} else if ( count == 1 ) {
return last;
} else {

int percentileIndex = (int)(count * precentile / 100);
int index = 0;
for (Map.Entry<Double, AtomicInteger> entry : bins.entrySet() ) {
int binSize = entry.getValue().get();
if ( percentileIndex >= index && (percentileIndex < (index + binSize)) ) {
return entry.getKey();
}
index += binSize;
}

// if here, we need the highest entry
return bins.lastKey();
}
}

public double getStd() {

if (count == 0) {
return Double.NaN;
}

// calculate mean
double mean = getMean();

// calculate sum of sqr(deviations)
double vsum = 0;
for (Map.Entry<Double, AtomicInteger> entry : bins.entrySet()) {
int binSize = entry.getValue().get();
vsum += (Math.pow(entry.getKey() - mean, 2) * binSize);
}

// calculate variance and std
double variance = vsum / count;
return Math.sqrt(variance);
}

}

0 comments on commit 2a2665e

Please sign in to comment.