Skip to content

Commit

Permalink
Only create final MatrixStatsResults on final reduction (#38130)
Browse files Browse the repository at this point in the history
MatrixStatsResults is the "final" result object, and runs an additional
computation in it's ctor to calculate covariance, etc.  This means
it should only run on the final reduction instead of on every reduce.
  • Loading branch information
polyfractal authored Feb 20, 2019
1 parent bdd3a82 commit ba98f67
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,15 @@ public InternalAggregation doReduce(List<InternalAggregation> aggregations, Redu
}

RunningStats runningStats = new RunningStats();
for (int i=0; i < aggs.size(); ++i) {
runningStats.merge(((InternalMatrixStats) aggs.get(i)).stats);
for (InternalAggregation agg : aggs) {
runningStats.merge(((InternalMatrixStats) agg).stats);
}
MatrixStatsResults results = new MatrixStatsResults(runningStats);

return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData());
if (reduceContext.isFinalReduce()) {
MatrixStatsResults results = new MatrixStatsResults(runningStats);
return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData());
}
return new InternalMatrixStats(name, runningStats.docCount, runningStats, null, pipelineAggregators(), getMetaData());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ public void testNoData() throws Exception {
}
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/37587")
public void testTwoFields() throws Exception {
String fieldA = "a";
MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
Expand Down Expand Up @@ -89,8 +88,49 @@ public void testTwoFields() throws Exception {
IndexSearcher searcher = new IndexSearcher(reader);
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
.fields(Arrays.asList(fieldA, fieldB));
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
InternalMatrixStats stats = search(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
// Since `search` doesn't do any reduction, and the InternalMatrixStats object will have a null `MatrixStatsResults`
// object. That is created during the final reduction, which also does a final round of computations
// So we have to create a MatrixStatsResults object here manually so that the final `compute()` is called
multiPassStats.assertNearlyEqual(new MatrixStatsResults(stats.getStats()));
}
}
}

public void testTwoFieldsReduce() throws Exception {
String fieldA = "a";
MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
ftA.setName(fieldA);
String fieldB = "b";
MappedFieldType ftB = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
ftB.setName(fieldB);

try (Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {

int numDocs = scaledRandomIntBetween(8192, 16384);
Double[] fieldAValues = new Double[numDocs];
Double[] fieldBValues = new Double[numDocs];
for (int docId = 0; docId < numDocs; docId++) {
Document document = new Document();
fieldAValues[docId] = randomDouble();
document.add(new SortedNumericDocValuesField(fieldA, NumericUtils.doubleToSortableLong(fieldAValues[docId])));

fieldBValues[docId] = randomDouble();
document.add(new SortedNumericDocValuesField(fieldB, NumericUtils.doubleToSortableLong(fieldBValues[docId])));
indexWriter.addDocument(document);
}

MultiPassStats multiPassStats = new MultiPassStats(fieldA, fieldB);
multiPassStats.computeStats(Arrays.asList(fieldAValues), Arrays.asList(fieldBValues));
try (IndexReader reader = indexWriter.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
.fields(Arrays.asList(fieldA, fieldB));
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
// Unlike testTwoFields, `searchAndReduce` will execute reductions so the `MatrixStatsResults` object
// will be populated and fully computed. We should use that value directly to test against
multiPassStats.assertNearlyEqual(stats);
assertTrue(MatrixAggregationInspectionHelper.hasValue(stats));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,30 @@ void assertNearlyEqual(MatrixStatsResults stats) {
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
}

void assertNearlyEqual(InternalMatrixStats stats) {
assertEquals(count, stats.getDocCount());
assertEquals(count, stats.getFieldCount(fieldAKey));
assertEquals(count, stats.getFieldCount(fieldBKey));
// means
assertTrue(nearlyEqual(means.get(fieldAKey), stats.getMean(fieldAKey), 1e-7));
assertTrue(nearlyEqual(means.get(fieldBKey), stats.getMean(fieldBKey), 1e-7));
// variances
assertTrue(nearlyEqual(variances.get(fieldAKey), stats.getVariance(fieldAKey), 1e-7));
assertTrue(nearlyEqual(variances.get(fieldBKey), stats.getVariance(fieldBKey), 1e-7));
// skewness (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance)
assertTrue(nearlyEqual(skewness.get(fieldAKey), stats.getSkewness(fieldAKey), 1e-4));
assertTrue(nearlyEqual(skewness.get(fieldBKey), stats.getSkewness(fieldBKey), 1e-4));
// kurtosis (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance)
assertTrue(nearlyEqual(kurtosis.get(fieldAKey), stats.getKurtosis(fieldAKey), 1e-4));
assertTrue(nearlyEqual(kurtosis.get(fieldBKey), stats.getKurtosis(fieldBKey), 1e-4));
// covariances
assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey),stats.getCovariance(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey),stats.getCovariance(fieldBKey, fieldAKey), 1e-7));
// correlation
assertTrue(nearlyEqual(correlations.get(fieldAKey).get(fieldBKey), stats.getCorrelation(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
}

private static boolean nearlyEqual(double a, double b, double epsilon) {
final double absA = Math.abs(a);
final double absB = Math.abs(b);
Expand Down

0 comments on commit ba98f67

Please sign in to comment.