Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only create MatrixStatsResults on final reduction #38130

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,15 @@ public InternalAggregation doReduce(List<InternalAggregation> aggregations, Redu
}

RunningStats runningStats = new RunningStats();
for (int i=0; i < aggs.size(); ++i) {
runningStats.merge(((InternalMatrixStats) aggs.get(i)).stats);
for (InternalAggregation agg : aggs) {
runningStats.merge(((InternalMatrixStats) agg).stats);
}
MatrixStatsResults results = new MatrixStatsResults(runningStats);

return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData());
if (reduceContext.isFinalReduce()) {
MatrixStatsResults results = new MatrixStatsResults(runningStats);
return new InternalMatrixStats(name, results.getDocCount(), runningStats, results, pipelineAggregators(), getMetaData());
}
return new InternalMatrixStats(name, runningStats.docCount, runningStats, null, pipelineAggregators(), getMetaData());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ public void testNoData() throws Exception {
}
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/37587")
public void testTwoFields() throws Exception {
String fieldA = "a";
MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
Expand Down Expand Up @@ -89,8 +88,49 @@ public void testTwoFields() throws Exception {
IndexSearcher searcher = new IndexSearcher(reader);
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
.fields(Arrays.asList(fieldA, fieldB));
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
InternalMatrixStats stats = search(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
// Since `search` doesn't do any reduction, and the InternalMatrixStats object will have a null `MatrixStatsResults`
// object. That is created during the final reduction, which also does a final round of computations
// So we have to create a MatrixStatsResults object here manually so that the final `compute()` is called
multiPassStats.assertNearlyEqual(new MatrixStatsResults(stats.getStats()));
}
}
}

public void testTwoFieldsReduce() throws Exception {
String fieldA = "a";
MappedFieldType ftA = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
ftA.setName(fieldA);
String fieldB = "b";
MappedFieldType ftB = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
ftB.setName(fieldB);

try (Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {

int numDocs = scaledRandomIntBetween(8192, 16384);
Double[] fieldAValues = new Double[numDocs];
Double[] fieldBValues = new Double[numDocs];
for (int docId = 0; docId < numDocs; docId++) {
Document document = new Document();
fieldAValues[docId] = randomDouble();
document.add(new SortedNumericDocValuesField(fieldA, NumericUtils.doubleToSortableLong(fieldAValues[docId])));

fieldBValues[docId] = randomDouble();
document.add(new SortedNumericDocValuesField(fieldB, NumericUtils.doubleToSortableLong(fieldBValues[docId])));
indexWriter.addDocument(document);
}

MultiPassStats multiPassStats = new MultiPassStats(fieldA, fieldB);
multiPassStats.computeStats(Arrays.asList(fieldAValues), Arrays.asList(fieldBValues));
try (IndexReader reader = indexWriter.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
MatrixStatsAggregationBuilder aggBuilder = new MatrixStatsAggregationBuilder("my_agg")
.fields(Arrays.asList(fieldA, fieldB));
InternalMatrixStats stats = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, ftA, ftB);
// Unlike testTwoFields, `searchAndReduce` will execute reductions so the `MatrixStatsResults` object
// will be populated and fully computed. We should use that value directly to test against
multiPassStats.assertNearlyEqual(stats);
assertTrue(MatrixAggregationInspectionHelper.hasValue(stats));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,30 @@ void assertNearlyEqual(MatrixStatsResults stats) {
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
}

void assertNearlyEqual(InternalMatrixStats stats) {
assertEquals(count, stats.getDocCount());
assertEquals(count, stats.getFieldCount(fieldAKey));
assertEquals(count, stats.getFieldCount(fieldBKey));
// means
assertTrue(nearlyEqual(means.get(fieldAKey), stats.getMean(fieldAKey), 1e-7));
assertTrue(nearlyEqual(means.get(fieldBKey), stats.getMean(fieldBKey), 1e-7));
// variances
assertTrue(nearlyEqual(variances.get(fieldAKey), stats.getVariance(fieldAKey), 1e-7));
assertTrue(nearlyEqual(variances.get(fieldBKey), stats.getVariance(fieldBKey), 1e-7));
// skewness (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance)
assertTrue(nearlyEqual(skewness.get(fieldAKey), stats.getSkewness(fieldAKey), 1e-4));
assertTrue(nearlyEqual(skewness.get(fieldBKey), stats.getSkewness(fieldBKey), 1e-4));
// kurtosis (multi-pass is more susceptible to round-off error so we need to slightly relax the tolerance)
assertTrue(nearlyEqual(kurtosis.get(fieldAKey), stats.getKurtosis(fieldAKey), 1e-4));
assertTrue(nearlyEqual(kurtosis.get(fieldBKey), stats.getKurtosis(fieldBKey), 1e-4));
// covariances
assertTrue(nearlyEqual(covariances.get(fieldAKey).get(fieldBKey),stats.getCovariance(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(covariances.get(fieldBKey).get(fieldAKey),stats.getCovariance(fieldBKey, fieldAKey), 1e-7));
// correlation
assertTrue(nearlyEqual(correlations.get(fieldAKey).get(fieldBKey), stats.getCorrelation(fieldAKey, fieldBKey), 1e-7));
assertTrue(nearlyEqual(correlations.get(fieldBKey).get(fieldAKey), stats.getCorrelation(fieldBKey, fieldAKey), 1e-7));
}

private static boolean nearlyEqual(double a, double b, double epsilon) {
final double absA = Math.abs(a);
final double absB = Math.abs(b);
Expand Down