-
Notifications
You must be signed in to change notification settings - Fork 24.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ML] Account for gaps in data counts after job is reopened (#30294)
This commit fixes an issue with the data diagnostics were empty buckets are not reported even though they should. Once a job is reopened, the diagnostics do not get initialized from the current data counts (especially the latest record timestamp). The result is that if the data that is sent have a time gap compared to the previous ones, that gap is not accounted for in the empty bucket count. This commit fixes that by initializing the diagnostics with the current data counts. Closes #30080
- Loading branch information
1 parent
ccd791b
commit 3b260dc
Showing
7 changed files
with
129 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
93 changes: 93 additions & 0 deletions
93
...native-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ReopenJobWithGapIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
package org.elasticsearch.xpack.ml.integration; | ||
|
||
import org.elasticsearch.common.unit.TimeValue; | ||
import org.elasticsearch.xpack.core.ml.action.GetBucketsAction; | ||
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; | ||
import org.elasticsearch.xpack.core.ml.job.config.DataDescription; | ||
import org.elasticsearch.xpack.core.ml.job.config.Detector; | ||
import org.elasticsearch.xpack.core.ml.job.config.Job; | ||
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts; | ||
import org.junit.After; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.hamcrest.Matchers.equalTo; | ||
|
||
/** | ||
* Tests that after reopening a job and sending more | ||
* data after a gap, data counts are reported correctly. | ||
*/ | ||
public class ReopenJobWithGapIT extends MlNativeAutodetectIntegTestCase { | ||
|
||
private static final String JOB_ID = "reopen-job-with-gap-test"; | ||
private static final long BUCKET_SPAN_SECONDS = 3600; | ||
|
||
@After | ||
public void cleanUpTest() { | ||
cleanUp(); | ||
} | ||
|
||
public void test() throws Exception { | ||
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder( | ||
Collections.singletonList(new Detector.Builder("count", null).build())); | ||
analysisConfig.setBucketSpan(TimeValue.timeValueSeconds(BUCKET_SPAN_SECONDS)); | ||
DataDescription.Builder dataDescription = new DataDescription.Builder(); | ||
dataDescription.setTimeFormat("epoch"); | ||
Job.Builder job = new Job.Builder(JOB_ID); | ||
job.setAnalysisConfig(analysisConfig); | ||
job.setDataDescription(dataDescription); | ||
|
||
registerJob(job); | ||
putJob(job); | ||
openJob(job.getId()); | ||
|
||
long timestamp = 1483228800L; // 2017-01-01T00:00:00Z | ||
List<String> data = new ArrayList<>(); | ||
for (int i = 0; i < 10; i++) { | ||
data.add(createJsonRecord(createRecord(timestamp))); | ||
timestamp += BUCKET_SPAN_SECONDS; | ||
} | ||
|
||
postData(job.getId(), data.stream().collect(Collectors.joining())); | ||
flushJob(job.getId(), true); | ||
closeJob(job.getId()); | ||
|
||
GetBucketsAction.Request request = new GetBucketsAction.Request(job.getId()); | ||
request.setExcludeInterim(true); | ||
assertThat(client().execute(GetBucketsAction.INSTANCE, request).actionGet().getBuckets().count(), equalTo(9L)); | ||
assertThat(getJobStats(job.getId()).get(0).getDataCounts().getBucketCount(), equalTo(9L)); | ||
|
||
timestamp += 10 * BUCKET_SPAN_SECONDS; | ||
data = new ArrayList<>(); | ||
for (int i = 0; i < 10; i++) { | ||
data.add(createJsonRecord(createRecord(timestamp))); | ||
timestamp += BUCKET_SPAN_SECONDS; | ||
} | ||
|
||
openJob(job.getId()); | ||
postData(job.getId(), data.stream().collect(Collectors.joining())); | ||
flushJob(job.getId(), true); | ||
closeJob(job.getId()); | ||
|
||
assertThat(client().execute(GetBucketsAction.INSTANCE, request).actionGet().getBuckets().count(), equalTo(29L)); | ||
DataCounts dataCounts = getJobStats(job.getId()).get(0).getDataCounts(); | ||
assertThat(dataCounts.getBucketCount(), equalTo(29L)); | ||
assertThat(dataCounts.getEmptyBucketCount(), equalTo(10L)); | ||
} | ||
|
||
private static Map<String, Object> createRecord(long timestamp) { | ||
Map<String, Object> record = new HashMap<>(); | ||
record.put("time", timestamp); | ||
return record; | ||
} | ||
} |