Skip to content

Commit

Permalink
[ML] fix array oob in IDGenerator and adjust format for mapping (#41703)
Browse files Browse the repository at this point in the history
* [ML] fix array oob in IDGenerator and adjust format for mapping

* Update DataFramePivotRestIT.java
  • Loading branch information
benwtrent authored May 1, 2019
1 parent 7639e6a commit 80f8943
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,9 @@ public void testDateHistogramPivot() throws Exception {

config += " \"pivot\": {"
+ " \"group_by\": {"
+ " \"by_day\": {"
+ " \"by_hr\": {"
+ " \"date_histogram\": {"
+ " \"interval\": \"1d\",\"field\":\"timestamp\",\"format\":\"yyyy-MM-DD\""
+ " \"interval\": \"1h\",\"field\":\"timestamp\",\"format\":\"yyyy-MM-DD_HH\""
+ " } } },"
+ " \"aggregations\": {"
+ " \"avg_rating\": {"
Expand All @@ -232,10 +232,9 @@ public void testDateHistogramPivot() throws Exception {
startAndWaitForTransform(transformId, dataFrameIndex, BASIC_AUTH_VALUE_DATA_FRAME_ADMIN_WITH_SOME_DATA_ACCESS);
assertTrue(indexExists(dataFrameIndex));

// we expect 21 documents as there shall be 21 days worth of docs
Map<String, Object> indexStats = getAsMap(dataFrameIndex + "/_stats");
assertEquals(21, XContentMapValues.extractValue("_all.total.docs.count", indexStats));
assertOnePivotValue(dataFrameIndex + "/_search?q=by_day:2017-01-15", 3.82);
assertEquals(104, XContentMapValues.extractValue("_all.total.docs.count", indexStats));
assertOnePivotValue(dataFrameIndex + "/_search?q=by_hr:1484499600000", 4.0833333333);
}

@SuppressWarnings("unchecked")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,20 @@ protected void createReviewsIndex() throws IOException {
// create index
final StringBuilder bulk = new StringBuilder();
int day = 10;
int hour = 10;
int min = 10;
for (int i = 0; i < numDocs; i++) {
bulk.append("{\"index\":{\"_index\":\"" + REVIEWS_INDEX_NAME + "\"}}\n");
long user = Math.round(Math.pow(i * 31 % 1000, distributionTable[i % distributionTable.length]) % 27);
int stars = distributionTable[(i * 33) % distributionTable.length];
long business = Math.round(Math.pow(user * stars, distributionTable[i % distributionTable.length]) % 13);
int hour = randomIntBetween(10, 20);
int min = randomIntBetween(30, 59);
int sec = randomIntBetween(30, 59);
if (i % 12 == 0) {
hour = 10 + (i % 13);
}
if (i % 5 == 0) {
min = 10 + (i % 49);
}
int sec = 10 + (i % 49);

String date_string = "2017-01-" + day + "T" + hour + ":" + min + ":" + sec + "Z";
bulk.append("{\"user_id\":\"")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import org.elasticsearch.xpack.core.dataframe.DataFrameField;
import org.elasticsearch.xpack.core.dataframe.DataFrameMessages;
import org.elasticsearch.xpack.core.dataframe.transforms.DataFrameTransformConfig;
import org.elasticsearch.xpack.core.dataframe.transforms.pivot.DateHistogramGroupSource;
import org.elasticsearch.xpack.core.dataframe.transforms.pivot.SingleGroupSource;

import java.io.IOException;
import java.util.Map;
Expand All @@ -31,7 +33,9 @@ public final class DataframeIndex {

private static final String PROPERTIES = "properties";
private static final String TYPE = "type";
private static final String FORMAT = "format";
private static final String META = "_meta";
private static final String DEFAULT_TIME_FORMAT = "strict_date_optional_time||epoch_millis";

private DataframeIndex() {
}
Expand All @@ -44,7 +48,9 @@ public static void createDestinationIndex(Client client, DataFrameTransformConfi
request.settings(Settings.builder() // <1>
.put("index.number_of_shards", 1).put("index.number_of_replicas", 0));

request.mapping(SINGLE_MAPPING_NAME, createMappingXContent(mappings, transformConfig.getId()));
request.mapping(SINGLE_MAPPING_NAME, createMappingXContent(mappings,
transformConfig.getPivotConfig().getGroupConfig().getGroups(),
transformConfig.getId()));

client.execute(CreateIndexAction.INSTANCE, request, ActionListener.wrap(createIndexResponse -> {
listener.onResponse(true);
Expand All @@ -56,14 +62,29 @@ public static void createDestinationIndex(Client client, DataFrameTransformConfi
}));
}

private static XContentBuilder createMappingXContent(Map<String, String> mappings, String id) {
private static XContentBuilder createMappingXContent(Map<String, String> mappings,
Map<String, SingleGroupSource> groupSources,
String id) {
try {
XContentBuilder builder = jsonBuilder().startObject();
builder.startObject(SINGLE_MAPPING_NAME);
addMetaData(builder, id);
builder.startObject(PROPERTIES);
for (Entry<String, String> field : mappings.entrySet()) {
builder.startObject(field.getKey()).field(TYPE, field.getValue()).endObject();
String fieldName = field.getKey();
String fieldType = field.getValue();

builder.startObject(fieldName);
builder.field(TYPE, fieldType);

SingleGroupSource groupSource = groupSources.get(fieldName);
if (groupSource instanceof DateHistogramGroupSource) {
String format = ((DateHistogramGroupSource) groupSource).getFormat();
if (format != null) {
builder.field(FORMAT, DEFAULT_TIME_FORMAT + "||" + format);
}
}
builder.endObject();
}
builder.endObject(); // properties
builder.endObject(); // _doc type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
*/
public final class IDGenerator {
private static final byte[] NULL_VALUE = "__NULL_VALUE__".getBytes(StandardCharsets.UTF_8);
private static final byte[] EMPTY_VALUE = "__EMPTY_VALUE__".getBytes(StandardCharsets.UTF_8);
private static final byte DELIM = '$';
private static final long SEED = 19;
private static final int MAX_FIRST_BYTES = 5;
Expand Down Expand Up @@ -57,7 +58,9 @@ public String getID() {

for (Object value : objectsForIDGeneration.values()) {
byte[] v = getBytes(value);

if (v.length == 0) {
v = EMPTY_VALUE;
}
buffer.append(v, 0, v.length);
buffer.append(DELIM);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public void testSupportedTypes() {
assertNotEquals(id, idGen.getID());
idGen.add("key6", 13);
assertNotEquals(id, idGen.getID());
id = idGen.getID();
idGen.add("key7", "");
assertNotEquals(id, idGen.getID());
}

public void testOrderIndependence() {
Expand Down

0 comments on commit 80f8943

Please sign in to comment.