Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Add earliest and latest timestamps to field stats #42890

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@ public class FieldStats implements ToXContentObject {
public static final ParseField MAX_VALUE = new ParseField("max_value");
public static final ParseField MEAN_VALUE = new ParseField("mean_value");
public static final ParseField MEDIAN_VALUE = new ParseField("median_value");
public static final ParseField EARLIEST = new ParseField("earliest");
public static final ParseField LATEST = new ParseField("latest");
public static final ParseField TOP_HITS = new ParseField("top_hits");

@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", true,
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
(List<Map<String, Object>>) a[6]));
(String) a[6], (String) a[7], (List<Map<String, Object>>) a[8]));

static {
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
Expand All @@ -52,6 +54,8 @@ public class FieldStats implements ToXContentObject {
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), EARLIEST);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), LATEST);
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
}

Expand All @@ -61,16 +65,20 @@ public class FieldStats implements ToXContentObject {
private final Double maxValue;
private final Double meanValue;
private final Double medianValue;
private final String earliestTimestamp;
private final String latestTimestamp;
private final List<Map<String, Object>> topHits;

FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
List<Map<String, Object>> topHits) {
String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this.count = count;
this.cardinality = cardinality;
this.minValue = minValue;
this.maxValue = maxValue;
this.meanValue = meanValue;
this.medianValue = medianValue;
this.earliestTimestamp = earliestTimestamp;
this.latestTimestamp = latestTimestamp;
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
}

Expand Down Expand Up @@ -98,6 +106,14 @@ public Double getMedianValue() {
return medianValue;
}

public String getEarliestTimestamp() {
return earliestTimestamp;
}

public String getLatestTimestamp() {
return latestTimestamp;
}

public List<Map<String, Object>> getTopHits() {
return topHits;
}
Expand All @@ -120,6 +136,12 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
if (medianValue != null) {
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
}
if (earliestTimestamp != null) {
builder.field(EARLIEST.getPreferredName(), earliestTimestamp);
}
if (latestTimestamp != null) {
builder.field(LATEST.getPreferredName(), latestTimestamp);
}
if (topHits.isEmpty() == false) {
builder.field(TOP_HITS.getPreferredName(), topHits);
}
Expand All @@ -140,7 +162,7 @@ static Number toIntegerIfInteger(double d) {
@Override
public int hashCode() {

return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}

@Override
Expand All @@ -161,6 +183,8 @@ public boolean equals(Object other) {
Objects.equals(this.maxValue, that.maxValue) &&
Objects.equals(this.meanValue, that.meanValue) &&
Objects.equals(this.medianValue, that.medianValue) &&
Objects.equals(this.earliestTimestamp, that.earliestTimestamp) &&
Objects.equals(this.latestTimestamp, that.latestTimestamp) &&
Objects.equals(this.topHits, that.topHits);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ static FieldStats createTestFieldStats() {
Double maxValue = null;
Double meanValue = null;
Double medianValue = null;
String earliestTimestamp = null;
String latestTimestamp = null;
boolean isMetric = randomBoolean();
if (isMetric) {
if (randomBoolean()) {
Expand All @@ -54,6 +56,12 @@ static FieldStats createTestFieldStats() {
}
meanValue = randomDouble();
medianValue = randomDouble();
} else {
boolean isDate = randomBoolean();
if (isDate) {
earliestTimestamp = randomAlphaOfLength(20);
latestTimestamp = randomAlphaOfLength(20);
}
}

List<Map<String, Object>> topHits = new ArrayList<>();
Expand All @@ -68,7 +76,7 @@ static FieldStats createTestFieldStats() {
topHits.add(topHit);
}

return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}

@Override
Expand Down
10 changes: 10 additions & 0 deletions docs/reference/ml/apis/find-file-structure.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,8 @@ If the request does not encounter errors, you receive the following result:
"release_date" : {
"count" : 24,
"cardinality" : 20,
"earliest" : "1932-06-01",
"latest" : "2011-06-02",
"top_hits" : [
{
"value" : "1985-06-01",
Expand Down Expand Up @@ -1152,6 +1154,8 @@ If the request does not encounter errors, you receive the following result:
"tpep_dropoff_datetime" : {
"count" : 19998,
"cardinality" : 9066,
"earliest" : "2018-05-31 06:18:15",
"latest" : "2018-06-02 02:25:44",
"top_hits" : [
{
"value" : "2018-06-01 01:12:12",
Expand Down Expand Up @@ -1198,6 +1202,8 @@ If the request does not encounter errors, you receive the following result:
"tpep_pickup_datetime" : {
"count" : 19998,
"cardinality" : 8760,
"earliest" : "2018-05-31 06:08:31",
"latest" : "2018-06-02 01:21:21",
"top_hits" : [
{
"value" : "2018-06-01 00:01:23",
Expand Down Expand Up @@ -1457,6 +1463,8 @@ this:
"timestamp" : {
"count" : 53,
"cardinality" : 28,
"earliest" : "2018-09-27T14:39:28,518",
"latest" : "2018-09-27T14:39:37,012",
"top_hits" : [
{
"value" : "2018-09-27T14:39:29,859",
Expand Down Expand Up @@ -1719,6 +1727,8 @@ this:
"timestamp" : {
"count" : 53,
"cardinality" : 28,
"earliest" : "2018-09-27T14:39:28,518",
"latest" : "2018-09-27T14:39:37,012",
"top_hits" : [
{
"value" : "2018-09-27T14:39:29,859",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
*/
package org.elasticsearch.xpack.core.ml.filestructurefinder;

import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
Expand All @@ -27,12 +29,14 @@ public class FieldStats implements ToXContentObject, Writeable {
static final ParseField MAX_VALUE = new ParseField("max_value");
static final ParseField MEAN_VALUE = new ParseField("mean_value");
static final ParseField MEDIAN_VALUE = new ParseField("median_value");
static final ParseField EARLIEST = new ParseField("earliest");
static final ParseField LATEST = new ParseField("latest");
static final ParseField TOP_HITS = new ParseField("top_hits");

@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", false,
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
(List<Map<String, Object>>) a[6]));
(String) a[6], (String) a[7], (List<Map<String, Object>>) a[8]));

static {
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
Expand All @@ -41,6 +45,8 @@ public class FieldStats implements ToXContentObject, Writeable {
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), EARLIEST);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), LATEST);
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
}

Expand All @@ -50,20 +56,33 @@ public class FieldStats implements ToXContentObject, Writeable {
private final Double maxValue;
private final Double meanValue;
private final Double medianValue;
private final String earliestTimestamp;
private final String latestTimestamp;
private final List<Map<String, Object>> topHits;

public FieldStats(long count, int cardinality, List<Map<String, Object>> topHits) {
this(count, cardinality, null, null, null, null, topHits);
this(count, cardinality, null, null, null, null, null, null, topHits);
}

public FieldStats(long count, int cardinality, String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this(count, cardinality, null, null, null, null, earliestTimestamp, latestTimestamp, topHits);
}

public FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
List<Map<String, Object>> topHits) {
this(count, cardinality, minValue, maxValue, meanValue, medianValue, null, null, topHits);
}

FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this.count = count;
this.cardinality = cardinality;
this.minValue = minValue;
this.maxValue = maxValue;
this.meanValue = meanValue;
this.medianValue = medianValue;
this.earliestTimestamp = earliestTimestamp;
this.latestTimestamp = latestTimestamp;
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
}

Expand All @@ -74,6 +93,13 @@ public FieldStats(StreamInput in) throws IOException {
maxValue = in.readOptionalDouble();
meanValue = in.readOptionalDouble();
medianValue = in.readOptionalDouble();
if (in.getVersion().onOrAfter(Version.V_7_3_0)) {
earliestTimestamp = in.readOptionalString();
latestTimestamp = in.readOptionalString();
} else {
earliestTimestamp = null;
latestTimestamp = null;
}
topHits = in.readList(StreamInput::readMap);
}

Expand All @@ -85,6 +111,10 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeOptionalDouble(maxValue);
out.writeOptionalDouble(meanValue);
out.writeOptionalDouble(medianValue);
if (out.getVersion().onOrAfter(Version.V_7_3_0)) {
out.writeOptionalString(earliestTimestamp);
out.writeOptionalString(latestTimestamp);
}
out.writeCollection(topHits, StreamOutput::writeMap);
}

Expand Down Expand Up @@ -112,6 +142,14 @@ public Double getMedianValue() {
return medianValue;
}

public String getEarliestTimestamp() {
return earliestTimestamp;
}

public String getLatestTimestamp() {
return latestTimestamp;
}

public List<Map<String, Object>> getTopHits() {
return topHits;
}
Expand All @@ -134,6 +172,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (medianValue != null) {
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
}
if (earliestTimestamp != null) {
builder.field(EARLIEST.getPreferredName(), earliestTimestamp);
}
if (latestTimestamp != null) {
builder.field(LATEST.getPreferredName(), latestTimestamp);
}
if (topHits.isEmpty() == false) {
builder.field(TOP_HITS.getPreferredName(), topHits);
}
Expand All @@ -154,7 +198,7 @@ public static Number toIntegerIfInteger(double d) {
@Override
public int hashCode() {

return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}

@Override
Expand All @@ -175,6 +219,13 @@ public boolean equals(Object other) {
Objects.equals(this.maxValue, that.maxValue) &&
Objects.equals(this.meanValue, that.meanValue) &&
Objects.equals(this.medianValue, that.medianValue) &&
Objects.equals(this.earliestTimestamp, that.earliestTimestamp) &&
Objects.equals(this.latestTimestamp, that.latestTimestamp) &&
Objects.equals(this.topHits, that.topHits);
}

@Override
public String toString() {
return Strings.toString(this);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ static FieldStats createTestFieldStats() {
Double maxValue = null;
Double meanValue = null;
Double medianValue = null;
String earliestTimestamp = null;
String latestTimestamp = null;
boolean isMetric = randomBoolean();
if (isMetric) {
if (randomBoolean()) {
Expand All @@ -41,6 +43,12 @@ static FieldStats createTestFieldStats() {
}
meanValue = randomDouble();
medianValue = randomDouble();
} else {
boolean isDate = randomBoolean();
if (isDate) {
earliestTimestamp = randomAlphaOfLength(20);
latestTimestamp = randomAlphaOfLength(20);
}
}

List<Map<String, Object>> topHits = new ArrayList<>();
Expand All @@ -55,7 +63,7 @@ static FieldStats createTestFieldStats() {
topHits.add(topHit);
}

return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,7 @@ static DelimitedFileStructureFinder makeDelimitedFileStructureFinder(List<String

SortedMap<String, Object> mappings = mappingsAndFieldStats.v1();
if (timeField != null) {
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD,
Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date"));
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
}

if (mappingsAndFieldStats.v2() != null) {
Expand Down
Loading