-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
I have create two ways to get aggregation data from a search response. 1) End user can provide a map of names to the aggregation types expected to be returned. This takes the place of the _type metadata returned in faceting. This map does not need to be in any particular order, but each aggregation will search through and see if it has any matching sub-aggregations matching the names present and populate them as needed. It is important to note that using this method we will ONLY return aggregations specified by the user in this map. 2) Type invariant aggregations can be requested. This method will simply return a list of TypeInvariantAggregations. This is recommended if the user is unsure of the Aggregations supplied, or simply wants a representation of all data returned in the aggregation. This module introduces some danger and requires responsibility on the user's part. The intended use is to call the getAggregationFields() method to get a list of all AggregationFields contained. The user is only assured that those particular methods will return non-null results. **TODO** * Polish off these methods-- this is meant to be a rough draft of the Aggregations module in order to get rapid feedback * Add testing over the Aggregation module
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package io.searchbox.core.search.aggregation; | ||
|
||
import com.google.gson.JsonObject; | ||
|
||
import java.lang.reflect.Constructor; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* @author cfstout | ||
*/ | ||
|
||
public abstract class Aggregation { | ||
This comment has been minimized.
Sorry, something went wrong.
kramer
|
||
|
||
protected String name; | ||
protected List<Aggregation> subAggregations; | ||
|
||
public String getName() { | ||
return name; | ||
} | ||
|
||
public List<Aggregation> getSubAggregations() { | ||
return subAggregations; | ||
} | ||
|
||
public boolean hasSubAggregations() { | ||
return !(subAggregations == null || subAggregations.isEmpty()); | ||
} | ||
|
||
public <T extends Aggregation> void addSubAggregations(JsonObject rootAggregation, Map<String, Class<T>> nameToTypeMap) { | ||
This comment has been minimized.
Sorry, something went wrong.
kramer
|
||
for (String nameCandidate : nameToTypeMap.keySet()) { | ||
if (rootAggregation.has(nameCandidate)) { | ||
try { | ||
Class<T> type = nameToTypeMap.get(name); | ||
Constructor<T> c = type.getConstructor(String.class, JsonObject.class, Map.class); | ||
this.subAggregations.add(c.newInstance(nameCandidate, rootAggregation.get(nameCandidate).getAsJsonObject(), nameToTypeMap)); | ||
This comment has been minimized.
Sorry, something went wrong.
kramer
|
||
} catch (Exception e) { | ||
throw new RuntimeException(e); | ||
This comment has been minimized.
Sorry, something went wrong.
kramer
|
||
} | ||
} | ||
} | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
package io.searchbox.core.search.aggregation; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* @author cfstout | ||
*/ | ||
public enum AggregationField { | ||
VALUE("value"), | ||
BUCKETS("buckets"), | ||
KEY("key"), //Can be String or Long | ||
KEY_AS_STRING("key_as_string"), | ||
DOC_COUNT("doc_count"), | ||
FROM("from"), | ||
TO("to"), | ||
FROM_AS_STRING("from_as_string"), | ||
TO_AS_STRING("to_as_string"), | ||
SUM_OF_SQUARES("sum_of_squares"), | ||
VARIANCE("variance"), | ||
STD_DEVIATION("std_deviation"), | ||
STD_DEVIATION_BOUNDS("std_deviation_bounds"), | ||
BOUNDS("bounds"), | ||
UNIT("unit"), | ||
VALUES("values"), | ||
SCORE("score"), | ||
BG_COUNT("bg_count"), //Background Count | ||
COUNT("count"), | ||
MIN("min"), | ||
MAX("max"), | ||
AVG("avg"), | ||
SUM("sum"); | ||
|
||
private final String field; | ||
|
||
private AggregationField(String s) { | ||
field = s; | ||
} | ||
|
||
public String toString() { | ||
return field; | ||
} | ||
|
||
public boolean equals(String s) { | ||
return s.equals(toString()); | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package io.searchbox.core.search.aggregation; | ||
|
||
import com.google.gson.JsonObject; | ||
|
||
import java.lang.reflect.Constructor; | ||
import java.util.Map; | ||
|
||
/** | ||
* @author cfstout | ||
*/ | ||
public class AvgAggregation extends Aggregation { | ||
|
||
public static final String TYPE = "avg"; | ||
|
||
private Double avg; | ||
|
||
public <T extends Aggregation> AvgAggregation(String name, JsonObject avgAggregation, Map<String, Class<T>> nameToTypeMap) { | ||
this.name = name; | ||
avg = avgAggregation.get("value").getAsDouble(); | ||
This comment has been minimized.
Sorry, something went wrong.
kramer
|
||
addSubAggregations(avgAggregation, nameToTypeMap); | ||
} | ||
|
||
public Double getAvg() { | ||
return avg; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package io.searchbox.core.search.aggregation; | ||
|
||
import com.google.gson.JsonObject; | ||
|
||
import java.util.Map; | ||
|
||
/** | ||
* @author cfstout | ||
*/ | ||
public class CardinalityAggregation extends Aggregation{ | ||
|
||
public static final String TYPE = "cardinality"; | ||
|
||
private Long cardinality; | ||
|
||
public <T extends Aggregation> CardinalityAggregation(String name, JsonObject cardinalityAggregation, Map<String, Class<T>> nameToTypeMap) { | ||
this.name = name; | ||
cardinality = cardinalityAggregation.get("value").getAsLong(); | ||
addSubAggregations(cardinalityAggregation, nameToTypeMap); | ||
} | ||
|
||
public Long getCardinality() { | ||
return cardinality; | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package io.searchbox.core.search.aggregation; | ||
|
||
import com.google.gson.JsonElement; | ||
import com.google.gson.JsonObject; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* @author cfstout | ||
*/ | ||
public class DateHistogramAggregation extends Aggregation{ | ||
|
||
public static final String TYPE = "date_histogram"; | ||
|
||
private List<DateHistogram> dateHistograms; | ||
|
||
public <T extends Aggregation> DateHistogramAggregation(String name, JsonObject dateHistogramAggregation, Map<String, Class<T>> nameToTypeMap) { | ||
this.name = name; | ||
dateHistograms = new ArrayList<DateHistogram>(); | ||
for (JsonElement bucket : dateHistogramAggregation.get("buckets").getAsJsonArray()) { | ||
JsonElement time = bucket.getAsJsonObject().get("key"); | ||
JsonElement timeAsString = bucket.getAsJsonObject().get("key_as_string"); | ||
JsonElement count = bucket.getAsJsonObject().get("doc_count"); | ||
DateHistogram histogram = new DateHistogram(time.getAsLong(), timeAsString.getAsString(), count.getAsLong()); | ||
dateHistograms.add(histogram); | ||
} | ||
addSubAggregations(dateHistogramAggregation, nameToTypeMap); | ||
} | ||
|
||
public List<DateHistogram> getDateHistograms() { | ||
return dateHistograms; | ||
} | ||
|
||
public class DateHistogram extends HistogramAggregation.Histogram { | ||
|
||
private String timeAsString; | ||
|
||
DateHistogram(Long time, String timeAsString, Long count) { | ||
super(time, count); | ||
this.timeAsString = timeAsString; | ||
} | ||
|
||
public Long getTime() { | ||
return getKey(); | ||
} | ||
|
||
public String getTimeAsString() { | ||
return timeAsString; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package io.searchbox.core.search.aggregation; | ||
|
||
import com.google.gson.JsonElement; | ||
import com.google.gson.JsonObject; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* @author cfstout | ||
*/ | ||
public class DateRangeAggregation extends Aggregation { | ||
|
||
public static final String TYPE = "date_range"; | ||
|
||
private List<DateRange> ranges; | ||
|
||
public <T extends Aggregation> DateRangeAggregation(String name, JsonObject dateRangeAggregation, Map<String, Class<T>> nameToTypeMap) { | ||
this.name = name; | ||
ranges = new ArrayList<DateRange>(); | ||
//todo support keyed:true as well | ||
for (JsonElement bucketv : dateRangeAggregation.get("buckets").getAsJsonArray()) { | ||
JsonObject bucket = bucketv.getAsJsonObject(); | ||
DateRange range = new DateRange( | ||
bucket.has("from") ? bucket.get("from").getAsDouble() : null, | ||
bucket.has("to") ? bucket.get("to").getAsDouble() : null, | ||
bucket.has("doc_count") ? bucket.get("doc_count").getAsLong() : null, | ||
bucket.has("from_as_string") ? bucket.get("from_as_string").getAsString() : null, | ||
bucket.has("to_as_string") ? bucket.get("to_as_string").getAsString() : null); | ||
ranges.add(range); | ||
} | ||
addSubAggregations(dateRangeAggregation, nameToTypeMap); | ||
} | ||
|
||
public List<DateRange> getRanges() { | ||
return ranges; | ||
} | ||
|
||
public class DateRange extends RangeAggregation.Range { | ||
private String fromAsString; | ||
private String toAsString; | ||
|
||
public DateRange(Double from, Double to, Long count, String fromString, String toString){ | ||
super(from, to, count); | ||
this.fromAsString = fromString; | ||
this.toAsString = toString; | ||
} | ||
|
||
public String getFromAsString() { | ||
return fromAsString; | ||
} | ||
|
||
public String getToAsString() { | ||
return toAsString; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package io.searchbox.core.search.aggregation; | ||
|
||
/** | ||
* @author cfstout | ||
*/ | ||
public class DeviationBound { | ||
private Double upper; | ||
private Double lower; | ||
|
||
public DeviationBound(Double upper, Double lower) { | ||
this.upper = upper; | ||
this.lower = lower; | ||
} | ||
|
||
public Double getUpper() { | ||
return upper; | ||
} | ||
|
||
public Double getLower() { | ||
return lower; | ||
} | ||
} |
6 comments
on commit 9a29e31
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The way the user is currently expected to provide a full and flat map of name-to-types for sub aggregations seems unintuitive (in getAggregations(Map<String, Class<T>> nameToTypeMap)
). One alternative is to for user to get them in levels (or on demand) as such:
- rename/refactor
addSubAggregations
togetSubAggregations
where the only parameter is name-to-type map for aggregations in the next level (keep root json internally etc.). So it is called by user if/when it is needed and not during initialization. - then we can also provide (convenience) getter for single aggregation in
SearchResult
e.g.:public <T extends Aggregation> T getAggregation(String aggName, Class<T> aggType)
and then even more convenientpublic AvgAggregation getAvgAggregation(String aggName)
As for the TypeInvariant
class and getter: I'd vote against supporting that case as I believe the added value is too little.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes I definitely agree. It makes sense that the user should know the expected structure of Aggregations, and that should be easy to refactor from the current state.
And to clarify, you are suggesting adding getters for every Aggregation type in the SearchResult
class? So then we would have the generic, getAggregation(String aggName, Class<T> aggType)
which would then call the correct method for that specific aggType?
And the more I think about it, I definitely agree with removing the TypeInvariant
class. Providing the name and expected type seems much more in line with other behavior in the code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And to clarify, you are suggesting adding getters for every Aggregation type in the SearchResult class?
Yes, well in my idea we'd have 3 (kind of) getters in SearchResult
:
getAggregations(Map<String, Class<T>> nameToTypeMap)
will return all root level aggregations (can call 2 for each map entry and return the results in a collection)getAggregation(String aggName, Class<T> aggType)
returns a single root level aggregation (allows us to separate single aggr parsing logic from looping as well as providing a useful unit operation to user)getXAggregation(String aggName)
returns a single root level aggregation where the aggregation type (class) is implied by the method name (can simply call 2 with proper arguments - pure convenience method) and yes we would have a lot of these (each corresponding to one of the Aggregation types/classes)
Then user needs to go through the returned Aggregation
instance to get sub aggregations - where we provide above 3 (kinds of) getters again. Ok, since at this point we would like to reuse the getter code, we need to think of a better structure 😄 ... Something like:
- Make
Aggregation
concrete and place 3 getters in it. - Now
Aggregation
has no (public) data field and it's just a gateway to get "sub"aggregations; we would use this to represent the root aggregations node. SearchResult
has only the methodpublic Aggregation getAggregations()
- User can now call
searchResult.getAggregations().getTermsAggregation("myTA")
to get a root level aggregation andsearchResult.getAggregations().getTermsAggregation("myTA").getGlobalAggregation("myGA")
to get a sub aggregation.
p.s.: this was a thought in progress, feel free to attack it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think my last question is just what you would recommend for default behavior on errors. Two major situations I'm thinking of would be things like
- calling
getAvgAggregation("myAvgAgg")
when"myAvgAgg"
is not in the json node - calling `getAvgAggregation("myAvgAgg") when the type of that aggregation is actually something else
I would think for 1 it would be ok to fail silently-- just return a null aggregation. I will make sure to document if that is possible.
2 is more tricky. The constructors now assume that all of these fields are present, as if they are actually of that type they will be. Is it ok to make that assumption? Or should we check for existence of these fields before trying to get them in each case? And if they don't exist should we throw an exception? Just set the value to null?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- Case 1: Behaviour should be just returning
null
(mind you not "null Aggregation object" which would be null-object-pattern way which Jest did not use so far). - Case 2: Tricky indeed. I'd say go for the best effort plan: extract what we can into the given aggregation type object and leave the fields that couldn't be found with null values (so we do null checks during json parsing) (debug level logging for such fields please). Proactive verification as you described sounds too much like going out of way here to me.
Other notes: Please log where necessary (without getting too chatty on >=INFO level). For min/max/avg/sum we probably can use just a single class called SingleValueAggregation
, can't we?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes good idea on that, though I think that that should be an Abstract class that all of the min/max/avg/sum extend. Even though their won't be any actual code there seems that those should be the objects we create to keep with ES standards.
Since an aggregation result can only be produced through Aggregation action (class) we can extract these two methods to a new class
AggregationResult extends SearchResult
.edit: i was mistaken. we cant extract this unless we have a special action for aggregation (which we dont have/need atm).