Skip to content

Commit

Permalink
geotile_grid implementation (#37842)
Browse files Browse the repository at this point in the history
Implements `geotile_grid` aggregation

This patch refactors previous implementation #30240

This code uses the same base classes as `geohash_grid` agg, but uses a different hashing
algorithm to allow zoom consistency.  Each grid bucket is aligned to Web Mercator tiles.
  • Loading branch information
nyurik authored Feb 1, 2019
1 parent 6c1e9fa commit f3cde06
Show file tree
Hide file tree
Showing 22 changed files with 1,310 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@
import org.elasticsearch.search.aggregations.bucket.filter.ParsedFilters;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.geogrid.ParsedGeoHashGrid;
import org.elasticsearch.search.aggregations.bucket.geogrid.ParsedGeoTileGrid;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.global.ParsedGlobal;
import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
Expand Down Expand Up @@ -1760,6 +1762,7 @@ static List<NamedXContentRegistry.Entry> getDefaultNamedXContents() {
map.put(FilterAggregationBuilder.NAME, (p, c) -> ParsedFilter.fromXContent(p, (String) c));
map.put(InternalSampler.PARSER_NAME, (p, c) -> ParsedSampler.fromXContent(p, (String) c));
map.put(GeoHashGridAggregationBuilder.NAME, (p, c) -> ParsedGeoHashGrid.fromXContent(p, (String) c));
map.put(GeoTileGridAggregationBuilder.NAME, (p, c) -> ParsedGeoTileGrid.fromXContent(p, (String) c));
map.put(RangeAggregationBuilder.NAME, (p, c) -> ParsedRange.fromXContent(p, (String) c));
map.put(DateRangeAggregationBuilder.NAME, (p, c) -> ParsedDateRange.fromXContent(p, (String) c));
map.put(GeoDistanceAggregationBuilder.NAME, (p, c) -> ParsedGeoDistance.fromXContent(p, (String) c));
Expand Down
185 changes: 185 additions & 0 deletions docs/reference/aggregations/bucket/geotilegrid-aggregation.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
[[search-aggregations-bucket-geotilegrid-aggregation]]
=== GeoTile Grid Aggregation

A multi-bucket aggregation that works on `geo_point` fields and groups points into
buckets that represent cells in a grid. The resulting grid can be sparse and only
contains cells that have matching data. Each cell corresponds to a
https://en.wikipedia.org/wiki/Tiled_web_map[map tile] as used by many online map
sites. Each cell is labeled using a "{zoom}/{x}/{y}" format, where zoom is equal
to the user-specified precision.

* High precision keys have a larger range for x and y, and represent tiles that
cover only a small area.
* Low precision keys have a smaller range for x and y, and represent tiles that
each cover a large area.

See https://wiki.openstreetmap.org/wiki/Zoom_levels[Zoom level documentation]
on how precision (zoom) correlates to size on the ground. Precision for this
aggregation can be between 0 and 29, inclusive.

WARNING: The highest-precision geotile of length 29 produces cells that cover
less than a 10cm by 10cm of land and so high-precision requests can be very
costly in terms of RAM and result sizes. Please see the example below on how
to first filter the aggregation to a smaller geographic area before requesting
high-levels of detail.

The specified field must be of type `geo_point` (which can only be set
explicitly in the mappings) and it can also hold an array of `geo_point`
fields, in which case all points will be taken into account during aggregation.


==== Simple low-precision request

[source,js]
--------------------------------------------------
PUT /museums
{
"mappings": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
POST /museums/_bulk?refresh
{"index":{"_id":1}}
{"location": "52.374081,4.912350", "name": "NEMO Science Museum"}
{"index":{"_id":2}}
{"location": "52.369219,4.901618", "name": "Museum Het Rembrandthuis"}
{"index":{"_id":3}}
{"location": "52.371667,4.914722", "name": "Nederlands Scheepvaartmuseum"}
{"index":{"_id":4}}
{"location": "51.222900,4.405200", "name": "Letterenhuis"}
{"index":{"_id":5}}
{"location": "48.861111,2.336389", "name": "Musée du Louvre"}
{"index":{"_id":6}}
{"location": "48.860000,2.327000", "name": "Musée d'Orsay"}
POST /museums/_search?size=0
{
"aggregations" : {
"large-grid" : {
"geotile_grid" : {
"field" : "location",
"precision" : 8
}
}
}
}
--------------------------------------------------
// CONSOLE

Response:

[source,js]
--------------------------------------------------
{
...
"aggregations": {
"large-grid": {
"buckets": [
{
"key" : "8/131/84",
"doc_count" : 3
},
{
"key" : "8/129/88",
"doc_count" : 2
},
{
"key" : "8/131/85",
"doc_count" : 1
}
]
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]

==== High-precision requests

When requesting detailed buckets (typically for displaying a "zoomed in" map)
a filter like <<query-dsl-geo-bounding-box-query,geo_bounding_box>> should be
applied to narrow the subject area otherwise potentially millions of buckets
will be created and returned.

[source,js]
--------------------------------------------------
POST /museums/_search?size=0
{
"aggregations" : {
"zoomed-in" : {
"filter" : {
"geo_bounding_box" : {
"location" : {
"top_left" : "52.4, 4.9",
"bottom_right" : "52.3, 5.0"
}
}
},
"aggregations":{
"zoom1":{
"geotile_grid" : {
"field": "location",
"precision": 22
}
}
}
}
}
}
--------------------------------------------------
// CONSOLE
// TEST[continued]

[source,js]
--------------------------------------------------
{
...
"aggregations" : {
"zoomed-in" : {
"doc_count" : 3,
"zoom1" : {
"buckets" : [
{
"key" : "22/2154412/1378379",
"doc_count" : 1
},
{
"key" : "22/2154385/1378332",
"doc_count" : 1
},
{
"key" : "22/2154259/1378425",
"doc_count" : 1
}
]
}
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]


==== Options

[horizontal]
field:: Mandatory. The name of the field indexed with GeoPoints.

precision:: Optional. The integer zoom of the key used to define
cells/buckets in the results. Defaults to 7.
Values outside of [0,29] will be rejected.

size:: Optional. The maximum number of geohash buckets to return
(defaults to 10,000). When results are trimmed, buckets are
prioritised based on the volumes of documents they contain.

shard_size:: Optional. To allow for more accurate counting of the top cells
returned in the final result the aggregation defaults to
returning `max(10,(size x number-of-shards))` buckets from each
shard. If this heuristic is undesirable, the number considered
from each shard can be over-ridden using this parameter.
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
setup:
- skip:
version: " - 6.99.99"
reason: "added in 7.0.0"
- do:
indices.create:
include_type_name: false
index: test_1
body:
settings:
number_of_replicas: 0
mappings:
properties:
location:
type: geo_point

---
"Basic test":
- do:
bulk:
refresh: true
body:
- index:
_index: test_1
_id: 1
- location: "52.374081,4.912350"
- index:
_index: test_1
_id: 2
- location: "52.369219,4.901618"
- index:
_index: test_1
_id: 3
- location: "52.371667,4.914722"
- index:
_index: test_1
_id: 4
- location: "51.222900,4.405200"
- index:
_index: test_1
_id: 5
- location: "48.861111,2.336389"
- index:
_index: test_1
_id: 6
- location: "48.860000,2.327000"

- do:
search:
rest_total_hits_as_int: true
body:
aggregations:
grid:
geotile_grid:
field: location
precision: 8


- match: { hits.total: 6 }
- match: { aggregations.grid.buckets.0.key: "8/131/84" }
- match: { aggregations.grid.buckets.0.doc_count: 3 }
- match: { aggregations.grid.buckets.1.key: "8/129/88" }
- match: { aggregations.grid.buckets.1.doc_count: 2 }
- match: { aggregations.grid.buckets.2.key: "8/131/85" }
- match: { aggregations.grid.buckets.2.doc_count: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@
import org.elasticsearch.search.aggregations.bucket.filter.InternalFilters;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.geogrid.InternalGeoHashGrid;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.geogrid.InternalGeoTileGrid;
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal;
import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
Expand Down Expand Up @@ -422,6 +424,8 @@ private void registerAggregations(List<SearchPlugin> plugins) {
GeoDistanceAggregationBuilder::parse).addResultReader(InternalGeoDistance::new));
registerAggregation(new AggregationSpec(GeoHashGridAggregationBuilder.NAME, GeoHashGridAggregationBuilder::new,
GeoHashGridAggregationBuilder::parse).addResultReader(InternalGeoHashGrid::new));
registerAggregation(new AggregationSpec(GeoTileGridAggregationBuilder.NAME, GeoTileGridAggregationBuilder::new,
GeoTileGridAggregationBuilder::parse).addResultReader(InternalGeoTileGrid::new));
registerAggregation(new AggregationSpec(NestedAggregationBuilder.NAME, NestedAggregationBuilder::new,
NestedAggregationBuilder::parse).addResultReader(InternalNested::new));
registerAggregation(new AggregationSpec(ReverseNestedAggregationBuilder.NAME, ReverseNestedAggregationBuilder::new,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter;
import org.elasticsearch.search.aggregations.bucket.geogrid.InternalGeoHashGrid;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.geogrid.InternalGeoTileGrid;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileGridAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.global.Global;
import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
Expand Down Expand Up @@ -250,6 +252,13 @@ public static GeoHashGridAggregationBuilder geohashGrid(String name) {
return new GeoHashGridAggregationBuilder(name);
}

/**
* Create a new {@link InternalGeoTileGrid} aggregation with the given name.
*/
public static GeoTileGridAggregationBuilder geotileGrid(String name) {
return new GeoTileGridAggregationBuilder(name);
}

/**
* Create a new {@link SignificantTerms} aggregation with the given name.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.aggregations.bucket.geogrid;

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.Map;

public class GeoTileGridAggregationBuilder extends GeoGridAggregationBuilder {
public static final String NAME = "geotile_grid";
private static final int DEFAULT_PRECISION = 7;
private static final int DEFAULT_MAX_NUM_CELLS = 10000;

private static final ObjectParser<GeoGridAggregationBuilder, Void> PARSER = createParser(NAME, GeoTileUtils::parsePrecision);

public GeoTileGridAggregationBuilder(String name) {
super(name);
precision(DEFAULT_PRECISION);
size(DEFAULT_MAX_NUM_CELLS);
shardSize = -1;
}

public GeoTileGridAggregationBuilder(StreamInput in) throws IOException {
super(in);
}

@Override
public GeoGridAggregationBuilder precision(int precision) {
this.precision = GeoTileUtils.checkPrecisionRange(precision);
return this;
}

@Override
protected ValuesSourceAggregatorFactory<ValuesSource.GeoPoint, ?> createFactory(
String name, ValuesSourceConfig<ValuesSource.GeoPoint> config, int precision, int requiredSize, int shardSize,
SearchContext context, AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metaData
) throws IOException {
return new GeoTileGridAggregatorFactory(name, config, precision, requiredSize, shardSize, context, parent,
subFactoriesBuilder, metaData);
}

private GeoTileGridAggregationBuilder(GeoTileGridAggregationBuilder clone, AggregatorFactories.Builder factoriesBuilder,
Map<String, Object> metaData) {
super(clone, factoriesBuilder, metaData);
}

@Override
protected AggregationBuilder shallowCopy(AggregatorFactories.Builder factoriesBuilder, Map<String, Object> metaData) {
return new GeoTileGridAggregationBuilder(this, factoriesBuilder, metaData);
}

public static GeoGridAggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException {
return PARSER.parse(parser, new GeoTileGridAggregationBuilder(aggregationName), null);
}

@Override
public String getType() {
return NAME;
}
}
Loading

0 comments on commit f3cde06

Please sign in to comment.