diff --git a/README.md b/README.md
index 3c87de928..474dcb071 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,8 @@ The API recognizes the following properties under the top-level `api` key in you
|---|---|---|---|
|`services`|*no*||Service definitions for [point-in-polygon](https://github.com/pelias/pip-service), [libpostal](https://github.com/whosonfirst/go-whosonfirst-libpostal), [placeholder](https://github.com/pelias/placeholder), and [interpolation](https://github.com/pelias/interpolation) services. For a description of when different Pelias services are recommended or required, see our [services documentation](https://github.com/pelias/documentation/blob/master/services.md).|
|`defaultParameters.focus.point.lon`
`defaultParameters.focus.point.lat`|no | |default coordinates for focus point
-|`targets.layers_by_source`
`targets.source_aliases`
`targets.layer_aliases`|no | |custom values for which `sources` and `layers` the API accepts (See more info in the [Custom sources and layers](#custom-sources-and-layers) section below).
+|`targets.auto_discover`|no|false|Should `sources` and `layers` be automatically discovered by querying elasticsearch at process startup. (See more info in the [Custom sources and layers](#custom-sources-and-layers) section below).
+|`targets.layers_by_source`
`targets.source_aliases`
`targets.layer_aliases`|no | |custom values for which `sources` and `layers` the API accepts (See more info in the [Custom sources and layers](#custom-sources-and-layers) section below). We recommend using the `targets.auto_discover:true` configuration instead of setting these manually.
|`customBoosts` | no | `{}` | Allows configuring boosts for specific sources and layers, in order to influence result order. See [Configurable Boosts](#custom-boosts) below for details |
|`autocomplete.exclude_address_length` | no | 0 | As a performance optimization, this optional parameter allows excluding address results for queries below the configured length. Addresses are usually the bulk of the records in Elasticsearch, and searching across all of them for very short text inputs can be slow, with little benefit. Consider setting this to 1 or 2 if you have several million addresses in Pelias. |
|`indexName`|*no*|*pelias*|name of the Elasticsearch index to be used when building queries|
@@ -88,7 +89,20 @@ The `timeout` and `retry` values, as show in in the `pip` service section, are o
### Custom sources and layers
-Pelias allows importing your own data with custom values for `source` and `layer`, however you MUST tell Pelias about them via `pelias.json` using the `targets.layers_by_source`, `targets.source_aliases` and `targets.layer_aliases` configuration parameters.
+Pelias allows importing your own data with custom values for `source` and `layer`.
+
+Custom sources and layers are not automatically detected, you MUST set `targets.auto_discover` to `true` in your `pelias.json` to make Pelias aware of them.
+
+The `auto_discover` functionality sends a request to elasticsearch in order to automatically discover sources and layers from elasticsearch when the API server starts-up.
+
+Be aware that the query sent to Elasticsearch can take several seconds to execute the first time against a large index, potentially impacting the performance of other queries hitting Elasticsearch at the same time. The query is cached in Elasticsearch for subsequent requests.
+
+If you are importing custom layers and are running a city or small region sized build then the impact of this query will likely be negligible, you can safely use `targets.auto_discover:true`.
+
+For advanced users running a full-planet build with custom layers or sources, and also concerned about this start-up delay, you have two options:
+
+1. execute the `auto_discover` query once manually to prime the cache **or**
+2. set `targets.auto_discover: false` and manually define the layers as documented below.
#### `layers_by_source`
diff --git a/helper/TypeMapping.js b/helper/TypeMapping.js
index 135045d08..b074fcaa7 100644
--- a/helper/TypeMapping.js
+++ b/helper/TypeMapping.js
@@ -57,32 +57,45 @@ TypeMapping.addStandardTargetsToAliases = function(standard, aliases) {
// source alias setter
TypeMapping.prototype.setSourceAliases = function( aliases ){
- this.source_aliases = aliases;
+ safeReplace(this.source_aliases, aliases);
};
// layers-by-source alias setter
TypeMapping.prototype.setLayersBySource = function( lbs ){
- this.layers_by_source = lbs;
+ safeReplace(this.layers_by_source, lbs);
};
// layer alias setter
TypeMapping.prototype.setLayerAliases = function( aliases ){
- this.layer_aliases = aliases;
+ safeReplace(this.layer_aliases, aliases);
};
// canonical sources setter
TypeMapping.prototype.setCanonicalSources = function( sources ){
- this.canonical_sources = sources;
+ safeReplace(this.canonical_sources, sources);
};
// generate mappings after setters have been run
TypeMapping.prototype.generateMappings = function(){
- this.sources = Object.keys( this.layers_by_source );
- this.source_mapping = TypeMapping.addStandardTargetsToAliases(this.sources, this.source_aliases);
- this.layers = _.uniq(Object.keys(this.layers_by_source).reduce(function(acc, key) {
- return acc.concat(this.layers_by_source[key]);
- }.bind(this), []));
- this.layer_mapping = TypeMapping.addStandardTargetsToAliases(this.layers, this.layer_aliases);
+
+ safeReplace(this.sources, Object.keys(this.layers_by_source) );
+
+ safeReplace(
+ this.source_mapping,
+ TypeMapping.addStandardTargetsToAliases(this.sources, this.source_aliases)
+ );
+
+ safeReplace(
+ this.layers,
+ _.uniq(Object.keys(this.layers_by_source).reduce(function (acc, key) {
+ return acc.concat(this.layers_by_source[key]);
+ }.bind(this), []))
+ );
+
+ safeReplace(
+ this.layer_mapping,
+ TypeMapping.addStandardTargetsToAliases(this.layers, this.layer_aliases)
+ );
};
// generate a list of all layers which are part of the canonical Pelias configuration
@@ -133,4 +146,16 @@ TypeMapping.prototype.load = function( done ){
loadFromElasticsearch(this, done);
};
+// replace the contents of an object or array
+// while maintaining the original pointer reference
+function safeReplace(reference, replacement){
+ if (_.isObject(reference) && _.isObject(replacement) ){
+ for( let attr in reference ){ delete reference[attr]; }
+ for (let attr in replacement) { reference[attr] = replacement[attr]; }
+ } else if (_.isArray(reference) && _.isArray(replacement)) {
+ reference.length = 0;
+ replacement.forEach(el => reference.push(el));
+ }
+}
+
module.exports = TypeMapping;
diff --git a/helper/type_mapping_discovery.js b/helper/type_mapping_discovery.js
index e4abd5684..1a3a57512 100644
--- a/helper/type_mapping_discovery.js
+++ b/helper/type_mapping_discovery.js
@@ -64,6 +64,9 @@ module.exports = (tm, done) => {
logger.info( 'total sources', sources.length );
logger.info( 'successfully discovered type mapping from elasticsearch' );
tm.setLayersBySource( layersBySource );
+
+ // (re)generate the mappings
+ tm.generateMappings();
}
}
diff --git a/sanitizer/_address_layer_filter.js b/sanitizer/_address_layer_filter.js
index a83f4b0d7..87c4be5a4 100644
--- a/sanitizer/_address_layer_filter.js
+++ b/sanitizer/_address_layer_filter.js
@@ -23,15 +23,9 @@ const check = require('check-types');
const ADDRESS_FILTER_WARNING = 'performance optimization: excluding \'address\' layer';
-function _setup(layersBySource) {
-
- // generate a deduplicated list of all layers except 'address'
- let layers = Object.keys(layersBySource).reduce((l, key) => l.concat(layersBySource[key]), []);
- layers = _.uniq(layers); // dedupe
- layers = layers.filter(item => item !== 'address'); // exclude 'address'
+function _setup(tm) {
return {
- layers: layers,
sanitize: function _sanitize(__, clean) {
// error & warning messages
@@ -81,7 +75,7 @@ function _setup(layersBySource) {
// handle the common case where neither source nor layers were specified
if (!check.array(clean.sources) || !check.nonEmptyArray(clean.sources)) {
- clean.layers = layers;
+ clean.layers = tm.layers.filter(item => item !== 'address'); // exclude 'address'
messages.warnings.push(ADDRESS_FILTER_WARNING);
}
@@ -89,7 +83,7 @@ function _setup(layersBySource) {
else if (check.array(clean.sources)) {
// we need to create a list of layers for the specified sources
- let sourceLayers = clean.sources.reduce((l, key) => l.concat(layersBySource[key] || []), []);
+ let sourceLayers = clean.sources.reduce((l, key) => l.concat(tm.layers_by_source[key] || []), []);
sourceLayers = _.uniq(sourceLayers); // dedupe
// if the sources specified do not have any addresses or if removing the
diff --git a/sanitizer/_targets.js b/sanitizer/_targets.js
index 180f9442f..36303246a 100644
--- a/sanitizer/_targets.js
+++ b/sanitizer/_targets.js
@@ -8,8 +8,7 @@ function getValidKeys(mapping) {
function _setup( paramName, targetMap ) {
const opts = {
paramName: paramName,
- targetMap: targetMap,
- targetMapKeysString: getValidKeys(targetMap)
+ targetMap: targetMap
};
return {
@@ -27,7 +26,7 @@ function _setup( paramName, targetMap ) {
// param must be a valid non-empty string
if( !check.nonEmptyString( targetsString ) ){
messages.errors.push(
- opts.paramName + ' parameter cannot be an empty string. Valid options: ' + opts.targetMapKeysString
+ opts.paramName + ' parameter cannot be an empty string. Valid options: ' + getValidKeys(opts.targetMap)
);
}
else {
@@ -42,7 +41,7 @@ function _setup( paramName, targetMap ) {
return !opts.targetMap.hasOwnProperty(target);
}).forEach( function( target ){
messages.errors.push(
- '\'' + target + '\' is an invalid ' + opts.paramName + ' parameter. Valid options: ' + opts.targetMapKeysString
+ '\'' + target + '\' is an invalid ' + opts.paramName + ' parameter. Valid options: ' + getValidKeys(opts.targetMap)
);
});
@@ -61,7 +60,7 @@ function _setup( paramName, targetMap ) {
// string is empty
else if( check.string( targetsString ) ){
messages.errors.push(
- opts.paramName + ' parameter cannot be an empty string. Valid options: ' + opts.targetMapKeysString
+ opts.paramName + ' parameter cannot be an empty string. Valid options: ' + getValidKeys(opts.targetMap)
);
}
diff --git a/sanitizer/autocomplete.js b/sanitizer/autocomplete.js
index 5618e8322..e987da2da 100644
--- a/sanitizer/autocomplete.js
+++ b/sanitizer/autocomplete.js
@@ -11,7 +11,7 @@ module.exports.middleware = (_api_pelias_config) => {
size: require('../sanitizer/_size')(/* use defaults*/),
layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping),
sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping),
- address_layer_filter: require('../sanitizer/_address_layer_filter')(type_mapping.layers_by_source),
+ address_layer_filter: require('../sanitizer/_address_layer_filter')(type_mapping),
// depends on the layers and sources sanitizers, must be run after them
sources_and_layers: require('../sanitizer/_sources_and_layers')(),
private: require('../sanitizer/_flag_bool')('private', false),
diff --git a/sanitizer/search.js b/sanitizer/search.js
index a307408db..1ba89e1ec 100644
--- a/sanitizer/search.js
+++ b/sanitizer/search.js
@@ -9,7 +9,7 @@ module.exports.middleware = (_api_pelias_config) => {
size: require('../sanitizer/_size')(/* use defaults*/),
layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping),
sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping),
- address_layer_filter: require('../sanitizer/_address_layer_filter')(type_mapping.layers_by_source),
+ address_layer_filter: require('../sanitizer/_address_layer_filter')(type_mapping),
// depends on the layers and sources sanitizers, must be run after them
sources_and_layers: require('../sanitizer/_sources_and_layers')(),
private: require('../sanitizer/_flag_bool')('private', false),
diff --git a/test/unit/helper/TypeMapping.js b/test/unit/helper/TypeMapping.js
index b4e454758..7032f948b 100644
--- a/test/unit/helper/TypeMapping.js
+++ b/test/unit/helper/TypeMapping.js
@@ -78,9 +78,10 @@ module.exports.tests.addStandardTargetsToAliases = function(test) {
module.exports.tests.setSourceAliases = function(test) {
test('setter setSourceAliases', function(t) {
var tm = new TypeMapping();
- t.deepEqual(tm.source_aliases, {});
+ var ref = tm.source_aliases; // save inital pointer reference
+ t.deepEqual(ref, {});
tm.setSourceAliases({ foo: ['foo', 'bar'] });
- t.deepEqual(tm.source_aliases, { foo: ['foo', 'bar'] });
+ t.deepEqual(ref, { foo: ['foo', 'bar'] });
t.end();
});
};
@@ -88,9 +89,10 @@ module.exports.tests.setSourceAliases = function(test) {
module.exports.tests.setLayersBySource = function(test) {
test('setter setLayersBySource', function(t) {
var tm = new TypeMapping();
- t.deepEqual(tm.layers_by_source, {});
+ var ref = tm.layers_by_source; // save inital pointer reference
+ t.deepEqual(ref, {});
tm.setLayersBySource({ foo: ['foo', 'bar'] });
- t.deepEqual(tm.layers_by_source, { foo: ['foo', 'bar'] });
+ t.deepEqual(ref, { foo: ['foo', 'bar'] });
t.end();
});
};
@@ -98,9 +100,10 @@ module.exports.tests.setLayersBySource = function(test) {
module.exports.tests.setLayerAliases = function(test) {
test('setter setLayerAliases', function(t) {
var tm = new TypeMapping();
- t.deepEqual(tm.layer_aliases, {});
+ var ref = tm.layer_aliases; // save inital pointer reference
+ t.deepEqual(ref, {});
tm.setLayerAliases({ foo: ['foo', 'bar'] });
- t.deepEqual(tm.layer_aliases, { foo: ['foo', 'bar'] });
+ t.deepEqual(ref, { foo: ['foo', 'bar'] });
t.end();
});
};
@@ -121,32 +124,36 @@ module.exports.tests.generateMappings = function(test) {
});
test('generateMappings - sources', function(t) {
var tm = new TypeMapping();
+ var ref = tm.sources; // save inital pointer reference
tm.layers_by_source = { foo: ['foo'], faz: ['faz'] };
tm.generateMappings();
- t.deepEqual(tm.sources, ['foo', 'faz']);
+ t.deepEqual(ref, ['foo', 'faz']);
t.end();
});
test('generateMappings - source_mapping', function(t) {
var tm = new TypeMapping();
+ var ref = tm.source_mapping; // save inital pointer reference
tm.layers_by_source = { foo: ['foo'], faz: ['faz'] };
tm.source_aliases = { foo: ['foo','f'], bar: ['bar', 'b'], baz: ['baz'] };
tm.generateMappings();
- t.deepEqual(tm.source_mapping, { foo: ['foo', 'f'], bar: ['bar', 'b'], baz: ['baz'], faz: ['faz'] });
+ t.deepEqual(ref, { foo: ['foo', 'f'], bar: ['bar', 'b'], baz: ['baz'], faz: ['faz'] });
t.end();
});
test('generateMappings - layers', function(t) {
var tm = new TypeMapping();
+ var ref = tm.layers; // save inital pointer reference
tm.layers_by_source = { foo: ['foo'], faz: ['faz'] };
tm.generateMappings();
- t.deepEqual(tm.layers, ['foo','faz']);
+ t.deepEqual(ref, ['foo','faz']);
t.end();
});
test('generateMappings - layer_mapping', function(t) {
var tm = new TypeMapping();
+ var ref = tm.layer_mapping; // save inital pointer reference
tm.layers_by_source = { foo: ['foo'], faz: ['faz'] };
tm.layer_aliases = { foo: ['foo','f'], bar: ['bar', 'b'], baz: ['baz'] };
tm.generateMappings();
- t.deepEqual(tm.layer_mapping, { foo: ['foo', 'f'], bar: ['bar', 'b'], baz: ['baz'], faz: ['faz'] });
+ t.deepEqual(ref, { foo: ['foo', 'f'], bar: ['bar', 'b'], baz: ['baz'], faz: ['faz'] });
t.end();
});
};
diff --git a/test/unit/sanitizer/_address_layer_filter.js b/test/unit/sanitizer/_address_layer_filter.js
index fb9e4939e..5543a7293 100644
--- a/test/unit/sanitizer/_address_layer_filter.js
+++ b/test/unit/sanitizer/_address_layer_filter.js
@@ -1,25 +1,15 @@
const sanitizer = require('../../../sanitizer/_address_layer_filter');
+const TypeMapping = require('../../../helper/TypeMapping');
const NO_MESSAGES = { errors: [], warnings: [] };
const STD_MESSAGES = { errors: [], warnings: ['performance optimization: excluding \'address\' layer'] };
module.exports.tests = {};
-module.exports.tests.enumerate_layers = function (test, common) {
- test('enumerate layers', (t) => {
- let m = { A: ['A', 'B'], B: ['B'], C: ['B', 'C'] };
- t.deepEqual(sanitizer(m).layers, ['A','B','C']);
- t.end();
- });
- test('enumerate layers - exclude "address"', (t) => {
- let m = { A: ['A', 'B', 'address'], B: ['B'], C: ['B', 'address', 'C'] };
- t.deepEqual(sanitizer(m).layers, ['A', 'B', 'C']);
- t.end();
- });
-};
-
module.exports.tests.sanitize = function (test, common) {
- let m = { A: ['A'], B: ['B'], C: ['C'] };
- let s = sanitizer(m);
+ let tm = new TypeMapping();
+ tm.setLayersBySource({ A: ['A'], B: ['B'], C: ['C'] });
+ tm.generateMappings();
+ let s = sanitizer(tm);
test('sanitize - do nothing if clean.layers already specified', (t) => {
let clean = { text: '1 example', layers: ['not empty'] };
@@ -71,8 +61,10 @@ module.exports.tests.sanitize = function (test, common) {
});
test('sanitize - reduce target layers when clean.sources specified', (t) => {
- let m = { A: ['A', 'address'], B: ['B', 'address'], C: ['C'] };
- let s = sanitizer(m);
+ let tm = new TypeMapping();
+ tm.setLayersBySource({ A: ['A', 'address'], B: ['B', 'address'], C: ['C'] });
+ tm.generateMappings();
+ let s = sanitizer(tm);
let clean = { text: 'foo', sources: [ 'A', 'C' ] };
t.deepEqual(s.sanitize(null, clean), STD_MESSAGES);
@@ -81,8 +73,10 @@ module.exports.tests.sanitize = function (test, common) {
});
test('sanitize - do nothing for sources which do not have any addresses', (t) => {
- let m = { A: ['address'], B: ['address'] };
- let s = sanitizer(m);
+ let tm = new TypeMapping();
+ tm.setLayersBySource({ A: ['address'], B: ['address'] });
+ tm.generateMappings();
+ let s = sanitizer(tm);
let clean = { text: 'foo', sources: ['A', 'B'] };
t.deepEqual(s.sanitize(null, clean), NO_MESSAGES);
@@ -92,8 +86,10 @@ module.exports.tests.sanitize = function (test, common) {
};
module.exports.tests.tricky_inputs = function (test, common) {
- let m = { A: ['A'], B: ['B'], C: ['C'] };
- let s = sanitizer(m);
+ let tm = new TypeMapping();
+ tm.setLayersBySource({ A: ['A'], B: ['B'], C: ['C'] });
+ tm.generateMappings();
+ let s = sanitizer(tm);
test('tricky inputs - extra whitespace', (t) => {
let clean = { text: ' \t\n 12 \n\t ' };
@@ -112,8 +108,10 @@ module.exports.tests.tricky_inputs = function (test, common) {
// handle cases where a parser has run and removed admin tokens
module.exports.tests.parsed_text = function (test, common) {
- let m = { A: ['A'], B: ['B'], C: ['C'] };
- let s = sanitizer(m);
+ let tm = new TypeMapping();
+ tm.setLayersBySource({ A: ['A'], B: ['B'], C: ['C'] });
+ tm.generateMappings();
+ let s = sanitizer(tm);
test('naive parser - apply filter due to comma being present', (t) => {
let clean = { text: 'A', parsed_text: { name: '1', admin_parts: 'Avenue' } };