Skip to content

Commit

Permalink
feat(dedupe): treat all non-canonical layers and analogous to a venue…
Browse files Browse the repository at this point in the history
…, prefer non-canonical records
  • Loading branch information
missinglink authored and orangejulius committed Oct 29, 2018
1 parent c458ea7 commit f1c5e8d
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 8 deletions.
19 changes: 19 additions & 0 deletions helper/TypeMapping.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
const _ = require('lodash');
const elasticsearch = require('elasticsearch');

// a list of the canonical sources included in the default Pelias configuration
const CANONICAL_SOURCES = ['whosonfirst', 'openstreetmap', 'openaddresses', 'geonames'];

var TypeMapping = function(){

// A list of all sources
Expand Down Expand Up @@ -75,6 +78,22 @@ TypeMapping.prototype.generateMappings = function(){
this.layer_mapping = TypeMapping.addStandardTargetsToAliases(this.layers, this.layer_aliases);
};

// return a list of all sources which are part of the canonical Pelias configuration
TypeMapping.prototype.getCanonicalSources = function(){
return CANONICAL_SOURCES;
};

// generate a list of all layers which are part of the canonical Pelias configuration
TypeMapping.prototype.getCanonicalLayers = function(){
var canonicalLayers = [];
for( var source in this.layers_by_source ){
if( _.includes( CANONICAL_SOURCES, source ) ){
canonicalLayers = _.uniq( canonicalLayers.concat( this.layers_by_source[source] ) );
}
}
return canonicalLayers;
};

// load values from targets block
TypeMapping.prototype.loadTargets = function( targetsBlock ){

Expand Down
11 changes: 10 additions & 1 deletion helper/diffPlaces.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
const _ = require('lodash');
const placeTypes = require('./placeTypes');
const canonicalLayers = require('../helper/type_mapping').getCanonicalLayers();
const field = require('../helper/fieldValue');

/**
* Compare the layer properties if they exist.
* Returns false if the objects are the same, else true.
*/
function isLayerDifferent(item1, item2){
return isPropertyDifferent(item1, item2, 'layer');
if( isPropertyDifferent(item1, item2, 'layer') ){
// consider all custom layers to be analogous to a venue
if( ( item1.layer === 'venue' || !_.includes( canonicalLayers, item1.layer ) ) &&
( item2.layer === 'venue' || !_.includes( canonicalLayers, item2.layer ) ) ){
return false;
}
return true;
}
return false;
}

/**
Expand Down
5 changes: 5 additions & 0 deletions middleware/dedupe.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const logger = require('pelias-logger').get('api');
const _ = require('lodash');
const isDifferent = require('../helper/diffPlaces').isDifferent;
const canonicalSources = require('../helper/type_mapping').getCanonicalSources();
const field = require('../helper/fieldValue');

function dedupeResults(req, res, next) {
Expand Down Expand Up @@ -74,6 +75,10 @@ function isPreferred(existingHit, candidateHit) {
if( !_.has(existingHit, 'address_parts.zip') &&
_.has(candidateHit, 'address_parts.zip') ){ return true; }

// prefer non-canonical sources over canonical ones
if( !_.includes(canonicalSources, candidateHit.source) &&
_.includes(canonicalSources, existingHit.source) ){ return true; }

// prefer certain sources over others
switch( existingHit.source ){
// sources are the same
Expand Down
32 changes: 32 additions & 0 deletions test/unit/fixture/dedupe_elasticsearch_custom_layer_results.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
module.exports = [
{
'_id': '101914069',
'layer': 'venue',
'source': 'openstreetmap',
'name': {
'default': 'Nike World Headquarters'
},
'parent': {
'country_a': ['USA'],
'country': ['United States'],
'region': ['Oregon'],
'region_id': ['85688513']
},
'confidence': 0.98
},
{
'_id': '2456::trimet::major_employer',
'layer': 'major_employer',
'source': 'transit',
'name': {
'default': 'Nike World Headquarters'
},
'parent': {
'country_a': ['USA'],
'country': ['United States'],
'region': ['Oregon'],
'region_id': ['85688513']
},
'confidence': 0.50
}
];
34 changes: 27 additions & 7 deletions test/unit/middleware/dedupe.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
var data = require('../fixture/dedupe_elasticsearch_results');
var nonAsciiData = require('../fixture/dedupe_elasticsearch_nonascii_results');
var customLayerData = require('../fixture/dedupe_elasticsearch_custom_layer_results');
var dedupe = require('../../../middleware/dedupe')();

module.exports.tests = {};
Expand Down Expand Up @@ -56,10 +57,29 @@ module.exports.tests.dedupe = function(test, common) {
t.end();
});
});

test('deduplicate between custom layers and venue layers', function(t) {
var req = {
clean: {
size: 20
}
};
var res = {
data: customLayerData
};
var expected = customLayerData[1]; // non-canonical record

dedupe(req, res, function () {
t.equal(res.data.length, 1, 'only one result displayed');
t.equal(res.data[0], expected, 'non-canonical data is preferred');
t.end();
});
});
};

module.exports.tests.trump = function(test, common) {
test('whosonfirst trumps geonames, replace', function (t) {

module.exports.tests.priority = function(test, common) {
test('whosonfirst takes priority over geonames, replace', function (t) {
var req = {
clean: {
text: 'Lancaster',
Expand Down Expand Up @@ -91,7 +111,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('whosonfirst trumps geonames, no replace', function (t) {
test('whosonfirst takes priority over geonames, no replace', function (t) {
var req = {
clean: {
text: 'Lancaster',
Expand Down Expand Up @@ -123,7 +143,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('openstreetmap trumps whosonfirst venues', function (t) {
test('openstreetmap takes priority over whosonfirst venues', function (t) {
var req = {
clean: {
text: 'Lancaster Dairy Farm',
Expand Down Expand Up @@ -155,7 +175,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('openaddresses trumps openstreetmap', function (t) {
test('openaddresses takes priority over openstreetmap', function (t) {
var req = {
clean: {
text: '100 Main St',
Expand Down Expand Up @@ -187,7 +207,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('openaddresses with zip trumps openaddresses without zip', function (t) {
test('openaddresses with zip takes priority over openaddresses without zip', function (t) {
var req = {
clean: {
text: '100 Main St',
Expand Down Expand Up @@ -223,7 +243,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('osm with zip trumps openaddresses without zip', function (t) {
test('osm with zip takes priority over openaddresses without zip', function (t) {
var req = {
clean: {
text: '100 Main St',
Expand Down

0 comments on commit f1c5e8d

Please sign in to comment.