Skip to content

Commit

Permalink
feat(dedupe): treat all non-canonical layers and analogous to a venue
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Oct 29, 2018
1 parent a4c5462 commit 6599280
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 8 deletions.
15 changes: 15 additions & 0 deletions helper/TypeMapping.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
const _ = require('lodash');
const elasticsearch = require('elasticsearch');

// a list of the canonical sources included in the default Pelias configuration
const CANONICAL_SOURCES = ['whosonfirst', 'openstreetmap', 'openaddresses', 'geonames'];

var TypeMapping = function(){

// A list of all sources
Expand Down Expand Up @@ -75,6 +78,18 @@ TypeMapping.prototype.generateMappings = function(){
this.layer_mapping = TypeMapping.addStandardTargetsToAliases(this.layers, this.layer_aliases);
};


// generate a list of all layers which are part of the canonical Pelias configuration
TypeMapping.prototype.getCanonicalLayers = function(){
var canonicalLayers = [];
for( var source in this.layers_by_source ){
if( _.includes( CANONICAL_SOURCES, source ) ){
canonicalLayers = _.uniq( canonicalLayers.concat( this.layers_by_source[source] ) );
}
}
return canonicalLayers;
};

// load values from targets block
TypeMapping.prototype.loadTargets = function( targetsBlock ){

Expand Down
11 changes: 10 additions & 1 deletion helper/diffPlaces.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
const _ = require('lodash');
const placeTypes = require('./placeTypes');
const canonicalLayers = require('../helper/type_mapping').getCanonicalLayers();
const field = require('../helper/fieldValue');

/**
* Compare the layer properties if they exist.
* Returns false if the objects are the same, else true.
*/
function isLayerDifferent(item1, item2){
return isPropertyDifferent(item1, item2, 'layer');
if( isPropertyDifferent(item1, item2, 'layer') ){
// consider all custom layers to be analogous to a venue
if( ( item1.layer === 'venue' || !_.includes( canonicalLayers, item1.layer ) ) &&
( item2.layer === 'venue' || !_.includes( canonicalLayers, item2.layer ) ) ){
return false;
}
return true;
}
return false;
}

/**
Expand Down
32 changes: 32 additions & 0 deletions test/unit/fixture/dedupe_elasticsearch_custom_layer_results.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
module.exports = [
{
'_id': '2456::trimet::major_employer',
'layer': 'major_employer',
'source': 'transit',
'name': {
'default': 'Nike World Headquarters'
},
'parent': {
'country_a': ['USA'],
'country': ['United States'],
'region': ['Oregon'],
'region_id': ['85688513']
},
'confidence': 0.963
},
{
'_id': '101914069',
'layer': 'venue',
'source': 'openstreetmap',
'name': {
'default': 'Nike World Headquarters'
},
'parent': {
'country_a': ['USA'],
'country': ['United States'],
'region': ['Oregon'],
'region_id': ['85688513']
},
'confidence': 0.962
}
];
32 changes: 25 additions & 7 deletions test/unit/middleware/dedupe.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
var data = require('../fixture/dedupe_elasticsearch_results');
var nonAsciiData = require('../fixture/dedupe_elasticsearch_nonascii_results');
var customLayerData = require('../fixture/dedupe_elasticsearch_custom_layer_results');
var dedupe = require('../../../middleware/dedupe')();

module.exports.tests = {};
Expand Down Expand Up @@ -56,10 +57,27 @@ module.exports.tests.dedupe = function(test, common) {
t.end();
});
});

test('deduplicate custom layers and venue layers', function(t) {
var req = {
clean: {
size: 20
}
};
var res = {
data: customLayerData
};

dedupe(req, res, function () {
t.equal(res.data.length, 1, 'only one result displayed');
t.end();
});
});
};

module.exports.tests.trump = function(test, common) {
test('whosonfirst trumps geonames, replace', function (t) {

module.exports.tests.priority = function(test, common) {
test('whosonfirst prioritizes geonames, replace', function (t) {
var req = {
clean: {
text: 'Lancaster',
Expand Down Expand Up @@ -91,7 +109,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('whosonfirst trumps geonames, no replace', function (t) {
test('whosonfirst prioritizes geonames, no replace', function (t) {
var req = {
clean: {
text: 'Lancaster',
Expand Down Expand Up @@ -123,7 +141,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('openstreetmap trumps whosonfirst venues', function (t) {
test('openstreetmap prioritizes whosonfirst venues', function (t) {
var req = {
clean: {
text: 'Lancaster Dairy Farm',
Expand Down Expand Up @@ -155,7 +173,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('openaddresses trumps openstreetmap', function (t) {
test('openaddresses prioritizes openstreetmap', function (t) {
var req = {
clean: {
text: '100 Main St',
Expand Down Expand Up @@ -187,7 +205,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('openaddresses with zip trumps openaddresses without zip', function (t) {
test('openaddresses with zip prioritizes openaddresses without zip', function (t) {
var req = {
clean: {
text: '100 Main St',
Expand Down Expand Up @@ -223,7 +241,7 @@ module.exports.tests.trump = function(test, common) {
});
});

test('osm with zip trumps openaddresses without zip', function (t) {
test('osm with zip prioritizes openaddresses without zip', function (t) {
var req = {
clean: {
text: '100 Main St',
Expand Down

0 comments on commit 6599280

Please sign in to comment.