Skip to content

Commit

Permalink
feat(dedupe): improved handling of cases where "name", "parent" or "a…
Browse files Browse the repository at this point in the history
…ddress_parts" propertiees are not set
  • Loading branch information
missinglink committed Oct 30, 2018
1 parent 1ca4b04 commit bbb1d6d
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 47 deletions.
93 changes: 49 additions & 44 deletions helper/diffPlaces.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,27 @@ function isParentHierarchyDifferent(item1, item2){
let parent1 = _.get(item1, 'parent');
let parent2 = _.get(item2, 'parent');

// if neither object has parent info, we consider them the same
if( !parent1 && !parent2 ){ return false; }
// check if these are plain 'ol javascript objects
let isPojo1 = _.isPlainObject(parent1);
let isPojo2 = _.isPlainObject(parent2);

// both have parent info
if( _.isPlainObject(parent1) && _.isPlainObject(parent2) ){
// if neither object has parent info, we consider them the same
if( !isPojo1 && !isPojo2 ){ return false; }

// iterate over all the placetypes, comparing between items
return placeTypes.some( placeType => {
// if only one has parent info, we consider them the same
// note: this really shouldn't happen as at least on parent should exist
if( !isPojo1 || !isPojo2 ){ return false; }

// skip the parent field corresponding to the item placetype
if( placeType === item1.layer ){ return false; }
// else both have parent info
// iterate over all the placetypes, comparing between items
return placeTypes.some( placeType => {

// ensure the parent ids are the same for all placetypes
return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' );
});
}
// skip the parent field corresponding to the item placetype
if( placeType === item1.layer ){ return false; }

// if one has parent info and the other doesn't, we consider them different
return true;
// ensure the parent ids are the same for all placetypes
return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' );
});
}

/**
Expand All @@ -56,27 +58,29 @@ function isNameDifferent(item1, item2){
let names1 = _.get(item1, 'name');
let names2 = _.get(item2, 'name');

// if neither object has name info, we consider them the same
if( !names1 && !names2 ){ return false; }
// check if these are plain 'ol javascript objects
let isPojo1 = _.isPlainObject(names1);
let isPojo2 = _.isPlainObject(names2);

// if both have name info
if( _.isPlainObject(names1) && _.isPlainObject(names2) ){
// if neither object has name info, we consider them the same
if( !isPojo1 && !isPojo2 ){ return false; }

// iterate over all the languages in item1, comparing between items
return Object.keys(names1).some( lang => {
// if only one has name info, we consider them the same
// note: this really shouldn't happen as name is a mandatory field
if( !isPojo1 || !isPojo2 ){ return false; }

// do not consider absence of an additional name as a difference
// but strictly enfore that 'default' must be present and match
if( _.has(names2, lang) || lang === 'default' ){
// else both have name info
// iterate over all the languages in item1, comparing between items
return Object.keys(names1).some( lang => {

// do not consider absence of an additional name as a difference
return isPropertyDifferent(names1, names2, lang);
}
});
}
// do not consider absence of an additional name as a difference
// but strictly enfore that 'default' must be present and match
if( _.has(names2, lang) || lang === 'default' ){

// if one has name info and the other doesn't, we consider them different
return true;
// do not consider absence of an additional name as a difference
return isPropertyDifferent(names1, names2, lang);
}
});
}

/**
Expand All @@ -87,26 +91,27 @@ function isAddressDifferent(item1, item2){
let address1 = _.get(item1, 'address_parts');
let address2 = _.get(item2, 'address_parts');

// if neither object has address info, we consider them the same
if( !address1 && !address2 ){ return false; }
// check if these are plain 'ol javascript objects
let isPojo1 = _.isPlainObject(address1);
let isPojo2 = _.isPlainObject(address2);

// if both have address info
if( _.isPlainObject(address1) && _.isPlainObject(address2) ){
// if neither object has address info, we consider them the same
if( !isPojo1 && !isPojo2 ){ return false; }

if( isPropertyDifferent(address1, address2, 'number') ){ return true; }
if( isPropertyDifferent(address1, address2, 'street') ){ return true; }
// if only one has address info, we consider them the same
if( !isPojo1 || !isPojo2 ){ return false; }

// only compare zip if both records have it, otherwise just ignore and assume it's the same
// since by this time we've already compared parent hierarchies
if( _.has(address1, 'zip') && _.has(address2, 'zip') ){
if( isPropertyDifferent(address1, address2, 'zip') ){ return true; }
}
// else both have address info
if( isPropertyDifferent(address1, address2, 'number') ){ return true; }
if( isPropertyDifferent(address1, address2, 'street') ){ return true; }

return false;
// only compare zip if both records have it, otherwise just ignore and assume it's the same
// since by this time we've already compared parent hierarchies
if( _.has(address1, 'zip') && _.has(address2, 'zip') ){
if( isPropertyDifferent(address1, address2, 'zip') ){ return true; }
}

// one has address and the other doesn't, different!
return true;
return false;
}

/**
Expand Down
6 changes: 3 additions & 3 deletions test/unit/middleware/dedupe.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
var data = require('../fixture/dedupe_elasticsearch_results');
var nonAsciiData = require('../fixture/dedupe_elasticsearch_nonascii_results');
var customLayerData = require('../fixture/dedupe_elasticsearch_custom_layer_results');
var onlyPostalcodeDiffers = require('../fixture/dedupe_only_postalcode_differs');
var onlyPostalcodeDiffersData = require('../fixture/dedupe_only_postalcode_differs');
var dedupe = require('../../../middleware/dedupe')();

module.exports.tests = {};
Expand Down Expand Up @@ -84,9 +84,9 @@ module.exports.tests.dedupe = function(test, common) {
}
};
var res = {
data: onlyPostalcodeDiffers
data: onlyPostalcodeDiffersData
};
var expected = onlyPostalcodeDiffers[1]; // non-canonical record
var expected = onlyPostalcodeDiffersData[1]; // record with postcode

dedupe(req, res, function () {
t.equal(res.data.length, 1, 'only one result displayed');
Expand Down

0 comments on commit bbb1d6d

Please sign in to comment.