Skip to content

Commit

Permalink
Merge pull request #92 from pelias/exclude-zero-house-numbers
Browse files Browse the repository at this point in the history
filter out '0' house numbers in US/CA
  • Loading branch information
trescube committed Apr 18, 2016
2 parents 3a37ebe + 4ce5c21 commit 4b464e1
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 1 deletion.
2 changes: 2 additions & 0 deletions lib/importPipeline.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ var deduplicatorStream = require('./streams/deduplicatorStream');
var recordStream = require('./streams/recordStream');
var model = require( 'pelias-model' );
var peliasDbclient = require( 'pelias-dbclient' );
var isUSorCAHouseNumberZero = require( './streams/isUSorCAHouseNumberZero' );

/**
* Import all OpenAddresses CSV files in a directory into Pelias elasticsearch.
Expand All @@ -29,6 +30,7 @@ function createFullImportPipeline( files, opts, finalStream ){
recordStream.create(files, opts.dirPath)
.pipe(deduplicatorStream.create(opts.deduplicate))
.pipe(adminLookupStream.create(opts.adminValues, peliasConfig))
.pipe(isUSorCAHouseNumberZero.create())
.pipe(model.createDocumentMapperStream())
.pipe(finalStream);
}
Expand Down
16 changes: 16 additions & 0 deletions lib/streams/isUSorCAHouseNumberZero.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
var filter = require('through2-filter');
var _ = require('lodash');

function isUSorCA(record) {
return _.isEqual(record.parent.country_a, ['USA']) ||
_.isEqual(record.parent.country_a, ['CAN']);
}

module.exports.create = function create() {
return filter.obj(function(record) {
if (record.address_parts.number === '0' && isUSorCA(record)) {
return false;
}
return true;
});
};
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"combined-stream": "1.0.5",
"csv-parse": "1.0.4",
"glob": "^7.0.0",
"jshint": "^2.9.1",
"lodash": "^4.10.0",
"minimist": "1.2.0",
"pelias-address-deduplicator": "^1.x.x",
Expand All @@ -15,7 +16,8 @@
"pelias-logger": "^0.x.x",
"pelias-model": "3.1.0",
"pelias-wof-admin-lookup": "^1.5.0",
"through2": "2.0.1"
"through2": "2.0.1",
"through2-filter": "^2.0.0"
},
"devDependencies": {
"deep-diff": "^0.3.3",
Expand Down
128 changes: 128 additions & 0 deletions test/streams/isUSorCAHouseNumberZero.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
var tape = require('tape');
var event_stream = require('event-stream');

var isUSorCAHouseNumberZero = require('../../lib/streams/isUSorCAHouseNumberZero');

function test_stream(input, testedStream, callback) {
var input_stream = event_stream.readArray(input);
var destination_stream = event_stream.writeArray(callback);

input_stream.pipe(testedStream).pipe(destination_stream);
}

tape('isUSorCAHouseNumberZero', function(t) {
t.test('non-0 house number in USA should return true', function(t) {
var record = {
parent: {
country_a: ['USA']
},
address_parts: {
number: '17'
}
};

var filter = isUSorCAHouseNumberZero.create();

test_stream([record], filter, function(err, actual) {
t.deepEqual(actual, [record], 'should have returned true');
t.end();
});

});

t.test('non-0 house number in CAN should return true', function(t) {
var record = {
parent: {
country_a: ['CAN']
},
address_parts: {
number: '17'
}
};

var filter = isUSorCAHouseNumberZero.create();

test_stream([record], filter, function(err, actual) {
t.deepEqual(actual, [record], 'should have returned true');
t.end();
});

});

t.test('non-0 house number in non-USA/CAN should return true', function(t) {
var record = {
parent: {
country_a: ['GBR']
},
address_parts: {
number: '17'
}
};

var filter = isUSorCAHouseNumberZero.create();

test_stream([record], filter, function(err, actual) {
t.deepEqual(actual, [record], 'should have returned true');
t.end();
});

});

t.test('0 house number in USA should return false', function(t) {
var record = {
parent: {
country_a: ['USA']
},
address_parts: {
number: '0'
}
};

var filter = isUSorCAHouseNumberZero.create();

test_stream([record], filter, function(err, actual) {
t.deepEqual(actual, [], 'should have returned true');
t.end();
});

});

t.test('0 house number in CAN should return false', function(t) {
var record = {
parent: {
country_a: ['CAN']
},
address_parts: {
number: '0'
}
};

var filter = isUSorCAHouseNumberZero.create();

test_stream([record], filter, function(err, actual) {
t.deepEqual(actual, [], 'should have returned true');
t.end();
});

});

t.test('0 house number in non-USA/CAN should return true', function(t) {
var record = {
parent: {
country_a: ['GBR']
},
address_parts: {
number: '0'
}
};

var filter = isUSorCAHouseNumberZero.create();

test_stream([record], filter, function(err, actual) {
t.deepEqual(actual, [record], 'should have returned true');
t.end();
});

});

});
1 change: 1 addition & 0 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ require( './isValidCsvRecord' );
require( './streams/cleanupStream' );
require( './streams/documentStream' );
require( './streams/recordStream' );
require( './streams/isUSorCAHouseNumberZero' );

0 comments on commit 4b464e1

Please sign in to comment.