Skip to content

Commit

Permalink
Merge pull request #310 from pelias/leading-zeros
Browse files Browse the repository at this point in the history
Stop removing leading 0s in housenumbers unless number === 0
  • Loading branch information
Diana Shkolnikov authored Nov 10, 2017
2 parents 077bffe + 6dcd42f commit 9a2395b
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 19 deletions.
13 changes: 9 additions & 4 deletions lib/streams/cleanupStream.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
var through2 = require( 'through2' );
var _ = require('lodash');
const logger = require('pelias-logger').get('openaddresses');
const through2 = require( 'through2' );
const _ = require('lodash');

var cleanup = require( '../cleanup' );
const cleanup = require( '../cleanup' );

/*
* create a stream that performs any needed cleanup on a record
Expand All @@ -19,7 +20,11 @@ function createCleanupStream() {
}
});

record.NUMBER = _.trimStart(record.NUMBER, '0');
// track addresses where the entire housenumber can be reduced to 0
const trimmedNumber = _.trimStart(record.NUMBER, '0');
if (_.isEmpty(trimmedNumber)) {
logger.info('[cleanup_stream] housenumber==0');
}

next(null, record);
});
Expand Down
84 changes: 78 additions & 6 deletions test/data/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,67 @@
"source_id": "data/input_file_1:1"
}
},
{
"_index": "pelias",
"_type": "address",
"_id": "data/input_file_1:2",
"data": {
"name": {
"default": "0 Main St"
},
"phrase": {
"default": "0 Main St"
},
"address_parts": {
"number": "0",
"street": "Main St"
},
"center_point": {
"lon": 41.414141,
"lat": 14.141414
},
"source": "openaddresses",
"layer": "address",
"source_id": "data/input_file_1:2"
}
},
{
"_index": "pelias",
"_type": "address",
"_id": "data/input_file_1:3",
"data": {
"name": {
"default": "00 Elm St"
},
"phrase": {
"default": "00 Elm St"
},
"address_parts": {
"number": "00",
"street": "Elm St"
},
"center_point": {
"lon": 51.515151,
"lat": 15.151515
},
"source": "openaddresses",
"layer": "address",
"source_id": "data/input_file_1:3"
}
},
{
"_index": "pelias",
"_type": "address",
"_id": "data/input_file_1:4",
"data": {
"name": {
"default": "300 Main St"
"default": "00300 Main St"
},
"phrase": {
"default": "300 Main St"
"default": "00300 Main St"
},
"address_parts": {
"number": "300",
"number": "00300",
"street": "Main St"
},
"center_point": {
Expand Down Expand Up @@ -125,19 +173,43 @@
"source_id": "data/input_file_2:0"
}
},
{
"_index": "pelias",
"_type": "address",
"_id": "data/input_file_2:1",
"data": {
"name": {
"default": "00000 Vireo Rd"
},
"phrase": {
"default": "00000 Vireo Rd"
},
"address_parts": {
"number": "00000",
"street": "Vireo Rd"
},
"center_point": {
"lon": 81.818181,
"lat": 18.181818
},
"source": "openaddresses",
"layer": "address",
"source_id": "data/input_file_2:1"
}
},
{
"_index": "pelias",
"_type": "address",
"_id": "data/input_file_2:2",
"data": {
"name": {
"default": "500 Calle De Lago"
"default": "00500 Calle De Lago"
},
"phrase": {
"default": "500 Calle De Lago"
"default": "00500 Calle De Lago"
},
"address_parts": {
"number": "500",
"number": "00500",
"street": "Calle De Lago"
},
"center_point": {
Expand Down
6 changes: 3 additions & 3 deletions test/data/input_file_1.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID
21.212121,12.121212,100,Main St,,input city,input district,input region,input postcode,GOOD RECORD
31.313131,13.131313, 200 , Main St ,,,,,,GOOD RECORD WITH FIELD TRIMMING
41.414141,14.141414,0,Main St,,,,,,WILL BE IGNORED SINCE NUMBER=0
51.515151,15.151515,00,Elm St,,,,,,WILL BE IGNORED SINCE NUMBER IS REDUCEABLE TO 0
61.616161,16.161616,00300,Main St,,,,,,REMOVES LEADING ZEROES
41.414141,14.141414,0,Main St,,,,,,WILL BE LOGGED BUT NOT SKIPPED (NUMBER IS REDUCEABLE TO 0)
51.515151,15.151515,00,Elm St,,,,,,WILL BE LOGGED BUT NOT SKIPPED (NUMBER IS REDUCEABLE TO 0)
61.616161,16.161616,00300,Main St,,,,,,MAINTAINS LEADING ZEROES
4 changes: 2 additions & 2 deletions test/data/input_file_2.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID
71.717171,17.171717,400,Vireo Rd,,,,,,GOOD RECORD
81.818181,18.181818,00000,Vireo Rd,,,,,,WILL BE IGNORED SINCE NUMBER IS REDUCEABLE TO 0
91.919191,19.191919,00500,Calle de Lago,,,,,,REMOVES LEADING ZEROES
81.818181,18.181818,00000,Vireo Rd,,,,,,WILL BE LOGGED BUT NOT SKIPPED (NUMBER IS REDUCEABLE TO 0)
91.919191,19.191919,00500,Calle de Lago,,,,,,MAINTAINS LEADING ZEROES
92.929292,29.292929,00500,Calle de Lago,,,,,,THIS GETS FILTERED OUT BY DEDUPE
8 changes: 4 additions & 4 deletions test/streams/cleanupStream.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ tape( 'cleanupStream trims whitespace from all fields', function(test) {
});
});

tape( 'cleanupStream trims leading 0\'s from house numbers', function(test) {
tape( 'cleanupStream does NOT trim leading 0\'s from house numbers', function(test) {
var inputs = [
{
NUMBER: ' 0030 ',
Expand All @@ -50,11 +50,11 @@ tape( 'cleanupStream trims leading 0\'s from house numbers', function(test) {

var expecteds = [
{
NUMBER: '30',
NUMBER: '0030',
STREET: 'Street'
},
{
NUMBER: '34560',
NUMBER: '0034560',
STREET: 'Street'
},
{
Expand All @@ -66,7 +66,7 @@ tape( 'cleanupStream trims leading 0\'s from house numbers', function(test) {
var cleanupStream = CleanupStream.create();

test_stream(inputs, cleanupStream, function(err, actual) {
test.deepEqual(actual, expecteds, 'leading 0\'s should have been trimmed from NUMBER');
test.deepEqual(actual, expecteds, 'leading 0\'s should not have been trimmed from NUMBER');
test.end();
});

Expand Down

0 comments on commit 9a2395b

Please sign in to comment.