Skip to content

Commit

Permalink
feat(unit-number-extractor): add new mapper stream to separate concat…
Browse files Browse the repository at this point in the history
…enated unit numbers
  • Loading branch information
missinglink committed Feb 3, 2022
1 parent 1fbe944 commit 163e43f
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 1 deletion.
4 changes: 3 additions & 1 deletion lib/streams/recordStream.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const ContentHashStream = require('./contentHashStream');
const ValidRecordFilterStream = require('./validRecordFilterStream');
const DocumentStream = require('./documentStream');
const gnafMapperStreamFactory = require('./gnafMapperStream');
const unitSplittingMapperStreamFactory = require('./unitSplittingMapperStream');

/*
* Construct a suitable id prefix for a CSV file given
Expand Down Expand Up @@ -65,7 +66,8 @@ function createRecordStream( filePath, dirPath ){
.pipe( validRecordFilterStream )
.pipe( cleanupStream )
.pipe( documentStream )
.pipe( gnafMapperStreamFactory() );
.pipe( gnafMapperStreamFactory() )
.pipe( unitSplittingMapperStreamFactory() );
}

function geojsonStream(stream) {
Expand Down
71 changes: 71 additions & 0 deletions lib/streams/unitSplittingMapperStream.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/**
The unit splitting mapper is responsible for detecting when the address.number
field contains the concatenation of the unit and the housenumber.
In this case we attempt to split the two terms into their consituent parts.
note: Addressing formats vary between countries, it's unlikely that a pattern
which works for one country will also works internationally. For this reason this
mapper accepts a country code which can be used to select the appropriate pattern(s).
Feel free to make changes to this mapping file!
**/

const _ = require('lodash');
const through = require('through2');
const logger = require('pelias-logger').get('openstreetmap');
const mappers = {};

// Australasian Unit Number Mapper
// https://auspost.com.au/content/dam/auspost_corp/media/documents/Appendix-01.pdf
// https://www.nzpost.co.nz/sites/nz/files/2021-10/adv358-address-standards.pdf
const australasian = (doc) =>{
const number = doc.getAddress('number');
if(!_.isString(number) || number.length < 3){ return; }

// 2/14
const solidus = number.match(/^(\d+)\/(\d+)$/);
if (solidus) {
doc.setAddress('unit', solidus[1]);
doc.setAddress('number', solidus[2]);
return;
}

// Flat 2 14 | F 2 14 | Unit 2 14 | APT 2 14
const verbose = number.match(/^(flat|f|unit|apartment|apt)\s+(\d+)\s+(\d+)$/i);
if (verbose) {
doc.setAddress('unit', verbose[2]);
doc.setAddress('number', verbose[3]);
return;
}
};

// associate mappers with country codes
mappers.AU = australasian;
mappers.NZ = australasian;

module.exports = function () {
return through.obj((doc, enc, next) => {
try {
// only applies to records with a 'number' set and no 'unit' set (yet).
if (doc.hasAddress('number') && !doc.hasAddress('unit')) {

// select the appropriate mapper based on country code
const mapper = _.get(mappers, doc.getMeta('country_code'));
if (_.isFunction(mapper)) {

// run the country-specific mapper
mapper(doc);
}
}
}

catch (e) {
logger.error('unit_mapper error');
logger.error(e.stack);
logger.error(JSON.stringify(doc, null, 2));
}

return next(null, doc);
});
};
160 changes: 160 additions & 0 deletions test/streams/unitSplittingMapperStream.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
var tape = require('tape');
const through = require('through2');
const mapper = require('../../lib/streams/unitSplittingMapperStream');
const Document = require('pelias-model').Document;

module.exports.tests = {};

// test exports
module.exports.tests.interface = function (test) {
test('interface: factory', t => {
t.equal(typeof mapper, 'function', 'stream factory');
t.end();
});
test('interface: stream', t => {
var stream = mapper();
t.equal(typeof stream, 'object', 'valid stream');
t.equal(typeof stream._read, 'function', 'valid readable');
t.equal(typeof stream._write, 'function', 'valid writeable');
t.end();
});
};

// ===================== australasian unit number mapping ======================

module.exports.tests.australasian_solidus = function (test) {
var doc = new Document('oa', 'example', 1);
doc.setName('default', '2/14 Smith Street');
doc.setAddress('number', '2/14');
doc.setAddress('street', 'Smith Street');
doc.setMeta('country_code', 'AU');

test('maps - split unit from housenumber', t => {
var stream = mapper();
stream.pipe(through.obj((doc, enc, next) => {
t.deepEqual(doc.getName('default'), '2/14 Smith Street', 'unchanged');
t.deepEqual(doc.getAddress('unit'), '2', 'mapped');
t.deepEqual(doc.getAddress('number'), '14', 'mapped');
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged');
t.end();
next();
}));
stream.write(doc);
});
};

// Flat 2 14 | F 2 14 | Unit 2 14 | APT 2 14

module.exports.tests.australasian_flat_prefix = function (test) {
var doc = new Document('oa', 'example', 1);
doc.setName('default', 'Flat 2 14 Smith Street');
doc.setAddress('number', 'Flat 2 14');
doc.setAddress('street', 'Smith Street');
doc.setMeta('country_code', 'AU');

test('maps - split unit from housenumber', t => {
var stream = mapper();
stream.pipe(through.obj((doc, enc, next) => {
t.deepEqual(doc.getName('default'), 'Flat 2 14 Smith Street', 'unchanged');
t.deepEqual(doc.getAddress('unit'), '2', 'mapped');
t.deepEqual(doc.getAddress('number'), '14', 'mapped');
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged');
t.end();
next();
}));
stream.write(doc);
});
};

module.exports.tests.australasian_flat_prefix_abbreviated = function (test) {
var doc = new Document('oa', 'example', 1);
doc.setName('default', 'F 2 14 Smith Street');
doc.setAddress('number', 'F 2 14');
doc.setAddress('street', 'Smith Street');
doc.setMeta('country_code', 'AU');

test('maps - split unit from housenumber', t => {
var stream = mapper();
stream.pipe(through.obj((doc, enc, next) => {
t.deepEqual(doc.getName('default'), 'F 2 14 Smith Street', 'unchanged');
t.deepEqual(doc.getAddress('unit'), '2', 'mapped');
t.deepEqual(doc.getAddress('number'), '14', 'mapped');
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged');
t.end();
next();
}));
stream.write(doc);
});
};

module.exports.tests.australasian_unit_prefix = function (test) {
var doc = new Document('oa', 'example', 1);
doc.setName('default', 'Unit 2 14 Smith Street');
doc.setAddress('number', 'Unit 2 14');
doc.setAddress('street', 'Smith Street');
doc.setMeta('country_code', 'AU');

test('maps - split unit from housenumber', t => {
var stream = mapper();
stream.pipe(through.obj((doc, enc, next) => {
t.deepEqual(doc.getName('default'), 'Unit 2 14 Smith Street', 'unchanged');
t.deepEqual(doc.getAddress('unit'), '2', 'mapped');
t.deepEqual(doc.getAddress('number'), '14', 'mapped');
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged');
t.end();
next();
}));
stream.write(doc);
});
};

module.exports.tests.australasian_apartment_prefix = function (test) {
var doc = new Document('oa', 'example', 1);
doc.setName('default', 'Apartment 2 14 Smith Street');
doc.setAddress('number', 'Apartment 2 14');
doc.setAddress('street', 'Smith Street');
doc.setMeta('country_code', 'AU');

test('maps - split unit from housenumber', t => {
var stream = mapper();
stream.pipe(through.obj((doc, enc, next) => {
t.deepEqual(doc.getName('default'), 'Apartment 2 14 Smith Street', 'unchanged');
t.deepEqual(doc.getAddress('unit'), '2', 'mapped');
t.deepEqual(doc.getAddress('number'), '14', 'mapped');
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged');
t.end();
next();
}));
stream.write(doc);
});
};

module.exports.tests.australasian_apartment_prefix_abbreviated = function (test) {
var doc = new Document('oa', 'example', 1);
doc.setName('default', 'APT 2 14 Smith Street');
doc.setAddress('number', 'APT 2 14');
doc.setAddress('street', 'Smith Street');
doc.setMeta('country_code', 'AU');

test('maps - split unit from housenumber', t => {
var stream = mapper();
stream.pipe(through.obj((doc, enc, next) => {
t.deepEqual(doc.getName('default'), 'APT 2 14 Smith Street', 'unchanged');
t.deepEqual(doc.getAddress('unit'), '2', 'mapped');
t.deepEqual(doc.getAddress('number'), '14', 'mapped');
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged');
t.end();
next();
}));
stream.write(doc);
});
};


function test(name, testFunction) {
return tape('unit_splitting_mapper: ' + name, testFunction);
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test);
}
1 change: 1 addition & 0 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ require( './streams/gnafMapperStream' );
require( './streams/germanicAbbreviationStream');
require( './streams/isUSorCAHouseNumberZero' );
require( './streams/recordStream' );
require( './streams/unitSplittingMapperStream' );

0 comments on commit 163e43f

Please sign in to comment.