-
-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(unit-number-extractor): add new mapper stream to separate concat…
…enated unit numbers
- Loading branch information
1 parent
1fbe944
commit 163e43f
Showing
4 changed files
with
235 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/** | ||
The unit splitting mapper is responsible for detecting when the address.number | ||
field contains the concatenation of the unit and the housenumber. | ||
In this case we attempt to split the two terms into their consituent parts. | ||
note: Addressing formats vary between countries, it's unlikely that a pattern | ||
which works for one country will also works internationally. For this reason this | ||
mapper accepts a country code which can be used to select the appropriate pattern(s). | ||
Feel free to make changes to this mapping file! | ||
**/ | ||
|
||
const _ = require('lodash'); | ||
const through = require('through2'); | ||
const logger = require('pelias-logger').get('openstreetmap'); | ||
const mappers = {}; | ||
|
||
// Australasian Unit Number Mapper | ||
// https://auspost.com.au/content/dam/auspost_corp/media/documents/Appendix-01.pdf | ||
// https://www.nzpost.co.nz/sites/nz/files/2021-10/adv358-address-standards.pdf | ||
const australasian = (doc) =>{ | ||
const number = doc.getAddress('number'); | ||
if(!_.isString(number) || number.length < 3){ return; } | ||
|
||
// 2/14 | ||
const solidus = number.match(/^(\d+)\/(\d+)$/); | ||
if (solidus) { | ||
doc.setAddress('unit', solidus[1]); | ||
doc.setAddress('number', solidus[2]); | ||
return; | ||
} | ||
|
||
// Flat 2 14 | F 2 14 | Unit 2 14 | APT 2 14 | ||
const verbose = number.match(/^(flat|f|unit|apartment|apt)\s+(\d+)\s+(\d+)$/i); | ||
if (verbose) { | ||
doc.setAddress('unit', verbose[2]); | ||
doc.setAddress('number', verbose[3]); | ||
return; | ||
} | ||
}; | ||
|
||
// associate mappers with country codes | ||
mappers.AU = australasian; | ||
mappers.NZ = australasian; | ||
|
||
module.exports = function () { | ||
return through.obj((doc, enc, next) => { | ||
try { | ||
// only applies to records with a 'number' set and no 'unit' set (yet). | ||
if (doc.hasAddress('number') && !doc.hasAddress('unit')) { | ||
|
||
// select the appropriate mapper based on country code | ||
const mapper = _.get(mappers, doc.getMeta('country_code')); | ||
if (_.isFunction(mapper)) { | ||
|
||
// run the country-specific mapper | ||
mapper(doc); | ||
} | ||
} | ||
} | ||
|
||
catch (e) { | ||
logger.error('unit_mapper error'); | ||
logger.error(e.stack); | ||
logger.error(JSON.stringify(doc, null, 2)); | ||
} | ||
|
||
return next(null, doc); | ||
}); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
var tape = require('tape'); | ||
const through = require('through2'); | ||
const mapper = require('../../lib/streams/unitSplittingMapperStream'); | ||
const Document = require('pelias-model').Document; | ||
|
||
module.exports.tests = {}; | ||
|
||
// test exports | ||
module.exports.tests.interface = function (test) { | ||
test('interface: factory', t => { | ||
t.equal(typeof mapper, 'function', 'stream factory'); | ||
t.end(); | ||
}); | ||
test('interface: stream', t => { | ||
var stream = mapper(); | ||
t.equal(typeof stream, 'object', 'valid stream'); | ||
t.equal(typeof stream._read, 'function', 'valid readable'); | ||
t.equal(typeof stream._write, 'function', 'valid writeable'); | ||
t.end(); | ||
}); | ||
}; | ||
|
||
// ===================== australasian unit number mapping ====================== | ||
|
||
module.exports.tests.australasian_solidus = function (test) { | ||
var doc = new Document('oa', 'example', 1); | ||
doc.setName('default', '2/14 Smith Street'); | ||
doc.setAddress('number', '2/14'); | ||
doc.setAddress('street', 'Smith Street'); | ||
doc.setMeta('country_code', 'AU'); | ||
|
||
test('maps - split unit from housenumber', t => { | ||
var stream = mapper(); | ||
stream.pipe(through.obj((doc, enc, next) => { | ||
t.deepEqual(doc.getName('default'), '2/14 Smith Street', 'unchanged'); | ||
t.deepEqual(doc.getAddress('unit'), '2', 'mapped'); | ||
t.deepEqual(doc.getAddress('number'), '14', 'mapped'); | ||
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged'); | ||
t.end(); | ||
next(); | ||
})); | ||
stream.write(doc); | ||
}); | ||
}; | ||
|
||
// Flat 2 14 | F 2 14 | Unit 2 14 | APT 2 14 | ||
|
||
module.exports.tests.australasian_flat_prefix = function (test) { | ||
var doc = new Document('oa', 'example', 1); | ||
doc.setName('default', 'Flat 2 14 Smith Street'); | ||
doc.setAddress('number', 'Flat 2 14'); | ||
doc.setAddress('street', 'Smith Street'); | ||
doc.setMeta('country_code', 'AU'); | ||
|
||
test('maps - split unit from housenumber', t => { | ||
var stream = mapper(); | ||
stream.pipe(through.obj((doc, enc, next) => { | ||
t.deepEqual(doc.getName('default'), 'Flat 2 14 Smith Street', 'unchanged'); | ||
t.deepEqual(doc.getAddress('unit'), '2', 'mapped'); | ||
t.deepEqual(doc.getAddress('number'), '14', 'mapped'); | ||
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged'); | ||
t.end(); | ||
next(); | ||
})); | ||
stream.write(doc); | ||
}); | ||
}; | ||
|
||
module.exports.tests.australasian_flat_prefix_abbreviated = function (test) { | ||
var doc = new Document('oa', 'example', 1); | ||
doc.setName('default', 'F 2 14 Smith Street'); | ||
doc.setAddress('number', 'F 2 14'); | ||
doc.setAddress('street', 'Smith Street'); | ||
doc.setMeta('country_code', 'AU'); | ||
|
||
test('maps - split unit from housenumber', t => { | ||
var stream = mapper(); | ||
stream.pipe(through.obj((doc, enc, next) => { | ||
t.deepEqual(doc.getName('default'), 'F 2 14 Smith Street', 'unchanged'); | ||
t.deepEqual(doc.getAddress('unit'), '2', 'mapped'); | ||
t.deepEqual(doc.getAddress('number'), '14', 'mapped'); | ||
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged'); | ||
t.end(); | ||
next(); | ||
})); | ||
stream.write(doc); | ||
}); | ||
}; | ||
|
||
module.exports.tests.australasian_unit_prefix = function (test) { | ||
var doc = new Document('oa', 'example', 1); | ||
doc.setName('default', 'Unit 2 14 Smith Street'); | ||
doc.setAddress('number', 'Unit 2 14'); | ||
doc.setAddress('street', 'Smith Street'); | ||
doc.setMeta('country_code', 'AU'); | ||
|
||
test('maps - split unit from housenumber', t => { | ||
var stream = mapper(); | ||
stream.pipe(through.obj((doc, enc, next) => { | ||
t.deepEqual(doc.getName('default'), 'Unit 2 14 Smith Street', 'unchanged'); | ||
t.deepEqual(doc.getAddress('unit'), '2', 'mapped'); | ||
t.deepEqual(doc.getAddress('number'), '14', 'mapped'); | ||
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged'); | ||
t.end(); | ||
next(); | ||
})); | ||
stream.write(doc); | ||
}); | ||
}; | ||
|
||
module.exports.tests.australasian_apartment_prefix = function (test) { | ||
var doc = new Document('oa', 'example', 1); | ||
doc.setName('default', 'Apartment 2 14 Smith Street'); | ||
doc.setAddress('number', 'Apartment 2 14'); | ||
doc.setAddress('street', 'Smith Street'); | ||
doc.setMeta('country_code', 'AU'); | ||
|
||
test('maps - split unit from housenumber', t => { | ||
var stream = mapper(); | ||
stream.pipe(through.obj((doc, enc, next) => { | ||
t.deepEqual(doc.getName('default'), 'Apartment 2 14 Smith Street', 'unchanged'); | ||
t.deepEqual(doc.getAddress('unit'), '2', 'mapped'); | ||
t.deepEqual(doc.getAddress('number'), '14', 'mapped'); | ||
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged'); | ||
t.end(); | ||
next(); | ||
})); | ||
stream.write(doc); | ||
}); | ||
}; | ||
|
||
module.exports.tests.australasian_apartment_prefix_abbreviated = function (test) { | ||
var doc = new Document('oa', 'example', 1); | ||
doc.setName('default', 'APT 2 14 Smith Street'); | ||
doc.setAddress('number', 'APT 2 14'); | ||
doc.setAddress('street', 'Smith Street'); | ||
doc.setMeta('country_code', 'AU'); | ||
|
||
test('maps - split unit from housenumber', t => { | ||
var stream = mapper(); | ||
stream.pipe(through.obj((doc, enc, next) => { | ||
t.deepEqual(doc.getName('default'), 'APT 2 14 Smith Street', 'unchanged'); | ||
t.deepEqual(doc.getAddress('unit'), '2', 'mapped'); | ||
t.deepEqual(doc.getAddress('number'), '14', 'mapped'); | ||
t.deepEqual(doc.getAddress('street'), 'Smith Street', 'unchanged'); | ||
t.end(); | ||
next(); | ||
})); | ||
stream.write(doc); | ||
}); | ||
}; | ||
|
||
|
||
function test(name, testFunction) { | ||
return tape('unit_splitting_mapper: ' + name, testFunction); | ||
} | ||
|
||
for (var testCase in module.exports.tests) { | ||
module.exports.tests[testCase](test); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters