From 8af64f309886ddc6e37ed18b6557e75ab9a19c4b Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 2 May 2019 16:13:50 +0200
Subject: [PATCH 01/55] feat(parser): replace addressit with pelias native
 parser

---
 controller/predicates/is_addressit_parse.js   |  14 -
 controller/predicates/is_pelias_parse.js      |  14 +
 package.json                                  |   1 +
 query/autocomplete.js                         |   2 +-
 query/search_addressit.js                     |   6 +-
 query/text_parser_pelias.js                   |  88 ++++
 routes/v1.js                                  |  12 +-
 sanitizer/_text_pelias_parser.js              | 133 ++++++
 sanitizer/_tokenizer.js                       |   6 +-
 sanitizer/autocomplete.js                     |   2 +-
 ...addressit.js => defer_to_pelias_parser.js} |   2 +-
 ..._addressit_parse.js => is_pelias_parse.js} |  24 +-
 .../fixture/search_full_address_original.js   |  20 +-
 .../search_partial_address_original.js        |   9 -
 .../search_regions_address_original.js        |  11 +-
 test/unit/query/autocomplete.js               |   5 +-
 test/unit/query/search_addressit.js           |  24 +-
 test/unit/run.js                              |   5 +-
 test/unit/sanitizer/_text_pelias_parser.js    | 419 ++++++++++++++++++
 test/unit/sanitizer/_tokenizer.js             |   8 +-
 test/unit/sanitizer/autocomplete.js           |   6 +-
 ...addressit.js => defer_to_pelias_parser.js} |  28 +-
 22 files changed, 723 insertions(+), 116 deletions(-)
 delete mode 100644 controller/predicates/is_addressit_parse.js
 create mode 100644 controller/predicates/is_pelias_parse.js
 create mode 100644 query/text_parser_pelias.js
 create mode 100644 sanitizer/_text_pelias_parser.js
 rename sanitizer/{defer_to_addressit.js => defer_to_pelias_parser.js} (92%)
 rename test/unit/controller/predicates/{is_addressit_parse.js => is_pelias_parse.js} (59%)
 create mode 100644 test/unit/sanitizer/_text_pelias_parser.js
 rename test/unit/sanitizer/{defer_to_addressit.js => defer_to_pelias_parser.js} (73%)

diff --git a/controller/predicates/is_addressit_parse.js b/controller/predicates/is_addressit_parse.js
deleted file mode 100644
index 288b173cb..000000000
--- a/controller/predicates/is_addressit_parse.js
+++ /dev/null
@@ -1,14 +0,0 @@
-const _ = require('lodash');
-const Debug = require('../../helper/debug');
-const debugLog = new Debug('controller:predicates:is_addressit_parse');
-const stackTraceLine = require('../../helper/stackTraceLine');
-
-// returns true IFF req.clean.parser is addressit
-module.exports = (req, res) => {
-  const is_addressit_parse = _.get(req, 'clean.parser') === 'addressit';
-  debugLog.push(req, () => ({
-    reply: is_addressit_parse,
-    stack_trace: stackTraceLine()
-  }));
-  return is_addressit_parse;
-};
diff --git a/controller/predicates/is_pelias_parse.js b/controller/predicates/is_pelias_parse.js
new file mode 100644
index 000000000..79a6149ad
--- /dev/null
+++ b/controller/predicates/is_pelias_parse.js
@@ -0,0 +1,14 @@
+const _ = require('lodash');
+const Debug = require('../../helper/debug');
+const debugLog = new Debug('controller:predicates:is_pelias_parse');
+const stackTraceLine = require('../../helper/stackTraceLine');
+
+// returns true IFF req.clean.parser is pelias
+module.exports = (req, res) => {
+  const is_pelias_parse = _.get(req, 'clean.parser') === 'pelias';
+  debugLog.push(req, () => ({
+    reply: is_pelias_parse,
+    stack_trace: stackTraceLine()
+  }));
+  return is_pelias_parse;
+};
diff --git a/package.json b/package.json
index 0b5102519..359beb0f0 100644
--- a/package.json
+++ b/package.json
@@ -56,6 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
+    "pelias-parser": "^1.2.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",
diff --git a/query/autocomplete.js b/query/autocomplete.js
index c2ee3b5fe..446e5e3fe 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -1,6 +1,6 @@
 const peliasQuery = require('pelias-query');
 const defaults = require('./autocomplete_defaults');
-const textParser = require('./text_parser_addressit');
+const textParser = require('./text_parser_pelias');
 const check = require('check-types');
 const logger = require('pelias-logger').get('api');
 const config = require('pelias-config').generate();
diff --git a/query/search_addressit.js b/query/search_addressit.js
index 04823dd80..00d3acf12 100644
--- a/query/search_addressit.js
+++ b/query/search_addressit.js
@@ -1,6 +1,6 @@
 const peliasQuery = require('pelias-query');
 const defaults = require('./search_defaults');
-const textParser = require('./text_parser_addressit');
+const textParser = require('./text_parser_pelias');
 const check = require('check-types');
 const logger = require('pelias-logger').get('api');
 const config = require('pelias-config').generate().api;
@@ -37,8 +37,8 @@ query.score( peliasQuery.view.address('postcode') );
 // country_a and region_a are left as matches here because the text-analyzer
 // can sometimes detect them, in which case a query more specific than a
 // multi_match is appropriate.
-query.score( peliasQuery.view.admin('country_a') );
-query.score( peliasQuery.view.admin('region_a') );
+// query.score( peliasQuery.view.admin('country_a') );
+// query.score( peliasQuery.view.admin('region_a') );
 query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') );
 query.score( views.custom_boosts( config.customBoosts ) );
 
diff --git a/query/text_parser_pelias.js b/query/text_parser_pelias.js
new file mode 100644
index 000000000..394b80d3d
--- /dev/null
+++ b/query/text_parser_pelias.js
@@ -0,0 +1,88 @@
+const _ = require('lodash');
+const logger = require('pelias-logger').get('api');
+const placeTypes = require('../helper/placeTypes');
+
+/*
+This list should only contain admin fields we are comfortable matching in the case
+when we can't identify parts of an address. This shouldn't contain fields like country_a
+or postalcode because we should only try to match those when we're sure that's what they are.
+ */
+const adminFields = placeTypes.concat([
+  'region_a'
+]);
+
+// all the address parsing logic
+function addParsedVariablesToQueryVariables(clean, vs) {
+  // ==== add parsed matches [address components] ====
+
+  // prefix (any unparsed text before any matched fields)
+  if (!_.isEmpty(clean.parsed_text.name)) {
+    vs.var('input:query', clean.parsed_text.name);
+  }
+
+  // housenumber
+  if (!_.isEmpty(clean.parsed_text.housenumber)) {
+    vs.var('input:housenumber', clean.parsed_text.housenumber);
+  }
+
+  // street name
+  if (!_.isEmpty(clean.parsed_text.street)) {
+    vs.var('input:street', clean.parsed_text.street);
+  }
+
+  // cross street name
+  if (!_.isEmpty(clean.parsed_text.cross_street)) {
+    vs.var('input:cross_street', clean.parsed_text.cross_street);
+  }
+
+  // postcode
+  if (!_.isEmpty(clean.parsed_text.postcode)) {
+    vs.var('input:postcode', clean.parsed_text.postcode);
+  }
+
+  // ==== legacy components ====
+  // @todo: can we remove this functionality?
+
+  // is the 'name' label set?
+  if (clean.parsed_text.name) {
+    vs.var('input:name', clean.parsed_text.name);
+  }
+  else {
+    // is it a street address?
+    var isStreetAddress = !_.isEmpty(clean.parsed_text.housenumber) && !_.isEmpty(clean.parsed_text.street);
+    if (isStreetAddress) {
+      vs.var('input:name', clean.parsed_text.housenumber + ' ' + clean.parsed_text.street);
+    }
+  }
+
+  // ==== add parsed matches [admin components] ====
+
+  // // locality
+  // if (!_.isEmpty(clean.parsed_text.locality)) {
+  //   vs.var('input:locality', clean.parsed_text.locality);
+  // }
+
+  // // region
+  // if (!_.isEmpty(clean.parsed_text.region)) {
+  //   vs.var('input:region', clean.parsed_text.region);
+  // }
+
+  // // country
+  // if (!_.isEmpty(clean.parsed_text.country)) {
+  //   vs.var('input:country', clean.parsed_text.country);
+  // }
+
+  // postfix
+  if (!_.isEmpty(clean.parsed_text.admin_parts)) {
+    // assign postfix to any admin fields which currently don't have a value assigned.
+    
+    // cycle through fields and set fields which are still currently unset
+    adminFields.forEach(key => {
+      if (!vs.isset('input:' + key)) {
+        vs.var('input:' + key, clean.parsed_text.admin_parts);
+      }
+    });
+  }
+}
+
+module.exports = addParsedVariablesToQueryVariables;
diff --git a/routes/v1.js b/routes/v1.js
index 29895ea3e..535203a49 100644
--- a/routes/v1.js
+++ b/routes/v1.js
@@ -11,7 +11,7 @@ var sanitizers = {
   autocomplete: require('../sanitizer/autocomplete'),
   place: require('../sanitizer/place'),
   search: require('../sanitizer/search'),
-  defer_to_addressit: require('../sanitizer/defer_to_addressit'),
+  defer_to_pelias_parser: require('../sanitizer/defer_to_pelias_parser'),
   structured_geocoding: require('../sanitizer/structured_geocoding'),
   reverse: require('../sanitizer/reverse'),
   nearby: require('../sanitizer/nearby')
@@ -74,7 +74,7 @@ const hasRequestErrors = require('../controller/predicates/has_request_errors');
 const isCoarseReverse = require('../controller/predicates/is_coarse_reverse');
 const isAdminOnlyAnalysis = require('../controller/predicates/is_admin_only_analysis');
 const hasResultsAtLayers = require('../controller/predicates/has_results_at_layers');
-const isAddressItParse = require('../controller/predicates/is_addressit_parse');
+const isPeliasItParse = require('../controller/predicates/is_pelias_parse');
 const hasRequestCategories = require('../controller/predicates/has_request_parameter')('categories');
 const isOnlyNonAdminLayers = require('../controller/predicates/is_only_non_admin_layers');
 const isRequestLayersAnyAddressRelated = require('../controller/predicates/is_request_layers_any_address_related');
@@ -224,8 +224,8 @@ function addRoutes(app, peliasConfig) {
     not(placeholderShouldHaveExecuted)
   );
 
-  // defer to addressit for analysis IF there's no response AND placeholder should not have executed
-  const shouldDeferToAddressIt = all(
+  // defer to pelias parser for analysis IF there's no response AND placeholder should not have executed
+  const shouldDeferToPeliasParser = all(
     not(hasRequestErrors),
     not(hasResponseData)
   );
@@ -233,7 +233,7 @@ function addRoutes(app, peliasConfig) {
   // call search addressit query if addressit was the parser
   const searchAddressitShouldExecute = all(
     not(hasRequestErrors),
-    isAddressItParse
+    isPeliasItParse
   );
 
   // get language adjustments if:
@@ -291,7 +291,7 @@ function addRoutes(app, peliasConfig) {
       // try 3 different query types: address search using ids, cascading fallback, addressit
       controllers.search(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute),
       controllers.search(peliasConfig.api, esclient, queries.cascading_fallback, fallbackQueryShouldExecute),
-      sanitizers.defer_to_addressit(shouldDeferToAddressIt), //run additional sanitizers needed for addressit parser
+      sanitizers.defer_to_pelias_parser(shouldDeferToPeliasParser), //run additional sanitizers needed for pelias parser
       controllers.search(peliasConfig.api, esclient, queries.search_addressit, searchAddressitShouldExecute),
       postProc.trimByGranularity(),
       postProc.distances('focus.point.'),
diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
new file mode 100644
index 000000000..6c56df7af
--- /dev/null
+++ b/sanitizer/_text_pelias_parser.js
@@ -0,0 +1,133 @@
+const Tokenizer = require('pelias-parser/tokenization/Tokenizer');
+const Solution = require('pelias-parser/solver/Solution');
+const AddressParser = require('pelias-parser/parser/AddressParser');
+const parser = new AddressParser();
+const _ = require('lodash');
+
+/**
+  this module provides fulltext parsing using the pelias/parser module.
+  see: https://github.com/pelias/parser
+
+  'pelias parser' provides the following fields:
+  'name',
+  'housenumber', 'street', 'postcode',
+  'locality', 'region', 'country',
+  'admin_parts'
+**/
+
+// validate texts, convert types and apply defaults
+function _sanitize (raw, clean) {
+  // error & warning messages
+  var messages = { errors: [], warnings: [] };
+
+  // invalid input 'text'
+  const text = _.trim(raw.text);
+  if (!_.isString(text) || _.isEmpty(text)) {
+    messages.errors.push('invalid param \'text\': text length, must be >0');
+  }
+
+  // valid input 'text'
+  else {
+    // parse text with pelias/parser
+    clean.text = text;
+    clean.parser = 'pelias';
+    clean.parsed_text = parse(clean);
+  }
+
+  return messages;
+}
+
+function parse (clean) {
+  // parse text
+  const t = new Tokenizer(clean.text);
+  parser.classify(t);
+  parser.solve(t);
+
+  // only use the first solution generated
+  // @todo: we could expand this in the future to accomodate more solutions
+  let solution = new Solution();
+  if (t.solution.length) { solution = t.solution[0]; }
+
+  // 1. map the output of the parser in to parsed_text
+  let parsed_text = {};
+
+  solution.pair.forEach(p => {
+    let field = p.classification.label;
+
+    // handle intersections
+    if (field === 'street') {
+      field = (!parsed_text.street) ? 'street' : 'cross_street';
+    }
+
+    // set field
+    parsed_text[field] = p.span.body;
+  });
+
+  // 2. find any unclassified characters:
+
+  // generate a classification mask, eg:
+  // 'Foo Cafe 10 Main St London 10010 Earth'
+  // '         NN SSSSSSS AAAAAA PPPPP      '
+  let mask = solution.mask(t);
+
+  // the entire input text as seen by the parser with any postcode classification(s) removed
+  let body = t.span.body.split('')
+    .map((c, i) => (mask[i] !== 'P') ? c : ' ')
+    .join('');
+
+  // scan through the input text and 'bucket' characters in to one of two buckets:
+  // prefix: all unparsed characters that came before any parsed fields
+  // postfix: all unparsed characters from the first admin field to the end of the string
+
+  // set cursor to the first classified character
+  let cursor = mask.search(/\S/);
+  if (cursor === -1) { cursor = body.length; }
+  let prefix = _.trim(body.substr(0, cursor), ' ,');
+
+  // set cursor to the first character of the first classified admin field
+  cursor = mask.indexOf('A');
+  if (cursor === -1) { cursor = body.length; }
+  let postfix = _.trim(body.substr(cursor), ' ,');
+
+  // clean up spacing around commas
+  prefix = prefix.split(/[,\n\t]/).join(', ');
+  postfix = postfix.split(/[,\n\t]/).join(', ');
+
+  // squash multiple adjacent whitespace characters into a single space
+  prefix = prefix.replace(/\s\s+/g, ' ').trim();
+  postfix = postfix.replace(/\s\s+/g, ' ').trim();
+
+  // handle the case where 'parsed_text' is completely empty
+  // ie. the parser was not able to classify anything at all
+  // note: this is common for venue names
+  if (Object.keys(parsed_text).length === 0) {
+    if (prefix.length && !postfix.length) {
+      // if the prefix contains a comma
+      // then only use the first part for the prefix for the
+      // name and use the remaining tokens for the postfix
+      // eg. 'Friendly Cafe, Footown'
+      // note: this is how the old 'naive' parser worked
+      let split = prefix.split(',');
+      if (split.length > 1) {
+        prefix = split[0].trim();
+        postfix = split.slice(1).join(', ').trim();
+      }
+    }
+  }
+
+  // 3. store the unparsed characters in fields which can be used for querying
+  if (prefix.length) { parsed_text.name = prefix; }
+  if (postfix.length) { parsed_text.admin_parts = postfix; }
+
+  return parsed_text;
+}
+
+function _expected () {
+  return [{ name: 'text' }];
+}
+
+// export function
+module.exports = () => ({
+  sanitize: _sanitize,
+  expected: _expected
+});
diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index 966b88be4..cc02e42a1 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -36,14 +36,14 @@ function _sanitize( raw, clean ){
 
     // else handle the case where parsed_text.street was produced but
     // no parsed_text.name is produced.
-    // additionally, handle the case where parsed_text.number is present
+    // additionally, handle the case where parsed_text.housenumber is present
     // note: the addressit module may also produce parsed_text.unit info
     // for now, we discard that information as we don't have an appropriate
     else if( _.has(clean.parsed_text, 'street') ){
       text = [
-        clean.parsed_text.number,
+        clean.parsed_text.housenumber,
         clean.parsed_text.street
-      ].filter(function(el){return el;})
+      ].filter((el) => el)
       .join(' '); // remove empty elements
     }
   }
diff --git a/sanitizer/autocomplete.js b/sanitizer/autocomplete.js
index e987da2da..1abfb7cad 100644
--- a/sanitizer/autocomplete.js
+++ b/sanitizer/autocomplete.js
@@ -6,7 +6,7 @@ module.exports.middleware = (_api_pelias_config) => {
   var sanitizers = {
       singleScalarParameters: require('../sanitizer/_single_scalar_parameters')(),
       debug: require('../sanitizer/_debug')(),
-      text: require('../sanitizer/_text_addressit')(),
+      text: require('../sanitizer/_text_pelias_parser')(),
       tokenizer: require('../sanitizer/_tokenizer')(),
       size: require('../sanitizer/_size')(/* use defaults*/),
       layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping),
diff --git a/sanitizer/defer_to_addressit.js b/sanitizer/defer_to_pelias_parser.js
similarity index 92%
rename from sanitizer/defer_to_addressit.js
rename to sanitizer/defer_to_pelias_parser.js
index 371687046..4ff8bdb7b 100644
--- a/sanitizer/defer_to_addressit.js
+++ b/sanitizer/defer_to_pelias_parser.js
@@ -1,7 +1,7 @@
 const sanitizeAll = require('../sanitizer/sanitizeAll'),
     sanitizers = {
       debug: require('../sanitizer/_debug')(),
-      text: require('../sanitizer/_text_addressit')()
+      text: require('../sanitizer/_text_pelias_parser')()
     };
 
 const logger = require('pelias-logger').get('api');
diff --git a/test/unit/controller/predicates/is_addressit_parse.js b/test/unit/controller/predicates/is_pelias_parse.js
similarity index 59%
rename from test/unit/controller/predicates/is_addressit_parse.js
rename to test/unit/controller/predicates/is_pelias_parse.js
index 219c36fff..07fecefdf 100644
--- a/test/unit/controller/predicates/is_addressit_parse.js
+++ b/test/unit/controller/predicates/is_pelias_parse.js
@@ -1,24 +1,24 @@
 const _ = require('lodash');
-const is_addressit_parse = require('../../../../controller/predicates/is_addressit_parse');
+const is_pelias_parse = require('../../../../controller/predicates/is_pelias_parse');
 
 module.exports.tests = {};
 
 module.exports.tests.interface = (test, common) => {
   test('valid interface', t => {
-    t.ok(_.isFunction(is_addressit_parse), 'is_addressit_parse is a function');
+    t.ok(_.isFunction(is_pelias_parse), 'is_pelias_parse is a function');
     t.end();
   });
 };
 
 module.exports.tests.true_conditions = (test, common) => {
-  test('request.clean.parser=addressit should return true', t => {
+  test('request.clean.parser=pelias should return true', t => {
     const req = {
       clean: {
-        parser: 'addressit'
+        parser: 'pelias'
       }
     };
 
-    t.ok(is_addressit_parse(req));
+    t.ok(is_pelias_parse(req));
     t.end();
 
   });
@@ -27,14 +27,14 @@ module.exports.tests.true_conditions = (test, common) => {
 
 module.exports.tests.false_conditions = (test, common) => {
   test('undefined request should return false', t => {
-    t.notOk(is_addressit_parse(undefined));
+    t.notOk(is_pelias_parse(undefined));
     t.end();
   });
 
   test('undefined request.clean should return false', t => {
     const req = {};
 
-    t.notOk(is_addressit_parse(req));
+    t.notOk(is_pelias_parse(req));
     t.end();
   });
 
@@ -43,18 +43,18 @@ module.exports.tests.false_conditions = (test, common) => {
       clean: {}
     };
 
-    t.notOk(is_addressit_parse(req));
+    t.notOk(is_pelias_parse(req));
     t.end();
   });
 
-  test('non-\'addressit\' request.clean.parser should return false', t => {
+  test('non-\'pelias\' request.clean.parser should return false', t => {
     const req = {
       clean: {
-        parser: 'not addressit'
+        parser: 'not pelias'
       }
     };
 
-    t.notOk(is_addressit_parse(req));
+    t.notOk(is_pelias_parse(req));
     t.end();
   });
 
@@ -62,7 +62,7 @@ module.exports.tests.false_conditions = (test, common) => {
 
 module.exports.all = (tape, common) => {
   function test(name, testFunction) {
-    return tape(`GET /is_addressit_parse ${name}`, testFunction);
+    return tape(`GET /is_pelias_parse ${name}`, testFunction);
   }
 
   for( const testCase in module.exports.tests ){
diff --git a/test/unit/fixture/search_full_address_original.js b/test/unit/fixture/search_full_address_original.js
index e400def60..9caff384b 100644
--- a/test/unit/fixture/search_full_address_original.js
+++ b/test/unit/fixture/search_full_address_original.js
@@ -105,24 +105,6 @@ module.exports = {
             'analyzer': vs['address:postcode:analyzer']
           }
         }
-      }, {
-        'match': {
-          'parent.country_a': {
-            'query': 'USA',
-            'cutoff_frequency': 0.01,
-            'boost': vs['admin:country_a:boost'],
-            'analyzer': vs['admin:country_a:analyzer']
-          }
-        }
-      }, {
-        'match': {
-          'parent.region_a': {
-            'query': 'NY',
-            'cutoff_frequency': 0.01,
-            'boost': vs['admin:region_a:boost'],
-            'analyzer': vs['admin:region_a:analyzer']
-          }
-        }
       }, {
         'multi_match': {
             'fields': [
@@ -135,7 +117,7 @@ module.exports = {
               'parent.neighbourhood^1',
               'parent.region_a^1'
             ],
-            'query': 'new york',
+            'query': 'new york ny US',
             'analyzer': 'peliasAdmin',
             'cutoff_frequency': 0.01
         }
diff --git a/test/unit/fixture/search_partial_address_original.js b/test/unit/fixture/search_partial_address_original.js
index 57bd61f84..f808896d4 100644
--- a/test/unit/fixture/search_partial_address_original.js
+++ b/test/unit/fixture/search_partial_address_original.js
@@ -77,15 +77,6 @@ module.exports = {
             'weight': 2
           }]
         }
-      }, {
-        'match': {
-          'parent.region_a': {
-            'analyzer': 'peliasAdmin',
-            'boost': 1,
-            'cutoff_frequency': 0.01,
-            'query': 'NY'
-          }
-        }
       }, {
         'multi_match': {
             'fields': [
diff --git a/test/unit/fixture/search_regions_address_original.js b/test/unit/fixture/search_regions_address_original.js
index 9d05aaefa..c5053a037 100644
--- a/test/unit/fixture/search_regions_address_original.js
+++ b/test/unit/fixture/search_regions_address_original.js
@@ -95,15 +95,6 @@ module.exports = {
             'analyzer': vs['address:street:analyzer']
           }
         }
-      }, {
-        'match': {
-          'parent.region_a': {
-            'query': 'NY',
-            'cutoff_frequency': 0.01,
-            'boost': vs['admin:region_a:boost'],
-            'analyzer': vs['admin:region_a:analyzer']
-          }
-        }
       }, {
         'multi_match': {
             'fields': [
@@ -116,7 +107,7 @@ module.exports = {
               'parent.neighbourhood^1',
               'parent.region_a^1'
             ],
-            'query': 'manhattan',
+            'query': 'manhattan ny',
             'analyzer': 'peliasAdmin',
             'cutoff_frequency': 0.01
         }
diff --git a/test/unit/query/autocomplete.js b/test/unit/query/autocomplete.js
index 16d16f4b6..6b324c898 100644
--- a/test/unit/query/autocomplete.js
+++ b/test/unit/query/autocomplete.js
@@ -57,7 +57,6 @@ module.exports.tests.query = function(test, common) {
       text: 'one two, three',
       parsed_text: {
         name: 'one two',
-        regions: [ 'one two', 'three' ],
         admin_parts: 'three'
       },
       tokens: ['one','two'],
@@ -258,9 +257,9 @@ module.exports.tests.query = function(test, common) {
     var query = generate({
       text: 'k road, laird',
       parsed_text: {
-        name: 'k road',
         street: 'k road',
-        regions: [ 'laird' ]
+        locality: 'laird',
+        admin_parts: 'laird'
       },
       tokens: ['k', 'road'],
       tokens_complete: ['k', 'road'],
diff --git a/test/unit/query/search_addressit.js b/test/unit/query/search_addressit.js
index 2d1180e23..401d282e2 100644
--- a/test/unit/query/search_addressit.js
+++ b/test/unit/query/search_addressit.js
@@ -106,12 +106,12 @@ module.exports.tests.query = function(test, common) {
       layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
       querySize: 10,
       parsed_text: {
-        number: '123',
+        housenumber: '123',
         street: 'main st',
-        state: 'NY',
-        country: 'USA',
-        postalcode: '10010',
-        regions: [ 'new york' ]
+        region: 'new york',
+        locality: 'ny',
+        postcode: '10010',
+        admin_parts: 'new york ny US'
       }
     });
 
@@ -127,9 +127,9 @@ module.exports.tests.query = function(test, common) {
     var query = generate({ text: 'soho grand, new york',
       layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
       querySize: 10,
-      parsed_text: { name: 'soho grand',
-        state: 'NY',
-        regions: [ 'soho grand' ],
+      parsed_text: {
+        name: 'soho grand',
+        region: 'new york',
         admin_parts: 'new york'
       }
     });
@@ -146,10 +146,12 @@ module.exports.tests.query = function(test, common) {
     var query = generate({ text: '1 water st manhattan ny',
       layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
       querySize: 10,
-      parsed_text: { number: '1',
+      parsed_text: {
+        housenumber: '1',
         street: 'water st',
-        state: 'NY',
-        regions: [ 'manhattan' ]
+        locality: 'manhattan',
+        region: 'ny',
+        admin_parts: 'manhattan ny'
       }
     });
 
diff --git a/test/unit/run.js b/test/unit/run.js
index a4f826991..1d38d5ac6 100644
--- a/test/unit/run.js
+++ b/test/unit/run.js
@@ -24,7 +24,7 @@ var tests = [
   require('./controller/predicates/has_results_at_layers'),
   require('./controller/predicates/has_request_parameter'),
   require('./controller/predicates/has_request_errors'),
-  require('./controller/predicates/is_addressit_parse'),
+  require('./controller/predicates/is_pelias_parse'),
   require('./controller/predicates/is_admin_only_analysis'),
   require('./controller/predicates/is_coarse_reverse'),
   require('./controller/predicates/is_only_non_admin_layers'),
@@ -98,6 +98,7 @@ var tests = [
   require('./sanitizer/_synthesize_analysis'),
   require('./sanitizer/_text'),
   require('./sanitizer/_text_addressit'),
+  require('./sanitizer/_text_pelias_parser'),
   require('./sanitizer/_tokenizer'),
   require('./sanitizer/_categories'),
   require('./sanitizer/_boundary_gid'),
@@ -108,7 +109,7 @@ var tests = [
   require('./sanitizer/reverse'),
   require('./sanitizer/sanitizeAll'),
   require('./sanitizer/search'),
-  require('./sanitizer/defer_to_addressit'),
+  require('./sanitizer/defer_to_pelias_parser'),
   require('./sanitizer/wrap'),
   require('./service/configurations/Interpolation'),
   require('./service/configurations/Language'),
diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
new file mode 100644
index 000000000..9a40ef7fc
--- /dev/null
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -0,0 +1,419 @@
+var sanitizer = require('../../../sanitizer/_text_pelias_parser')();
+var type_mapping = require('../../../helper/type_mapping');
+
+module.exports.tests = {};
+
+module.exports.tests.text_parser = function (test, common) {
+  test('short input text has admin layers set ', function (t) {
+    var raw = {
+      text: 'emp'  //start of empire state building
+    };
+    var clean = {
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEquals(messages.errors, [], 'no errors');
+    t.deepEquals(messages.warnings, [], 'no warnings');
+
+    t.end();
+  });
+
+  var usQueries = [
+    { name: 'soho', admin_parts: 'new york', region: 'NY' },
+    { name: '123 main', admin_parts: 'new york', region: 'NY' }
+  ];
+
+  usQueries.forEach(function (query) {
+    test('naive parsing ' + query, function (t) {
+      var raw = {
+        text: query.name + ', ' + query.admin_parts
+      };
+      var clean = {};
+
+      var expected_clean = {
+        text: raw.text.trim(),
+        parser: 'pelias',
+        parsed_text: {
+          name: query.name,
+          region: query.admin_parts,
+          admin_parts: query.admin_parts
+        }
+      };
+
+      var messages = sanitizer.sanitize(raw, clean);
+
+      t.deepEqual(messages, { errors: [], warnings: [] });
+      t.deepEqual(clean, expected_clean);
+      t.end();
+
+    });
+
+    test('naive parsing ' + query + ' without spaces', function (t) {
+      var raw = {
+        text: query.name + ',' + query.admin_parts
+      };
+      var clean = {};
+
+      var expected_clean = {
+        text: raw.text.trim(),
+        parser: 'pelias',
+        parsed_text: {
+          name: query.name,
+          region: query.admin_parts,
+          admin_parts: query.admin_parts
+        }
+      };
+
+      var messages = sanitizer.sanitize(raw, clean);
+
+      t.deepEqual(messages, { errors: [], warnings: [] });
+      t.deepEqual(clean, expected_clean);
+      t.end();
+
+    });
+
+    test('naive parsing ' + query + ' with leading and trailing junk', function (t) {
+      var raw = {
+        text: ' , ' + query.name + ',' + query.admin_parts + ' , '
+      };
+      var clean = {};
+
+      var expected_clean = {
+        text: raw.text.trim(),
+        parser: 'pelias',
+        parsed_text: {
+          name: query.name,
+          region: query.admin_parts,
+          admin_parts: query.admin_parts
+        }
+      };
+
+      var messages = sanitizer.sanitize(raw, clean);
+
+      t.deepEqual(messages, { errors: [], warnings: [] });
+      t.deepEqual(clean, expected_clean);
+      t.end();
+
+    });
+  });
+
+  var nonUSQueries = [
+    { name: 'chelsea', admin_parts: 'london' },
+  ];
+
+  nonUSQueries.forEach(function (query) {
+    test('naive parsing ' + query, function (t) {
+      var raw = {
+        text: query.name + ', ' + query.admin_parts
+      };
+      var clean = {};
+
+      var expected_clean = {
+        text: query.name + ', ' + query.admin_parts,
+        parser: 'pelias',
+        parsed_text: {
+          locality: query.name,
+          admin_parts: query.name + ', ' + query.admin_parts
+        }
+      };
+
+      var messages = sanitizer.sanitize(raw, clean);
+
+      t.deepEqual(messages, { errors: [], warnings: [] });
+      t.deepEqual(clean, expected_clean);
+      t.end();
+
+    });
+
+    test('naive parsing ' + query + ' without spaces', function (t) {
+      var raw = {
+        text: query.name + ',' + query.admin_parts
+      };
+      var clean = {};
+
+      var expected_clean = {
+        text: query.name + ',' + query.admin_parts,
+        parser: 'pelias',
+        parsed_text: {
+          locality: query.name,
+          admin_parts: query.name + ', ' + query.admin_parts
+        }
+      };
+
+      var messages = sanitizer.sanitize(raw, clean);
+
+      t.deepEqual(messages, { errors: [], warnings: [] });
+      t.deepEqual(clean, expected_clean);
+      t.end();
+
+    });
+
+  });
+
+  test('query with one token', function (t) {
+    var raw = {
+      text: 'yugolsavia'
+    };
+    var clean = {};
+    clean.parsed_text = 'this should be removed';
+
+    var expected_clean = {
+      parser: 'pelias',
+      text: 'yugolsavia',
+      parsed_text: {
+        name: 'yugolsavia'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+
+  });
+
+  test('query with two tokens, no numbers', function (t) {
+    var raw = {
+      text: 'small town'
+    };
+    var clean = {};
+    clean.parsed_text = 'this should be removed';
+
+    var expected_clean = {
+      parser: 'pelias',
+      text: 'small town',
+      parsed_text: {
+        name: 'small town'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+
+  });
+
+  test('query with two tokens, number first', function (t) {
+    var raw = {
+      text: '123 main'
+    };
+    var clean = {};
+    clean.parsed_text = 'this should be removed';
+
+    var expected_clean = {
+      parser: 'pelias',
+      text: '123 main',
+      parsed_text: {
+        name: '123 main'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+
+  });
+
+  test('query with two tokens, number second', function (t) {
+    var raw = {
+      text: 'main 123'
+    };
+    var clean = {};
+    clean.parsed_text = 'this should be removed';
+
+    var expected_clean = {
+      parser: 'pelias',
+      text: 'main 123',
+      parsed_text: {
+        name: 'main 123'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+
+  });
+
+  test('query with many tokens', function (t) {
+    var raw = {
+      text: 'main particle new york'
+    };
+    var clean = {};
+    clean.parsed_text = 'this should be removed';
+
+    var expected_clean = {
+      text: 'main particle new york',
+      parser: 'pelias',
+      parsed_text: {
+        name: 'main particle',
+        region: 'new york',
+        admin_parts: 'new york'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+
+  });
+
+  test('valid address, house number', function (t) {
+    var raw = {
+      text: '123 main st new york ny'
+    };
+    var clean = {};
+
+    var expected_clean = {
+      text: '123 main st new york ny',
+      parser: 'pelias',
+      parsed_text: {
+        housenumber: '123',
+        street: 'main st',
+        region: 'new york',
+        locality: 'ny',
+        admin_parts: 'new york ny'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+
+  });
+
+  test('valid address, zipcode', function (t) {
+    var raw = {
+      text: '123 main st new york ny 10010'
+    };
+    var clean = {};
+
+    var expected_clean = {
+      text: '123 main st new york ny 10010',
+      parser: 'pelias',
+      parsed_text: {
+        housenumber: '123',
+        street: 'main st',
+        region: 'new york',
+        locality: 'ny',
+        postcode: '10010',
+        admin_parts: 'new york ny'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+  });
+
+  test('valid address with leading 0s in zipcode', function (t) {
+    var raw = {
+      text: '339 W Main St, Cheshire, 06410'
+    };
+    var clean = {};
+
+    var expected_clean = {
+      text: '339 W Main St, Cheshire, 06410',
+      parser: 'pelias',
+      parsed_text: {
+        housenumber: '339',
+        street: 'W Main St',
+        postcode: '06410',
+        region: 'Cheshire',
+        admin_parts: 'Cheshire'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+  });
+
+  test('valid address without spaces after commas', function (t) {
+    var raw = {
+      text: '339 W Main St,Lancaster,PA'
+    };
+    var clean = {};
+
+    var expected_clean = {
+      text: '339 W Main St,Lancaster,PA',
+      parser: 'pelias',
+      parsed_text: {
+        housenumber: '339',
+        street: 'W Main St',
+        locality: 'Lancaster',
+        region: 'PA',
+        admin_parts: 'Lancaster, PA'
+      }
+    };
+
+    var messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEqual(messages, { errors: [], warnings: [] });
+    t.deepEqual(clean, expected_clean);
+    t.end();
+
+  });
+
+  test('whitespace-only input counts as empty', (t) => {
+    const raw = { text: ' ' };
+    const clean = {};
+
+    const expected_clean = {};
+
+    const messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEquals(clean, expected_clean);
+    t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0']);
+    t.deepEquals(messages.warnings, [], 'no warnings');
+    t.end();
+  });
+
+  test('return an array of expected parameters in object form for validation', (t) => {
+    const expected = [{ name: 'text' }];
+    const validParameters = sanitizer.expected();
+    t.deepEquals(validParameters, expected);
+    t.end();
+  });
+
+  test('Australia - state only', (t) => {
+    const raw = { text: 'NSW' };
+    const clean = {};
+    const expected_clean = { text: 'NSW', parser: 'pelias', parsed_text: {
+      region: 'NSW',
+      admin_parts: 'NSW'
+    }};
+    const messages = sanitizer.sanitize(raw, clean);
+
+    t.deepEquals(clean, expected_clean);
+    t.deepEquals(messages.errors, []);
+    t.deepEquals(messages.warnings, [], 'no warnings');
+    t.end();
+  });
+};
+
+module.exports.all = function (tape, common) {
+  function test(name, testFunction) {
+    return tape('sanitizer _text: ' + name, testFunction);
+  }
+
+  for (var testCase in module.exports.tests) {
+    module.exports.tests[testCase](test, common);
+  }
+};
diff --git a/test/unit/sanitizer/_tokenizer.js b/test/unit/sanitizer/_tokenizer.js
index 3de769d5c..181a32106 100644
--- a/test/unit/sanitizer/_tokenizer.js
+++ b/test/unit/sanitizer/_tokenizer.js
@@ -183,12 +183,12 @@ module.exports.tests.sanity_checks = function(test, common) {
   });
   test('favor clean.parsed_text street data over clean.text', function(t) {
 
-    var clean = { parsed_text: { number: '190', street: 'foo st' }, text: 'bar' };
+    var clean = { parsed_text: { housenumber: '190', street: 'foo st' }, text: 'bar' };
     var messages = sanitizer.sanitize({}, clean);
 
     // favor clean.parsed_text.name over clean.text
-    t.deepEquals(clean.tokens, [ '190', 'foo', 'st' ], 'use street name + number');
-    t.deepEquals(clean.tokens_complete, [ '190', 'foo', 'st' ], 'use street name + number');
+    t.deepEquals(clean.tokens, [ '190', 'foo', 'st' ], 'use street name + housenumber');
+    t.deepEquals(clean.tokens_complete, [ '190', 'foo', 'st' ], 'use street name + housenumber');
     t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
 
     // no errors/warnings produced
@@ -199,7 +199,7 @@ module.exports.tests.sanity_checks = function(test, common) {
   });
   test('favor clean.parsed_text.name over clean.parsed_text street data', function(t) {
 
-    var clean = { parsed_text: { number: '190', street: 'foo st', name: 'foo' }, text: 'bar' };
+    var clean = { parsed_text: { housenumber: '190', street: 'foo st', name: 'foo' }, text: 'bar' };
     var messages = sanitizer.sanitize({}, clean);
 
     // favor clean.parsed_text.name over all other variables
diff --git a/test/unit/sanitizer/autocomplete.js b/test/unit/sanitizer/autocomplete.js
index 1ff106dcd..2f7227a61 100644
--- a/test/unit/sanitizer/autocomplete.js
+++ b/test/unit/sanitizer/autocomplete.js
@@ -24,10 +24,10 @@ module.exports.tests.sanitizers = function(test, common) {
           }
         };
       },
-      '../sanitizer/_text_addressit': function () {
+      '../sanitizer/_text_pelias_parser': function () {
         return {
           sanitize: () => {
-            called_sanitizers.push('_text_addressit');
+            called_sanitizers.push('_text_pelias_parser');
             return { errors: [], warnings: [] };
           }
         };
@@ -142,7 +142,7 @@ module.exports.tests.sanitizers = function(test, common) {
     const expected_sanitizers = [
       '_single_scalar_parameters',
       '_debug',
-      '_text_addressit',
+      '_text_pelias_parser',
       '_tokenizer',
       '_size',
       '_targets/layers',
diff --git a/test/unit/sanitizer/defer_to_addressit.js b/test/unit/sanitizer/defer_to_pelias_parser.js
similarity index 73%
rename from test/unit/sanitizer/defer_to_addressit.js
rename to test/unit/sanitizer/defer_to_pelias_parser.js
index d649c2970..d245f078b 100644
--- a/test/unit/sanitizer/defer_to_addressit.js
+++ b/test/unit/sanitizer/defer_to_pelias_parser.js
@@ -11,11 +11,11 @@ module.exports.tests.sanitize = (test, common) => {
 
     // rather than re-verify the functionality of all the sanitizers, this test just verifies that they
     //  were all called correctly
-    const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', {
-      '../sanitizer/_text_addressit': function () {
+    const defer_to_pelias_parser = proxyquire('../../../sanitizer/defer_to_pelias_parser', {
+      '../sanitizer/_text_pelias_parser': function () {
         return {
           sanitize: () => {
-            t.fail('_text_addressit should not have been called');
+            t.fail('_text_pelias_parser should not have been called');
           }
         };
       },
@@ -29,25 +29,25 @@ module.exports.tests.sanitize = (test, common) => {
       }
     })(() => false);
 
-    defer_to_addressit({}, {}, () => {
+    defer_to_pelias_parser({}, {}, () => {
       t.equals(logger.getInfoMessages().length, 0);
       t.end();
     });
 
   });
 
-  test('verify that _text_addressit sanitizer was called when should_execute returns true', (t) => {
+  test('verify that _text_pelias_parser sanitizer was called when should_execute returns true', (t) => {
     t.plan(2);
 
     const logger = mock_logger();
 
     // rather than re-verify the functionality of all the sanitizers, this test just verifies that they
     //  were all called correctly
-    const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', {
-      '../sanitizer/_text_addressit': function () {
+    const defer_to_pelias_parser = proxyquire('../../../sanitizer/defer_to_pelias_parser', {
+      '../sanitizer/_text_pelias_parser': function () {
         return {
           sanitize: () => {
-            t.pass('_text_addressit should have been called');
+            t.pass('_text_pelias_parser should have been called');
             return { errors: [], warnings: [] };
           }
         };
@@ -73,7 +73,7 @@ module.exports.tests.sanitize = (test, common) => {
       }
     };
 
-    defer_to_addressit(req, {}, () => {
+    defer_to_pelias_parser(req, {}, () => {
       t.end();
     });
 
@@ -86,11 +86,11 @@ module.exports.tests.sanitize = (test, common) => {
 
     // rather than re-verify the functionality of all the sanitizers, this test just verifies that they
     //  were all called correctly
-    const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', {
-      '../sanitizer/_text_addressit': function () {
+    const defer_to_pelias_parser = proxyquire('../../../sanitizer/defer_to_pelias_parser', {
+      '../sanitizer/_text_pelias_parser': function () {
         return {
           sanitize: () => {
-            t.pass('_text_addressit should have been called');
+            t.pass('_text_pelias_parser should have been called');
             return { errors: [], warnings: [] };
           }
         };
@@ -113,7 +113,7 @@ module.exports.tests.sanitize = (test, common) => {
       }
     };
 
-    defer_to_addressit(req, {}, () => {
+    defer_to_pelias_parser(req, {}, () => {
       t.deepEquals(logger.getInfoMessages(), []);
       t.end();
     });
@@ -124,7 +124,7 @@ module.exports.tests.sanitize = (test, common) => {
 module.exports.all = function (tape, common) {
 
   function test(name, testFunction) {
-    return tape(`SANITIZE /defer_to_addressit ${name}`, testFunction);
+    return tape(`SANITIZE /defer_to_pelias_parser ${name}`, testFunction);
   }
 
   for( var testCase in module.exports.tests ){

From 62d23e2ceac5a75243ba8c4288ceae9eb7616794 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 2 May 2019 16:34:29 +0200
Subject: [PATCH 02/55] feat(parser): improved postfix cursor position for text
 with no admin classification

---
 sanitizer/_text_pelias_parser.js | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index 6c56df7af..c4cc7199c 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -77,16 +77,23 @@ function parse (clean) {
 
   // scan through the input text and 'bucket' characters in to one of two buckets:
   // prefix: all unparsed characters that came before any parsed fields
-  // postfix: all unparsed characters from the first admin field to the end of the string
+  // postfix: all characters from the first admin field to the end of the string
 
   // set cursor to the first classified character
   let cursor = mask.search(/\S/);
   if (cursor === -1) { cursor = body.length; }
   let prefix = _.trim(body.substr(0, cursor), ' ,');
 
-  // set cursor to the first character of the first classified admin field
-  cursor = mask.indexOf('A');
-  if (cursor === -1) { cursor = body.length; }
+  // solution includes address classification
+  // set cursor after the last classified address character
+  if (mask.search(/[NS]/) > -1) {
+    cursor = Math.max(mask.lastIndexOf('N'), mask.lastIndexOf('S')) + 1;
+  }
+  // solution includes admin classification
+  // set cursor to the first classified admin character
+  else if( mask.includes('A') ){ cursor = mask.indexOf('A'); }
+  // else set cursor to end-of-text
+  else { cursor = body.length; }
   let postfix = _.trim(body.substr(cursor), ' ,');
 
   // clean up spacing around commas

From 463245246595d9069a449c0f730b1e9407756b2e Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Fri, 3 May 2019 13:12:11 +0200
Subject: [PATCH 03/55] feat(parser): pelias/parser improvements

---
 package.json                               |  2 +-
 query/text_parser_pelias.js                | 25 ++------
 sanitizer/_text_pelias_parser.js           | 46 +++++++++++--
 sanitizer/_tokenizer.js                    | 17 +----
 test/unit/query/autocomplete.js            |  6 +-
 test/unit/query/search_addressit.js        |  9 ++-
 test/unit/sanitizer/_text_pelias_parser.js | 75 +++++++++++++---------
 test/unit/sanitizer/_tokenizer.js          | 10 ++-
 8 files changed, 112 insertions(+), 78 deletions(-)

diff --git a/package.json b/package.json
index 359beb0f0..0ac55677a 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.2.0",
+    "pelias-parser": "^1.3.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",
diff --git a/query/text_parser_pelias.js b/query/text_parser_pelias.js
index 394b80d3d..779a3895b 100644
--- a/query/text_parser_pelias.js
+++ b/query/text_parser_pelias.js
@@ -15,9 +15,9 @@ const adminFields = placeTypes.concat([
 function addParsedVariablesToQueryVariables(clean, vs) {
   // ==== add parsed matches [address components] ====
 
-  // prefix (any unparsed text before any matched fields)
-  if (!_.isEmpty(clean.parsed_text.name)) {
-    vs.var('input:query', clean.parsed_text.name);
+  // name
+  if (!_.isEmpty(clean.parsed_text.subject)) {
+    vs.var('input:name', clean.parsed_text.subject);
   }
 
   // housenumber
@@ -40,21 +40,6 @@ function addParsedVariablesToQueryVariables(clean, vs) {
     vs.var('input:postcode', clean.parsed_text.postcode);
   }
 
-  // ==== legacy components ====
-  // @todo: can we remove this functionality?
-
-  // is the 'name' label set?
-  if (clean.parsed_text.name) {
-    vs.var('input:name', clean.parsed_text.name);
-  }
-  else {
-    // is it a street address?
-    var isStreetAddress = !_.isEmpty(clean.parsed_text.housenumber) && !_.isEmpty(clean.parsed_text.street);
-    if (isStreetAddress) {
-      vs.var('input:name', clean.parsed_text.housenumber + ' ' + clean.parsed_text.street);
-    }
-  }
-
   // ==== add parsed matches [admin components] ====
 
   // // locality
@@ -73,13 +58,13 @@ function addParsedVariablesToQueryVariables(clean, vs) {
   // }
 
   // postfix
-  if (!_.isEmpty(clean.parsed_text.admin_parts)) {
+  if (!_.isEmpty(clean.parsed_text.admin)) {
     // assign postfix to any admin fields which currently don't have a value assigned.
     
     // cycle through fields and set fields which are still currently unset
     adminFields.forEach(key => {
       if (!vs.isset('input:' + key)) {
-        vs.var('input:' + key, clean.parsed_text.admin_parts);
+        vs.var('input:' + key, clean.parsed_text.admin);
       }
     });
   }
diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index c4cc7199c..c2426480d 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -12,7 +12,7 @@ const _ = require('lodash');
   'name',
   'housenumber', 'street', 'postcode',
   'locality', 'region', 'country',
-  'admin_parts'
+  'admin'
 **/
 
 // validate texts, convert types and apply defaults
@@ -49,7 +49,7 @@ function parse (clean) {
   if (t.solution.length) { solution = t.solution[0]; }
 
   // 1. map the output of the parser in to parsed_text
-  let parsed_text = {};
+  let parsed_text = { subject: undefined };
 
   solution.pair.forEach(p => {
     let field = p.classification.label;
@@ -107,7 +107,8 @@ function parse (clean) {
   // handle the case where 'parsed_text' is completely empty
   // ie. the parser was not able to classify anything at all
   // note: this is common for venue names
-  if (Object.keys(parsed_text).length === 0) {
+  // note: length == 1 accounts for 'subject'
+  if (Object.keys(parsed_text).length === 1) {
     if (prefix.length && !postfix.length) {
       // if the prefix contains a comma
       // then only use the first part for the prefix for the
@@ -124,7 +125,44 @@ function parse (clean) {
 
   // 3. store the unparsed characters in fields which can be used for querying
   if (prefix.length) { parsed_text.name = prefix; }
-  if (postfix.length) { parsed_text.admin_parts = postfix; }
+  if (postfix.length) { parsed_text.admin = postfix; }
+
+  // 4. set 'subject', this is the text which will target the 'name.*'
+  // fields in elasticsearch queries
+
+  // an address query
+  if (!_.isEmpty(parsed_text.housenumber) && !_.isEmpty(parsed_text.street)) {
+    parsed_text.subject = `${parsed_text.housenumber} ${parsed_text.street}`;
+  }
+  // a street query
+  else if (!_.isEmpty(parsed_text.street)) {
+    parsed_text.subject = parsed_text.street;
+  }
+  // query with a name such as a venue query
+  else if (!_.isEmpty(parsed_text.name)){
+    parsed_text.subject = parsed_text.name;
+  }
+  // a postcode query
+  else if (!_.isEmpty(parsed_text.postcode)) {
+    parsed_text.subject = parsed_text.postcode;
+  }
+  // a locality query
+  else if (!_.isEmpty(parsed_text.locality)) {
+    parsed_text.subject = parsed_text.locality;
+  }
+  // a region query
+  else if (!_.isEmpty(parsed_text.region)) {
+    parsed_text.subject = parsed_text.region;
+  }
+  // a country query
+  else if (!_.isEmpty(parsed_text.country)) {
+    parsed_text.subject = parsed_text.country;
+  }
+  
+  // unknown query type
+  else {
+    parsed_text.subject = t.span.body;
+  }
 
   return parsed_text;
 }
diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index cc02e42a1..4dbfbd9a9 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -30,21 +30,8 @@ function _sanitize( raw, clean ){
     inputParserRanSuccessfully = true;
 
     // parsed_text.name is set, this is the highest priority, use this string
-    if( _.has(clean.parsed_text, 'name') ){
-      text = clean.parsed_text.name; // use this string instead
-    }
-
-    // else handle the case where parsed_text.street was produced but
-    // no parsed_text.name is produced.
-    // additionally, handle the case where parsed_text.housenumber is present
-    // note: the addressit module may also produce parsed_text.unit info
-    // for now, we discard that information as we don't have an appropriate
-    else if( _.has(clean.parsed_text, 'street') ){
-      text = [
-        clean.parsed_text.housenumber,
-        clean.parsed_text.street
-      ].filter((el) => el)
-      .join(' '); // remove empty elements
+    if( _.has(clean.parsed_text, 'subject') ){
+      text = clean.parsed_text.subject; // use this string instead
     }
   }
 
diff --git a/test/unit/query/autocomplete.js b/test/unit/query/autocomplete.js
index 6b324c898..fee51406b 100644
--- a/test/unit/query/autocomplete.js
+++ b/test/unit/query/autocomplete.js
@@ -56,8 +56,9 @@ module.exports.tests.query = function(test, common) {
     var query = generate({
       text: 'one two, three',
       parsed_text: {
+        subject: 'one two',
         name: 'one two',
-        admin_parts: 'three'
+        admin: 'three'
       },
       tokens: ['one','two'],
       tokens_complete: ['one','two'],
@@ -257,9 +258,10 @@ module.exports.tests.query = function(test, common) {
     var query = generate({
       text: 'k road, laird',
       parsed_text: {
+        subject: 'k road',
         street: 'k road',
         locality: 'laird',
-        admin_parts: 'laird'
+        admin: 'laird'
       },
       tokens: ['k', 'road'],
       tokens_complete: ['k', 'road'],
diff --git a/test/unit/query/search_addressit.js b/test/unit/query/search_addressit.js
index 401d282e2..88b7a0655 100644
--- a/test/unit/query/search_addressit.js
+++ b/test/unit/query/search_addressit.js
@@ -106,12 +106,13 @@ module.exports.tests.query = function(test, common) {
       layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
       querySize: 10,
       parsed_text: {
+        subject: '123 main st',
         housenumber: '123',
         street: 'main st',
         region: 'new york',
         locality: 'ny',
         postcode: '10010',
-        admin_parts: 'new york ny US'
+        admin: 'new york ny US'
       }
     });
 
@@ -128,9 +129,10 @@ module.exports.tests.query = function(test, common) {
       layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
       querySize: 10,
       parsed_text: {
+        subject: 'soho grand',
         name: 'soho grand',
         region: 'new york',
-        admin_parts: 'new york'
+        admin: 'new york'
       }
     });
 
@@ -147,11 +149,12 @@ module.exports.tests.query = function(test, common) {
       layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
       querySize: 10,
       parsed_text: {
+        subject: '1 water st',
         housenumber: '1',
         street: 'water st',
         locality: 'manhattan',
         region: 'ny',
-        admin_parts: 'manhattan ny'
+        admin: 'manhattan ny'
       }
     });
 
diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
index 9a40ef7fc..3a0fd3ea1 100644
--- a/test/unit/sanitizer/_text_pelias_parser.js
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -20,14 +20,14 @@ module.exports.tests.text_parser = function (test, common) {
   });
 
   var usQueries = [
-    { name: 'soho', admin_parts: 'new york', region: 'NY' },
-    { name: '123 main', admin_parts: 'new york', region: 'NY' }
+    { name: 'soho', admin: 'new york', region: 'NY' },
+    { name: '123 main', admin: 'new york', region: 'NY' }
   ];
 
   usQueries.forEach(function (query) {
     test('naive parsing ' + query, function (t) {
       var raw = {
-        text: query.name + ', ' + query.admin_parts
+        text: query.name + ', ' + query.admin
       };
       var clean = {};
 
@@ -35,9 +35,10 @@ module.exports.tests.text_parser = function (test, common) {
         text: raw.text.trim(),
         parser: 'pelias',
         parsed_text: {
+          subject: query.name,
           name: query.name,
-          region: query.admin_parts,
-          admin_parts: query.admin_parts
+          locality: query.admin,
+          admin: query.admin
         }
       };
 
@@ -51,7 +52,7 @@ module.exports.tests.text_parser = function (test, common) {
 
     test('naive parsing ' + query + ' without spaces', function (t) {
       var raw = {
-        text: query.name + ',' + query.admin_parts
+        text: query.name + ',' + query.admin
       };
       var clean = {};
 
@@ -59,9 +60,10 @@ module.exports.tests.text_parser = function (test, common) {
         text: raw.text.trim(),
         parser: 'pelias',
         parsed_text: {
+          subject: query.name,
           name: query.name,
-          region: query.admin_parts,
-          admin_parts: query.admin_parts
+          locality: query.admin,
+          admin: query.admin
         }
       };
 
@@ -75,7 +77,7 @@ module.exports.tests.text_parser = function (test, common) {
 
     test('naive parsing ' + query + ' with leading and trailing junk', function (t) {
       var raw = {
-        text: ' , ' + query.name + ',' + query.admin_parts + ' , '
+        text: ' , ' + query.name + ',' + query.admin + ' , '
       };
       var clean = {};
 
@@ -83,9 +85,10 @@ module.exports.tests.text_parser = function (test, common) {
         text: raw.text.trim(),
         parser: 'pelias',
         parsed_text: {
+          subject: query.name,
           name: query.name,
-          region: query.admin_parts,
-          admin_parts: query.admin_parts
+          locality: query.admin,
+          admin: query.admin
         }
       };
 
@@ -99,22 +102,23 @@ module.exports.tests.text_parser = function (test, common) {
   });
 
   var nonUSQueries = [
-    { name: 'chelsea', admin_parts: 'london' },
+    { name: 'chelsea', admin: 'london' },
   ];
 
   nonUSQueries.forEach(function (query) {
     test('naive parsing ' + query, function (t) {
       var raw = {
-        text: query.name + ', ' + query.admin_parts
+        text: query.name + ', ' + query.admin
       };
       var clean = {};
 
       var expected_clean = {
-        text: query.name + ', ' + query.admin_parts,
+        text: query.name + ', ' + query.admin,
         parser: 'pelias',
         parsed_text: {
+          subject: query.name,
           locality: query.name,
-          admin_parts: query.name + ', ' + query.admin_parts
+          admin: query.name + ', ' + query.admin
         }
       };
 
@@ -128,16 +132,17 @@ module.exports.tests.text_parser = function (test, common) {
 
     test('naive parsing ' + query + ' without spaces', function (t) {
       var raw = {
-        text: query.name + ',' + query.admin_parts
+        text: query.name + ',' + query.admin
       };
       var clean = {};
 
       var expected_clean = {
-        text: query.name + ',' + query.admin_parts,
+        text: query.name + ',' + query.admin,
         parser: 'pelias',
         parsed_text: {
+          subject: query.name,
           locality: query.name,
-          admin_parts: query.name + ', ' + query.admin_parts
+          admin: query.name + ', ' + query.admin
         }
       };
 
@@ -162,6 +167,7 @@ module.exports.tests.text_parser = function (test, common) {
       parser: 'pelias',
       text: 'yugolsavia',
       parsed_text: {
+        subject: 'yugolsavia',
         name: 'yugolsavia'
       }
     };
@@ -185,6 +191,7 @@ module.exports.tests.text_parser = function (test, common) {
       parser: 'pelias',
       text: 'small town',
       parsed_text: {
+        subject: 'small town',
         name: 'small town'
       }
     };
@@ -208,6 +215,7 @@ module.exports.tests.text_parser = function (test, common) {
       parser: 'pelias',
       text: '123 main',
       parsed_text: {
+        subject: '123 main',
         name: '123 main'
       }
     };
@@ -231,6 +239,7 @@ module.exports.tests.text_parser = function (test, common) {
       parser: 'pelias',
       text: 'main 123',
       parsed_text: {
+        subject: 'main 123',
         name: 'main 123'
       }
     };
@@ -254,9 +263,10 @@ module.exports.tests.text_parser = function (test, common) {
       text: 'main particle new york',
       parser: 'pelias',
       parsed_text: {
+        subject: 'main particle',
         name: 'main particle',
-        region: 'new york',
-        admin_parts: 'new york'
+        locality: 'new york',
+        admin: 'new york'
       }
     };
 
@@ -278,11 +288,12 @@ module.exports.tests.text_parser = function (test, common) {
       text: '123 main st new york ny',
       parser: 'pelias',
       parsed_text: {
+        subject: '123 main st',
         housenumber: '123',
         street: 'main st',
-        region: 'new york',
-        locality: 'ny',
-        admin_parts: 'new york ny'
+        locality: 'new york',
+        region: 'ny',
+        admin: 'new york ny'
       }
     };
 
@@ -304,12 +315,13 @@ module.exports.tests.text_parser = function (test, common) {
       text: '123 main st new york ny 10010',
       parser: 'pelias',
       parsed_text: {
+        subject: '123 main st',
         housenumber: '123',
         street: 'main st',
-        region: 'new york',
-        locality: 'ny',
+        locality: 'new york',
+        region: 'ny',
         postcode: '10010',
-        admin_parts: 'new york ny'
+        admin: 'new york ny'
       }
     };
 
@@ -330,11 +342,12 @@ module.exports.tests.text_parser = function (test, common) {
       text: '339 W Main St, Cheshire, 06410',
       parser: 'pelias',
       parsed_text: {
+        subject: '339 W Main St',
         housenumber: '339',
         street: 'W Main St',
+        locality: 'Cheshire',
         postcode: '06410',
-        region: 'Cheshire',
-        admin_parts: 'Cheshire'
+        admin: 'Cheshire'
       }
     };
 
@@ -355,11 +368,12 @@ module.exports.tests.text_parser = function (test, common) {
       text: '339 W Main St,Lancaster,PA',
       parser: 'pelias',
       parsed_text: {
+        subject: '339 W Main St',
         housenumber: '339',
         street: 'W Main St',
         locality: 'Lancaster',
         region: 'PA',
-        admin_parts: 'Lancaster, PA'
+        admin: 'Lancaster, PA'
       }
     };
 
@@ -396,8 +410,9 @@ module.exports.tests.text_parser = function (test, common) {
     const raw = { text: 'NSW' };
     const clean = {};
     const expected_clean = { text: 'NSW', parser: 'pelias', parsed_text: {
+      subject: 'NSW',
       region: 'NSW',
-      admin_parts: 'NSW'
+      admin: 'NSW'
     }};
     const messages = sanitizer.sanitize(raw, clean);
 
diff --git a/test/unit/sanitizer/_tokenizer.js b/test/unit/sanitizer/_tokenizer.js
index 181a32106..f02de49f7 100644
--- a/test/unit/sanitizer/_tokenizer.js
+++ b/test/unit/sanitizer/_tokenizer.js
@@ -167,7 +167,7 @@ module.exports.tests.sanity_checks = function(test, common) {
   });
   test('favor clean.parsed_text.name over clean.text', function(t) {
 
-    var clean = { parsed_text: { name: 'foo' }, text: 'bar' };
+    var clean = { parsed_text: { subject: 'foo' }, text: 'bar' };
     var messages = sanitizer.sanitize({}, clean);
 
     // favor clean.parsed_text.name over clean.text
@@ -183,7 +183,9 @@ module.exports.tests.sanity_checks = function(test, common) {
   });
   test('favor clean.parsed_text street data over clean.text', function(t) {
 
-    var clean = { parsed_text: { housenumber: '190', street: 'foo st' }, text: 'bar' };
+    var clean = { parsed_text: {
+      housenumber: '190', street: 'foo st', subject: '190 foo st'
+    }, text: 'bar' };
     var messages = sanitizer.sanitize({}, clean);
 
     // favor clean.parsed_text.name over clean.text
@@ -199,7 +201,9 @@ module.exports.tests.sanity_checks = function(test, common) {
   });
   test('favor clean.parsed_text.name over clean.parsed_text street data', function(t) {
 
-    var clean = { parsed_text: { housenumber: '190', street: 'foo st', name: 'foo' }, text: 'bar' };
+    var clean = { parsed_text: {
+      housenumber: '190', street: 'foo st', subject: 'foo'
+    }, text: 'bar' };
     var messages = sanitizer.sanitize({}, clean);
 
     // favor clean.parsed_text.name over all other variables

From 10c98890291fb42a44307b84faea22cc39432003 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Fri, 3 May 2019 15:44:19 +0200
Subject: [PATCH 04/55] feat(parser): bump pelias/parser version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 0ac55677a..f76b35ca8 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.3.0",
+    "pelias-parser": "^1.4.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 559fdb041d78237d0663f10bf74dd1bde2f11817 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Mon, 13 May 2019 14:35:03 +0200
Subject: [PATCH 05/55] feat(parser): bump pelias/parser version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index f76b35ca8..bc8cf457e 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.4.0",
+    "pelias-parser": "^1.9.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 30a62866828a3e840ba8c21014dea876d05ab019 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 14 May 2019 15:58:13 +0200
Subject: [PATCH 06/55] feat(parser): bump pelias/parser version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index bc8cf457e..382e43f1b 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.9.0",
+    "pelias-parser": "^1.12.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From b24bf31ee9e2463a02630eeb05519757173b7a9a Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 14 May 2019 18:01:02 +0200
Subject: [PATCH 07/55] feat(parser): bump pelias/parser version

---
 package.json                               |   2 +-
 sanitizer/_text_pelias_parser.js           |   8 +-
 test/unit/sanitizer/_text_pelias_parser.js | 523 ++++++---------------
 3 files changed, 151 insertions(+), 382 deletions(-)

diff --git a/package.json b/package.json
index 382e43f1b..ca0a0d883 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.12.0",
+    "pelias-parser": "^1.13.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",
diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index c2426480d..607d1aefb 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -100,10 +100,6 @@ function parse (clean) {
   prefix = prefix.split(/[,\n\t]/).join(', ');
   postfix = postfix.split(/[,\n\t]/).join(', ');
 
-  // squash multiple adjacent whitespace characters into a single space
-  prefix = prefix.replace(/\s\s+/g, ' ').trim();
-  postfix = postfix.replace(/\s\s+/g, ' ').trim();
-
   // handle the case where 'parsed_text' is completely empty
   // ie. the parser was not able to classify anything at all
   // note: this is common for venue names
@@ -123,6 +119,10 @@ function parse (clean) {
     }
   }
 
+  // squash multiple adjacent whitespace characters into a single space
+  prefix = prefix.replace(/\s+/g, ' ').trim();
+  postfix = postfix.replace(/\s+/g, ' ').trim();
+
   // 3. store the unparsed characters in fields which can be used for querying
   if (prefix.length) { parsed_text.name = prefix; }
   if (postfix.length) { parsed_text.admin = postfix; }
diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
index 3a0fd3ea1..e49dd60b1 100644
--- a/test/unit/sanitizer/_text_pelias_parser.js
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -19,370 +19,155 @@ module.exports.tests.text_parser = function (test, common) {
     t.end();
   });
 
-  var usQueries = [
-    { name: 'soho', admin: 'new york', region: 'NY' },
-    { name: '123 main', admin: 'new york', region: 'NY' }
-  ];
-
-  usQueries.forEach(function (query) {
-    test('naive parsing ' + query, function (t) {
-      var raw = {
-        text: query.name + ', ' + query.admin
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: raw.text.trim(),
-        parser: 'pelias',
-        parsed_text: {
-          subject: query.name,
-          name: query.name,
-          locality: query.admin,
-          admin: query.admin
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] });
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-
-    test('naive parsing ' + query + ' without spaces', function (t) {
-      var raw = {
-        text: query.name + ',' + query.admin
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: raw.text.trim(),
-        parser: 'pelias',
-        parsed_text: {
-          subject: query.name,
-          name: query.name,
-          locality: query.admin,
-          admin: query.admin
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] });
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-
-    test('naive parsing ' + query + ' with leading and trailing junk', function (t) {
-      var raw = {
-        text: ' , ' + query.name + ',' + query.admin + ' , '
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: raw.text.trim(),
-        parser: 'pelias',
-        parsed_text: {
-          subject: query.name,
-          name: query.name,
-          locality: query.admin,
-          admin: query.admin
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] });
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-  });
-
-  var nonUSQueries = [
-    { name: 'chelsea', admin: 'london' },
-  ];
-
-  nonUSQueries.forEach(function (query) {
-    test('naive parsing ' + query, function (t) {
-      var raw = {
-        text: query.name + ', ' + query.admin
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: query.name + ', ' + query.admin,
-        parser: 'pelias',
-        parsed_text: {
-          subject: query.name,
-          locality: query.name,
-          admin: query.name + ', ' + query.admin
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] });
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-
-    test('naive parsing ' + query + ' without spaces', function (t) {
-      var raw = {
-        text: query.name + ',' + query.admin
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: query.name + ',' + query.admin,
-        parser: 'pelias',
-        parsed_text: {
-          subject: query.name,
-          locality: query.name,
-          admin: query.name + ', ' + query.admin
+  let cases = [];
+  
+  // USA queries
+  cases.push(['soho, new york, NY', {
+    subject: 'soho',
+    name: 'soho',
+    locality: 'new york',
+    region: 'NY',
+    admin: 'new york, NY'
+  }]);
+  cases.push(['123 main st, new york, NY', {
+    subject: '123 main st',
+    housenumber: '123',
+    street: 'main st',
+    locality: 'new york',
+    region: 'NY',
+    admin: 'new york, NY'
+  }]);
+
+  // GBR queries
+  cases.push(['chelsea, london', {
+    subject: 'chelsea',
+    locality: 'chelsea',
+    admin: 'chelsea, london'
+  }]);
+
+  // Query with one token
+  cases.push(['yugolsavia', {
+    subject: 'yugolsavia',
+    name: 'yugolsavia'
+  }]);
+
+  // Query with two tokens, no numbers
+  cases.push(['small town', {
+    subject: 'small town',
+    name: 'small town'
+  }]);
+
+  // Query with two tokens, number first
+  cases.push(['123 main', {
+    subject: '123 main',
+    name: '123 main'
+  }]);
+
+  // Query with two tokens, number second
+  cases.push(['main 123', {
+    subject: 'main 123',
+    name: 'main 123'
+  }]);
+
+  // Query with many tokens
+  cases.push(['main particle new york', {
+    subject: 'main particle',
+    name: 'main particle',
+    locality: 'new york',
+    admin: 'new york'
+  }]);
+
+  // Valid address with housenumber
+  cases.push(['123 main st new york ny', {
+    subject: '123 main st',
+    housenumber: '123',
+    street: 'main st',
+    locality: 'new york',
+    region: 'ny',
+    admin: 'new york ny'
+  }]);
+
+  // Valid address with postcode
+  cases.push(['123 main st new york ny 10010', {
+    subject: '123 main st',
+    housenumber: '123',
+    street: 'main st',
+    locality: 'new york',
+    region: 'ny',
+    postcode: '10010',
+    admin: 'new york ny'
+  }]);
+
+  // Valid address with leading 0 in postcode
+  cases.push(['339 W Main St, Cheshire, 06410', {
+    subject: '339 W Main St',
+    housenumber: '339',
+    street: 'W Main St',
+    locality: 'Cheshire',
+    postcode: '06410',
+    admin: 'Cheshire'
+  }]);
+
+  // Valid address with no spaces after comma
+  cases.push(['339 W Main St,Lancaster,PA', {
+    subject: '339 W Main St',
+    housenumber: '339',
+    street: 'W Main St',
+    locality: 'Lancaster',
+    region: 'PA',
+    admin: 'Lancaster, PA'
+  }]);
+
+  // Valid address without commas
+  cases.push(['123 main st new york ny', {
+    subject: '123 main st',
+    housenumber: '123',
+    street: 'main st',
+    locality: 'new york',
+    region: 'ny',
+    admin: 'new york ny'
+  }]);
+
+  // AUS - state only
+  cases.push(['NSW', {
+    subject: 'NSW',
+    region: 'NSW',
+    admin: 'NSW'
+  }]);
+
+  cases.forEach(testcase => {
+    let input = testcase[0];
+    let expected = testcase[1];
+
+    function assert(label, replacement, replaceAdmin) {
+      let text = input.replace(/\s+/, ' ');
+      let clone = Object.assign({}, expected);
+      if (Array.isArray(replacement) && replacement.length === 2) {
+        text = text.replace(replacement[0], replacement[1]);
+        if (replaceAdmin === true && clone.admin) {
+          clone.admin = clone.admin.replace(replacement[0], replacement[1]).trim();
         }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] });
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-
-  });
-
-  test('query with one token', function (t) {
-    var raw = {
-      text: 'yugolsavia'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      parser: 'pelias',
-      text: 'yugolsavia',
-      parsed_text: {
-        subject: 'yugolsavia',
-        name: 'yugolsavia'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('query with two tokens, no numbers', function (t) {
-    var raw = {
-      text: 'small town'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      parser: 'pelias',
-      text: 'small town',
-      parsed_text: {
-        subject: 'small town',
-        name: 'small town'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('query with two tokens, number first', function (t) {
-    var raw = {
-      text: '123 main'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      parser: 'pelias',
-      text: '123 main',
-      parsed_text: {
-        subject: '123 main',
-        name: '123 main'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('query with two tokens, number second', function (t) {
-    var raw = {
-      text: 'main 123'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      parser: 'pelias',
-      text: 'main 123',
-      parsed_text: {
-        subject: 'main 123',
-        name: 'main 123'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('query with many tokens', function (t) {
-    var raw = {
-      text: 'main particle new york'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      text: 'main particle new york',
-      parser: 'pelias',
-      parsed_text: {
-        subject: 'main particle',
-        name: 'main particle',
-        locality: 'new york',
-        admin: 'new york'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('valid address, house number', function (t) {
-    var raw = {
-      text: '123 main st new york ny'
-    };
-    var clean = {};
-
-    var expected_clean = {
-      text: '123 main st new york ny',
-      parser: 'pelias',
-      parsed_text: {
-        subject: '123 main st',
-        housenumber: '123',
-        street: 'main st',
-        locality: 'new york',
-        region: 'ny',
-        admin: 'new york ny'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('valid address, zipcode', function (t) {
-    var raw = {
-      text: '123 main st new york ny 10010'
-    };
-    var clean = {};
-
-    var expected_clean = {
-      text: '123 main st new york ny 10010',
-      parser: 'pelias',
-      parsed_text: {
-        subject: '123 main st',
-        housenumber: '123',
-        street: 'main st',
-        locality: 'new york',
-        region: 'ny',
-        postcode: '10010',
-        admin: 'new york ny'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-  });
-
-  test('valid address with leading 0s in zipcode', function (t) {
-    var raw = {
-      text: '339 W Main St, Cheshire, 06410'
-    };
-    var clean = {};
-
-    var expected_clean = {
-      text: '339 W Main St, Cheshire, 06410',
-      parser: 'pelias',
-      parsed_text: {
-        subject: '339 W Main St',
-        housenumber: '339',
-        street: 'W Main St',
-        locality: 'Cheshire',
-        postcode: '06410',
-        admin: 'Cheshire'
       }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-  });
-
-  test('valid address without spaces after commas', function (t) {
-    var raw = {
-      text: '339 W Main St,Lancaster,PA'
-    };
-    var clean = {};
-
-    var expected_clean = {
-      text: '339 W Main St,Lancaster,PA',
-      parser: 'pelias',
-      parsed_text: {
-        subject: '339 W Main St',
-        housenumber: '339',
-        street: 'W Main St',
-        locality: 'Lancaster',
-        region: 'PA',
-        admin: 'Lancaster, PA'
+      if (clone.admin) {
+        clone.admin = clone.admin.replace(/\s+/g, ' ').trim();
       }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] });
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
+      test(`${label}: ${text}`, t => {
+        let raw = { text: text };
+        let clean = { parsed_text: 'this should be removed' };
+        let messages = sanitizer.sanitize(raw, clean);
+
+        t.deepEqual(messages, { errors: [], warnings: [] }, 'messages');
+        t.equal(clean.text, raw.text.trim(), 'text');
+        t.equal(clean.parser, 'pelias', 'parser');
+        t.deepEqual(clean.parsed_text, clone, `${label}: ${text}`);
+        t.end();
+      });
+    }
+
+    assert('literal');
+    assert('no commas', [/,/g, ' '], true);
+    assert('no space after comma', [/,\s+/g, ',']);
+    assert('leading and trailing junk', [/^(.+)$/g, ' , $1 , ']);
   });
 
   test('whitespace-only input counts as empty', (t) => {
@@ -405,22 +190,6 @@ module.exports.tests.text_parser = function (test, common) {
     t.deepEquals(validParameters, expected);
     t.end();
   });
-
-  test('Australia - state only', (t) => {
-    const raw = { text: 'NSW' };
-    const clean = {};
-    const expected_clean = { text: 'NSW', parser: 'pelias', parsed_text: {
-      subject: 'NSW',
-      region: 'NSW',
-      admin: 'NSW'
-    }};
-    const messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEquals(clean, expected_clean);
-    t.deepEquals(messages.errors, []);
-    t.deepEquals(messages.warnings, [], 'no warnings');
-    t.end();
-  });
 };
 
 module.exports.all = function (tape, common) {

From 2d82d38a6a9abefa8b75780cc1b98af82c7f9fae Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 13:19:01 +0200
Subject: [PATCH 08/55] feat(parser): updates to tokenizer sanitizer

---
 sanitizer/_tokenizer.js                    | 13 +++++---
 test/unit/sanitizer/_text_pelias_parser.js |  1 +
 test/unit/sanitizer/_tokenizer.js          | 36 ++++++----------------
 3 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index 4dbfbd9a9..c3205ed72 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -22,16 +22,21 @@ function _sanitize( raw, clean ){
   var text = clean.text;
 
   // a boolean to track whether the input parser successfully ran; or not.
-  var inputParserRanSuccessfully = false;
+  var parserConsumedAllTokens = false;
 
   // if the text parser has run then we only tokenize the 'name' section
   // of the 'parsed_text' object, ignoring the 'admin' parts.
   if( _.isPlainObject(clean, 'parsed_text') && !_.isEmpty(clean.parsed_text) ) {
-    inputParserRanSuccessfully = true;
-
     // parsed_text.name is set, this is the highest priority, use this string
     if( _.has(clean.parsed_text, 'subject') ){
       text = clean.parsed_text.subject; // use this string instead
+
+      // when both housenumber and street fields are present then the pelias parser
+      // will simply set $subject to be a concatination of these fields.
+      // in this case we can be sure that all tokens were complete
+      if (_.has(clean.parsed_text, 'housenumber') && _.has(clean.parsed_text, 'street')){
+        parserConsumedAllTokens = true;
+      }
     }
   }
 
@@ -66,7 +71,7 @@ function _sanitize( raw, clean ){
   if( clean.tokens.length ){
 
     // if all the tokens are complete, simply copy them from clean.tokens
-    if( inputParserRanSuccessfully ){
+    if( parserConsumedAllTokens ){
 
       // all these tokens are complete!
       clean.tokens_complete = clean.tokens.slice();
diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
index e49dd60b1..ddc32bee2 100644
--- a/test/unit/sanitizer/_text_pelias_parser.js
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -29,6 +29,7 @@ module.exports.tests.text_parser = function (test, common) {
     region: 'NY',
     admin: 'new york, NY'
   }]);
+
   cases.push(['123 main st, new york, NY', {
     subject: '123 main st',
     housenumber: '123',
diff --git a/test/unit/sanitizer/_tokenizer.js b/test/unit/sanitizer/_tokenizer.js
index f02de49f7..a0eb2371e 100644
--- a/test/unit/sanitizer/_tokenizer.js
+++ b/test/unit/sanitizer/_tokenizer.js
@@ -149,7 +149,7 @@ module.exports.tests.sanity_checks = function(test, common) {
 
     t.end();
   });
-  test('clean.parsed_text set but clean.parsed_text.name invalid', function(t) {
+  test('clean.parsed_text set but clean.parsed_text.subject invalid', function(t) {
 
     var clean = { parsed_text: { text: {} } };
     var messages = sanitizer.sanitize({}, clean);
@@ -165,15 +165,15 @@ module.exports.tests.sanity_checks = function(test, common) {
 
     t.end();
   });
-  test('favor clean.parsed_text.name over clean.text', function(t) {
+  test('favor clean.parsed_text.subject over clean.text', function(t) {
 
     var clean = { parsed_text: { subject: 'foo' }, text: 'bar' };
     var messages = sanitizer.sanitize({}, clean);
 
-    // favor clean.parsed_text.name over clean.text
-    t.deepEquals(clean.tokens, [ 'foo' ], 'use clean.parsed_text.name');
-    t.deepEquals(clean.tokens_complete, [ 'foo' ], 'use clean.parsed_text.name');
-    t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
+    // favor clean.parsed_text.subject over clean.text
+    t.deepEquals(clean.tokens, [ 'foo' ], 'use clean.parsed_text.subject');
+    t.deepEquals(clean.tokens_complete, [], 'complete');
+    t.deepEquals(clean.tokens_incomplete, [ 'foo' ], 'incomplete');
 
     // no errors/warnings produced
     t.deepEquals(messages.errors, [], 'no errors');
@@ -184,11 +184,13 @@ module.exports.tests.sanity_checks = function(test, common) {
   test('favor clean.parsed_text street data over clean.text', function(t) {
 
     var clean = { parsed_text: {
-      housenumber: '190', street: 'foo st', subject: '190 foo st'
+      subject: '190 foo st',
+      housenumber: '190',
+      street: 'foo st'
     }, text: 'bar' };
     var messages = sanitizer.sanitize({}, clean);
 
-    // favor clean.parsed_text.name over clean.text
+    // favor clean.parsed_text.subject over clean.text
     t.deepEquals(clean.tokens, [ '190', 'foo', 'st' ], 'use street name + housenumber');
     t.deepEquals(clean.tokens_complete, [ '190', 'foo', 'st' ], 'use street name + housenumber');
     t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
@@ -197,24 +199,6 @@ module.exports.tests.sanity_checks = function(test, common) {
     t.deepEquals(messages.errors, [], 'no errors');
     t.deepEquals(messages.warnings, [], 'no warnings');
 
-    t.end();
-  });
-  test('favor clean.parsed_text.name over clean.parsed_text street data', function(t) {
-
-    var clean = { parsed_text: {
-      housenumber: '190', street: 'foo st', subject: 'foo'
-    }, text: 'bar' };
-    var messages = sanitizer.sanitize({}, clean);
-
-    // favor clean.parsed_text.name over all other variables
-    t.deepEquals(clean.tokens, [ 'foo' ], 'use clean.parsed_text.name');
-    t.deepEquals(clean.tokens_complete, [ 'foo' ], 'use clean.parsed_text.name');
-    t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
-
-    // no errors/warnings produced
-    t.deepEquals(messages.errors, [], 'no errors');
-    t.deepEquals(messages.warnings, [], 'no warnings');
-
     t.end();
   });
 };

From 4de4983f2c810fcd007eca3f5d31d635ed00f7fb Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 13:20:03 +0200
Subject: [PATCH 09/55] typo

---
 sanitizer/_tokenizer.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index c3205ed72..4017576e5 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -24,10 +24,10 @@ function _sanitize( raw, clean ){
   // a boolean to track whether the input parser successfully ran; or not.
   var parserConsumedAllTokens = false;
 
-  // if the text parser has run then we only tokenize the 'name' section
+  // if the text parser has run then we only tokenize the 'subject' section
   // of the 'parsed_text' object, ignoring the 'admin' parts.
   if( _.isPlainObject(clean, 'parsed_text') && !_.isEmpty(clean.parsed_text) ) {
-    // parsed_text.name is set, this is the highest priority, use this string
+    // parsed_text.subject is set, this is the highest priority, use this string
     if( _.has(clean.parsed_text, 'subject') ){
       text = clean.parsed_text.subject; // use this string instead
 

From 88e2390aaa4961e57a9458bfda73d1d43ba154c2 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 13:34:28 +0200
Subject: [PATCH 10/55] feat(parser): stricter tokenization of exact matching
 admin queries

---
 sanitizer/_tokenizer.js | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index 4017576e5..e1cec8f91 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -37,6 +37,14 @@ function _sanitize( raw, clean ){
       if (_.has(clean.parsed_text, 'housenumber') && _.has(clean.parsed_text, 'street')){
         parserConsumedAllTokens = true;
       }
+    
+      // when $subject exactly equals one of the admin fields
+      else if (
+        text === clean.parsed_text.locality ||
+        text === clean.parsed_text.region ||
+        text === clean.parsed_text.country) {
+        parserConsumedAllTokens = true;
+      }
     }
   }
 

From 308df52a91b1893a9ab3cf05ba0f1b0e7f175504 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 14:51:07 +0200
Subject: [PATCH 11/55] feat(parser): switch to using multi_match for admin
 subqueries

---
 query/autocomplete.js                         | 15 ++--
 .../autocomplete_linguistic_with_admin.js     | 90 +++----------------
 .../autocomplete_single_character_street.js   | 86 ++++--------------
 3 files changed, 34 insertions(+), 157 deletions(-)

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 446e5e3fe..5d786d733 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -4,6 +4,7 @@ const textParser = require('./text_parser_pelias');
 const check = require('check-types');
 const logger = require('pelias-logger').get('api');
 const config = require('pelias-config').generate();
+const placeTypes = require('../helper/placeTypes');
 
 // additional views (these may be merged in to pelias/query at a later date)
 var views = {
@@ -17,6 +18,10 @@ var views = {
   focus_point_filter:         require('./view/focus_point_distance_filter')
 };
 
+// region_a is also an admin field. pelias/parser tries to detect
+// region_a, in which case we use a match query specifically for it.
+var adminFields = placeTypes.concat(['region_a']);
+
 //------------------------------
 // autocomplete query
 //------------------------------
@@ -32,15 +37,7 @@ query.score( peliasQuery.view.address('street') );
 query.score( peliasQuery.view.address('postcode') );
 
 // admin components
-query.score( peliasQuery.view.admin('country') );
-query.score( peliasQuery.view.admin('country_a') );
-query.score( peliasQuery.view.admin('region') );
-query.score( peliasQuery.view.admin('region_a') );
-query.score( peliasQuery.view.admin('county') );
-query.score( peliasQuery.view.admin('borough') );
-query.score( peliasQuery.view.admin('localadmin') );
-query.score( peliasQuery.view.admin('locality') );
-query.score( peliasQuery.view.admin('neighbourhood') );
+query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') );
 
 // scoring boost
 query.score( views.boost_exact_matches );
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 54437fa07..412711ecc 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -17,83 +17,19 @@ module.exports = {
       ],
       'should': [
         {
-          'match': {
-            'parent.country.ngram': {
-              'analyzer': 'peliasAdmin',
-              'boost': 800,
-              'cutoff_frequency': 0.01,
-              'query': 'three'
-            }
-          }
-        },
-        {
-          'match': {
-            'parent.region.ngram': {
-              'analyzer': 'peliasAdmin',
-              'cutoff_frequency': 0.01,
-              'boost': 600,
-              'query': 'three'
-            }
-          }
-        },
-        {
-          'match': {
-            'parent.region_a.ngram': {
-              'analyzer': 'peliasAdmin',
-              'cutoff_frequency': 0.01,
-              'boost': 600,
-              'query': 'three'
-            }
-          }
-        },
-        {
-          'match': {
-            'parent.county.ngram': {
-              'analyzer': 'peliasAdmin',
-              'cutoff_frequency': 0.01,
-              'boost': 400,
-              'query': 'three'
-            }
-          }
-        },
-        {
-          'match': {
-            'parent.borough.ngram': {
-              'analyzer': 'peliasAdmin',
-              'cutoff_frequency': 0.01,
-              'boost': 600,
-              'query': 'three'
-            }
-          }
-        },
-        {
-          'match': {
-            'parent.localadmin.ngram': {
-              'analyzer': 'peliasAdmin',
-              'cutoff_frequency': 0.01,
-              'boost': 200,
-              'query': 'three'
-            }
-          }
-        },
-        {
-          'match': {
-            'parent.locality.ngram': {
-              'analyzer': 'peliasAdmin',
-              'cutoff_frequency': 0.01,
-              'boost': 200,
-              'query': 'three'
-            }
-          }
-        },
-        {
-          'match': {
-            'parent.neighbourhood.ngram': {
-              'analyzer': 'peliasAdmin',
-              'cutoff_frequency': 0.01,
-              'boost': 200,
-              'query': 'three'
-            }
+          'multi_match': {
+            'fields': [
+              'parent.country.ngram^800',
+              'parent.region.ngram^600',
+              'parent.county.ngram^400',
+              'parent.localadmin.ngram^200',
+              'parent.locality.ngram^200',
+              'parent.borough.ngram^600',
+              'parent.neighbourhood.ngram^200',
+              'parent.region_a.ngram^600'
+            ],
+            'query': 'three',
+            'analyzer': 'peliasAdmin'
           }
         },
         {
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index a625e5e04..756ccb0f3 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -23,77 +23,21 @@ module.exports = {
               'analyzer': 'peliasStreet'
             }
           }
-        }, {
-          'match': {
-            'parent.country.ngram': {
-              'query': 'laird',
-              'cutoff_frequency': 0.01,
-              'boost': 800,
-              'analyzer': 'peliasAdmin'
-            }
-          }
-        }, {
-          'match': {
-            'parent.region.ngram': {
-              'query': 'laird',
-              'cutoff_frequency': 0.01,
-              'boost': 600,
-              'analyzer': 'peliasAdmin'
-            }
-          }
-        }, {
-          'match': {
-            'parent.region_a.ngram': {
-              'query': 'laird',
-              'cutoff_frequency': 0.01,
-              'boost': 600,
-              'analyzer': 'peliasAdmin'
-            }
-          }
-        }, {
-          'match': {
-            'parent.county.ngram': {
-              'query': 'laird',
-              'cutoff_frequency': 0.01,
-              'boost': 400,
-              'analyzer': 'peliasAdmin'
-            }
-          }
-        }, {
-          'match': {
-            'parent.borough.ngram': {
-              'analyzer': 'peliasAdmin',
-              'cutoff_frequency': 0.01,
-              'boost': 600,
-              'query': 'laird'
-            }
-          }
-        }, {
-          'match': {
-            'parent.localadmin.ngram': {
-              'query': 'laird',
-              'cutoff_frequency': 0.01,
-              'boost': 200,
-              'analyzer': 'peliasAdmin'
-            }
-          }
-        }, {
-          'match': {
-            'parent.locality.ngram': {
-              'query': 'laird',
-              'cutoff_frequency': 0.01,
-              'boost': 200,
-              'analyzer': 'peliasAdmin'
-            }
-          }
-        }, {
-          'match': {
-            'parent.neighbourhood.ngram': {
-              'query': 'laird',
-              'cutoff_frequency': 0.01,
-              'boost': 200,
-              'analyzer': 'peliasAdmin'
-            }
+        },
+        {
+          'multi_match': {
+            'fields': [
+              'parent.country.ngram^800',
+              'parent.region.ngram^600',
+              'parent.county.ngram^400',
+              'parent.localadmin.ngram^200',
+              'parent.locality.ngram^200',
+              'parent.borough.ngram^600',
+              'parent.neighbourhood.ngram^200',
+              'parent.region_a.ngram^600'
+            ],
+            'query': 'laird',
+            'analyzer': 'peliasAdmin'
           }
         },
         {

From f5dcf3cb9ff19aae3499b238df51fee0a9fd773b Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 16:14:53 +0200
Subject: [PATCH 12/55] feat(admin_subqueries): test cross_fields query

---
 query/autocomplete_defaults.js                            | 3 ++-
 test/unit/fixture/autocomplete_linguistic_with_admin.js   | 4 +++-
 test/unit/fixture/autocomplete_single_character_street.js | 4 +++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index c9c3a9291..0609b550e 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -51,7 +51,8 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'address:postcode:boost': 2000,
   'address:postcode:cutoff_frequency': 0.01,
 
-  // generic multi_match cutoff_frequency
+  // generic multi_match config
+  'multi_match:type': 'cross_fields',
   'multi_match:cutoff_frequency': 0.01,
 
   'admin:country_a:analyzer': 'standard',
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 412711ecc..cc5f18911 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -29,7 +29,9 @@ module.exports = {
               'parent.region_a.ngram^600'
             ],
             'query': 'three',
-            'analyzer': 'peliasAdmin'
+            'analyzer': 'peliasAdmin',
+            'type': 'cross_fields',
+            'cutoff_frequency': 0.01
           }
         },
         {
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 756ccb0f3..61410114e 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -37,7 +37,9 @@ module.exports = {
               'parent.region_a.ngram^600'
             ],
             'query': 'laird',
-            'analyzer': 'peliasAdmin'
+            'analyzer': 'peliasAdmin',
+            'type': 'cross_fields',
+            'cutoff_frequency': 0.01
           }
         },
         {

From 58f8171b4e149464d87019dd2592295732e82680 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 16:46:38 +0200
Subject: [PATCH 13/55] feat(admin_subqueries): test operator:and query

---
 query/autocomplete_defaults.js                            | 1 +
 test/unit/fixture/autocomplete_linguistic_with_admin.js   | 1 +
 test/unit/fixture/autocomplete_single_character_street.js | 1 +
 3 files changed, 3 insertions(+)

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index 0609b550e..35aa6c80c 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -53,6 +53,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
 
   // generic multi_match config
   'multi_match:type': 'cross_fields',
+  'multi_match:operator': 'and',
   'multi_match:cutoff_frequency': 0.01,
 
   'admin:country_a:analyzer': 'standard',
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index cc5f18911..245d4f0f5 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -31,6 +31,7 @@ module.exports = {
             'query': 'three',
             'analyzer': 'peliasAdmin',
             'type': 'cross_fields',
+            'operator': 'and',
             'cutoff_frequency': 0.01
           }
         },
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 61410114e..6a0e4cd48 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -39,6 +39,7 @@ module.exports = {
             'query': 'laird',
             'analyzer': 'peliasAdmin',
             'type': 'cross_fields',
+            'operator': 'and',
             'cutoff_frequency': 0.01
           }
         },

From cd2f159d05aa13683c2cf191a90e1ceeb150012d Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 16:53:14 +0200
Subject: [PATCH 14/55] feat(admin_subqueries): set all boosts to 1

---
 query/autocomplete_defaults.js                 | 18 +++++++++---------
 .../autocomplete_linguistic_with_admin.js      | 16 ++++++++--------
 .../autocomplete_single_character_street.js    | 16 ++++++++--------
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index 35aa6c80c..980c94dc9 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -58,47 +58,47 @@ module.exports = _.merge({}, peliasQuery.defaults, {
 
   'admin:country_a:analyzer': 'standard',
   'admin:country_a:field': 'parent.country_a.ngram',
-  'admin:country_a:boost': 1000,
+  'admin:country_a:boost': 1,
   'admin:country_a:cutoff_frequency': 0.01,
 
   'admin:country:analyzer': 'peliasAdmin',
   'admin:country:field': 'parent.country.ngram',
-  'admin:country:boost': 800,
+  'admin:country:boost': 1,
   'admin:country:cutoff_frequency': 0.01,
 
   'admin:region:analyzer': 'peliasAdmin',
   'admin:region:field': 'parent.region.ngram',
-  'admin:region:boost': 600,
+  'admin:region:boost': 1,
   'admin:region:cutoff_frequency': 0.01,
 
   'admin:region_a:analyzer': 'peliasAdmin',
   'admin:region_a:field': 'parent.region_a.ngram',
-  'admin:region_a:boost': 600,
+  'admin:region_a:boost': 1,
   'admin:region_a:cutoff_frequency': 0.01,
 
   'admin:county:analyzer': 'peliasAdmin',
   'admin:county:field': 'parent.county.ngram',
-  'admin:county:boost': 400,
+  'admin:county:boost': 1,
   'admin:county:cutoff_frequency': 0.01,
 
   'admin:localadmin:analyzer': 'peliasAdmin',
   'admin:localadmin:field': 'parent.localadmin.ngram',
-  'admin:localadmin:boost': 200,
+  'admin:localadmin:boost': 1,
   'admin:localadmin:cutoff_frequency': 0.01,
 
   'admin:locality:analyzer': 'peliasAdmin',
   'admin:locality:field': 'parent.locality.ngram',
-  'admin:locality:boost': 200,
+  'admin:locality:boost': 1,
   'admin:locality:cutoff_frequency': 0.01,
 
   'admin:neighbourhood:analyzer': 'peliasAdmin',
   'admin:neighbourhood:field': 'parent.neighbourhood.ngram',
-  'admin:neighbourhood:boost': 200,
+  'admin:neighbourhood:boost': 1,
   'admin:neighbourhood:cutoff_frequency': 0.01,
 
   'admin:borough:analyzer': 'peliasAdmin',
   'admin:borough:field': 'parent.borough.ngram',
-  'admin:borough:boost': 600,
+  'admin:borough:boost': 1,
   'admin:borough:cutoff_frequency': 0.01,
 
   'popularity:field': 'popularity',
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 245d4f0f5..75cfc6d17 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -19,14 +19,14 @@ module.exports = {
         {
           'multi_match': {
             'fields': [
-              'parent.country.ngram^800',
-              'parent.region.ngram^600',
-              'parent.county.ngram^400',
-              'parent.localadmin.ngram^200',
-              'parent.locality.ngram^200',
-              'parent.borough.ngram^600',
-              'parent.neighbourhood.ngram^200',
-              'parent.region_a.ngram^600'
+              'parent.country.ngram^1',
+              'parent.region.ngram^1',
+              'parent.county.ngram^1',
+              'parent.localadmin.ngram^1',
+              'parent.locality.ngram^1',
+              'parent.borough.ngram^1',
+              'parent.neighbourhood.ngram^1',
+              'parent.region_a.ngram^1'
             ],
             'query': 'three',
             'analyzer': 'peliasAdmin',
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 6a0e4cd48..1f62be732 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -27,14 +27,14 @@ module.exports = {
         {
           'multi_match': {
             'fields': [
-              'parent.country.ngram^800',
-              'parent.region.ngram^600',
-              'parent.county.ngram^400',
-              'parent.localadmin.ngram^200',
-              'parent.locality.ngram^200',
-              'parent.borough.ngram^600',
-              'parent.neighbourhood.ngram^200',
-              'parent.region_a.ngram^600'
+              'parent.country.ngram^1',
+              'parent.region.ngram^1',
+              'parent.county.ngram^1',
+              'parent.localadmin.ngram^1',
+              'parent.locality.ngram^1',
+              'parent.borough.ngram^1',
+              'parent.neighbourhood.ngram^1',
+              'parent.region_a.ngram^1'
             ],
             'query': 'laird',
             'analyzer': 'peliasAdmin',

From 22d69aabf5e7a1edb022c552d143bf0455fa559c Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 17:14:16 +0200
Subject: [PATCH 15/55] feat(admin_subqueries): add locality_a and country_a to
 multi_match

---
 query/autocomplete.js                                     | 5 ++---
 query/autocomplete_defaults.js                            | 5 +++++
 query/text_parser_pelias.js                               | 1 +
 test/unit/fixture/autocomplete_linguistic_with_admin.js   | 4 +++-
 test/unit/fixture/autocomplete_single_character_street.js | 4 +++-
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 5d786d733..a0e24db0b 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -18,9 +18,8 @@ var views = {
   focus_point_filter:         require('./view/focus_point_distance_filter')
 };
 
-// region_a is also an admin field. pelias/parser tries to detect
-// region_a, in which case we use a match query specifically for it.
-var adminFields = placeTypes.concat(['region_a']);
+// add abbrevations for the fields pelias/parser is able to detect.
+var adminFields = placeTypes.concat(['locality_a', 'region_a', 'country_a']);
 
 //------------------------------
 // autocomplete query
diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index 980c94dc9..f0b835fc9 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -91,6 +91,11 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'admin:locality:boost': 1,
   'admin:locality:cutoff_frequency': 0.01,
 
+  'admin:locality_a:analyzer': 'peliasAdmin',
+  'admin:locality_a:field': 'parent.locality_a.ngram',
+  'admin:locality_a:boost': 1,
+  'admin:locality_a:cutoff_frequency': 0.01,
+
   'admin:neighbourhood:analyzer': 'peliasAdmin',
   'admin:neighbourhood:field': 'parent.neighbourhood.ngram',
   'admin:neighbourhood:boost': 1,
diff --git a/query/text_parser_pelias.js b/query/text_parser_pelias.js
index 779a3895b..141ad2df6 100644
--- a/query/text_parser_pelias.js
+++ b/query/text_parser_pelias.js
@@ -65,6 +65,7 @@ function addParsedVariablesToQueryVariables(clean, vs) {
     adminFields.forEach(key => {
       if (!vs.isset('input:' + key)) {
         vs.var('input:' + key, clean.parsed_text.admin);
+        vs.var('input:' + key + '_a', clean.parsed_text.admin);
       }
     });
   }
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 75cfc6d17..9f37c2bac 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -26,7 +26,9 @@ module.exports = {
               'parent.locality.ngram^1',
               'parent.borough.ngram^1',
               'parent.neighbourhood.ngram^1',
-              'parent.region_a.ngram^1'
+              'parent.locality_a.ngram^1',
+              'parent.region_a.ngram^1',
+              'parent.country_a.ngram^1'
             ],
             'query': 'three',
             'analyzer': 'peliasAdmin',
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 1f62be732..8d0b3b277 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -34,7 +34,9 @@ module.exports = {
               'parent.locality.ngram^1',
               'parent.borough.ngram^1',
               'parent.neighbourhood.ngram^1',
-              'parent.region_a.ngram^1'
+              'parent.locality_a.ngram^1',
+              'parent.region_a.ngram^1',
+              'parent.country_a.ngram^1'
             ],
             'query': 'laird',
             'analyzer': 'peliasAdmin',

From 79c5c4557403ab06c876493aaef5dcd389ad2a86 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 17:20:42 +0200
Subject: [PATCH 16/55] feat(admin_subqueries): revert to operator:or

---
 query/autocomplete_defaults.js                            | 1 -
 test/unit/fixture/autocomplete_linguistic_with_admin.js   | 1 -
 test/unit/fixture/autocomplete_single_character_street.js | 1 -
 3 files changed, 3 deletions(-)

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index f0b835fc9..ba7d54b79 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -53,7 +53,6 @@ module.exports = _.merge({}, peliasQuery.defaults, {
 
   // generic multi_match config
   'multi_match:type': 'cross_fields',
-  'multi_match:operator': 'and',
   'multi_match:cutoff_frequency': 0.01,
 
   'admin:country_a:analyzer': 'standard',
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 9f37c2bac..95b5c4404 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -33,7 +33,6 @@ module.exports = {
             'query': 'three',
             'analyzer': 'peliasAdmin',
             'type': 'cross_fields',
-            'operator': 'and',
             'cutoff_frequency': 0.01
           }
         },
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 8d0b3b277..ccea321b9 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -41,7 +41,6 @@ module.exports = {
             'query': 'laird',
             'analyzer': 'peliasAdmin',
             'type': 'cross_fields',
-            'operator': 'and',
             'cutoff_frequency': 0.01
           }
         },

From f31d5ae4484745a8da6093eda66a4caa02b3ef1b Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 17:31:56 +0200
Subject: [PATCH 17/55] feat(admin_subqueries): remove cutoff_frequency

---
 query/autocomplete_defaults.js                            | 1 -
 test/unit/fixture/autocomplete_linguistic_with_admin.js   | 3 +--
 test/unit/fixture/autocomplete_single_character_street.js | 3 +--
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index ba7d54b79..ea121e055 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -53,7 +53,6 @@ module.exports = _.merge({}, peliasQuery.defaults, {
 
   // generic multi_match config
   'multi_match:type': 'cross_fields',
-  'multi_match:cutoff_frequency': 0.01,
 
   'admin:country_a:analyzer': 'standard',
   'admin:country_a:field': 'parent.country_a.ngram',
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 95b5c4404..f737a52b9 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -32,8 +32,7 @@ module.exports = {
             ],
             'query': 'three',
             'analyzer': 'peliasAdmin',
-            'type': 'cross_fields',
-            'cutoff_frequency': 0.01
+            'type': 'cross_fields'
           }
         },
         {
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index ccea321b9..978f0ddc3 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -40,8 +40,7 @@ module.exports = {
             ],
             'query': 'laird',
             'analyzer': 'peliasAdmin',
-            'type': 'cross_fields',
-            'cutoff_frequency': 0.01
+            'type': 'cross_fields'
           }
         },
         {

From 325c0706bdd6c0856e45da734127b6cdf864a542 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 15 May 2019 17:50:12 +0200
Subject: [PATCH 18/55] feat(admin_subqueries): move admin matching to MUST
 condition

---
 query/autocomplete.js                         |  6 +--
 .../autocomplete_linguistic_with_admin.js     |  8 ++--
 .../autocomplete_single_character_street.js   | 37 +++++++++----------
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/query/autocomplete.js b/query/autocomplete.js
index a0e24db0b..8b967c2a5 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -30,14 +30,14 @@ var query = new peliasQuery.layout.FilteredBooleanQuery();
 query.score( views.phrase_first_tokens_only, 'must' );
 query.score( views.ngrams_last_token_only, 'must' );
 
+// admin components
+query.score(peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin'), 'must');
+
 // address components
 query.score( peliasQuery.view.address('housenumber') );
 query.score( peliasQuery.view.address('street') );
 query.score( peliasQuery.view.address('postcode') );
 
-// admin components
-query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') );
-
 // scoring boost
 query.score( views.boost_exact_matches );
 query.score( peliasQuery.view.focus( views.ngrams_strict ) );
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index f737a52b9..bb58888ba 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -13,9 +13,7 @@ module.exports = {
               'query': 'one two'
             }
           }
-        }
-      ],
-      'should': [
+        },
         {
           'multi_match': {
             'fields': [
@@ -34,7 +32,9 @@ module.exports = {
             'analyzer': 'peliasAdmin',
             'type': 'cross_fields'
           }
-        },
+        }
+      ],
+      'should': [
         {
           'match': {
             'phrase.default': {
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 978f0ddc3..90a481394 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -12,6 +12,24 @@ module.exports = {
             'query': 'k road'
           }
         }
+      }, {
+        'multi_match': {
+          'fields': [
+            'parent.country.ngram^1',
+            'parent.region.ngram^1',
+            'parent.county.ngram^1',
+            'parent.localadmin.ngram^1',
+            'parent.locality.ngram^1',
+            'parent.borough.ngram^1',
+            'parent.neighbourhood.ngram^1',
+            'parent.locality_a.ngram^1',
+            'parent.region_a.ngram^1',
+            'parent.country_a.ngram^1'
+          ],
+          'query': 'laird',
+          'analyzer': 'peliasAdmin',
+          'type': 'cross_fields'
+        }
       }],
       'should':[
         {
@@ -24,25 +42,6 @@ module.exports = {
             }
           }
         },
-        {
-          'multi_match': {
-            'fields': [
-              'parent.country.ngram^1',
-              'parent.region.ngram^1',
-              'parent.county.ngram^1',
-              'parent.localadmin.ngram^1',
-              'parent.locality.ngram^1',
-              'parent.borough.ngram^1',
-              'parent.neighbourhood.ngram^1',
-              'parent.locality_a.ngram^1',
-              'parent.region_a.ngram^1',
-              'parent.country_a.ngram^1'
-            ],
-            'query': 'laird',
-            'analyzer': 'peliasAdmin',
-            'type': 'cross_fields'
-          }
-        },
         {
           'match': {
             'phrase.default': {

From 5e82ec692d8c47bfe5fed341e50e29c3aebd9ea4 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 08:18:38 +0200
Subject: [PATCH 19/55] feat(tokenizer): consider query as complete if the
 final char is a numeral

---
 sanitizer/_tokenizer.js           |  8 +++++-
 test/unit/sanitizer/_tokenizer.js | 41 ++++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index e1cec8f91..56b41b7c3 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -48,6 +48,12 @@ function _sanitize( raw, clean ){
     }
   }
 
+  // if the final character is a numeral then consider all tokens
+  // as complete in order to avoid prefix matching numerals.
+  if (/[0-9]$/.test(text) ) {
+    parserConsumedAllTokens = true;
+  }
+
   // always set 'clean.tokens*' arrays for consistency and to avoid upstream errors.
   clean.tokens = [];
   clean.tokens_complete = [];
@@ -61,7 +67,7 @@ function _sanitize( raw, clean ){
     // see: settings.analysis.tokenizer.peliasNameTokenizer
     clean.tokens = text
       .split(/[\s,\\\/]+/) // split on delimeters
-      .filter(function(el){return el;}); // remove empty elements
+      .filter(el => el); // remove empty elements
   } else {
     // text is empty, this sanitizer should be a no-op
     return messages;
diff --git a/test/unit/sanitizer/_tokenizer.js b/test/unit/sanitizer/_tokenizer.js
index a0eb2371e..6ead9f76b 100644
--- a/test/unit/sanitizer/_tokenizer.js
+++ b/test/unit/sanitizer/_tokenizer.js
@@ -428,13 +428,11 @@ module.exports.tests.final_token_single_gram = function(test, common) {
 
     // all but last token marked as 'complete'
     t.deepEquals(clean.tokens_complete, [
-      'grolmanstrasse',
+      'grolmanstrasse', '1'
     ], 'tokens produced');
 
     // last token marked as 'incomplete'
-    t.deepEquals(clean.tokens_incomplete, [
-      '1'
-    ], 'tokens produced');
+    t.deepEquals(clean.tokens_incomplete, [], 'tokens produced');
 
     // no errors/warnings produced
     t.deepEquals(messages.errors, [], 'no errors');
@@ -534,6 +532,41 @@ module.exports.tests.mixed_delimiter = function(test, common) {
   });
 };
 
+module.exports.tests.numeric_final_char = function (test, common) {
+  test('numeric final char, single token', function (t) {
+
+    var clean = { text: '7-11', parsed_text: { subject: '7-11' } };
+    var messages = sanitizer.sanitize({}, clean);
+
+    // tokens produced
+    t.deepEquals(clean.tokens, ['7-11'], 'tokens produced');
+    t.deepEquals(clean.tokens_complete, ['7-11'], 'complete');
+    t.deepEquals(clean.tokens_incomplete, [], 'incomplete');
+
+    // no errors/warnings produced
+    t.deepEquals(messages.errors, [], 'no errors');
+    t.deepEquals(messages.warnings, [], 'no warnings');
+
+    t.end();
+  });
+  test('numeric final char, multiple token', function (t) {
+
+    var clean = { text: 'stop 3', parsed_text: { subject: 'stop 3' } };
+    var messages = sanitizer.sanitize({}, clean);
+
+    // tokens produced
+    t.deepEquals(clean.tokens, ['stop', '3'], 'tokens produced');
+    t.deepEquals(clean.tokens_complete, ['stop', '3'], 'complete');
+    t.deepEquals(clean.tokens_incomplete, [], 'incomplete');
+
+    // no errors/warnings produced
+    t.deepEquals(messages.errors, [], 'no errors');
+    t.deepEquals(messages.warnings, [], 'no warnings');
+
+    t.end();
+  });
+};
+
 module.exports.all = function (tape, common) {
   function test(name, testFunction) {
     return tape('sanitizeR _tokenizer: ' + name, testFunction);

From 74a337df914ac067660709489580f462cd261f3e Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 10:16:04 +0200
Subject: [PATCH 20/55] feat(autocomplete): test removing exact_matching
 subquery

---
 query/autocomplete.js                                |  1 -
 test/unit/fixture/autocomplete_custom_boosts.json    | 12 ------------
 .../fixture/autocomplete_linguistic_final_token.js   | 11 -----------
 .../autocomplete_linguistic_multiple_tokens.js       | 12 ------------
 .../fixture/autocomplete_linguistic_with_admin.js    | 12 ------------
 .../fixture/autocomplete_single_character_street.js  | 12 ------------
 .../autocomplete_token_matching_permutations.js      | 12 ++----------
 test/unit/query/autocomplete_with_custom_boosts.js   |  2 +-
 8 files changed, 3 insertions(+), 71 deletions(-)

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 8b967c2a5..8ad2606d6 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -39,7 +39,6 @@ query.score( peliasQuery.view.address('street') );
 query.score( peliasQuery.view.address('postcode') );
 
 // scoring boost
-query.score( views.boost_exact_matches );
 query.score( peliasQuery.view.focus( views.ngrams_strict ) );
 query.score( peliasQuery.view.popularity( views.pop_subquery ) );
 query.score( peliasQuery.view.population( views.pop_subquery ) );
diff --git a/test/unit/fixture/autocomplete_custom_boosts.json b/test/unit/fixture/autocomplete_custom_boosts.json
index 361de970f..6bf9b7994 100644
--- a/test/unit/fixture/autocomplete_custom_boosts.json
+++ b/test/unit/fixture/autocomplete_custom_boosts.json
@@ -18,18 +18,6 @@
           }
         ],
         "should": [
-          {
-            "match": {
-              "phrase.default": {
-                "analyzer": "peliasPhrase",
-                "cutoff_frequency": 0.01,
-                "type": "phrase",
-                "boost": 1,
-                "slop": 3,
-                "query": "foo"
-              }
-            }
-          },
           {
             "function_score": {
               "query": {
diff --git a/test/unit/fixture/autocomplete_linguistic_final_token.js b/test/unit/fixture/autocomplete_linguistic_final_token.js
index 5967f9b08..5bfab75d6 100644
--- a/test/unit/fixture/autocomplete_linguistic_final_token.js
+++ b/test/unit/fixture/autocomplete_linguistic_final_token.js
@@ -14,17 +14,6 @@ module.exports = {
         }
       }],
       'should':[{
-        'match': {
-          'phrase.default': {
-            'analyzer': 'peliasPhrase',
-            'cutoff_frequency': 0.01,
-            'boost': 1,
-            'slop': 3,
-            'query': 'one',
-            'type': 'phrase'
-          }
-        }
-      },{
         'function_score': {
           'query': {
             'match_all': {}
diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
index d6fb9275f..679fa5aa8 100644
--- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
+++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
@@ -31,18 +31,6 @@ module.exports = {
         }
       }],
       'should':[
-        {
-          'match': {
-            'phrase.default': {
-              'analyzer' : 'peliasPhrase',
-              'type' : 'phrase',
-              'boost' : 1,
-              'slop' : 3,
-              'cutoff_frequency': 0.01,
-              'query' : 'one two'
-            }
-          }
-        },
         {
         'function_score': {
           'query': {
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index bb58888ba..e746149ae 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -35,18 +35,6 @@ module.exports = {
         }
       ],
       'should': [
-        {
-          'match': {
-            'phrase.default': {
-              'analyzer' : 'peliasPhrase',
-              'cutoff_frequency': 0.01,
-              'type' : 'phrase',
-              'boost' : 1,
-              'slop' : 3,
-              'query' : 'one two'
-            }
-          }
-        },
         {
           'function_score': {
             'query': {
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 90a481394..6fa061ed4 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -42,18 +42,6 @@ module.exports = {
             }
           }
         },
-        {
-          'match': {
-            'phrase.default': {
-              'analyzer' : 'peliasPhrase',
-              'type' : 'phrase',
-              'boost' : 1,
-              'slop' : 3,
-              'cutoff_frequency': 0.01,
-              'query' : 'k road'
-            }
-          }
-        },
         {
         'function_score': {
           'query': {
diff --git a/test/unit/query/autocomplete_token_matching_permutations.js b/test/unit/query/autocomplete_token_matching_permutations.js
index 0806014a9..069465ed8 100644
--- a/test/unit/query/autocomplete_token_matching_permutations.js
+++ b/test/unit/query/autocomplete_token_matching_permutations.js
@@ -17,8 +17,7 @@ const defaults = new peliasQuery.Vars( require('../../../query/autocomplete_defa
 const views = {
   ngrams_last_token_only:     require('../../../query/view/ngrams_last_token_only'),
   phrase_first_tokens_only:   require('../../../query/view/phrase_first_tokens_only'),
-  pop_subquery:               require('../../../query/view/pop_subquery'),
-  boost_exact_matches:        require('../../../query/view/boost_exact_matches')
+  pop_subquery:               require('../../../query/view/pop_subquery')
 };
 
 module.exports.tests = {};
@@ -44,7 +43,7 @@ function assert( t, actual, expected, debug ){
   }
 
   t.deepEqual(_actual.type, 'autocomplete', 'query type set');
-  t.deepEqual(_actual.body.query.bool, _expected);
+  t.deepEqual(_actual.body.query.bool, _expected, 'autocomplete_token_matching_permutations');
   t.end();
 }
 
@@ -83,7 +82,6 @@ module.exports.tests.single_token = function(test, common) {
     assert( t, generate( clean ), {
       must: [ views.phrase_first_tokens_only( vs ) ],
       should: [
-        views.boost_exact_matches( vs ),
         peliasQuery.view.popularity( views.pop_subquery )( vs ),
         peliasQuery.view.population( views.pop_subquery )( vs )
       ]
@@ -124,7 +122,6 @@ module.exports.tests.single_token = function(test, common) {
     assert( t, generate( clean ), {
       must: [ views.phrase_first_tokens_only( vs ) ],
       should: [
-        views.boost_exact_matches( vs ),
         peliasQuery.view.popularity( views.pop_subquery )( vs ),
         peliasQuery.view.population( views.pop_subquery )( vs )
       ]
@@ -165,7 +162,6 @@ module.exports.tests.single_token = function(test, common) {
     assert( t, generate( clean ), {
       must: [ views.phrase_first_tokens_only( vs ) ],
       should: [
-        views.boost_exact_matches( vs ),
         peliasQuery.view.popularity( views.pop_subquery )( vs ),
         peliasQuery.view.population( views.pop_subquery )( vs )
       ]
@@ -191,7 +187,6 @@ module.exports.tests.multiple_tokens = function(test, common) {
         views.ngrams_last_token_only( vs )
       ],
       should: [
-        views.boost_exact_matches( vs ),
         peliasQuery.view.popularity( views.pop_subquery )( vs ),
         peliasQuery.view.population( views.pop_subquery )( vs )
       ]
@@ -214,7 +209,6 @@ module.exports.tests.multiple_tokens = function(test, common) {
         views.phrase_first_tokens_only( vs )
       ],
       should: [
-        views.boost_exact_matches( vs ),
         peliasQuery.view.popularity( views.pop_subquery )( vs ),
         peliasQuery.view.population( views.pop_subquery )( vs )
       ]
@@ -238,7 +232,6 @@ module.exports.tests.multiple_tokens = function(test, common) {
         views.ngrams_last_token_only( vs )
       ],
       should: [
-        views.boost_exact_matches( vs ),
         peliasQuery.view.popularity( views.pop_subquery )( vs ),
         peliasQuery.view.population( views.pop_subquery )( vs )
       ]
@@ -261,7 +254,6 @@ module.exports.tests.multiple_tokens = function(test, common) {
         views.phrase_first_tokens_only( vs )
       ],
       should: [
-        views.boost_exact_matches( vs ),
         peliasQuery.view.popularity( views.pop_subquery )( vs ),
         peliasQuery.view.population( views.pop_subquery )( vs )
       ]
diff --git a/test/unit/query/autocomplete_with_custom_boosts.js b/test/unit/query/autocomplete_with_custom_boosts.js
index 01c292757..3cc38ef8d 100644
--- a/test/unit/query/autocomplete_with_custom_boosts.js
+++ b/test/unit/query/autocomplete_with_custom_boosts.js
@@ -40,7 +40,7 @@ module.exports.tests.query = function(test, common) {
 
     const actual_query = JSON.parse( JSON.stringify( autocomplete_query_module(clean) ) );
 
-    t.deepEqual(actual_query, expected_query, 'query as expected');
+    t.deepEqual(actual_query, expected_query, 'autocomplete_custom_boosts');
     t.pass();
     t.end();
   });

From e0b8a6b350c8d5fd054015fef8f3234f8e1d46ce Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 10:19:24 +0200
Subject: [PATCH 21/55] feat(admin_subqueries): add cutoff_frequency

---
 query/autocomplete_defaults.js                            | 1 +
 test/unit/fixture/autocomplete_linguistic_with_admin.js   | 3 ++-
 test/unit/fixture/autocomplete_single_character_street.js | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index ea121e055..ba7d54b79 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -53,6 +53,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
 
   // generic multi_match config
   'multi_match:type': 'cross_fields',
+  'multi_match:cutoff_frequency': 0.01,
 
   'admin:country_a:analyzer': 'standard',
   'admin:country_a:field': 'parent.country_a.ngram',
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index e746149ae..2d74a79a8 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -30,7 +30,8 @@ module.exports = {
             ],
             'query': 'three',
             'analyzer': 'peliasAdmin',
-            'type': 'cross_fields'
+            'type': 'cross_fields',
+            'cutoff_frequency': 0.01
           }
         }
       ],
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 6fa061ed4..5aeb95275 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -28,7 +28,8 @@ module.exports = {
           ],
           'query': 'laird',
           'analyzer': 'peliasAdmin',
-          'type': 'cross_fields'
+          'type': 'cross_fields',
+          'cutoff_frequency': 0.01
         }
       }],
       'should':[

From 235fafef2499c61fa8a0cb468fa148b003421a9c Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 10:51:09 +0200
Subject: [PATCH 22/55] feat(pelias_parser): admin queries - remove subject
 from admin subquery

---
 sanitizer/_text_pelias_parser.js           | 30 ++++++++++++++++++++++
 test/unit/sanitizer/_text_pelias_parser.js | 20 ++++++++++++---
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index 607d1aefb..ed3e1a738 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -149,14 +149,44 @@ function parse (clean) {
   // a locality query
   else if (!_.isEmpty(parsed_text.locality)) {
     parsed_text.subject = parsed_text.locality;
+
+    // remove the locality name from $admin
+    if ( parsed_text.admin ) {
+      let width = parsed_text.subject.length;
+      let cut = parsed_text.admin.substr(0, width);
+      if( cut === parsed_text.subject ){
+        parsed_text.admin = _.trim(parsed_text.admin.substr(width), ', ');
+        if( !parsed_text.admin.length ){ delete parsed_text.admin; }
+      }
+    }
   }
   // a region query
   else if (!_.isEmpty(parsed_text.region)) {
     parsed_text.subject = parsed_text.region;
+
+    // remove the region name from $admin
+    if (parsed_text.admin) {
+      let width = parsed_text.subject.length;
+      let cut = parsed_text.admin.substr(0, width);
+      if (cut === parsed_text.subject) {
+        parsed_text.admin = _.trim(parsed_text.admin.substr(width), ', ');
+        if( !parsed_text.admin.length ){ delete parsed_text.admin; }
+      }
+    }
   }
   // a country query
   else if (!_.isEmpty(parsed_text.country)) {
     parsed_text.subject = parsed_text.country;
+
+    // remove the country name from $admin
+    if (parsed_text.admin) {
+      let width = parsed_text.subject.length;
+      let cut = parsed_text.admin.substr(0, width);
+      if (cut === parsed_text.subject) {
+        parsed_text.admin = _.trim(parsed_text.admin.substr(width), ', ');
+        if (!parsed_text.admin.length) { delete parsed_text.admin; }
+      }
+    }
   }
   
   // unknown query type
diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
index ddc32bee2..79e30f697 100644
--- a/test/unit/sanitizer/_text_pelias_parser.js
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -43,7 +43,7 @@ module.exports.tests.text_parser = function (test, common) {
   cases.push(['chelsea, london', {
     subject: 'chelsea',
     locality: 'chelsea',
-    admin: 'chelsea, london'
+    admin: 'london'
   }]);
 
   // Query with one token
@@ -132,8 +132,22 @@ module.exports.tests.text_parser = function (test, common) {
   // AUS - state only
   cases.push(['NSW', {
     subject: 'NSW',
-    region: 'NSW',
-    admin: 'NSW'
+    region: 'NSW'
+  }]);
+
+  // when admin name is $subject it should
+  // be removed from $admin
+  cases.push(['paris texas', {
+    subject: 'paris',
+    locality: 'paris',
+    region: 'texas',
+    admin: 'texas'
+  }]);
+  cases.push(['rome italy', {
+    subject: 'rome',
+    locality: 'rome',
+    country: 'italy',
+    admin: 'italy'
   }]);
 
   cases.forEach(testcase => {

From f2816882e15ac379033c1e0be75b3d5f3d4168af Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 11:10:11 +0200
Subject: [PATCH 23/55] feat(admin_subqueries): remove cutoff_frequency

---
 query/autocomplete_defaults.js                            | 5 ++++-
 test/unit/fixture/autocomplete_linguistic_with_admin.js   | 3 +--
 test/unit/fixture/autocomplete_single_character_street.js | 3 +--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index ba7d54b79..121d64284 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -53,7 +53,10 @@ module.exports = _.merge({}, peliasQuery.defaults, {
 
   // generic multi_match config
   'multi_match:type': 'cross_fields',
-  'multi_match:cutoff_frequency': 0.01,
+
+  // setting 'cutoff_frequency' will result in very common
+  // terms such as country not scoring at all
+  // 'multi_match:cutoff_frequency': 0.01,
 
   'admin:country_a:analyzer': 'standard',
   'admin:country_a:field': 'parent.country_a.ngram',
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 2d74a79a8..e746149ae 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -30,8 +30,7 @@ module.exports = {
             ],
             'query': 'three',
             'analyzer': 'peliasAdmin',
-            'type': 'cross_fields',
-            'cutoff_frequency': 0.01
+            'type': 'cross_fields'
           }
         }
       ],
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 5aeb95275..6fa061ed4 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -28,8 +28,7 @@ module.exports = {
           ],
           'query': 'laird',
           'analyzer': 'peliasAdmin',
-          'type': 'cross_fields',
-          'cutoff_frequency': 0.01
+          'type': 'cross_fields'
         }
       }],
       'should':[

From cfbd5f7f0189048a644e6837860162e261d2195f Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 11:12:15 +0200
Subject: [PATCH 24/55] feat(autocomplete): use phrase index for complete
 tokens

---
 query/autocomplete_defaults.js                               | 2 +-
 test/unit/fixture/autocomplete_custom_boosts.json            | 2 +-
 test/unit/fixture/autocomplete_linguistic_final_token.js     | 2 +-
 test/unit/fixture/autocomplete_linguistic_multiple_tokens.js | 2 +-
 test/unit/fixture/autocomplete_linguistic_with_admin.js      | 2 +-
 test/unit/fixture/autocomplete_single_character_street.js    | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index 121d64284..b5a5ed2bf 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -22,7 +22,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'ngram:cutoff_frequency': 0.01,
 
   'phrase:analyzer': 'peliasQuery',
-  'phrase:field': 'name.default',
+  'phrase:field': 'phrase.default',
   'phrase:boost': 1,
   'phrase:slop': 3,
   'phrase:cutoff_frequency': 0.01,
diff --git a/test/unit/fixture/autocomplete_custom_boosts.json b/test/unit/fixture/autocomplete_custom_boosts.json
index 6bf9b7994..a29ceab55 100644
--- a/test/unit/fixture/autocomplete_custom_boosts.json
+++ b/test/unit/fixture/autocomplete_custom_boosts.json
@@ -6,7 +6,7 @@
         "must": [
           {
             "match": {
-              "name.default": {
+              "phrase.default": {
                 "analyzer": "peliasQuery",
                 "cutoff_frequency": 0.01,
                 "type": "phrase",
diff --git a/test/unit/fixture/autocomplete_linguistic_final_token.js b/test/unit/fixture/autocomplete_linguistic_final_token.js
index 5bfab75d6..a55f842a9 100644
--- a/test/unit/fixture/autocomplete_linguistic_final_token.js
+++ b/test/unit/fixture/autocomplete_linguistic_final_token.js
@@ -3,7 +3,7 @@ module.exports = {
     'bool': {
       'must': [{
         'match': {
-          'name.default': {
+          'phrase.default': {
             'analyzer': 'peliasQuery',
             'cutoff_frequency': 0.01,
             'boost': 1,
diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
index 679fa5aa8..5e8db15e1 100644
--- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
+++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
@@ -3,7 +3,7 @@ module.exports = {
     'bool': {
       'must': [{
         'match': {
-          'name.default': {
+          'phrase.default': {
             'analyzer': 'peliasQuery',
             'type': 'phrase',
             'boost': 1,
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index e746149ae..088b8da65 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -4,7 +4,7 @@ module.exports = {
       'must': [
         {
           'match': {
-            'name.default': {
+            'phrase.default': {
               'analyzer': 'peliasQuery',
               'type': 'phrase',
               'boost': 1,
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 6fa061ed4..a4ea7695a 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -3,7 +3,7 @@ module.exports = {
     'bool': {
       'must': [{
         'match': {
-          'name.default': {
+          'phrase.default': {
             'analyzer': 'peliasQuery',
             'cutoff_frequency': 0.01,
             'type': 'phrase',

From 68a0776db98ae728b62d1aaf4e16e05553257894 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 11:34:28 +0200
Subject: [PATCH 25/55] feat(parser): remove parsed_text.name

---
 sanitizer/_text_pelias_parser.js           |  8 ++++----
 test/unit/sanitizer/_text_pelias_parser.js | 14 ++++----------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index ed3e1a738..722dd03b7 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -124,7 +124,7 @@ function parse (clean) {
   postfix = postfix.replace(/\s+/g, ' ').trim();
 
   // 3. store the unparsed characters in fields which can be used for querying
-  if (prefix.length) { parsed_text.name = prefix; }
+  // if (prefix.length) { parsed_text.name = prefix; }
   if (postfix.length) { parsed_text.admin = postfix; }
 
   // 4. set 'subject', this is the text which will target the 'name.*'
@@ -138,9 +138,9 @@ function parse (clean) {
   else if (!_.isEmpty(parsed_text.street)) {
     parsed_text.subject = parsed_text.street;
   }
-  // query with a name such as a venue query
-  else if (!_.isEmpty(parsed_text.name)){
-    parsed_text.subject = parsed_text.name;
+  // query with a $prefix such as a venue query
+  else if (!_.isEmpty(prefix)){
+    parsed_text.subject = prefix;
   }
   // a postcode query
   else if (!_.isEmpty(parsed_text.postcode)) {
diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
index 79e30f697..462572ce8 100644
--- a/test/unit/sanitizer/_text_pelias_parser.js
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -24,7 +24,6 @@ module.exports.tests.text_parser = function (test, common) {
   // USA queries
   cases.push(['soho, new york, NY', {
     subject: 'soho',
-    name: 'soho',
     locality: 'new york',
     region: 'NY',
     admin: 'new york, NY'
@@ -48,32 +47,27 @@ module.exports.tests.text_parser = function (test, common) {
 
   // Query with one token
   cases.push(['yugolsavia', {
-    subject: 'yugolsavia',
-    name: 'yugolsavia'
+    subject: 'yugolsavia'
   }]);
 
   // Query with two tokens, no numbers
   cases.push(['small town', {
-    subject: 'small town',
-    name: 'small town'
+    subject: 'small town'
   }]);
 
   // Query with two tokens, number first
   cases.push(['123 main', {
-    subject: '123 main',
-    name: '123 main'
+    subject: '123 main'
   }]);
 
   // Query with two tokens, number second
   cases.push(['main 123', {
-    subject: 'main 123',
-    name: 'main 123'
+    subject: 'main 123'
   }]);
 
   // Query with many tokens
   cases.push(['main particle new york', {
     subject: 'main particle',
-    name: 'main particle',
     locality: 'new york',
     admin: 'new york'
   }]);

From 9258f2e264a3f87fee783301acb7cf7fe29f5964 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 11:38:34 +0200
Subject: [PATCH 26/55] feat(parser): so not consider address parses as safe to
 use with an ngrams index due to parses potentially containing partial
 suffixes

---
 sanitizer/_tokenizer.js           | 11 ++++-------
 test/unit/sanitizer/_tokenizer.js |  4 ++--
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index 56b41b7c3..3b9bb9c7e 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -31,15 +31,12 @@ function _sanitize( raw, clean ){
     if( _.has(clean.parsed_text, 'subject') ){
       text = clean.parsed_text.subject; // use this string instead
 
-      // when both housenumber and street fields are present then the pelias parser
-      // will simply set $subject to be a concatination of these fields.
-      // in this case we can be sure that all tokens were complete
-      if (_.has(clean.parsed_text, 'housenumber') && _.has(clean.parsed_text, 'street')){
-        parserConsumedAllTokens = true;
-      }
+      // note: we cannot be sure that the input is complete if a street is
+      // detected because the parser will detect partially completed suffixes
+      // which are not safe to match against an ngrams index
     
       // when $subject exactly equals one of the admin fields
-      else if (
+      if (
         text === clean.parsed_text.locality ||
         text === clean.parsed_text.region ||
         text === clean.parsed_text.country) {
diff --git a/test/unit/sanitizer/_tokenizer.js b/test/unit/sanitizer/_tokenizer.js
index 6ead9f76b..626639911 100644
--- a/test/unit/sanitizer/_tokenizer.js
+++ b/test/unit/sanitizer/_tokenizer.js
@@ -192,8 +192,8 @@ module.exports.tests.sanity_checks = function(test, common) {
 
     // favor clean.parsed_text.subject over clean.text
     t.deepEquals(clean.tokens, [ '190', 'foo', 'st' ], 'use street name + housenumber');
-    t.deepEquals(clean.tokens_complete, [ '190', 'foo', 'st' ], 'use street name + housenumber');
-    t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
+    t.deepEquals(clean.tokens_complete, [ '190', 'foo' ], 'complete');
+    t.deepEquals(clean.tokens_incomplete, [ 'st' ], 'incomplete');
 
     // no errors/warnings produced
     t.deepEquals(messages.errors, [], 'no errors');

From 7a62b3d3ecd01e38129a7234f944c3aefce0c66b Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 13:00:49 +0200
Subject: [PATCH 27/55] feat(parser): bump pelias/parser version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index ca0a0d883..c58bfaa2a 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.13.0",
+    "pelias-parser": "^1.14.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 6bcd91de596eee79b4445d6d3ba114372595e9a6 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 13:53:11 +0200
Subject: [PATCH 28/55] feat(tokenizer): consider query as complete if the
 $subject is not at the end of $clean.text

---
 sanitizer/_tokenizer.js           | 15 ++++++++++++---
 test/unit/sanitizer/_tokenizer.js | 30 ++++++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index 3b9bb9c7e..f4612e11c 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -33,10 +33,19 @@ function _sanitize( raw, clean ){
 
       // note: we cannot be sure that the input is complete if a street is
       // detected because the parser will detect partially completed suffixes
-      // which are not safe to match against an ngrams index
-    
+      // which are not safe to match against a phrase index
+      if( _.has(clean.parsed_text, 'housenumber') && _.has(clean.parsed_text, 'street') ){
+        parserConsumedAllTokens = false;
+      }
+
+      // when $subject is not the end of $clean.text
+      // then there must be tokens coming afterwards
+      else if (!clean.text.endsWith(text)) {
+        parserConsumedAllTokens = true;
+      }
+
       // when $subject exactly equals one of the admin fields
-      if (
+      else if (
         text === clean.parsed_text.locality ||
         text === clean.parsed_text.region ||
         text === clean.parsed_text.country) {
diff --git a/test/unit/sanitizer/_tokenizer.js b/test/unit/sanitizer/_tokenizer.js
index 626639911..9edd4a4ce 100644
--- a/test/unit/sanitizer/_tokenizer.js
+++ b/test/unit/sanitizer/_tokenizer.js
@@ -172,8 +172,8 @@ module.exports.tests.sanity_checks = function(test, common) {
 
     // favor clean.parsed_text.subject over clean.text
     t.deepEquals(clean.tokens, [ 'foo' ], 'use clean.parsed_text.subject');
-    t.deepEquals(clean.tokens_complete, [], 'complete');
-    t.deepEquals(clean.tokens_incomplete, [ 'foo' ], 'incomplete');
+    t.deepEquals(clean.tokens_complete, [ 'foo' ], 'complete');
+    t.deepEquals(clean.tokens_incomplete, [ ], 'incomplete');
 
     // no errors/warnings produced
     t.deepEquals(messages.errors, [], 'no errors');
@@ -567,6 +567,32 @@ module.exports.tests.numeric_final_char = function (test, common) {
   });
 };
 
+module.exports.tests.subject_complete = function (test, common) {
+  test('subject complete', function (t) {
+
+    var clean = {
+      text: '혜화로, seoul',
+      parsed_text: {
+        subject: '혜화로',
+        locality: 'seoul',
+        admin: 'seoul'
+      }
+    };
+    var messages = sanitizer.sanitize({}, clean);
+
+    // tokens produced
+    t.deepEquals(clean.tokens, ['혜화로'], 'tokens produced');
+    t.deepEquals(clean.tokens_complete, ['혜화로'], 'complete');
+    t.deepEquals(clean.tokens_incomplete, [], 'incomplete');
+
+    // no errors/warnings produced
+    t.deepEquals(messages.errors, [], 'no errors');
+    t.deepEquals(messages.warnings, [], 'no warnings');
+
+    t.end();
+  });
+};
+
 module.exports.all = function (tape, common) {
   function test(name, testFunction) {
     return tape('sanitizeR _tokenizer: ' + name, testFunction);

From 5923a1ace4d213dd5b31e3b4b894e11d357da997 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 15:55:56 +0200
Subject: [PATCH 29/55] feat(parser): bump pelias/parser version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index c58bfaa2a..9fd0e65bb 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.14.0",
+    "pelias-parser": "^1.16.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 12af8cc9a8df0d882b1e69b0016389a95f3d9aba Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 16 May 2019 16:15:53 +0200
Subject: [PATCH 30/55] feat(autocomplete): experiment adding name.default to
 admin multi_match

---
 query/autocomplete.js                                     | 8 ++++++++
 test/unit/fixture/autocomplete_linguistic_with_admin.js   | 3 ++-
 test/unit/fixture/autocomplete_single_character_street.js | 3 ++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 8ad2606d6..475c85a95 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -21,6 +21,9 @@ var views = {
 // add abbrevations for the fields pelias/parser is able to detect.
 var adminFields = placeTypes.concat(['locality_a', 'region_a', 'country_a']);
 
+// add name field to improve venue matching
+adminFields = adminFields.concat(['add_name_to_multimatch']);
+
 //------------------------------
 // autocomplete query
 //------------------------------
@@ -160,6 +163,11 @@ function generateQuery( clean ){
     textParser( clean, vs );
   }
 
+  let isAdminSet = adminFields.some(field => vs.isset('input:' + field));
+  if ( isAdminSet ){ vs.var('input:add_name_to_multimatch', 'enabled'); }
+
+  vs.var('admin:add_name_to_multimatch:field', 'name.default');
+
   return {
     type: 'autocomplete',
     body: query.render(vs)
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 088b8da65..3d1e77583 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -26,7 +26,8 @@ module.exports = {
               'parent.neighbourhood.ngram^1',
               'parent.locality_a.ngram^1',
               'parent.region_a.ngram^1',
-              'parent.country_a.ngram^1'
+              'parent.country_a.ngram^1',
+              'name.default^1'
             ],
             'query': 'three',
             'analyzer': 'peliasAdmin',
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index a4ea7695a..952da297f 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -24,7 +24,8 @@ module.exports = {
             'parent.neighbourhood.ngram^1',
             'parent.locality_a.ngram^1',
             'parent.region_a.ngram^1',
-            'parent.country_a.ngram^1'
+            'parent.country_a.ngram^1',
+            'name.default^1'
           ],
           'query': 'laird',
           'analyzer': 'peliasAdmin',

From 43d727b0ff94fa7195e43a43714169faf3a07dfc Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Mon, 3 Jun 2019 12:46:01 +0200
Subject: [PATCH 31/55] feat(autocomplete): progess commit

---
 package.json                                  |  2 +-
 query/autocomplete_defaults.js                | 10 ++++++
 sanitizer/_address_layer_filter.js            | 17 ++++++++--
 sanitizer/_text_pelias_parser.js              | 31 +++++++++++++++++--
 sanitizer/_tokenizer.js                       |  2 +-
 .../autocomplete_linguistic_with_admin.js     |  2 ++
 .../autocomplete_single_character_street.js   |  2 ++
 test/unit/sanitizer/_text_pelias_parser.js    |  8 +++++
 8 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/package.json b/package.json
index 9fd0e65bb..b6de70d2f 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.16.0",
+    "pelias-parser": "^1.21.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",
diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index b5a5ed2bf..64ee3cad1 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -68,6 +68,11 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'admin:country:boost': 1,
   'admin:country:cutoff_frequency': 0.01,
 
+  'admin:dependency:analyzer': 'peliasAdmin',
+  'admin:dependency:field': 'parent.dependency.ngram',
+  'admin:dependency:boost': 1,
+  'admin:dependency:cutoff_frequency': 0.01,
+
   'admin:region:analyzer': 'peliasAdmin',
   'admin:region:field': 'parent.region.ngram',
   'admin:region:boost': 1,
@@ -78,6 +83,11 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'admin:region_a:boost': 1,
   'admin:region_a:cutoff_frequency': 0.01,
 
+  'admin:macroregion:analyzer': 'peliasAdmin',
+  'admin:macroregion:field': 'parent.macroregion.ngram',
+  'admin:macroregion:boost': 1,
+  'admin:macroregion:cutoff_frequency': 0.01,
+
   'admin:county:analyzer': 'peliasAdmin',
   'admin:county:field': 'parent.county.ngram',
   'admin:county:boost': 1,
diff --git a/sanitizer/_address_layer_filter.js b/sanitizer/_address_layer_filter.js
index 87c4be5a4..3b6febd2d 100644
--- a/sanitizer/_address_layer_filter.js
+++ b/sanitizer/_address_layer_filter.js
@@ -21,6 +21,8 @@ const check = require('check-types');
  * Update: added additional check that enforces that the input must also contain at least one numeral
  */
 
+ // note: this runs before libpostal (which is a service)
+
 const ADDRESS_FILTER_WARNING = 'performance optimization: excluding \'address\' layer';
 
 function _setup(tm) {
@@ -51,9 +53,15 @@ function _setup(tm) {
       // be subject to change.
       if (check.nonEmptyObject(clean.parsed_text)) {
 
-        // if 'addressit' or 'libpostal' identified input as a street address
         var isStreetAddress = clean.parsed_text.hasOwnProperty('number') && clean.parsed_text.hasOwnProperty('street');
-        if (isStreetAddress) {
+
+        // use $subject where available (pelias parser)
+        if (_.has(clean, 'parsed_text.subject')) {
+          input = clean.parsed_text.subject;
+        }
+
+        // if 'addressit' or 'libpostal' identified input as a street address
+        else if (isStreetAddress) {
           input = clean.parsed_text.number + ' ' + clean.parsed_text.street;
         }
 
@@ -69,6 +77,11 @@ function _setup(tm) {
       // check that at least one numeral was specified
       let hasNumeral = /\d/.test(input);
 
+      // do not consider numeric street names, such as '26 st' in numeric check.
+      if( _.has(clean, 'parsed_text.street') ){
+        hasNumeral = /\d/.test(input.replace(clean.parsed_text.street, ''));
+      }
+
       // if less than two words were specified /or no numeral is present
       // then it is safe to apply the layer filter
       if (totalWords < 2 || !hasNumeral) {
diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index 722dd03b7..73d87abc3 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -67,9 +67,22 @@ function parse (clean) {
 
   // generate a classification mask, eg:
   // 'Foo Cafe 10 Main St London 10010 Earth'
-  // '         NN SSSSSSS AAAAAA PPPPP      '
+  // '    VVVV NN SSSSSSS AAAAAA PPPPP      '
   let mask = solution.mask(t);
 
+  // special handling of intersection queries
+  // here we do not trust intersection parses which also contain another
+  // classification, such as a house number, postcode or admin field.
+  // this is to avoid errors for queries such as:
+  // eg 'air & space museum, washington, dc'
+  if (parsed_text.street && parsed_text.cross_street) {
+    if (Object.keys(parsed_text).length > 3) {
+      delete parsed_text.street;
+      delete parsed_text.cross_street;
+      mask = mask.replace(/S/g, ' ');
+    }
+  }
+
   // the entire input text as seen by the parser with any postcode classification(s) removed
   let body = t.span.body.split('')
     .map((c, i) => (mask[i] !== 'P') ? c : ' ')
@@ -79,8 +92,13 @@ function parse (clean) {
   // prefix: all unparsed characters that came before any parsed fields
   // postfix: all characters from the first admin field to the end of the string
 
-  // set cursor to the first classified character
-  let cursor = mask.search(/\S/);
+  // set cursor to the first classified character from selected classes
+  let cursor = mask.search(/[NSAP]/);
+
+  // >> solution includes venue classification
+  // set cursor after the venue name
+  if (mask.includes('V')) { cursor = mask.lastIndexOf('V') +1; }
+
   if (cursor === -1) { cursor = body.length; }
   let prefix = _.trim(body.substr(0, cursor), ' ,');
 
@@ -92,6 +110,9 @@ function parse (clean) {
   // solution includes admin classification
   // set cursor to the first classified admin character
   else if( mask.includes('A') ){ cursor = mask.indexOf('A'); }
+  // >> solution includes venue classification
+  // set cursor after the venue name
+  else if (mask.includes('V')) { cursor = mask.lastIndexOf('V') + 1; }
   // else set cursor to end-of-text
   else { cursor = body.length; }
   let postfix = _.trim(body.substr(cursor), ' ,');
@@ -134,6 +155,10 @@ function parse (clean) {
   if (!_.isEmpty(parsed_text.housenumber) && !_.isEmpty(parsed_text.street)) {
     parsed_text.subject = `${parsed_text.housenumber} ${parsed_text.street}`;
   }
+  // an intersection query
+  else if (!_.isEmpty(parsed_text.street) && !_.isEmpty(parsed_text.cross_street)) {
+    parsed_text.subject = `${parsed_text.street} & ${parsed_text.cross_street}`;
+  }
   // a street query
   else if (!_.isEmpty(parsed_text.street)) {
     parsed_text.subject = parsed_text.street;
diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index f4612e11c..739e3a9cf 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -34,7 +34,7 @@ function _sanitize( raw, clean ){
       // note: we cannot be sure that the input is complete if a street is
       // detected because the parser will detect partially completed suffixes
       // which are not safe to match against a phrase index
-      if( _.has(clean.parsed_text, 'housenumber') && _.has(clean.parsed_text, 'street') ){
+      if( _.has(clean.parsed_text, 'street') ){
         parserConsumedAllTokens = false;
       }
 
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index 3d1e77583..cd652dc0f 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -18,6 +18,8 @@ module.exports = {
           'multi_match': {
             'fields': [
               'parent.country.ngram^1',
+              'parent.dependency.ngram^1',
+              'parent.macroregion.ngram^1',
               'parent.region.ngram^1',
               'parent.county.ngram^1',
               'parent.localadmin.ngram^1',
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 952da297f..25d08b243 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -16,6 +16,8 @@ module.exports = {
         'multi_match': {
           'fields': [
             'parent.country.ngram^1',
+            'parent.dependency.ngram^1',
+            'parent.macroregion.ngram^1',
             'parent.region.ngram^1',
             'parent.county.ngram^1',
             'parent.localadmin.ngram^1',
diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
index 462572ce8..317f43b25 100644
--- a/test/unit/sanitizer/_text_pelias_parser.js
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -144,6 +144,14 @@ module.exports.tests.text_parser = function (test, common) {
     admin: 'italy'
   }]);
 
+  // university
+  cases.push(['Union College, Kentucky', {
+    subject: 'Union College',
+    place: 'Union College',
+    region: 'Kentucky',
+    admin: 'Kentucky'
+  }]);
+
   cases.forEach(testcase => {
     let input = testcase[0];
     let expected = testcase[1];

From 61cceebe56e18b22e5cfc96b538c52066f0b294d Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 4 Jun 2019 10:08:56 +0200
Subject: [PATCH 32/55] feat(autocomplete): typo

---
 routes/v1.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/routes/v1.js b/routes/v1.js
index 535203a49..aaf52004f 100644
--- a/routes/v1.js
+++ b/routes/v1.js
@@ -74,7 +74,7 @@ const hasRequestErrors = require('../controller/predicates/has_request_errors');
 const isCoarseReverse = require('../controller/predicates/is_coarse_reverse');
 const isAdminOnlyAnalysis = require('../controller/predicates/is_admin_only_analysis');
 const hasResultsAtLayers = require('../controller/predicates/has_results_at_layers');
-const isPeliasItParse = require('../controller/predicates/is_pelias_parse');
+const isPeliasParse = require('../controller/predicates/is_pelias_parse');
 const hasRequestCategories = require('../controller/predicates/has_request_parameter')('categories');
 const isOnlyNonAdminLayers = require('../controller/predicates/is_only_non_admin_layers');
 const isRequestLayersAnyAddressRelated = require('../controller/predicates/is_request_layers_any_address_related');
@@ -233,7 +233,7 @@ function addRoutes(app, peliasConfig) {
   // call search addressit query if addressit was the parser
   const searchAddressitShouldExecute = all(
     not(hasRequestErrors),
-    isPeliasItParse
+    isPeliasParse
   );
 
   // get language adjustments if:

From dc69e0d31458d666f5ef54414979a5285ff48f76 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 4 Jun 2019 11:23:37 +0200
Subject: [PATCH 33/55] feat(autocomplete): improved matching at the cusp

---
 query/autocomplete.js                         |  3 +-
 query/autocomplete_defaults.js                |  4 +--
 query/view/ngrams_last_token_only_multi.js    | 36 +++++++++++++++++++
 ...autocomplete_linguistic_multiple_tokens.js | 29 +++++++++------
 .../autocomplete_linguistic_with_admin.js     |  4 +--
 .../autocomplete_single_character_street.js   |  4 +--
 ...utocomplete_token_matching_permutations.js | 13 +++++--
 7 files changed, 74 insertions(+), 19 deletions(-)
 create mode 100644 query/view/ngrams_last_token_only_multi.js

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 475c85a95..6be8eb8ac 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -11,6 +11,7 @@ var views = {
   custom_boosts:              require('./view/boost_sources_and_layers'),
   ngrams_strict:              require('./view/ngrams_strict'),
   ngrams_last_token_only:     require('./view/ngrams_last_token_only'),
+  ngrams_last_token_only_multi: require('./view/ngrams_last_token_only_multi'),
   phrase_first_tokens_only:   require('./view/phrase_first_tokens_only'),
   pop_subquery:               require('./view/pop_subquery'),
   boost_exact_matches:        require('./view/boost_exact_matches'),
@@ -31,7 +32,7 @@ var query = new peliasQuery.layout.FilteredBooleanQuery();
 
 // mandatory matches
 query.score( views.phrase_first_tokens_only, 'must' );
-query.score( views.ngrams_last_token_only, 'must' );
+query.score( views.ngrams_last_token_only_multi( adminFields ), 'must' );
 
 // admin components
 query.score(peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin'), 'must');
diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index 64ee3cad1..fe20d06a1 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -60,7 +60,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
 
   'admin:country_a:analyzer': 'standard',
   'admin:country_a:field': 'parent.country_a.ngram',
-  'admin:country_a:boost': 1,
+  'admin:country_a:boost': 4,
   'admin:country_a:cutoff_frequency': 0.01,
 
   'admin:country:analyzer': 'peliasAdmin',
@@ -80,7 +80,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
 
   'admin:region_a:analyzer': 'peliasAdmin',
   'admin:region_a:field': 'parent.region_a.ngram',
-  'admin:region_a:boost': 1,
+  'admin:region_a:boost': 4,
   'admin:region_a:cutoff_frequency': 0.01,
 
   'admin:macroregion:analyzer': 'peliasAdmin',
diff --git a/query/view/ngrams_last_token_only_multi.js b/query/view/ngrams_last_token_only_multi.js
new file mode 100644
index 000000000..369bef183
--- /dev/null
+++ b/query/view/ngrams_last_token_only_multi.js
@@ -0,0 +1,36 @@
+const peliasQuery = require('pelias-query');
+const ngrams_last_token_only = require('./ngrams_last_token_only');
+
+module.exports = function (adminFields){
+  const subview = peliasQuery.view.admin_multi_match( adminFields, 'peliasQueryPartialToken' );
+
+  return function (vs) {
+
+    // get a copy of the *tokens_incomplete* tokens produced from the input:name
+    var tokens = vs.var('input:name:tokens_incomplete').get();
+
+    // no valid tokens to use, fail now, don't render this view.
+    if (!tokens || tokens.length < 1) { return null; }
+
+    var complete_tokens = vs.var('input:name:tokens_complete').get();
+    if (!complete_tokens || complete_tokens.length < 1) { return ngrams_last_token_only(vs); }
+
+    // make a copy Vars so we don't mutate the original
+    var vsCopy = new peliasQuery.Vars( vs.export() );
+
+    adminFields.forEach(field => {
+      // set the admin variables in the copy to only the last token
+      vsCopy.var(`input:${field}`).set(tokens.join(' '));
+    });
+
+    var rendered = subview( vsCopy );
+    if( !rendered ){ return rendered; }
+
+    // return the view rendered using the copy
+    return {
+      'constant_score': {
+        'query': rendered
+      }
+    };
+  };
+};
diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
index 5e8db15e1..f1cd848de 100644
--- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
+++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
@@ -16,16 +16,25 @@ module.exports = {
       {
         'constant_score': {
           'query': {
-            'match': {
-              'name.default': {
-                'analyzer': 'peliasQuery',
-                'boost': 100,
-                'query': 'three',
-                'type': 'phrase',
-                'operator': 'and',
-                'cutoff_frequency': 0.01,
-                'slop': 3
-              }
+            'multi_match': {
+              'fields': [
+                'parent.country.ngram^1',
+                'parent.dependency.ngram^1',
+                'parent.macroregion.ngram^1',
+                'parent.region.ngram^1',
+                'parent.county.ngram^1',
+                'parent.localadmin.ngram^1',
+                'parent.locality.ngram^1',
+                'parent.borough.ngram^1',
+                'parent.neighbourhood.ngram^1',
+                'parent.locality_a.ngram^1',
+                'parent.region_a.ngram^4',
+                'parent.country_a.ngram^4',
+                'name.default^1'
+              ],
+              'query': 'three',
+              'analyzer': 'peliasQuery',
+              'type': 'cross_fields'
             }
           }
         }
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index cd652dc0f..6b7a5b39d 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -27,8 +27,8 @@ module.exports = {
               'parent.borough.ngram^1',
               'parent.neighbourhood.ngram^1',
               'parent.locality_a.ngram^1',
-              'parent.region_a.ngram^1',
-              'parent.country_a.ngram^1',
+              'parent.region_a.ngram^4',
+              'parent.country_a.ngram^4',
               'name.default^1'
             ],
             'query': 'three',
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index 25d08b243..fb113cbcf 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -25,8 +25,8 @@ module.exports = {
             'parent.borough.ngram^1',
             'parent.neighbourhood.ngram^1',
             'parent.locality_a.ngram^1',
-            'parent.region_a.ngram^1',
-            'parent.country_a.ngram^1',
+            'parent.region_a.ngram^4',
+            'parent.country_a.ngram^4',
             'name.default^1'
           ],
           'query': 'laird',
diff --git a/test/unit/query/autocomplete_token_matching_permutations.js b/test/unit/query/autocomplete_token_matching_permutations.js
index 069465ed8..597f543a6 100644
--- a/test/unit/query/autocomplete_token_matching_permutations.js
+++ b/test/unit/query/autocomplete_token_matching_permutations.js
@@ -6,6 +6,10 @@ const defaultPeliasConfig = {
   }
 };
 
+// admin fields
+const placeTypes = require('../../../helper/placeTypes');
+var adminFields = placeTypes.concat(['locality_a', 'region_a', 'country_a', 'add_name_to_multimatch']);
+
 var generate = proxyquire('../../../query/autocomplete', {
   'pelias-config': defaultPeliasConfig
 });
@@ -16,6 +20,7 @@ const defaults = new peliasQuery.Vars( require('../../../query/autocomplete_defa
 // additional views
 const views = {
   ngrams_last_token_only:     require('../../../query/view/ngrams_last_token_only'),
+  ngrams_last_token_only_multi: require('../../../query/view/ngrams_last_token_only_multi')(adminFields),
   phrase_first_tokens_only:   require('../../../query/view/phrase_first_tokens_only'),
   pop_subquery:               require('../../../query/view/pop_subquery')
 };
@@ -180,11 +185,13 @@ module.exports.tests.multiple_tokens = function(test, common) {
     };
 
     var vs = vars( clean );
+    vs.var('input:add_name_to_multimatch', 'enabled');
+    vs.var('admin:add_name_to_multimatch:field', 'name.default');
 
     assert( t, generate( clean ), {
       must: [
         views.phrase_first_tokens_only( vs ),
-        views.ngrams_last_token_only( vs )
+        views.ngrams_last_token_only_multi( vs )
       ],
       should: [
         peliasQuery.view.popularity( views.pop_subquery )( vs ),
@@ -225,11 +232,13 @@ module.exports.tests.multiple_tokens = function(test, common) {
     };
 
     var vs = vars( clean );
+    vs.var('input:add_name_to_multimatch', 'enabled');
+    vs.var('admin:add_name_to_multimatch:field', 'name.default');
 
     assert( t, generate( clean ), {
       must: [
         views.phrase_first_tokens_only( vs ),
-        views.ngrams_last_token_only( vs )
+        views.ngrams_last_token_only_multi( vs )
       ],
       should: [
         peliasQuery.view.popularity( views.pop_subquery )( vs ),

From 0cdc5e8eea1025e589de189f4f6cdde0ad859dd4 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 4 Jun 2019 13:05:13 +0200
Subject: [PATCH 34/55] feat(autocomplete): improved performance and reduced
 noise for admin matching

---
 query/autocomplete.js                      |  5 ++-
 query/view/admin_multi_match_first.js      | 46 ++++++++++++++++++++++
 query/view/admin_multi_match_last.js       | 38 ++++++++++++++++++
 query/view/ngrams_last_token_only_multi.js |  3 ++
 4 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 query/view/admin_multi_match_first.js
 create mode 100644 query/view/admin_multi_match_last.js

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 6be8eb8ac..7a51fad25 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -12,6 +12,8 @@ var views = {
   ngrams_strict:              require('./view/ngrams_strict'),
   ngrams_last_token_only:     require('./view/ngrams_last_token_only'),
   ngrams_last_token_only_multi: require('./view/ngrams_last_token_only_multi'),
+  admin_multi_match_first: require('./view/admin_multi_match_first'),
+  admin_multi_match_last: require('./view/admin_multi_match_last'),
   phrase_first_tokens_only:   require('./view/phrase_first_tokens_only'),
   pop_subquery:               require('./view/pop_subquery'),
   boost_exact_matches:        require('./view/boost_exact_matches'),
@@ -35,7 +37,8 @@ query.score( views.phrase_first_tokens_only, 'must' );
 query.score( views.ngrams_last_token_only_multi( adminFields ), 'must' );
 
 // admin components
-query.score(peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin'), 'must');
+query.score( views.admin_multi_match_first( adminFields ), 'must');
+query.score( views.admin_multi_match_last( adminFields ), 'must');
 
 // address components
 query.score( peliasQuery.view.address('housenumber') );
diff --git a/query/view/admin_multi_match_first.js b/query/view/admin_multi_match_first.js
new file mode 100644
index 000000000..bcecd1387
--- /dev/null
+++ b/query/view/admin_multi_match_first.js
@@ -0,0 +1,46 @@
+const peliasQuery = require('pelias-query');
+
+module.exports = function (adminFields) {
+  const subview = peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin');
+
+  return (vs) => {
+
+    // check which of the possible admin_properties are actually set
+    // from the query
+    var valid_admin_properties = adminFields.filter(admin_property => {
+      return admin_property &&
+        vs.isset('input:' + admin_property) &&
+        vs.isset('admin:' + admin_property + ':field');
+    });
+
+    if (valid_admin_properties.length === 0) {
+      return null;
+    }
+
+    // the actual query text is simply taken from the first valid admin field
+    // this assumes all the values would be the same, which is probably not true
+    // TODO: handle the case where not all admin area input values are the same
+    var tokens = vs.var('input:' + valid_admin_properties[0]).get().split(/\s+/g);
+
+    // no valid tokens to use, fail now, don't render this view.
+    if (!tokens || tokens.length < 2) { return null; }
+
+    // make a copy Vars so we don't mutate the original
+    var vsCopy = new peliasQuery.Vars(vs.export());
+
+    // change field mappings
+    vsCopy.var('admin:add_name_to_multimatch:field', 'phrase.default');
+    adminFields.forEach(field => {
+      if( vsCopy.isset(`admin:${field}:field`) ){
+        vsCopy.var(`admin:${field}:field`, vsCopy.var(`admin:${field}:field`).get().replace('.ngram', ''));
+      }
+    });
+
+    adminFields.forEach(field => {
+      // set the admin variables in the copy to only the last token
+      vsCopy.var(`input:${field}`).set(tokens.slice(0, -1).join(' '));
+    });
+
+    return subview(vsCopy);
+  };
+};
diff --git a/query/view/admin_multi_match_last.js b/query/view/admin_multi_match_last.js
new file mode 100644
index 000000000..ceaab8a98
--- /dev/null
+++ b/query/view/admin_multi_match_last.js
@@ -0,0 +1,38 @@
+const peliasQuery = require('pelias-query');
+
+module.exports = function (adminFields) {
+  const subview = peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin');
+
+  return (vs) => {
+
+    // check which of the possible admin_properties are actually set
+    // from the query
+    var valid_admin_properties = adminFields.filter(admin_property => {
+      return admin_property &&
+        vs.isset('input:' + admin_property) &&
+        vs.isset('admin:' + admin_property + ':field');
+    });
+
+    if (valid_admin_properties.length === 0) {
+      return null;
+    }
+
+    // the actual query text is simply taken from the first valid admin field
+    // this assumes all the values would be the same, which is probably not true
+    // TODO: handle the case where not all admin area input values are the same
+    var tokens = vs.var('input:' + valid_admin_properties[0]).get().split(/\s+/g);
+
+    // no valid tokens to use, fail now, don't render this view.
+    if (!tokens || tokens.length < 1) { return null; }
+
+    // make a copy Vars so we don't mutate the original
+    var vsCopy = new peliasQuery.Vars(vs.export());
+
+    adminFields.forEach(field => {
+      // set the admin variables in the copy to only the last token
+      vsCopy.var(`input:${field}`).set(tokens[ tokens.length -1 ]);
+    });
+
+    return subview(vsCopy);
+  };
+};
diff --git a/query/view/ngrams_last_token_only_multi.js b/query/view/ngrams_last_token_only_multi.js
index 369bef183..3eaaddea9 100644
--- a/query/view/ngrams_last_token_only_multi.js
+++ b/query/view/ngrams_last_token_only_multi.js
@@ -6,6 +6,9 @@ module.exports = function (adminFields){
 
   return function (vs) {
 
+    // return the simple view for address queries
+    if( vs.isset('input:street') ){ return ngrams_last_token_only(vs); }
+
     // get a copy of the *tokens_incomplete* tokens produced from the input:name
     var tokens = vs.var('input:name:tokens_incomplete').get();
 

From 4b79aa138ae074256406d20f9e30eeb8e0f7cc16 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 5 Jun 2019 19:26:34 +0200
Subject: [PATCH 35/55] feat(deps): bump parser dep version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index b6de70d2f..070d62d07 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.21.0",
+    "pelias-parser": "^1.24.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 1045c84a5395793dbc78efa33e14cc9069010d08 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 6 Jun 2019 13:30:14 +0200
Subject: [PATCH 36/55] feat(deps): bump parser dep version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 070d62d07..3ce4c731d 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.24.0",
+    "pelias-parser": "^1.25.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 2743575507cb9bedec4aa3cb27e963cb3339ae9e Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 6 Jun 2019 15:39:54 +0200
Subject: [PATCH 37/55] feat(deps): bump parser dep version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 3ce4c731d..8874daf1b 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.25.0",
+    "pelias-parser": "^1.27.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From f079baf1a60203707e087ece2e8f619ef20df45b Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 6 Jun 2019 15:52:41 +0200
Subject: [PATCH 38/55] test: disable parserConsumedAllTokens for admin parses

---
 sanitizer/_tokenizer.js | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index 739e3a9cf..8e58105f1 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -45,12 +45,12 @@ function _sanitize( raw, clean ){
       }
 
       // when $subject exactly equals one of the admin fields
-      else if (
-        text === clean.parsed_text.locality ||
-        text === clean.parsed_text.region ||
-        text === clean.parsed_text.country) {
-        parserConsumedAllTokens = true;
-      }
+      // else if (
+      //   text === clean.parsed_text.locality ||
+      //   text === clean.parsed_text.region ||
+      //   text === clean.parsed_text.country) {
+      //   parserConsumedAllTokens = true;
+      // }
     }
   }
 

From f07cb90fc9b4ffb6cd6b6223861c481676ddf038 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 6 Jun 2019 19:00:00 +0200
Subject: [PATCH 39/55] feat(deps): bump parser dep version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 8874daf1b..c840d4572 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.27.0",
+    "pelias-parser": "^1.28.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 40b62bc821a3619de174e77e483e84e2a149c35f Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Fri, 7 Jun 2019 15:03:46 +0200
Subject: [PATCH 40/55] feat(query): add should subquery for cross_street
 matching

---
 query/autocomplete.js          | 1 +
 query/autocomplete_defaults.js | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 7a51fad25..454a96e62 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -43,6 +43,7 @@ query.score( views.admin_multi_match_last( adminFields ), 'must');
 // address components
 query.score( peliasQuery.view.address('housenumber') );
 query.score( peliasQuery.view.address('street') );
+query.score( peliasQuery.view.address('cross_street') );
 query.score( peliasQuery.view.address('postcode') );
 
 // scoring boost
diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index fe20d06a1..e2d7e3ee3 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -46,6 +46,11 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'address:street:boost': 5,
   'address:street:cutoff_frequency': 0.01,
 
+  'address:cross_street:analyzer': 'peliasStreet',
+  'address:cross_street:field': 'address_parts.cross_street',
+  'address:cross_street:boost': 5,
+  'address:cross_street:cutoff_frequency': 0.01,
+
   'address:postcode:analyzer': 'peliasZip',
   'address:postcode:field': 'address_parts.zip',
   'address:postcode:boost': 2000,

From 9d69fe183a7e7e67dd620a0ebdda9dab0d8939e8 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Mon, 10 Jun 2019 16:35:11 +0200
Subject: [PATCH 41/55] feat(logging): add summary logging for pelias parser

---
 sanitizer/_text_pelias_parser.js | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index 73d87abc3..005ca57ae 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -1,3 +1,4 @@
+const logger = require('pelias-logger').get('api');
 const Tokenizer = require('pelias-parser/tokenization/Tokenizer');
 const Solution = require('pelias-parser/solver/Solution');
 const AddressParser = require('pelias-parser/parser/AddressParser');
@@ -38,11 +39,19 @@ function _sanitize (raw, clean) {
 }
 
 function parse (clean) {
+  
   // parse text
+  let start = new Date();
   const t = new Tokenizer(clean.text);
   parser.classify(t);
   parser.solve(t);
 
+  // log summary info
+  logger.info('pelias_parser', {
+    took: (new Date()) - start,
+    solutions: t.solution.length
+  });
+
   // only use the first solution generated
   // @todo: we could expand this in the future to accomodate more solutions
   let solution = new Solution();

From 5974f7a83ae16ec1189b3f55e0de91652995167d Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 12 Jun 2019 14:22:04 +0200
Subject: [PATCH 42/55] feat(deps): bump parser dep version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index c840d4572..52958540a 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.28.0",
+    "pelias-parser": "^1.34.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 85693a83c65f4079fca7f41a63ccbe6904ddd3e9 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Mon, 17 Jun 2019 16:04:59 +0200
Subject: [PATCH 43/55] feat(pelias_parser): additional parser tests

---
 test/unit/sanitizer/_text_pelias_parser.js | 208 ++++++++++++++++++++-
 1 file changed, 207 insertions(+), 1 deletion(-)

diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
index 317f43b25..abdae52bd 100644
--- a/test/unit/sanitizer/_text_pelias_parser.js
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -152,9 +152,211 @@ module.exports.tests.text_parser = function (test, common) {
     admin: 'Kentucky'
   }]);
 
+  // street (USA style)
+  cases.push(['M', { subject: 'M' }, true]);
+  cases.push(['Ma', { subject: 'Ma' }, true]);
+  cases.push(['Mai', { subject: 'Mai' }, true]);
+  cases.push(['Main', { subject: 'Main' }, true]);
+  cases.push(['Main ', { subject: 'Main' }, true]);
+  cases.push(['Main S', { subject: 'Main S' }, true]);
+  cases.push(['Main St', { subject: 'Main St' }, true]);
+  cases.push(['Main St S', { subject: 'Main St' }, true]);
+  // cases.push(['Main St Se', { subject: 'Main St' }, true]); // jitter on SE
+  cases.push(['Main St Sea', { subject: 'Main St' }, true]);
+  cases.push(['Main St Seat', { subject: 'Main St' }, true]);
+  cases.push(['Main St Seatt', { subject: 'Main St' }, true]);
+  cases.push(['Main St Seattl', { subject: 'Main St' }, true]);
+  cases.push(['Main St Seattle', { subject: 'Main St' }, true]);
+
+  // address (USA style)
+  cases.push(['1', { subject: '1' }, true]);
+  cases.push(['10', { subject: '10' }, true]);
+  cases.push(['10 ', { subject: '10' }, true]);
+  cases.push(['10 M', { subject: '10 M' }, true]);
+  cases.push(['10 Ma', { subject: '10 Ma' }, true]);
+  cases.push(['10 Mai', { subject: '10 Mai' }, true]);
+  cases.push(['10 Main', { subject: '10 Main' }, true]);
+  cases.push(['10 Main ', { subject: '10 Main' }, true]);
+  cases.push(['10 Main S', { subject: '10 Main S' }, true]);
+  cases.push(['10 Main St', { subject: '10 Main St' }, true]);
+  cases.push(['10 Main St S', { subject: '10 Main St' }, true]);
+  // cases.push(['10 Main St Se', { subject: '10 Main St' }, true]); // jitter issue
+  cases.push(['10 Main St Sea', { subject: '10 Main St' }, true]);
+  cases.push(['10 Main St Seat', { subject: '10 Main St' }, true]);
+  cases.push(['10 Main St Seatt', { subject: '10 Main St' }, true]);
+  cases.push(['10 Main St Seattl', { subject: '10 Main St' }, true]);
+  cases.push(['10 Main St Seattle', { subject: '10 Main St' }, true]);
+
+  // street (ESP style)
+  cases.push(['C', { subject: 'C' }, true]);
+  cases.push(['Ca', { subject: 'Ca' }, true]);
+  cases.push(['Cal', { subject: 'Cal' }, true]);
+  cases.push(['Call', { subject: 'Call' }, true]);
+  cases.push(['Calle', { subject: 'Calle' }, true]);
+  cases.push(['Calle ', { subject: 'Calle' }, true]);
+  cases.push(['Calle P', { subject: 'Calle P' }, true]);
+  cases.push(['Calle Pr', { subject: 'Calle Pr' }, true]);
+  cases.push(['Calle Pri', { subject: 'Calle Pri' }, true]);
+  cases.push(['Calle Prin', { subject: 'Calle Prin' }, true]);
+  cases.push(['Calle Princ', { subject: 'Calle Princ' }, true]);
+  cases.push(['Calle Princi', { subject: 'Calle Princi' }, true]);
+  cases.push(['Calle Princip', { subject: 'Calle Princip' }, true]);
+  cases.push(['Calle Principa', { subject: 'Calle Principa' }, true]);
+  cases.push(['Calle Principal', { subject: 'Calle Principal' }, true]);
+  cases.push(['Calle Principal ', { subject: 'Calle Principal' }, true]);
+  cases.push(['Calle Principal B', { subject: 'Calle Principal' }, true]);
+  // cases.push(['Calle Principal Ba', { subject: 'Calle Principal' }, true]); // jitter issue
+  cases.push(['Calle Principal Bar', { subject: 'Calle Principal' }, true]);
+  cases.push(['Calle Principal Barc', { subject: 'Calle Principal' }, true]);
+  // cases.push(['Calle Principal Barce', { subject: 'Calle Principal' }, true]); // jitter issue
+  // cases.push(['Calle Principal Barcel', { subject: 'Calle Principal' }, true]); // jitter issue
+  // cases.push(['Calle Principal Barcelo', { subject: 'Calle Principal' }, true]); // jitter issue
+  // cases.push(['Calle Principal Barcelon', { subject: 'Calle Principal' }, true]); // jitter issue
+  cases.push(['Calle Principal Barcelona', { subject: 'Calle Principal' }, true]);
+
+  // address (ESP style)
+  cases.push(['Calle Principal 20', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 ', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 B', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 Ba', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 Bar', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 Barc', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 Barce', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 Barcel', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 Barcelo', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 Barcelon', { subject: '20 Calle Principal' }, true]);
+  cases.push(['Calle Principal 20 Barcelona', { subject: '20 Calle Principal' }, true]);
+
+  // street (DEU style)
+  cases.push(['H', { subject: 'H' }, true]);
+  cases.push(['Ha', { subject: 'Ha' }, true]);
+  cases.push(['Hau', { subject: 'Hau' }, true]);
+  cases.push(['Haup', { subject: 'Haup' }, true]);
+  cases.push(['Haupt', { subject: 'Haupt' }, true]);
+  cases.push(['Haupts', { subject: 'Haupts' }, true]);
+  cases.push(['Hauptst', { subject: 'Hauptst' }, true]);
+  cases.push(['Hauptstr', { subject: 'Hauptstr' }, true]);
+  cases.push(['Hauptstra', { subject: 'Hauptstra' }, true]);
+  cases.push(['Hauptstraß', { subject: 'Hauptstraß' }, true]);
+  cases.push(['Hauptstraße', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße ', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße B', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße Be', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße Ber', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße Berl', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße Berli', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße Berlin', { subject: 'Hauptstraße' }, true]);
+
+  // address (DEU style)
+  cases.push(['H', { subject: 'H' }, true]);
+  cases.push(['Ha', { subject: 'Ha' }, true]);
+  cases.push(['Hau', { subject: 'Hau' }, true]);
+  cases.push(['Haup', { subject: 'Haup' }, true]);
+  cases.push(['Haupt', { subject: 'Haupt' }, true]);
+  cases.push(['Haupts', { subject: 'Haupts' }, true]);
+  cases.push(['Hauptst', { subject: 'Hauptst' }, true]);
+  cases.push(['Hauptstr', { subject: 'Hauptstr' }, true]);
+  cases.push(['Hauptstra', { subject: 'Hauptstra' }, true]);
+  cases.push(['Hauptstraß', { subject: 'Hauptstraß' }, true]);
+  cases.push(['Hauptstraße', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße ', { subject: 'Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 5', { subject: '5 Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 50', { subject: '50 Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 50 ', { subject: '50 Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 50 B', { subject: '50 Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 50 Be', { subject: '50 Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 50 Ber', { subject: '50 Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 50 Berl', { subject: '50 Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 50 Berli', { subject: '50 Hauptstraße' }, true]);
+  cases.push(['Hauptstraße 50 Berlin', { subject: '50 Hauptstraße' }, true]);
+
+  // venues
+  cases.push(['K', { subject: 'K' }, true]);
+  cases.push(['Ka', { subject: 'Ka' }, true]);
+  cases.push(['Kas', { subject: 'Kas' }, true]);
+  cases.push(['Kasc', { subject: 'Kasc' }, true]);
+  cases.push(['Kasch', { subject: 'Kasch' }, true]);
+  cases.push(['Kaschk', { subject: 'Kaschk' }, true]);
+  cases.push(['Kaschk ', { subject: 'Kaschk' }, true]);
+  // cases.push(['Kaschk B', { subject: 'Kaschk' }, true]); // jitter issue
+  cases.push(['Kaschk Be', { subject: 'Kaschk' }, true]);
+  // cases.push(['Kaschk Ber', { subject: 'Kaschk' }, true]); // jitter issue
+  // cases.push(['Kaschk Berl', { subject: 'Kaschk' }, true]); // jitter issue
+  // cases.push(['Kaschk Berli', { subject: 'Kaschk' }, true]); // jitter issue
+  cases.push(['Kaschk Berlin', { subject: 'Kaschk' }, true]);
+
+  cases.push(['A', { subject: 'A' }, true]);
+  cases.push(['Ai', { subject: 'Ai' }, true]);
+  cases.push(['Air', { subject: 'Air' }, true]);
+  cases.push(['Air ', { subject: 'Air' }, true]);
+  cases.push(['Air &', { subject: 'Air &' }, true]);
+  cases.push(['Air & ', { subject: 'Air &' }, true]);
+  cases.push(['Air & S', { subject: 'Air & S' }, true]);
+  cases.push(['Air & Sp', { subject: 'Air & Sp' }, true]);
+  cases.push(['Air & Spa', { subject: 'Air & Spa' }, true]);
+  cases.push(['Air & Spac', { subject: 'Air & Spac' }, true]);
+  cases.push(['Air & Space', { subject: 'Air & Space' }, true]);
+  cases.push(['Air & Space ', { subject: 'Air & Space' }, true]);
+  // cases.push(['Air & Space M', { subject: 'Air & Space M' }, true]); // jitter issue
+  // cases.push(['Air & Space Mu', { subject: 'Air & Space Mu' }, true]); // jitter issue
+  cases.push(['Air & Space Mus', { subject: 'Air & Space Mus' }, true]);
+  // cases.push(['Air & Space Muse', { subject: 'Air & Space Muse' }, true]); // jitter issue
+  // cases.push(['Air & Space Museu', { subject: 'Air & Space Museu' }, true]); // jitter issue
+  cases.push(['Air & Space Museum', { subject: 'Air & Space Museum' }, true]);
+  cases.push(['Air & Space Museum ', { subject: 'Air & Space Museum' }, true]);
+  cases.push(['Air & Space Museum D', { subject: 'Air & Space Museum' }, true]);
+  cases.push(['Air & Space Museum DC', { subject: 'Air & Space Museum' }, true]);
+
+  // admin areas
+  cases.push(['N', { subject: 'N' }, true]);
+  cases.push(['Ne', { subject: 'Ne' }, true]);
+  cases.push(['New', { subject: 'New' }, true]);
+  cases.push(['New ', { subject: 'New' }, true]);
+  cases.push(['New Y', { subject: 'New Y' }, true]);
+  // cases.push(['New Yo', { subject: 'New Yo' }, true]); // jitter issue
+  // cases.push(['New Yor', { subject: 'New Yor' }, true]); // jitter issue
+  cases.push(['New York', { subject: 'New York' }, true]);
+  cases.push(['New York N', { subject: 'New York' }, true]);
+  cases.push(['New York NY', { subject: 'New York' }, true]);
+  
+  cases.push(['B', { subject: 'B' }, true]);
+  cases.push(['Be', { subject: 'Be' }, true]);
+  cases.push(['Ber', { subject: 'Ber' }, true]);
+  cases.push(['Berl', { subject: 'Berl' }, true]);
+  cases.push(['Berli', { subject: 'Berli' }, true]);
+  cases.push(['Berlin', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin ', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin D', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin De', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deu', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deut', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deuts', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deutsc', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deutsch', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deutschl', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deutschla', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deutschlan', { subject: 'Berlin' }, true]);
+  cases.push(['Berlin Deutschland', { subject: 'Berlin' }, true]);
+
+  // postcodes
+  cases.push(['2000', { subject: '2000' }, true]);
+  cases.push(['Sydney 2000', { subject: '2000' }, true]);
+  cases.push(['10010', { subject: '10010' }, true]);
+  cases.push(['New York 10010', { subject: '10010' }, true]);
+  cases.push(['10437', { subject: '10437' }, true]);
+  cases.push(['Berlin 10437', { subject: '10437' }, true]);
+  cases.push(['E81DN', { subject: 'E81DN' }, true]);
+  cases.push(['London E81DN', { subject: 'E81DN' }, true]);
+  cases.push(['e81dn', { subject: 'e81dn' }, true]);
+  cases.push(['london e81dn', { subject: 'e81dn' }, true]);
+  cases.push(['e8 1dn', { subject: 'e8 1dn' }, true]);
+  // cases.push(['london e8 1dn', { subject: 'e8 1dn' }, true]); // issue
+
   cases.forEach(testcase => {
     let input = testcase[0];
     let expected = testcase[1];
+    let subjectOnly = (testcase[2] === true);
 
     function assert(label, replacement, replaceAdmin) {
       let text = input.replace(/\s+/, ' ');
@@ -176,7 +378,11 @@ module.exports.tests.text_parser = function (test, common) {
         t.deepEqual(messages, { errors: [], warnings: [] }, 'messages');
         t.equal(clean.text, raw.text.trim(), 'text');
         t.equal(clean.parser, 'pelias', 'parser');
-        t.deepEqual(clean.parsed_text, clone, `${label}: ${text}`);
+        if( subjectOnly ){
+          t.equals(clean.parsed_text.subject, clone.subject, `${label}: ${text}`);
+        } else {
+          t.deepEqual(clean.parsed_text, clone, `${label}: ${text}`);
+        }
         t.end();
       });
     }

From abeb48f013a5937836966db1c3168f4939583371 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Mon, 17 Jun 2019 16:06:23 +0200
Subject: [PATCH 44/55] feat(deps): bump parser dep version

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 52958540a..b62eb01fc 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.34.0",
+    "pelias-parser": "^1.36.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 770e820bb57670058e598d1ef97b81f7e492316d Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 10 Jul 2019 13:20:49 +0200
Subject: [PATCH 45/55] feat(pelias_parser): fix tests

---
 test/unit/query/autocomplete.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/query/autocomplete.js b/test/unit/query/autocomplete.js
index fee51406b..c38a6cab6 100644
--- a/test/unit/query/autocomplete.js
+++ b/test/unit/query/autocomplete.js
@@ -309,7 +309,7 @@ module.exports.tests.query = function(test, common) {
     var expected = require('../fixture/autocomplete_linguistic_bbox_san_francisco');
 
     t.deepEqual(compiled.type, 'autocomplete', 'query type set');
-    t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_focus_null_island');
+    t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_bbox_san_francisco');
     t.end();
   });
 

From 2257ec77952a5a9759c76a13f6f42c39d752ec53 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Thu, 15 Aug 2019 16:34:10 +0200
Subject: [PATCH 46/55] feat(search_addressit): generate cross_street subquery
 where available

---
 query/search_addressit.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/query/search_addressit.js b/query/search_addressit.js
index 00d3acf12..44151f16f 100644
--- a/query/search_addressit.js
+++ b/query/search_addressit.js
@@ -31,6 +31,7 @@ query.score( peliasQuery.view.population( peliasQuery.view.phrase ) );
 // address components
 query.score( peliasQuery.view.address('housenumber') );
 query.score( peliasQuery.view.address('street') );
+query.score( peliasQuery.view.address('cross_street') );
 query.score( peliasQuery.view.address('postcode') );
 
 // admin components

From d7d5f7b1dfce8bcb7583ab24f61e9fa31257aa8d Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Mon, 16 Sep 2019 12:48:23 +0200
Subject: [PATCH 47/55] feat(pelias_parser): limit input text to 140 characters

---
 sanitizer/_text.js                         |  3 ++-
 sanitizer/_text_pelias_parser.js           | 18 ++++++++++++++----
 test/unit/sanitizer/_text_pelias_parser.js | 17 +++++++++++++++++
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/sanitizer/_text.js b/sanitizer/_text.js
index 477c5e2e2..6003fa37e 100644
--- a/sanitizer/_text.js
+++ b/sanitizer/_text.js
@@ -10,9 +10,10 @@ function _sanitize( raw, clean ){
   // error & warning messages
   const messages = { errors: [], warnings: [] };
 
-  // invalid input 'text'
+  // remove superfluous whitespace and quotes
   let text =  _.trim( _.trim( raw.text ), QUOTES );
 
+  // validate input 'text'
   if( !_.isString(text) || _.isEmpty(text) ){
     messages.errors.push(`invalid param 'text': text length, must be >0`);
   } else {
diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index 005ca57ae..ec619f3d2 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -4,6 +4,7 @@ const Solution = require('pelias-parser/solver/Solution');
 const AddressParser = require('pelias-parser/parser/AddressParser');
 const parser = new AddressParser();
 const _ = require('lodash');
+const MAX_TEXT_LENGTH = 140;
 
 /**
   this module provides fulltext parsing using the pelias/parser module.
@@ -21,14 +22,23 @@ function _sanitize (raw, clean) {
   // error & warning messages
   var messages = { errors: [], warnings: [] };
 
-  // invalid input 'text'
-  const text = _.trim(raw.text);
-  if (!_.isString(text) || _.isEmpty(text)) {
-    messages.errors.push('invalid param \'text\': text length, must be >0');
+  // remove superfluous whitespace
+  let text = _.trim(raw.text);
+
+  // validate input 'text'
+  if( !_.isString(text) || _.isEmpty(text) ){
+    messages.errors.push(`invalid param 'text': text length, must be >0`);
   }
 
   // valid input 'text'
   else {
+
+    // truncate text to $MAX_TEXT_LENGTH chars
+    if (text.length > MAX_TEXT_LENGTH) {
+      messages.warnings.push(`param 'text' truncated to ${MAX_TEXT_LENGTH} characters`);
+      text = text.substring(0, MAX_TEXT_LENGTH);
+    }
+
     // parse text with pelias/parser
     clean.text = text;
     clean.parser = 'pelias';
diff --git a/test/unit/sanitizer/_text_pelias_parser.js b/test/unit/sanitizer/_text_pelias_parser.js
index abdae52bd..fb998eec7 100644
--- a/test/unit/sanitizer/_text_pelias_parser.js
+++ b/test/unit/sanitizer/_text_pelias_parser.js
@@ -413,6 +413,23 @@ module.exports.tests.text_parser = function (test, common) {
     t.deepEquals(validParameters, expected);
     t.end();
   });
+
+  test('should truncate very long text inputs', (t) => {
+    const raw = {
+      text: `
+Sometimes we make the process more complicated than we need to.
+We will never make a journey of a thousand miles by fretting about 
+how long it will take or how hard it will be.
+We make the journey by taking each day step by step and then repeating 
+it again and again until we reach our destination.` };
+    const clean = {};
+    const messages = sanitizer.sanitize(raw, clean);
+
+    t.equals(clean.text.length, 140);
+    t.deepEquals(messages.errors, [], 'no errors');
+    t.deepEquals(messages.warnings, [`param 'text' truncated to 140 characters`]);
+    t.end();
+  });
 };
 
 module.exports.all = function (tape, common) {

From 670666cb54eac42f4e381a8682c23b091fdb99f1 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 25 Sep 2019 13:47:54 +0200
Subject: [PATCH 48/55] feat(pelias_parser): replace peliasQueryPartialToken
 analyzer with peliasQuery

---
 query/view/ngrams_last_token_only_multi.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/query/view/ngrams_last_token_only_multi.js b/query/view/ngrams_last_token_only_multi.js
index 3eaaddea9..d0845a62a 100644
--- a/query/view/ngrams_last_token_only_multi.js
+++ b/query/view/ngrams_last_token_only_multi.js
@@ -2,7 +2,7 @@ const peliasQuery = require('pelias-query');
 const ngrams_last_token_only = require('./ngrams_last_token_only');
 
 module.exports = function (adminFields){
-  const subview = peliasQuery.view.admin_multi_match( adminFields, 'peliasQueryPartialToken' );
+  const subview = peliasQuery.view.admin_multi_match( adminFields, 'peliasQuery' );
 
   return function (vs) {
 

From 30760b96afcf4f192f1f21757f7e2897d702f81c Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Wed, 25 Sep 2019 14:16:55 +0200
Subject: [PATCH 49/55] feat(pelias_parser): disable
 "ngrams_last_token_only_multi" view when every "completed" token is numeric

---
 query/view/ngrams_last_token_only_multi.js    |  5 ++
 ...uistic_multiple_tokens_complete_numeric.js | 74 +++++++++++++++++++
 test/unit/query/autocomplete.js               | 17 +++++
 3 files changed, 96 insertions(+)
 create mode 100644 test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js

diff --git a/query/view/ngrams_last_token_only_multi.js b/query/view/ngrams_last_token_only_multi.js
index d0845a62a..17fa6aeec 100644
--- a/query/view/ngrams_last_token_only_multi.js
+++ b/query/view/ngrams_last_token_only_multi.js
@@ -15,9 +15,14 @@ module.exports = function (adminFields){
     // no valid tokens to use, fail now, don't render this view.
     if (!tokens || tokens.length < 1) { return null; }
 
+    // return the simple view for queries with no complete tokens
     var complete_tokens = vs.var('input:name:tokens_complete').get();
     if (!complete_tokens || complete_tokens.length < 1) { return ngrams_last_token_only(vs); }
 
+    // return the simple view when every complete token is numeric
+    var all_complete_tokens_numeric = complete_tokens.every(token => !token.replace(/[0-9]/g, '').length);
+    if (all_complete_tokens_numeric) { return ngrams_last_token_only(vs); }
+
     // make a copy Vars so we don't mutate the original
     var vsCopy = new peliasQuery.Vars( vs.export() );
 
diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js
new file mode 100644
index 000000000..c9f53cff9
--- /dev/null
+++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js
@@ -0,0 +1,74 @@
+module.exports = {
+  'query': {
+    'bool': {
+      'must': [{
+        'match': {
+          'phrase.default': {
+            'analyzer': 'peliasQuery',
+            'type': 'phrase',
+            'boost': 1,
+            'slop': 3,
+            'cutoff_frequency': 0.01,
+            'query': '1 2'
+          }
+        }
+      },
+      {
+        'constant_score': {
+          'query': {
+            'match': {
+              'name.default': {
+                'analyzer': 'peliasQuery',
+                'boost': 100,
+                'query': 'three',
+                'cutoff_frequency': 0.01,
+                'type': 'phrase',
+                'operator': 'and',
+                'slop': 3
+              }
+            }
+          }
+        }
+      }],
+      'should': [
+        {
+          'function_score': {
+            'query': {
+              'match_all': {}
+            },
+            'max_boost': 20,
+            'score_mode': 'first',
+            'boost_mode': 'replace',
+            'functions': [{
+              'field_value_factor': {
+                'modifier': 'log1p',
+                'field': 'popularity',
+                'missing': 1
+              },
+              'weight': 1
+            }]
+          }
+        }, {
+          'function_score': {
+            'query': {
+              'match_all': {}
+            },
+            'max_boost': 20,
+            'score_mode': 'first',
+            'boost_mode': 'replace',
+            'functions': [{
+              'field_value_factor': {
+                'modifier': 'log1p',
+                'field': 'population',
+                'missing': 1
+              },
+              'weight': 3
+            }]
+          }
+        }]
+    }
+  },
+  'sort': ['_score'],
+  'size': 20,
+  'track_scores': true
+};
diff --git a/test/unit/query/autocomplete.js b/test/unit/query/autocomplete.js
index c38a6cab6..4651502f1 100644
--- a/test/unit/query/autocomplete.js
+++ b/test/unit/query/autocomplete.js
@@ -52,6 +52,23 @@ module.exports.tests.query = function(test, common) {
     t.end();
   });
 
+  // This is to prevent a query like '30 west' from considering the 'west' part as an admin component
+  test('valid lingustic autocomplete with 3 tokens - first two are numeric', function (t) {
+    var query = generate({
+      text: '1 1 three',
+      tokens: ['1', '2', 'three'],
+      tokens_complete: ['1', '2'],
+      tokens_incomplete: ['three']
+    });
+
+    var compiled = JSON.parse(JSON.stringify(query));
+    var expected = require('../fixture/autocomplete_linguistic_multiple_tokens_complete_numeric');
+
+    t.deepEqual(compiled.type, 'autocomplete', 'query type set');
+    t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_multiple_tokens_complete_numeric');
+    t.end();
+  });
+
   test('valid lingustic autocomplete with comma delimited admin section', function(t) {
     var query = generate({
       text: 'one two, three',

From 866c479bbd3c3f1d07e2d518d9174a068dbe6ba3 Mon Sep 17 00:00:00 2001
From: Julian Simioni <julian@simioni.org>
Date: Wed, 25 Sep 2019 11:19:07 -0400
Subject: [PATCH 50/55] Add context to pelias parser logs

---
 sanitizer/_text_pelias_parser.js | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sanitizer/_text_pelias_parser.js b/sanitizer/_text_pelias_parser.js
index ec619f3d2..b0a10e24d 100644
--- a/sanitizer/_text_pelias_parser.js
+++ b/sanitizer/_text_pelias_parser.js
@@ -58,8 +58,10 @@ function parse (clean) {
 
   // log summary info
   logger.info('pelias_parser', {
-    took: (new Date()) - start,
-    solutions: t.solution.length
+    response_time: (new Date()) - start,
+    params: clean,
+    solutions: t.solution.length,
+    text_length: _.get(clean, 'text.length', 0)
   });
 
   // only use the first solution generated

From c0749a0cf78a5118e6a62b1774d912d31510e67d Mon Sep 17 00:00:00 2001
From: Julian Simioni <julian@simioni.org>
Date: Wed, 25 Sep 2019 12:43:17 -0400
Subject: [PATCH 51/55] Pin to pelias-parser-1.38.0 for now

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index b62eb01fc..1d90bdf44 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "pelias-logger": "^1.2.0",
     "pelias-microservice-wrapper": "^1.7.0",
     "pelias-model": "^7.0.0",
-    "pelias-parser": "^1.36.0",
+    "pelias-parser": "1.38.0",
     "pelias-query": "^9.14.0",
     "pelias-sorting": "^1.2.0",
     "predicates": "^2.0.0",

From 97f6496ac99a9d909e7e876609b2c7c898b09c96 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 1 Oct 2019 15:16:45 +0200
Subject: [PATCH 52/55] refactor(pelias_parser): add code comments relating to
 "add_name_to_multimatch", clean up related code

---
 query/autocomplete.js          | 18 +++++++++++++++---
 query/autocomplete_defaults.js |  7 ++++++-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 454a96e62..537c22574 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -24,7 +24,16 @@ var views = {
 // add abbrevations for the fields pelias/parser is able to detect.
 var adminFields = placeTypes.concat(['locality_a', 'region_a', 'country_a']);
 
-// add name field to improve venue matching
+// add some name field(s) to the admin fields in order to improve venue matching
+// note: this is a bit of a hacky way to add a 'name' field to the list
+// of multimatch fields normally reserved for admin subquerying.
+// in some cases we are not sure if certain tokens refer to admin components
+// or are part of the place name (such as some venue names).
+// the variable name 'add_name_to_multimatch' is arbitrary, it can be any value so
+// long as there is a corresponding 'admin:*:field' variable set which defines
+// the name of the field to use.
+// this functionality is not enabled unless the 'input:add_name_to_multimatch'
+// variable is set to a non-empty value at query-time.
 adminFields = adminFields.concat(['add_name_to_multimatch']);
 
 //------------------------------
@@ -168,11 +177,14 @@ function generateQuery( clean ){
     textParser( clean, vs );
   }
 
+  // set the 'add_name_to_multimatch' variable only in the case where one
+  // or more of the admin variables are set.
+  // the value 'enabled' is not relevant, it just needs to be any non-empty
+  // value so that the associated field is added to the multimatch query.
+  // see code comments above for additional information.
   let isAdminSet = adminFields.some(field => vs.isset('input:' + field));
   if ( isAdminSet ){ vs.var('input:add_name_to_multimatch', 'enabled'); }
 
-  vs.var('admin:add_name_to_multimatch:field', 'name.default');
-
   return {
     type: 'autocomplete',
     body: query.render(vs)
diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index e2d7e3ee3..e78e56800 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -123,6 +123,11 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'admin:borough:boost': 1,
   'admin:borough:cutoff_frequency': 0.01,
 
+  // an additional 'name' field to add to admin multi-match queries.
+  // this is used to improve venue matching in cases where the we
+  // are unsure if the tokens represent admin or name components.
+  'admin:add_name_to_multimatch:field': 'name.default',
+
   'popularity:field': 'popularity',
   'popularity:modifier': 'log1p',
   'popularity:max_boost': 20,
@@ -139,4 +144,4 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'custom:boosting:max_boost': 50,          // maximum boosting which can be applied (max_boost/boost = max_score)
   'custom:boosting:score_mode': 'sum',      // sum all function scores before multiplying the boost
   'custom:boosting:boost_mode': 'multiply'  // this mode is not relevant because there is no query section
-});
+});
\ No newline at end of file

From b2d3b160c41dcfcfba937992c4016cd1a6e420e5 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 1 Oct 2019 15:23:40 +0200
Subject: [PATCH 53/55] refactor(pelias_parser): remove disused code/comments

---
 query/search_addressit.js   |  5 -----
 query/text_parser_pelias.js | 19 +------------------
 sanitizer/_tokenizer.js     |  8 --------
 3 files changed, 1 insertion(+), 31 deletions(-)

diff --git a/query/search_addressit.js b/query/search_addressit.js
index 44151f16f..7fc4af306 100644
--- a/query/search_addressit.js
+++ b/query/search_addressit.js
@@ -35,11 +35,6 @@ query.score( peliasQuery.view.address('cross_street') );
 query.score( peliasQuery.view.address('postcode') );
 
 // admin components
-// country_a and region_a are left as matches here because the text-analyzer
-// can sometimes detect them, in which case a query more specific than a
-// multi_match is appropriate.
-// query.score( peliasQuery.view.admin('country_a') );
-// query.score( peliasQuery.view.admin('region_a') );
 query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') );
 query.score( views.custom_boosts( config.customBoosts ) );
 
diff --git a/query/text_parser_pelias.js b/query/text_parser_pelias.js
index 141ad2df6..8a43ae0b0 100644
--- a/query/text_parser_pelias.js
+++ b/query/text_parser_pelias.js
@@ -40,24 +40,7 @@ function addParsedVariablesToQueryVariables(clean, vs) {
     vs.var('input:postcode', clean.parsed_text.postcode);
   }
 
-  // ==== add parsed matches [admin components] ====
-
-  // // locality
-  // if (!_.isEmpty(clean.parsed_text.locality)) {
-  //   vs.var('input:locality', clean.parsed_text.locality);
-  // }
-
-  // // region
-  // if (!_.isEmpty(clean.parsed_text.region)) {
-  //   vs.var('input:region', clean.parsed_text.region);
-  // }
-
-  // // country
-  // if (!_.isEmpty(clean.parsed_text.country)) {
-  //   vs.var('input:country', clean.parsed_text.country);
-  // }
-
-  // postfix
+  // ==== add admin components [postfix] ====
   if (!_.isEmpty(clean.parsed_text.admin)) {
     // assign postfix to any admin fields which currently don't have a value assigned.
     
diff --git a/sanitizer/_tokenizer.js b/sanitizer/_tokenizer.js
index 8e58105f1..cc6008571 100644
--- a/sanitizer/_tokenizer.js
+++ b/sanitizer/_tokenizer.js
@@ -43,14 +43,6 @@ function _sanitize( raw, clean ){
       else if (!clean.text.endsWith(text)) {
         parserConsumedAllTokens = true;
       }
-
-      // when $subject exactly equals one of the admin fields
-      // else if (
-      //   text === clean.parsed_text.locality ||
-      //   text === clean.parsed_text.region ||
-      //   text === clean.parsed_text.country) {
-      //   parserConsumedAllTokens = true;
-      // }
     }
   }
 

From 1e1cf245c2f07caf0b780b74db7392be53147589 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 1 Oct 2019 15:44:25 +0200
Subject: [PATCH 54/55] feat(pelias_parser): completely remove "addressit" and
 references to it

---
 middleware/confidenceScore.js                 |   2 +-
 package.json                                  |   3 +-
 query/search.js                               |   2 +-
 ...h_addressit.js => search_pelias_parser.js} |   9 +-
 query/text_parser_addressit.js                |  99 ----
 routes/v1.js                                  |  10 +-
 sanitizer/_address_layer_filter.js            |   2 +-
 sanitizer/_text_addressit.js                  | 123 -----
 .../fixture/search_with_custom_boosts.json    |   2 +-
 test/unit/middleware/confidenceScore.js       |  12 +-
 ...h_addressit.js => search_pelias_parser.js} |  26 +-
 test/unit/query/search_with_custom_boosts.js  |   2 +-
 test/unit/run.js                              |   3 +-
 test/unit/sanitizer/_address_layer_filter.js  |   4 +-
 test/unit/sanitizer/_text_addressit.js        | 429 ------------------
 15 files changed, 37 insertions(+), 691 deletions(-)
 rename query/{search_addressit.js => search_pelias_parser.js} (93%)
 delete mode 100644 query/text_parser_addressit.js
 delete mode 100644 sanitizer/_text_addressit.js
 rename test/unit/query/{search_addressit.js => search_pelias_parser.js} (87%)
 delete mode 100644 test/unit/sanitizer/_text_addressit.js

diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js
index c132868b5..111513eac 100644
--- a/middleware/confidenceScore.js
+++ b/middleware/confidenceScore.js
@@ -29,7 +29,7 @@ function computeScores(req, res, next) {
   // do nothing if no result data set or if query is not of the original variety
   if (check.undefined(req.clean) || check.undefined(res) ||
       check.undefined(res.data) || check.undefined(res.meta) ||
-      res.meta.query_type !== 'search_addressit') {
+      res.meta.query_type !== 'search_pelias_parser') {
     return next();
   }
 
diff --git a/package.json b/package.json
index 1d90bdf44..1cf56c48e 100644
--- a/package.json
+++ b/package.json
@@ -36,8 +36,8 @@
     "node": ">=8.0.0"
   },
   "dependencies": {
+    "@hapi/joi": "^15.0.0",
     "@mapbox/geojson-extent": "^0.3.1",
-    "addressit": "1.7.0",
     "async": "^3.0.1",
     "check-types": "^10.0.0",
     "elasticsearch": "^16.0.0",
@@ -45,7 +45,6 @@
     "geojson": "^0.5.0",
     "geolib": "^3.0.0",
     "iso-639-3": "^1.0.0",
-    "@hapi/joi": "^15.0.0",
     "locale": "^0.1.0",
     "lodash": "^4.17.4",
     "markdown": "^0.5.0",
diff --git a/query/search.js b/query/search.js
index 2cbe93310..04a4bcc8c 100644
--- a/query/search.js
+++ b/query/search.js
@@ -129,7 +129,7 @@ function getQuery(vs) {
     };
   }
 
-  // returning undefined is a signal to a later step that the addressit-parsed
+  // returning undefined is a signal to a later step that a fallback parser
   // query should be queried for
   return undefined;
 
diff --git a/query/search_addressit.js b/query/search_pelias_parser.js
similarity index 93%
rename from query/search_addressit.js
rename to query/search_pelias_parser.js
index 7fc4af306..418e9cc1e 100644
--- a/query/search_addressit.js
+++ b/query/search_pelias_parser.js
@@ -8,10 +8,9 @@ const config = require('pelias-config').generate().api;
 var placeTypes = require('../helper/placeTypes');
 var views = { custom_boosts: require('./view/boost_sources_and_layers') };
 
-// region_a is also an admin field. addressit tries to detect
-// region_a, in which case we use a match query specifically for it.
-// but address it doesn't know about all of them so it helps to search
-// against this with the other admin parts as a fallback
+// region_a is also an admin field which can be identified by
+// the pelias_parser. this functionality was inherited from the
+// previous parser we used prior to the creation of pelias_parser.
 var adminFields = placeTypes.concat(['region_a']);
 
 //------------------------------
@@ -138,7 +137,7 @@ function generateQuery( clean ){
   }
 
   return {
-    type: 'search_addressit',
+    type: 'search_pelias_parser',
     body: query.render(vs)
   };
 }
diff --git a/query/text_parser_addressit.js b/query/text_parser_addressit.js
deleted file mode 100644
index 65c7ea775..000000000
--- a/query/text_parser_addressit.js
+++ /dev/null
@@ -1,99 +0,0 @@
-var logger = require('pelias-logger').get('api');
-var placeTypes = require('../helper/placeTypes');
-
-/*
-This list should only contain admin fields we are comfortable matching in the case
-when we can't identify parts of an address. This shouldn't contain fields like country_a
-or postalcode because we should only try to match those when we're sure that's what they are.
- */
-var adminFields = placeTypes.concat([
-  'region_a'
-]);
-
-/**
-  @todo: refactor me
-**/
-
-// all the address parsing logic
-function addParsedVariablesToQueryVariables( clean, vs ){
-
-  // is it a street address?
-  var isStreetAddress = clean.parsed_text.hasOwnProperty('number') && clean.parsed_text.hasOwnProperty('street');
-  if( isStreetAddress ){
-    vs.var( 'input:name', clean.parsed_text.number + ' ' + clean.parsed_text.street );
-  }
-
-  // if the 'naive parser' was used, input is equal to 'name'
-  // see: 'sanitizer/_text_addressit.js' function 'naive'
-  else if (clean.parsed_text.admin_parts && clean.parsed_text.name ) {
-    vs.var( 'input:name', clean.parsed_text.name );
-  }
-
-  // ?
-  else {
-    logger.warn( 'chaos monkey asks: what happens now?', {
-      params: clean
-    });
-  }
-
-  // ==== add parsed matches [address components] ====
-
-  // house number
-  if( clean.parsed_text.hasOwnProperty('number') ){
-    vs.var( 'input:housenumber', clean.parsed_text.number );
-  }
-
-  // street name
-  if( clean.parsed_text.hasOwnProperty('street') ){
-    vs.var( 'input:street', clean.parsed_text.street );
-  }
-
-  // postal code
-  if( clean.parsed_text.hasOwnProperty('postalcode') ){
-    vs.var( 'input:postcode', clean.parsed_text.postalcode );
-  }
-
-  // ==== add parsed matches [admin components] ====
-
-  // city
-  if( clean.parsed_text.hasOwnProperty('city') ){
-    vs.var( 'input:county', clean.parsed_text.city );
-  }
-
-  // state
-  if( clean.parsed_text.hasOwnProperty('state') ){
-    vs.var( 'input:region_a', clean.parsed_text.state );
-  }
-
-  // country
-  if( clean.parsed_text.hasOwnProperty('country') ){
-    vs.var( 'input:country_a', clean.parsed_text.country );
-  }
-
-  // ==== deal with the 'leftover' components ====
-  // @todo: clean up this code
-
-  // a concept called 'leftovers' which is just 'admin_parts' /or 'regions'.
-  var leftoversString = '';
-  if( clean.parsed_text.hasOwnProperty('admin_parts') ){
-    leftoversString = clean.parsed_text.admin_parts;
-  }
-  else if( clean.parsed_text.hasOwnProperty('regions') ){
-    leftoversString = clean.parsed_text.regions.join(' ');
-  }
-
-  // if we have 'leftovers' then assign them to any fields which
-  // currently don't have a value assigned.
-  if( leftoversString.length ){
-
-    // cycle through fields and set fields which
-    // are still currently unset
-    adminFields.forEach( function( key ){
-      if( !vs.isset( 'input:' + key ) ){
-        vs.var( 'input:' + key, leftoversString );
-      }
-    });
-  }
-}
-
-module.exports = addParsedVariablesToQueryVariables;
diff --git a/routes/v1.js b/routes/v1.js
index aaf52004f..3290ec5fe 100644
--- a/routes/v1.js
+++ b/routes/v1.js
@@ -38,7 +38,7 @@ var controllers = {
 
 var queries = {
   cascading_fallback: require('../query/search'),
-  search_addressit: require('../query/search_addressit'),
+  search_pelias_parser: require('../query/search_pelias_parser'),
   structured_geocoding: require('../query/structured_geocoding'),
   reverse: require('../query/reverse'),
   autocomplete: require('../query/autocomplete'),
@@ -230,8 +230,8 @@ function addRoutes(app, peliasConfig) {
     not(hasResponseData)
   );
 
-  // call search addressit query if addressit was the parser
-  const searchAddressitShouldExecute = all(
+  // call search_pelias_parser query if pelias_parser was the parser
+  const searchPeliasParserShouldExecute = all(
     not(hasRequestErrors),
     isPeliasParse
   );
@@ -288,11 +288,11 @@ function addRoutes(app, peliasConfig) {
       controllers.libpostal(libpostalService, libpostalShouldExecute),
       controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute),
       controllers.placeholder(placeholderService, geometricFiltersApply, placeholderIdsLookupShouldExecute),
-      // try 3 different query types: address search using ids, cascading fallback, addressit
+      // try 3 different query types: address search using ids, cascading fallback, pelias parser
       controllers.search(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute),
       controllers.search(peliasConfig.api, esclient, queries.cascading_fallback, fallbackQueryShouldExecute),
       sanitizers.defer_to_pelias_parser(shouldDeferToPeliasParser), //run additional sanitizers needed for pelias parser
-      controllers.search(peliasConfig.api, esclient, queries.search_addressit, searchAddressitShouldExecute),
+      controllers.search(peliasConfig.api, esclient, queries.search_pelias_parser, searchPeliasParserShouldExecute),
       postProc.trimByGranularity(),
       postProc.distances('focus.point.'),
       postProc.confidenceScores(peliasConfig.api),
diff --git a/sanitizer/_address_layer_filter.js b/sanitizer/_address_layer_filter.js
index 3b6febd2d..c4e545798 100644
--- a/sanitizer/_address_layer_filter.js
+++ b/sanitizer/_address_layer_filter.js
@@ -60,7 +60,7 @@ function _setup(tm) {
           input = clean.parsed_text.subject;
         }
 
-        // if 'addressit' or 'libpostal' identified input as a street address
+        // if 'pelias_parser' or 'libpostal' identified input as a street address
         else if (isStreetAddress) {
           input = clean.parsed_text.number + ' ' + clean.parsed_text.street;
         }
diff --git a/sanitizer/_text_addressit.js b/sanitizer/_text_addressit.js
deleted file mode 100644
index 4c674218a..000000000
--- a/sanitizer/_text_addressit.js
+++ /dev/null
@@ -1,123 +0,0 @@
-const addressit = require('addressit');
-const _      = require('lodash');
-const logger = require('pelias-logger').get('api');
-const MAX_TEXT_LENGTH = 140;
-
-/**
-  this module provides extremely basic parsing using two methods.
-
-  note: this code is old and well due for a makover/replacement, we
-  are not happy with either of these methods but they remain in place
-  for purely legacy reasons.
-
-  'naive parser' provides the following fields:
-  'name', 'admin_parts'
-
-  'addressit parser' provides the following fields:
-  'unit', 'number', 'street', 'state', 'country', 'postalcode', 'regions'
-**/
-
-// ref: https://en.wikipedia.org/wiki/Quotation_mark
-const QUOTES = `"'«»‘’‚‛“”„‟‹›⹂「」『』〝〞〟﹁﹂﹃﹄＂＇｢｣`;
-const DELIM = ',';
-const ADDRESSIT_MIN_CHAR_LENGTH = 4;
-
-// validate texts, convert types and apply defaults
-function _sanitize( raw, clean ){
-
-  // error & warning messages
-  var messages = { errors: [], warnings: [] };
-
-  // remove superfluous whitespace & quotes
-  let text = _.trim( _.trim( raw.text ), QUOTES );
-
-  // validate input 'text'
-  if (!_.isString(text) || _.isEmpty(text)) {
-    messages.errors.push(`invalid param 'text': text length, must be >0`);
-  }
-
-  // valid input 'text'
-  else {
-
-    // truncate text to $MAX_TEXT_LENGTH chars
-    if (text.length > MAX_TEXT_LENGTH) {
-      messages.warnings.push(`param 'text' truncated to ${MAX_TEXT_LENGTH} characters`);
-      text = text.substring(0, MAX_TEXT_LENGTH);
-    }
-
-    // parse text with query parser
-    clean.text = text;
-    clean.parser = 'addressit';
-    clean.parsed_text = parse(clean);
-  }
-
-  return messages;
-}
-
-// naive approach - for admin matching during query time
-// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
-var naive = function(tokens) {
-  var parsed_text = {};
-
-  if( tokens.length > 1 ){
-    parsed_text.name = tokens[0];
-
-    // 1. slice away all parts after the first one
-    // 2. trim spaces from each part just in case
-    // 3. join the parts back together with appropriate delimiter and spacing
-    parsed_text.admin_parts = tokens.slice(1).join(`${DELIM} `);
-  }
-
-  return parsed_text;
-};
-
-function parse(clean) {
-
-  // split query on delimiter, trim tokens and remove empty elements
-  var tokens = clean.text.split(DELIM)
-                         .map( part => part.trim() )
-                         .filter( part => part.length > 0 );
-
-  // call the naive parser to try and split tokens
-  var parsed_text = naive(tokens);
-
-  // join tokens back togther with normalized delimiters
-  var joined = tokens.join(`${DELIM} `);
-
-  // query addressit - perform full address parsing
-  // except on queries so short they obviously can't contain an address
-  if( joined.length >= ADDRESSIT_MIN_CHAR_LENGTH ) {
-    var parsed = addressit(joined);
-
-    // copy fields from addressit response to parsed_text
-    for( var attr in parsed ){
-      if( 'text' === attr ){ continue; } // ignore 'text'
-      if( !_.isEmpty( parsed[ attr ] ) && _.isUndefined( parsed_text[ attr ] ) ){
-        parsed_text[ attr ] = parsed[ attr ];
-      }
-    }
-  }
-
-  // if all we found was regions, ignore it as it is not enough information to make smarter decisions
-  if( Object.keys(parsed_text).length === 1 && !_.isUndefined(parsed_text.regions) ){
-    logger.info('Ignoring address parser output, regions only', {
-      parsed: parsed_text,
-      params: clean
-    });
-
-    // return empty parsed_text
-    return {};
-  }
-
-  return parsed_text;
-}
-
-function _expected(){
-  return [{ name: 'text' }];
-}
-
-// export function
-module.exports = () => ({
-  sanitize: _sanitize,
-  expected: _expected
-});
diff --git a/test/unit/fixture/search_with_custom_boosts.json b/test/unit/fixture/search_with_custom_boosts.json
index 6da91c123..a17912d01 100644
--- a/test/unit/fixture/search_with_custom_boosts.json
+++ b/test/unit/fixture/search_with_custom_boosts.json
@@ -1,5 +1,5 @@
 {
-  "type": "search_addressit",
+  "type": "search_pelias_parser",
   "body": {
     "query": {
       "bool": {
diff --git a/test/unit/middleware/confidenceScore.js b/test/unit/middleware/confidenceScore.js
index 36a505c0c..f9386d552 100644
--- a/test/unit/middleware/confidenceScore.js
+++ b/test/unit/middleware/confidenceScore.js
@@ -47,7 +47,7 @@ module.exports.tests.confidenceScore = function(test, common) {
       }],
       meta: {
         scores: [10],
-        query_type: 'search_addressit'
+        query_type: 'search_pelias_parser'
       }
     };
 
@@ -89,7 +89,7 @@ module.exports.tests.confidenceScore = function(test, common) {
       }],
       meta: {
         scores: [10],
-        query_type: 'search_addressit'
+        query_type: 'search_pelias_parser'
       }
     };
 
@@ -125,7 +125,7 @@ module.exports.tests.confidenceScore = function(test, common) {
       }],
       meta: {
         scores: [10],
-        query_type: 'search_addressit'
+        query_type: 'search_pelias_parser'
       }
     };
 
@@ -134,7 +134,7 @@ module.exports.tests.confidenceScore = function(test, common) {
     t.end();
   });
 
-  test('should only work for search_addressit query_type', function(t) {
+  test('should only work for search_pelias_parser query_type', function(t) {
     var req = {
       clean: {
         text: '123 Main St, City, NM',
@@ -191,7 +191,7 @@ module.exports.tests.confidenceScore = function(test, common) {
       }],
       meta: {
         scores: [10],
-        query_type: 'search_addressit'
+        query_type: 'search_pelias_parser'
       }
     };
 
@@ -223,7 +223,7 @@ module.exports.tests.confidenceScore = function(test, common) {
       }],
       meta: {
         scores: [10],
-        query_type: 'search_addressit'
+        query_type: 'search_pelias_parser'
       }
     };
 
diff --git a/test/unit/query/search_addressit.js b/test/unit/query/search_pelias_parser.js
similarity index 87%
rename from test/unit/query/search_addressit.js
rename to test/unit/query/search_pelias_parser.js
index 88b7a0655..814fd2836 100644
--- a/test/unit/query/search_addressit.js
+++ b/test/unit/query/search_pelias_parser.js
@@ -6,7 +6,7 @@ const defaultPeliasConfig = {
   }
 };
 
-var generate = proxyquire('../../../query/search_addressit', {
+var generate = proxyquire('../../../query/search_pelias_parser', {
   'pelias-config': defaultPeliasConfig
 });
 
@@ -34,7 +34,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_linguistic_focus_bbox_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_focus_bbox_original');
     t.end();
   });
@@ -52,7 +52,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_linguistic_bbox_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_bbox');
     t.end();
   });
@@ -66,7 +66,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_linguistic_only_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_only');
     t.end();
   });
@@ -81,7 +81,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_linguistic_focus_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_focus');
     t.end();
   });
@@ -96,7 +96,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_linguistic_focus_null_island_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_focus_null_island');
     t.end();
   });
@@ -119,7 +119,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_full_address_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_full_address');
     t.end();
   });
@@ -139,7 +139,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_partial_address_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_partial_address');
     t.end();
   });
@@ -161,7 +161,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_regions_address_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_regions_address');
     t.end();
   });
@@ -176,7 +176,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_boundary_country_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search: valid boundary.country query');
     t.end();
   });
@@ -190,7 +190,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_with_source_filtering_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search: valid search query with source filtering');
     t.end();
   });
@@ -204,7 +204,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_with_category_filtering_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'correct search_with_category_filtering_original query');
     t.end();
   });
@@ -219,7 +219,7 @@ module.exports.tests.query = function(test, common) {
     var compiled = JSON.parse( JSON.stringify( query ) );
     var expected = require('../fixture/search_boundary_gid_original');
 
-    t.deepEqual(compiled.type, 'search_addressit', 'query type set');
+    t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search: valid boundary.gid filter');
     t.end();
   });
diff --git a/test/unit/query/search_with_custom_boosts.js b/test/unit/query/search_with_custom_boosts.js
index 911c5d055..cd2efdf33 100644
--- a/test/unit/query/search_with_custom_boosts.js
+++ b/test/unit/query/search_with_custom_boosts.js
@@ -31,7 +31,7 @@ module.exports.tests.query = function(test, common) {
 
     var expected_query = require('../fixture/search_with_custom_boosts.json');
 
-    const search_query_module = proxyquire('../../../query/search_addressit', {
+    const search_query_module = proxyquire('../../../query/search_pelias_parser', {
       'pelias-config': config_with_boosts
     });
 
diff --git a/test/unit/run.js b/test/unit/run.js
index 1d38d5ac6..82e94cceb 100644
--- a/test/unit/run.js
+++ b/test/unit/run.js
@@ -71,7 +71,7 @@ var tests = [
   require('./query/search'),
   require('./query/search_with_custom_boosts'),
   require('./query/search_defaults'),
-  require('./query/search_addressit'),
+  require('./query/search_pelias_parser'),
   require('./query/structured_geocoding'),
   require('./query/text_parser'),
   require('./query/view/boost_sources_and_layers'),
@@ -97,7 +97,6 @@ var tests = [
   require('./sanitizer/_address_layer_filter'),
   require('./sanitizer/_synthesize_analysis'),
   require('./sanitizer/_text'),
-  require('./sanitizer/_text_addressit'),
   require('./sanitizer/_text_pelias_parser'),
   require('./sanitizer/_tokenizer'),
   require('./sanitizer/_categories'),
diff --git a/test/unit/sanitizer/_address_layer_filter.js b/test/unit/sanitizer/_address_layer_filter.js
index 5543a7293..681096843 100644
--- a/test/unit/sanitizer/_address_layer_filter.js
+++ b/test/unit/sanitizer/_address_layer_filter.js
@@ -120,14 +120,14 @@ module.exports.tests.parsed_text = function (test, common) {
     t.end();
   });
 
-  test('addressit/libpostal - do not apply filter for numeric addresses', (t) => {
+  test('pelias_parser/libpostal - do not apply filter for numeric addresses', (t) => {
     let clean = { text: 'A', parsed_text: { number: '1', street: 'Main St' } };
     t.deepEqual(s.sanitize(null, clean), NO_MESSAGES);
     t.false(clean.layers);
     t.end();
   });
 
-  test('addressit/libpostal - apply filter for non-numeric addresses', (t) => {
+  test('pelias_parser/libpostal - apply filter for non-numeric addresses', (t) => {
     let clean = { text: 'A', parsed_text: { number: 'Foo', street: 'Main St' } };
     t.deepEqual(s.sanitize(null, clean), STD_MESSAGES);
     t.deepEqual(clean.layers, ['A', 'B', 'C']);
diff --git a/test/unit/sanitizer/_text_addressit.js b/test/unit/sanitizer/_text_addressit.js
deleted file mode 100644
index ee071567c..000000000
--- a/test/unit/sanitizer/_text_addressit.js
+++ /dev/null
@@ -1,429 +0,0 @@
-var sanitizer = require('../../../sanitizer/_text_addressit')();
-var type_mapping = require('../../../helper/type_mapping');
-
-module.exports.tests = {};
-
-module.exports.tests.text_parser = function(test, common) {
-  test('short input text has admin layers set ', function(t) {
-    var raw = {
-      text: 'emp'  //start of empire state building
-    };
-    var clean = {
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEquals(messages.errors, [], 'no errors');
-    t.deepEquals(messages.warnings, [], 'no warnings');
-
-    t.end();
-  });
-
-  var usQueries = [
-    { name: 'soho', admin_parts: 'new york', state: 'NY' },
-    { name: '123 main', admin_parts: 'new york', state: 'NY' }
-  ];
-
-  usQueries.forEach(function (query) {
-    test('naive parsing ' + query, function(t) {
-      var raw = {
-        text: query.name + ', ' + query.admin_parts
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: raw.text.trim(),
-        parser: 'addressit',
-        parsed_text: {
-          name: query.name,
-          regions: [ query.name ],
-          admin_parts: query.admin_parts,
-          state: query.state
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] } );
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-
-    test('naive parsing ' + query + ' without spaces', function(t) {
-      var raw = {
-        text: query.name + ',' + query.admin_parts
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: raw.text.trim(),
-        parser: 'addressit',
-        parsed_text: {
-          name: query.name,
-          regions: [ query.name ],
-          admin_parts: query.admin_parts,
-          state: query.state
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] } );
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-
-    test('naive parsing ' + query + ' with leading and trailing junk', function(t) {
-      var raw = {
-        text: ' , ' + query.name + ',' + query.admin_parts + ' , '
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: raw.text.trim(),
-        parser: 'addressit',
-        parsed_text: {
-          name: query.name,
-          regions: [ query.name ],
-          admin_parts: query.admin_parts,
-          state: query.state
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] } );
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-  });
-
-  var nonUSQueries = [
-    { name: 'chelsea', admin_parts: 'london' },
-  ];
-
-  nonUSQueries.forEach(function (query) {
-    test('naive parsing ' + query, function(t) {
-      var raw = {
-        text: query.name + ', ' + query.admin_parts
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: query.name + ', ' + query.admin_parts,
-        parser: 'addressit',
-        parsed_text: {
-          name: query.name,
-          regions: [ query.name, query.admin_parts ],
-          admin_parts: query.admin_parts
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] } );
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-
-    test('naive parsing ' + query + ' without spaces', function(t) {
-      var raw = {
-        text: query.name + ',' + query.admin_parts
-      };
-      var clean = {};
-
-      var expected_clean = {
-        text: query.name + ',' + query.admin_parts,
-        parser: 'addressit',
-        parsed_text: {
-          name: query.name,
-          regions: [ query.name, query.admin_parts ],
-          admin_parts: query.admin_parts
-        }
-      };
-
-      var messages = sanitizer.sanitize(raw, clean);
-
-      t.deepEqual(messages, { errors: [], warnings: [] } );
-      t.deepEqual(clean, expected_clean);
-      t.end();
-
-    });
-
-  });
-
-  test('query with one token', function (t) {
-    var raw = {
-      text: 'yugolsavia'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      parser: 'addressit',
-      text: 'yugolsavia',
-      parsed_text: {}
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('query with two tokens, no numbers', function (t) {
-    var raw = {
-      text: 'small town'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      parser: 'addressit',
-      text: 'small town',
-      parsed_text: {}
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('query with two tokens, number first', function (t) {
-    var raw = {
-      text: '123 main'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      parser: 'addressit',
-      text: '123 main',
-      parsed_text: {}
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('query with two tokens, number second', function (t) {
-    var raw = {
-      text: 'main 123'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      parser: 'addressit',
-      text: 'main 123',
-      parsed_text: {}
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('query with many tokens', function(t) {
-    var raw = {
-      text: 'main particle new york'
-    };
-    var clean = {};
-    clean.parsed_text = 'this should be removed';
-
-    var expected_clean = {
-      text: 'main particle new york',
-      parser: 'addressit',
-      parsed_text: {
-        regions: [ 'main particle' ],
-        state: 'NY'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('valid address, house number', function(t) {
-    var raw = {
-      text: '123 main st new york ny'
-    };
-    var clean = {};
-
-    var expected_clean = {
-      text: '123 main st new york ny',
-      parser: 'addressit',
-      parsed_text: {
-        number: '123',
-        street: 'main st',
-        state: 'NY',
-        regions: [ 'new york' ]
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('valid address, zipcode', function(t) {
-    var raw = {
-      text: '123 main st new york ny 10010'
-    };
-    var clean = {};
-
-    var expected_clean = {
-      text: '123 main st new york ny 10010',
-      parser: 'addressit',
-      parsed_text: {
-        number: '123',
-        street: 'main st',
-        state: 'NY',
-        postalcode: '10010',
-        regions: [ 'new york' ]
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-  });
-
-  test('valid address with leading 0s in zipcode', function(t) {
-    var raw = {
-      text: '339 W Main St, Cheshire, 06410'
-    };
-    var clean = {};
-
-    var expected_clean = {
-      text: '339 W Main St, Cheshire, 06410',
-      parser: 'addressit',
-      parsed_text: {
-        name: '339 W Main St',
-        number: '339',
-        street: 'W Main St',
-        postalcode: '06410',
-        regions: [ 'Cheshire' ],
-        admin_parts: 'Cheshire, 06410'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-  });
-
-  test('valid address without spaces after commas', function(t) {
-    var raw = {
-      text: '339 W Main St,Lancaster,PA'
-    };
-    var clean = {};
-
-    var expected_clean = {
-      text: '339 W Main St,Lancaster,PA',
-      parser: 'addressit',
-      parsed_text: {
-        name: '339 W Main St',
-        number: '339',
-        street: 'W Main St',
-        state: 'PA',
-        regions: [ 'Lancaster' ],
-        admin_parts: 'Lancaster, PA'
-      }
-    };
-
-    var messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEqual(messages, { errors: [], warnings: [] } );
-    t.deepEqual(clean, expected_clean);
-    t.end();
-
-  });
-
-  test('whitespace-only input counts as empty', (t) => {
-    const raw = { text: ' ' };
-    const clean = {};
-
-    const expected_clean = {};
-
-    const messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEquals(clean, expected_clean);
-    t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0']);
-    t.deepEquals(messages.warnings, [], 'no warnings');
-    t.end();
-  });
-
-  test('return an array of expected parameters in object form for validation', (t) => {
-    const expected = [{ name: 'text' }];
-    const validParameters = sanitizer.expected();
-    t.deepEquals(validParameters, expected);
-    t.end();
-  });
-
-  test('Australia - state only', (t) => {
-    const raw = { text: 'NSW' };
-    const clean = {};
-    const expected_clean = { text: 'NSW', parser: 'addressit', parsed_text: {} };
-    const messages = sanitizer.sanitize(raw, clean);
-
-    t.deepEquals(clean, expected_clean);
-    t.deepEquals(messages.errors, []);
-    t.deepEquals(messages.warnings, [], 'no warnings');
-    t.end();
-  });
-
-  test('should truncate very long text inputs', (t) => {
-    const raw = {
-      text: `
-Sometimes we make the process more complicated than we need to.
-We will never make a journey of a thousand miles by fretting about 
-how long it will take or how hard it will be.
-We make the journey by taking each day step by step and then repeating 
-it again and again until we reach our destination.` };
-    const clean = {};
-    const messages = sanitizer.sanitize(raw, clean);
-
-    t.equals(clean.text.length, 140);
-    t.deepEquals(messages.errors, [], 'no errors');
-    t.deepEquals(messages.warnings, [`param 'text' truncated to 140 characters`]);
-    t.end();
-  });
-};
-
-module.exports.all = function (tape, common) {
-  function test(name, testFunction) {
-    return tape('sanitizer _text: ' + name, testFunction);
-  }
-
-  for( var testCase in module.exports.tests ){
-    module.exports.tests[testCase](test, common);
-  }
-};

From 88656e0a793663a673a4e2eb3ba545f590cb68d9 Mon Sep 17 00:00:00 2001
From: missinglink <insomnia@rcpt.at>
Date: Tue, 1 Oct 2019 16:00:25 +0200
Subject: [PATCH 55/55] refactor(pelias_parser): remove references to "original
 style queries"

---
 middleware/confidenceScore.js                 |  2 +-
 ... search_pelias_parser_boundary_country.js} |  0
 ...s => search_pelias_parser_boundary_gid.js} |  0
 ...s => search_pelias_parser_full_address.js} |  0
 ...> search_pelias_parser_linguistic_bbox.js} |  0
 ... search_pelias_parser_linguistic_focus.js} |  0
 ...ch_pelias_parser_linguistic_focus_bbox.js} |  0
 ...as_parser_linguistic_focus_null_island.js} |  0
 ...> search_pelias_parser_linguistic_only.js} |  0
 ...> search_pelias_parser_partial_address.js} |  0
 ...> search_pelias_parser_regions_address.js} |  0
 ..._pelias_parser_with_category_filtering.js} |  0
 ...ch_pelias_parser_with_source_filtering.js} |  0
 test/unit/query/search_pelias_parser.js       | 28 +++++++++----------
 14 files changed, 15 insertions(+), 15 deletions(-)
 rename test/unit/fixture/{search_boundary_country_original.js => search_pelias_parser_boundary_country.js} (100%)
 rename test/unit/fixture/{search_boundary_gid_original.js => search_pelias_parser_boundary_gid.js} (100%)
 rename test/unit/fixture/{search_full_address_original.js => search_pelias_parser_full_address.js} (100%)
 rename test/unit/fixture/{search_linguistic_bbox_original.js => search_pelias_parser_linguistic_bbox.js} (100%)
 rename test/unit/fixture/{search_linguistic_focus_original.js => search_pelias_parser_linguistic_focus.js} (100%)
 rename test/unit/fixture/{search_linguistic_focus_bbox_original.js => search_pelias_parser_linguistic_focus_bbox.js} (100%)
 rename test/unit/fixture/{search_linguistic_focus_null_island_original.js => search_pelias_parser_linguistic_focus_null_island.js} (100%)
 rename test/unit/fixture/{search_linguistic_only_original.js => search_pelias_parser_linguistic_only.js} (100%)
 rename test/unit/fixture/{search_partial_address_original.js => search_pelias_parser_partial_address.js} (100%)
 rename test/unit/fixture/{search_regions_address_original.js => search_pelias_parser_regions_address.js} (100%)
 rename test/unit/fixture/{search_with_category_filtering_original.js => search_pelias_parser_with_category_filtering.js} (100%)
 rename test/unit/fixture/{search_with_source_filtering_original.js => search_pelias_parser_with_source_filtering.js} (100%)

diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js
index 111513eac..c75b88c58 100644
--- a/middleware/confidenceScore.js
+++ b/middleware/confidenceScore.js
@@ -26,7 +26,7 @@ function setup(peliasConfig) {
 }
 
 function computeScores(req, res, next) {
-  // do nothing if no result data set or if query is not of the original variety
+  // do nothing if no result data set or if query is not of the pelias_parser variety
   if (check.undefined(req.clean) || check.undefined(res) ||
       check.undefined(res.data) || check.undefined(res.meta) ||
       res.meta.query_type !== 'search_pelias_parser') {
diff --git a/test/unit/fixture/search_boundary_country_original.js b/test/unit/fixture/search_pelias_parser_boundary_country.js
similarity index 100%
rename from test/unit/fixture/search_boundary_country_original.js
rename to test/unit/fixture/search_pelias_parser_boundary_country.js
diff --git a/test/unit/fixture/search_boundary_gid_original.js b/test/unit/fixture/search_pelias_parser_boundary_gid.js
similarity index 100%
rename from test/unit/fixture/search_boundary_gid_original.js
rename to test/unit/fixture/search_pelias_parser_boundary_gid.js
diff --git a/test/unit/fixture/search_full_address_original.js b/test/unit/fixture/search_pelias_parser_full_address.js
similarity index 100%
rename from test/unit/fixture/search_full_address_original.js
rename to test/unit/fixture/search_pelias_parser_full_address.js
diff --git a/test/unit/fixture/search_linguistic_bbox_original.js b/test/unit/fixture/search_pelias_parser_linguistic_bbox.js
similarity index 100%
rename from test/unit/fixture/search_linguistic_bbox_original.js
rename to test/unit/fixture/search_pelias_parser_linguistic_bbox.js
diff --git a/test/unit/fixture/search_linguistic_focus_original.js b/test/unit/fixture/search_pelias_parser_linguistic_focus.js
similarity index 100%
rename from test/unit/fixture/search_linguistic_focus_original.js
rename to test/unit/fixture/search_pelias_parser_linguistic_focus.js
diff --git a/test/unit/fixture/search_linguistic_focus_bbox_original.js b/test/unit/fixture/search_pelias_parser_linguistic_focus_bbox.js
similarity index 100%
rename from test/unit/fixture/search_linguistic_focus_bbox_original.js
rename to test/unit/fixture/search_pelias_parser_linguistic_focus_bbox.js
diff --git a/test/unit/fixture/search_linguistic_focus_null_island_original.js b/test/unit/fixture/search_pelias_parser_linguistic_focus_null_island.js
similarity index 100%
rename from test/unit/fixture/search_linguistic_focus_null_island_original.js
rename to test/unit/fixture/search_pelias_parser_linguistic_focus_null_island.js
diff --git a/test/unit/fixture/search_linguistic_only_original.js b/test/unit/fixture/search_pelias_parser_linguistic_only.js
similarity index 100%
rename from test/unit/fixture/search_linguistic_only_original.js
rename to test/unit/fixture/search_pelias_parser_linguistic_only.js
diff --git a/test/unit/fixture/search_partial_address_original.js b/test/unit/fixture/search_pelias_parser_partial_address.js
similarity index 100%
rename from test/unit/fixture/search_partial_address_original.js
rename to test/unit/fixture/search_pelias_parser_partial_address.js
diff --git a/test/unit/fixture/search_regions_address_original.js b/test/unit/fixture/search_pelias_parser_regions_address.js
similarity index 100%
rename from test/unit/fixture/search_regions_address_original.js
rename to test/unit/fixture/search_pelias_parser_regions_address.js
diff --git a/test/unit/fixture/search_with_category_filtering_original.js b/test/unit/fixture/search_pelias_parser_with_category_filtering.js
similarity index 100%
rename from test/unit/fixture/search_with_category_filtering_original.js
rename to test/unit/fixture/search_pelias_parser_with_category_filtering.js
diff --git a/test/unit/fixture/search_with_source_filtering_original.js b/test/unit/fixture/search_pelias_parser_with_source_filtering.js
similarity index 100%
rename from test/unit/fixture/search_with_source_filtering_original.js
rename to test/unit/fixture/search_pelias_parser_with_source_filtering.js
diff --git a/test/unit/query/search_pelias_parser.js b/test/unit/query/search_pelias_parser.js
index 814fd2836..7c2eba0c9 100644
--- a/test/unit/query/search_pelias_parser.js
+++ b/test/unit/query/search_pelias_parser.js
@@ -32,10 +32,10 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_linguistic_focus_bbox_original');
+    var expected = require('../fixture/search_pelias_parser_linguistic_focus_bbox');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
-    t.deepEqual(compiled.body, expected, 'search_linguistic_focus_bbox_original');
+    t.deepEqual(compiled.body, expected, 'search_linguistic_focus_bbox');
     t.end();
   });
 
@@ -50,7 +50,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_linguistic_bbox_original');
+    var expected = require('../fixture/search_pelias_parser_linguistic_bbox');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_bbox');
@@ -64,7 +64,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_linguistic_only_original');
+    var expected = require('../fixture/search_pelias_parser_linguistic_only');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_only');
@@ -79,7 +79,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_linguistic_focus_original');
+    var expected = require('../fixture/search_pelias_parser_linguistic_focus');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_focus');
@@ -94,7 +94,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_linguistic_focus_null_island_original');
+    var expected = require('../fixture/search_pelias_parser_linguistic_focus_null_island');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_linguistic_focus_null_island');
@@ -117,7 +117,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_full_address_original');
+    var expected = require('../fixture/search_pelias_parser_full_address');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_full_address');
@@ -137,7 +137,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_partial_address_original');
+    var expected = require('../fixture/search_pelias_parser_partial_address');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_partial_address');
@@ -159,7 +159,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_regions_address_original');
+    var expected = require('../fixture/search_pelias_parser_regions_address');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search_regions_address');
@@ -174,7 +174,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_boundary_country_original');
+    var expected = require('../fixture/search_pelias_parser_boundary_country');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search: valid boundary.country query');
@@ -188,7 +188,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_with_source_filtering_original');
+    var expected = require('../fixture/search_pelias_parser_with_source_filtering');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search: valid search query with source filtering');
@@ -202,10 +202,10 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_with_category_filtering_original');
+    var expected = require('../fixture/search_pelias_parser_with_category_filtering');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
-    t.deepEqual(compiled.body, expected, 'correct search_with_category_filtering_original query');
+    t.deepEqual(compiled.body, expected, 'correct search_with_category_filtering query');
     t.end();
   });
 
@@ -217,7 +217,7 @@ module.exports.tests.query = function(test, common) {
     });
 
     var compiled = JSON.parse( JSON.stringify( query ) );
-    var expected = require('../fixture/search_boundary_gid_original');
+    var expected = require('../fixture/search_pelias_parser_boundary_gid');
 
     t.deepEqual(compiled.type, 'search_pelias_parser', 'query type set');
     t.deepEqual(compiled.body, expected, 'search: valid boundary.gid filter');