Skip to content

Commit

Permalink
Merge pull request #127 from pelias/missinglink
Browse files Browse the repository at this point in the history
autocomplete milestone
  • Loading branch information
missinglink committed Apr 22, 2016
2 parents 04a4b59 + 4a10fb8 commit d38ad7d
Show file tree
Hide file tree
Showing 24 changed files with 1,531 additions and 103 deletions.
25 changes: 25 additions & 0 deletions integration/analyzer_peliasAdmin.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,31 @@ module.exports.tests.functional = function(test, common){
});
};

module.exports.tests.tokenizer = function(test, common){
test( 'tokenizer', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasAdmin' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

// specify 2 parts with a delimeter
assertAnalysis( 'forward slash', 'Trinidad/Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'forward slash', 'Trinidad /Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'forward slash', 'Trinidad/ Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'back slash', 'Trinidad\\Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'back slash', 'Trinidad \\Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'back slash', 'Trinidad\\ Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'comma', 'Trinidad,Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'comma', 'Trinidad ,Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'comma', 'Trinidad, Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'space', 'Trinidad,Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'space', 'Trinidad ,Tobago', [ 'trinidad', 'tobago' ]);
assertAnalysis( 'space', 'Trinidad, Tobago', [ 'trinidad', 'tobago' ]);

suite.run( t.end );
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ module.exports.tests.analyze = function(test, common){
test( 'analyze', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasOneEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexOneEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'lowercase', 'F', ['f']);
assertAnalysis( 'asciifolding', 'é', ['e']);
assertAnalysis( 'asciifolding', 'á', ['a']);
assertAnalysis( 'asciifolding', 'ß', ['s','ss']);
assertAnalysis( 'asciifolding', 'æ', ['a','ae']);
assertAnalysis( 'asciifolding', 'ł', ['l']);
Expand All @@ -26,7 +26,12 @@ module.exports.tests.analyze = function(test, common){
assertAnalysis( 'ampersand', 'a & b', ['a','&','b'] );
assertAnalysis( 'ampersand', 'a and & and b', ['a','&','b'] );
assertAnalysis( 'ampersand', 'land', ['l','la','lan','land'] ); // should not replace inside tokens
assertAnalysis( 'peliasOneEdgeGramFilter', '1 a ab abc abcdefghij', ['1','a','ab','abc','abcd','abcde','abcdef','abcdefg','abcdefgh','abcdefghi','abcdefghij'] );

// full_token_address_suffix_expansion
assertAnalysis( 'full_token_address_suffix_expansion', 'rd', ['r','ro','roa','road'] );
assertAnalysis( 'full_token_address_suffix_expansion', 'ctr', ['c','ce','cen','cent','cente','center'] );

assertAnalysis( 'peliasIndexOneEdgeGramFilter', '1 a ab abc abcdefghij', ['1','a','ab','abc','abcd','abcde','abcdef','abcdefg','abcdefgh','abcdefghi','abcdefghij'] );
assertAnalysis( 'removeAllZeroNumericPrefix', '00001', ['1'] );
assertAnalysis( 'unique', '1 1 1', ['1'] );
assertAnalysis( 'notnull', ' / / ', [] );
Expand Down Expand Up @@ -55,7 +60,7 @@ module.exports.tests.address_suffix_expansions = function(test, common){
test( 'address suffix expansions', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasOneEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexOneEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'safe expansions', 'aly', [
Expand Down Expand Up @@ -83,7 +88,7 @@ module.exports.tests.stop_words = function(test, common){
test( 'stop words', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasOneEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexOneEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'street suffix', 'AB street', [
Expand All @@ -102,7 +107,7 @@ module.exports.tests.functional = function(test, common){
test( 'functional', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasOneEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexOneEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'country', 'Trinidad and Tobago', [
Expand All @@ -124,7 +129,7 @@ module.exports.tests.functional = function(test, common){
module.exports.all = function (tape, common) {

function test(name, testFunction) {
return tape('peliasOneEdgeGram: ' + name, testFunction);
return tape('peliasIndexOneEdgeGram: ' + name, testFunction);
}

for( var testCase in module.exports.tests ){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,21 @@ module.exports.tests.analyze = function(test, common){
test( 'analyze', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasTwoEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexTwoEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'lowercase', 'FA', ['fa']);
assertAnalysis( 'asciifolding', 'éA', ['ea']);
assertAnalysis( 'asciifolding', '', ['le']);
assertAnalysis( 'asciifolding', 'ß', ['ss']);
assertAnalysis( 'asciifolding', 'æ', ['ae']);
assertAnalysis( 'asciifolding', 'łA', ['la']);
assertAnalysis( 'asciifolding', 'ɰA', ['ma']);
assertAnalysis( 'trim', ' fA ', ['fa'] );

// full_token_address_suffix_expansion
assertAnalysis( 'full_token_address_suffix_expansion', 'rd', ['ro','roa','road'] );
assertAnalysis( 'full_token_address_suffix_expansion', 'ctr', ['ce','cen','cent','cente','center'] );

assertAnalysis( 'ampersand', 'aa and bb', ['aa','bb'] );
assertAnalysis( 'ampersand', 'land', ['la','lan','land'] ); // should not replace inside tokens

Expand All @@ -31,8 +36,8 @@ module.exports.tests.analyze = function(test, common){
// assertAnalysis( 'ampersand', 'aa & bb', ['aa','&','bb'] );
// assertAnalysis( 'ampersand', 'aa and & and bb', ['aa','&','bb'] );

assertAnalysis( 'peliasTwoEdgeGramFilter', '1 a ab abc abcdefghij', ['ab','abc','abcd','abcde','abcdef','abcdefg','abcdefgh','abcdefghi','abcdefghij'] );
assertAnalysis( 'removeAllZeroNumericPrefix', '0002 00011', ['11'] );
assertAnalysis( 'peliasIndexTwoEdgeGramFilter', '1 a ab abc abcdefghij', ['1', 'ab','abc','abcd','abcde','abcdef','abcdefg','abcdefgh','abcdefghi','abcdefghij'] );
assertAnalysis( 'removeAllZeroNumericPrefix', '0002 00011', ['2', '11'] );
assertAnalysis( 'unique', '11 11 11', ['11'] );
assertAnalysis( 'notnull', ' / / ', [] );

Expand All @@ -44,7 +49,13 @@ module.exports.tests.analyze = function(test, common){
assertAnalysis( 'punctuation', punctuation.all.join(''), ['-&'] );

// ensure that single grams are not created
assertAnalysis( '1grams', 'a aa b bb 1 11', ['aa','bb','11'] );
assertAnalysis( '1grams', 'a aa b bb 1 11', ['aa','bb','1','11'] );

// for directionals (north/south/east/west) we allow single grams
assertAnalysis( 'direction_synonym_contraction_keep_original', 'a', [] );
assertAnalysis( 'direction_synonym_contraction_keep_original', 'n', ['no','nor','nort','north','n'] );
// note the single gram created below
assertAnalysis( 'direction_synonym_contraction_keep_original', 'north', ['no','nor','nort','north','n'] );

// ensure that very large grams are created
assertAnalysis( 'largeGrams', 'grolmanstrasse', [
Expand All @@ -63,7 +74,7 @@ module.exports.tests.address_suffix_expansions = function(test, common){
test( 'address suffix expansions', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasTwoEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexTwoEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'safe expansions', 'aly', [
Expand Down Expand Up @@ -91,7 +102,7 @@ module.exports.tests.stop_words = function(test, common){
test( 'stop words', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasTwoEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexTwoEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'street suffix', 'AB street', [
Expand All @@ -110,7 +121,7 @@ module.exports.tests.functional = function(test, common){
test( 'functional', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasTwoEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexTwoEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'country', 'Trinidad and Tobago', [
Expand All @@ -129,12 +140,11 @@ module.exports.tests.functional = function(test, common){
});
};


module.exports.tests.functional = function(test, common){
module.exports.tests.address_suffix_expansions = function(test, common){
test( 'address suffix expansion', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasTwoEdgeGram' );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexTwoEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

assertAnalysis( 'street', 'FOO rd', [
Expand All @@ -149,10 +159,28 @@ module.exports.tests.functional = function(test, common){
});
};

// handle special cases for numerals
module.exports.tests.numerals = function(test, common){
test( 'numerals', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasIndexTwoEdgeGram' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

// allow single grams for single digit numbers
assertAnalysis( 'single digit', '1 2', [ '1', '2' ]);

// do not produce single grams for 2+ digit numbers
assertAnalysis( 'multi digits', '12 999', [ '12', '99', '999' ]);

suite.run( t.end );
});
};

module.exports.all = function (tape, common) {

function test(name, testFunction) {
return tape('peliasTwoEdgeGram: ' + name, testFunction);
return tape('peliasIndexTwoEdgeGram: ' + name, testFunction);
}

for( var testCase in module.exports.tests ){
Expand Down
25 changes: 25 additions & 0 deletions integration/analyzer_peliasPhrase.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,31 @@ module.exports.tests.functional = function(test, common){
});
};

module.exports.tests.tokenizer = function(test, common){
test( 'tokenizer', function(t){

var suite = new elastictest.Suite( null, { schema: schema } );
var assertAnalysis = analyze.bind( null, suite, t, 'peliasPhrase' );
suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up

// specify 2 parts with a delimeter
assertAnalysis( 'forward slash', 'Bedell Street/133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'forward slash', 'Bedell Street /133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'forward slash', 'Bedell Street/ 133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'back slash', 'Bedell Street\\133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'back slash', 'Bedell Street \\133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'back slash', 'Bedell Street\\ 133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'comma', 'Bedell Street,133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'comma', 'Bedell Street ,133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'comma', 'Bedell Street, 133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'space', 'Bedell Street,133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'space', 'Bedell Street ,133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);
assertAnalysis( 'space', 'Bedell Street, 133rd Avenue', [ 'bedell', 'st', '133rd', 'ave' ]);

suite.run( t.end );
});
};

// @ref: https://www.elastic.co/guide/en/elasticsearch/guide/current/phrase-matching.html
// @ref: https://www.elastic.co/guide/en/elasticsearch/guide/current/slop.html
module.exports.tests.slop = function(test, common){
Expand Down
Loading

0 comments on commit d38ad7d

Please sign in to comment.