From 950dc3640b1c6d2ffbf687f384ef0b4c962346a0 Mon Sep 17 00:00:00 2001 From: missinglink Date: Wed, 21 Mar 2018 13:32:23 +0100 Subject: [PATCH] refactor synonyms to be easier to modify, change some names for token filters to be more consistent --- .jshintrc | 20 ++ settings.js | 76 ++--- street_suffix.js | 304 ------------------ synonyms/ampersand.txt | 1 + synonyms/directionals.txt | 11 + .../full_token_address_suffix_expansion.txt | 106 ++++++ synonyms/parser.js | 30 ++ ...partial_token_address_suffix_expansion.txt | 120 +++++++ synonyms/street_suffix_contractions.txt | 120 +++++++ test/fixtures/expected.json | 147 +-------- test/settings.js | 53 +-- 11 files changed, 455 insertions(+), 533 deletions(-) create mode 100644 .jshintrc delete mode 100644 street_suffix.js create mode 100644 synonyms/ampersand.txt create mode 100644 synonyms/directionals.txt create mode 100644 synonyms/full_token_address_suffix_expansion.txt create mode 100644 synonyms/parser.js create mode 100644 synonyms/partial_token_address_suffix_expansion.txt create mode 100644 synonyms/street_suffix_contractions.txt diff --git a/.jshintrc b/.jshintrc new file mode 100644 index 00000000..24d2c33e --- /dev/null +++ b/.jshintrc @@ -0,0 +1,20 @@ +{ + "node": true, + "curly": true, + "eqeqeq": true, + "esversion": 6, + "freeze": true, + "immed": true, + "indent": 2, + "latedef": false, + "newcap": true, + "noarg": true, + "noempty": true, + "nonbsp": true, + "nonew": true, + "plusplus": false, + "undef": true, + "unused": false, + "maxparams": 4, + "maxdepth": 4 +} diff --git a/settings.js b/settings.js index 7341e069..918fcb01 100644 --- a/settings.js +++ b/settings.js @@ -1,9 +1,22 @@ 'use strict'; +var fs = require('fs'); +var path = require('path'); var merge = require('lodash.merge'); var peliasConfig = require('pelias-config'); var punctuation = require('./punctuation'); -var street_suffix = require('./street_suffix'); +var synonymFile = require('./synonyms/parser'); + +// load synonyms from disk +var synonyms = fs.readdirSync('./synonyms') + .sort() + .filter( f => f.match(/\.txt$/) ) + .reduce(( acc, cur ) => { + acc[cur.replace('.txt','')] = synonymFile( + path.join( './synonyms', cur ) + ); + return acc; + }, {}); require('./configValidation').validate(peliasConfig.generate()); @@ -102,8 +115,8 @@ function generate(){ "icu_folding", "trim", "ampersand", - "street_synonym", - "direction_synonym", + "street_suffix_contractions", + "directionals", "unique", "notnull" ] @@ -139,9 +152,9 @@ function generate(){ "lowercase", "icu_folding", "remove_duplicate_spaces", - ].concat( street_suffix.synonyms.map( function( synonym ){ + ].concat( synonyms.street_suffix_contractions.map( function( synonym ){ return "keyword_street_suffix_" + synonym.split(' ')[0]; - })).concat( street_suffix.direction_synonyms.map( function( synonym ){ + })).concat( synonyms.directionals.map( function( synonym ){ return "keyword_compass_" + synonym.split(' ')[0]; })).concat([ "remove_ordinals", @@ -150,10 +163,6 @@ function generate(){ } }, "filter" : { - "ampersand" :{ - "type": "synonym", - "synonyms": [ "and => &" ] - }, "notnull" :{ "type" : "length", "min" : 1 @@ -173,30 +182,6 @@ function generate(){ "pattern" : "^(0*)", "replacement" : "" }, - "address_stop": { - "type": "stop", - "stopwords": street_suffix.terms - }, - "street_synonym": { - "type": "synonym", - "synonyms": street_suffix.synonyms - }, - "partial_token_address_suffix_expansion": { - "type": "synonym", - "synonyms": street_suffix.partial_token_safe_expansions - }, - "full_token_address_suffix_expansion": { - "type": "synonym", - "synonyms": street_suffix.full_token_safe_expansions - }, - "direction_synonym": { - "type": "synonym", - "synonyms": street_suffix.direction_synonyms - }, - "direction_synonym_contraction_keep_original": { - "type": "synonym", - "synonyms": street_suffix.direction_synonyms_keep_original - }, "remove_ordinals" : { "type" : "pattern_replace", "pattern": "(?i)((^| )((1)st?|(2)nd?|(3)rd?|([4-9])th?)|(([0-9]*)(1[0-9])th?)|(([0-9]*[02-9])((1)st?|(2)nd?|(3)rd?|([04-9])th?))($| ))", @@ -276,6 +261,15 @@ function generate(){ } }; + // dynamically create filters for all synonym files in the ./synonyms directory. + // each filter is given the same name as the file, minus the extension. + for( var key in synonyms ){ + settings.analysis.filter[key] = { + "type": "synonym", + "synonyms": !!synonyms[key].length ? synonyms[key] : [''] + }; + } + // dynamically create filters which can replace text *inside* a token. // we are not able to re-use the synonym functionality in elasticsearch // because it only matches whole tokens, not strings *within* tokens. @@ -284,30 +278,30 @@ function generate(){ // street suffix filters (replace text inside tokens) // based off synonym list - street_suffix.synonyms.forEach( function( synonym ){ + synonyms.street_suffix_contractions.forEach( function( synonym ){ var split = synonym.split(' '); settings.analysis.filter[ "keyword_street_suffix_" + split[0] ] = { "type": "pattern_replace", "pattern": " " + split[0], "replacement": " " + split[2] - } + }; }); // compass prefix filters (replace text inside tokens) - // based off direction_synonyms list - street_suffix.direction_synonyms.forEach( function( synonym ){ + // based off directionals list + synonyms.directionals.forEach( function( synonym ){ var split = synonym.split(' '); settings.analysis.filter[ "keyword_compass_" + split[0] ] = { "type": "pattern_replace", "pattern": split[0], "replacement": split[2] - } + }; }); // Merge settings from pelias/config - if( 'object' == typeof config && - 'object' == typeof config.elasticsearch && - 'object' == typeof config.elasticsearch.settings ){ + if( 'object' === typeof config && + 'object' === typeof config.elasticsearch && + 'object' === typeof config.elasticsearch.settings ){ return merge({}, settings, config.elasticsearch.settings); } diff --git a/street_suffix.js b/street_suffix.js deleted file mode 100644 index e279f695..00000000 --- a/street_suffix.js +++ /dev/null @@ -1,304 +0,0 @@ - -module.exports.terms = [ - "alley", "annex", "avenue", - "bay", "bayou", "beach", "beltway", "bend", "bluff", "bluffs", "boulevard", "bottom", "branch", "bridge", "brook", "bypass", - "canyon", "cape", "causeway", "center", "channel", "circle", "cliff", "club", "common", "commons", "connector", "corridor", - "course", "cove", "creek", "crescent", "crest", "crossing", "crossroad", "crossroads", "curve", - "dale", "dam", "drive", - "esplanade", "expressway", "extended", - "falls", "ferry", "field", "fields", "flat", "flats", "ford", "forest", "forge", "fork", "forks", "freeway", - "garden", "gardens", "gateway", "glen", "glenn", "green", "grove", - "harbor", "haven", "heights", "highway", "hill", "hills", "hollow", - "isle", - "junction", - "key", "keys", "knoll", "knolls", - "landing", "lane", "light", "lights", "lock", "locks", - "manor", "meadow", "meadows", "mews", "mill", "mills", "mountain", "motorway", - "neck", - "orchard", - "parade", "parkway", "passage", "pier", "pike", "pine", "pines", "place", "plaza", "promenade", - "ranch", "ridge", "ridges", "river", "road", "route", "row", - "shore", "shores", "skyway", "spring", "springs", "square", "street", - "terrace", "trail", "trafficway", "tunnel", "turnpike", - "valley", "vista", "village", "viaduct", - "way" -]; - -module.exports.synonyms = [ - "alley => aly", - "annex => anx", - "avenue => ave", - "bayou => byu", - "beach => bch", - "bend => bnd", - "bluff => blf", - "bluffs => blfs", - "bottom => btm", - "boulevard => blvd", - "branch => br", - "bridge => brg", - "brook => brk", - "bypass => byp", - "canyon => cyn", - "cape => cp", - "causeway => cswy", - "center => ctr", - "channel => chnnl", - "circle => cir", - "cliff => clf", - "close => cl", - "club => clb", - "common => cmn", - "commons => cmns", - "connector => con", - "corridor => cor", - "course => crse", - "court => ct", - "cove => cv", - "creek => crk", - "crescent => cres", - "crest => crst", - "crossing => xing", - "crossroad => xrd", - "crossroads => xrds", - "curve => curv", - "dale => dl", - "dam => dm", - "drive => dr", - "esplanade => esp", - "expressway => expy", - "extended => ext", - "falls => fls", - "ferry => fry", - "field => fld", - "fields => flds", - "flat => flt", - "flats => flts", - "ford => frd", - "forest => frst", - "forge => frg", - "fork => frk", - "forks => frks", - "freeway => fwy", - "garden => gdn", - "gardens => gdns", - "gateway => gtwy", - "glen => gln", - "glenn => gln", - "green => grn", - "grove => grv", - "harbor => hbr", - "haven => hvn", - "heights => hts", - "highway => hwy", - "hill => hl", - "hills => hls", - "hollow => holw", - "isle => is", - "junction => jct", - "key => ky", - "keys => kys", - "knoll => knl", - "knolls => knls", - "landing => lndg", - "lane => ln", - "light => lgt", - "lights => lgts", - "lock => lck", - "locks => lcks", - "manor => mnr", - "meadow => mdw", - "meadows => mdws", - "mill => ml", - "mills => mls", - "mountain => mnt", - "motorway => mtwy", - "neck => nck", - "orchard => orch", - "parkway => pkwy", - "pasage => psge", - "pier => pr", - "pine => pne", - "pines => pnes", - "place => pl", - "plaza => plz", - "ranch => rnch", - "ridge => rdg", - "ridges => rdgs", - "river => riv", - "road => rd", - "route => rte", - "shore => shr", - "shores => shrs", - "skyway => skwy", - "spring => spg", - "springs => spgs", - "square => sq", - "street => st", - "suite => ste", - "terrace => terr", - "trail => tr", - "trafficway => trfy", - "tunnel => tunl", - "turnpike => tpke", - "valley => vly", - "vista => vis", - "village => vlg", - "way => wy" -]; - -// note: more descriptive tokens must come before less descriptive ones -// eg: 'southwest' must come before 'west' else 'southwest foo' -> 'southw foo' -module.exports.direction_synonyms = [ - "southwest => sw", - "southeast => se", - "northwest => nw", - "northeast => ne", - "north => n", - "south => s", - "east => e", - "west => w" -]; - -// note: this is a bit of a hack, it can be placed AFTER an 2+ ngram filter in -// order to allow single grams in the index. -module.exports.direction_synonyms_keep_original = [ - "north => north,n", - "south => south,s", - "east => east,e", - "west => west,w" -]; - -/** - a list of 'safe' street suffix expansions. - - this list should NOT include any values where the abbreviation is a prefix of - the expanded form. - - EG. 'st' is a prefix of 'street' so it is not included here. - EG. 'rd' is NOT a prefix of 'road' so it IS included here. - - the term 'safe' refers to whether the token may be expanded without causing - other issues; in general ask yourself "if i expand `byu => bayou` will this cause - issues with other tokens which *begin with byu?" - - EG. 'pr' is disabled as it would cause jitter when autocompleting any place - name beginning with 'pr' such as 'princeton', on the second keypress the - results would likely all contain names which are/begin with "pier". - - EG. 'ct' is disabled as expanding it to 'court' would possibly conflict with - the state abbreviation for 'Connecticut'. - - please use judgement when adding new expansions as it may cause the 'jitter' - behaviour as outlined in https://github.com/pelias/schema/pull/83 -**/ -module.exports.partial_token_safe_expansions = [ - "aly => alley", - "anx => annex", - "byu => bayou", - "bch => beach", - "bnd => bend", - "blf => bluff", - "blfs => bluffs", - "btm => bottom", - "blvd => boulevard", - "brg => bridge", - "brk => brook", - "cyn => canyon", - "cp => cape", - "cswy => causeway", - "ctr => center", - "chnnl => channel", - "clf => cliff", - "clb => club", - "cmn => common", - "cmns => commons", - "crse => course", - // "ct => court", - "cv => cove", - "crk => creek", - "crst => crest", - "xing => crossing", - "xrd => crossroad", - "xrds => crossroads", - "dl => dale", - "dm => dam", - "expy => expressway", - "fls => falls", - "fry => ferry", - "fld => field", - "flds => fields", - "flt => flat", - "flts => flats", - "frd => ford", - "frst => forest", - "frg => forge", - "frk => fork", - "frks => forks", - "fwy => freeway", - "gdn => garden", - "gdns => gardens", - "gtwy => gateway", - "gln => glenn", - "grn => green", - "grv => grove", - "hbr => harbor", - "hvn => haven", - "hts => heights", - "hwy => highway", - "hl => hill", - "hls => hills", - "holw => hollow", - "jct => junction", - "ky => key", - "kys => keys", - "knl => knoll", - "knls => knolls", - "lndg => landing", - "ln => lane", - "lgt => light", - "lgts => lights", - "lck => lock", - "lcks => locks", - "mnr => manor", - "mdw => meadow", - "mdws => meadows", - "ml => mill", - "mls => mills", - "mnt => mountain", - "mtwy => motorway", - "nck => neck", - "pkwy => parkway", - "psge => pasage", - // "pr => pier", - "pne => pine", - "pnes => pines", - "plz => plaza", - "rnch => ranch", - "rdg => ridge", - "rdgs => ridges", - "rd => road", - "rte => route", - "shr => shore", - "shrs => shores", - "skwy => skyway", - "spg => spring", - "spgs => springs", - "ste => suite", - "trfy => trafficway", - "tunl => tunnel", - "tpke => turnpike", - "vly => valley", - "vlg => village", - "wy => way" -]; - -module.exports.full_token_safe_expansions = []; - -// copy the unsafe expansions -module.exports.partial_token_safe_expansions.forEach( function( expansion ){ - module.exports.full_token_safe_expansions.push( expansion ); -}); - -// add the expansions which are only safe on complete tokens (not partial tokens) -module.exports.full_token_safe_expansions.push( "n => north", "s => south", "e => east", "w => west" ); diff --git a/synonyms/ampersand.txt b/synonyms/ampersand.txt new file mode 100644 index 00000000..fbc6ab9c --- /dev/null +++ b/synonyms/ampersand.txt @@ -0,0 +1 @@ +and => & diff --git a/synonyms/directionals.txt b/synonyms/directionals.txt new file mode 100644 index 00000000..bdb00bbc --- /dev/null +++ b/synonyms/directionals.txt @@ -0,0 +1,11 @@ +# note: more descriptive tokens must come before less descriptive ones +# eg: 'southwest' must come before 'west' else 'southwest foo' -> 'southw foo' + +southwest => sw +southeast => se +northwest => nw +northeast => ne +north => n +south => s +east => e +west => w diff --git a/synonyms/full_token_address_suffix_expansion.txt b/synonyms/full_token_address_suffix_expansion.txt new file mode 100644 index 00000000..fef4937c --- /dev/null +++ b/synonyms/full_token_address_suffix_expansion.txt @@ -0,0 +1,106 @@ +# note: synonyms copied verbatim from partial_token_address_suffix_expansion.txt + +aly => alley +anx => annex +byu => bayou +bch => beach +bnd => bend +blf => bluff +blfs => bluffs +btm => bottom +blvd => boulevard +brg => bridge +brk => brook +cyn => canyon +cp => cape +cswy => causeway +ctr => center +chnnl => channel +clf => cliff +clb => club +cmn => common +cmns => commons +crse => course +# ct => court +cv => cove +crk => creek +crst => crest +xing => crossing +xrd => crossroad +xrds => crossroads +dl => dale +dm => dam +expy => expressway +fls => falls +fry => ferry +fld => field +flds => fields +flt => flat +flts => flats +frd => ford +frst => forest +frg => forge +frk => fork +frks => forks +fwy => freeway +gdn => garden +gdns => gardens +gtwy => gateway +gln => glenn +grn => green +grv => grove +hbr => harbor +hvn => haven +hts => heights +hwy => highway +hl => hill +hls => hills +holw => hollow +jct => junction +ky => key +kys => keys +knl => knoll +knls => knolls +lndg => landing +ln => lane +lgt => light +lgts => lights +lck => lock +lcks => locks +mnr => manor +mdw => meadow +mdws => meadows +ml => mill +mls => mills +mnt => mountain +mtwy => motorway +nck => neck +pkwy => parkway +psge => pasage +# pr => pier +pne => pine +pnes => pines +plz => plaza +rnch => ranch +rdg => ridge +rdgs => ridges +rd => road +rte => route +shr => shore +shrs => shores +skwy => skyway +spg => spring +spgs => springs +ste => suite +trfy => trafficway +tunl => tunnel +tpke => turnpike +vly => valley +vlg => village +wy => way + +# add the expansions which are only safe on complete tokens (not partial tokens) +n => north +s => south +e => east +w => west diff --git a/synonyms/parser.js b/synonyms/parser.js new file mode 100644 index 00000000..e2a33afd --- /dev/null +++ b/synonyms/parser.js @@ -0,0 +1,30 @@ +var fs = require('fs'); + +// https://www.elastic.co/guide/en/elasticsearch/reference/2.4/analysis-synonym-tokenfilter.html + +function parser( filename ){ + + // path not specified / file does not exist + try { + if( !fs.lstatSync(filename).isFile() ){ + throw new Error( 'invalid file' ); + } + } catch(e){ + throw new Error( 'file not found' ); + } + + // parse solr synonyms format + return fs.readFileSync( filename, 'utf8' ) + .split('\n') + .map( line => { + return line.trim() // trim whitespace + .replace( /\s\s+/g, ' ' ) // squash double spaces + .replace(/(^,)|(,$)/g, '') // trim commas + .replace(/(\s*,\s*)/g,',') // trim spaces around commas + .replace(/(\s*=>\s*)/g,' => '); // trim spaces around arrows + }) + .filter( line => line.length > 0 ) // remove empty lines + .filter( line => '#' !== line[0] ); // remove comments +} + +module.exports = parser; diff --git a/synonyms/partial_token_address_suffix_expansion.txt b/synonyms/partial_token_address_suffix_expansion.txt new file mode 100644 index 00000000..20a97095 --- /dev/null +++ b/synonyms/partial_token_address_suffix_expansion.txt @@ -0,0 +1,120 @@ +# a list of 'safe' street suffix expansions. +# +# this list should NOT include any values where the abbreviation is a prefix of +# the expanded form. +# +# EG. 'st' is a prefix of 'street' so it is not included here. +# EG. 'rd' is NOT a prefix of 'road' so it IS included here. +# +# the term 'safe' refers to whether the token may be expanded without causing +# other issues; in general ask yourself "if i expand `byu => bayou` will this cause +# issues with other tokens which *begin with byu?" +# +# EG. 'pr' is disabled as it would cause jitter when autocompleting any place +# name beginning with 'pr' such as 'princeton', on the second keypress the +# results would likely all contain names which are/begin with "pier". +# +# EG. 'ct' is disabled as expanding it to 'court' would possibly conflict with +# the state abbreviation for 'Connecticut'. +# +# please use judgement when adding new expansions as it may cause the 'jitter' +# behaviour as outlined in https://github.com/pelias/schema/pull/83 + +aly => alley +anx => annex +byu => bayou +bch => beach +bnd => bend +blf => bluff +blfs => bluffs +btm => bottom +blvd => boulevard +brg => bridge +brk => brook +cyn => canyon +cp => cape +cswy => causeway +ctr => center +chnnl => channel +clf => cliff +clb => club +cmn => common +cmns => commons +crse => course +# ct => court +cv => cove +crk => creek +crst => crest +xing => crossing +xrd => crossroad +xrds => crossroads +dl => dale +dm => dam +expy => expressway +fls => falls +fry => ferry +fld => field +flds => fields +flt => flat +flts => flats +frd => ford +frst => forest +frg => forge +frk => fork +frks => forks +fwy => freeway +gdn => garden +gdns => gardens +gtwy => gateway +gln => glenn +grn => green +grv => grove +hbr => harbor +hvn => haven +hts => heights +hwy => highway +hl => hill +hls => hills +holw => hollow +jct => junction +ky => key +kys => keys +knl => knoll +knls => knolls +lndg => landing +ln => lane +lgt => light +lgts => lights +lck => lock +lcks => locks +mnr => manor +mdw => meadow +mdws => meadows +ml => mill +mls => mills +mnt => mountain +mtwy => motorway +nck => neck +pkwy => parkway +psge => pasage +# pr => pier +pne => pine +pnes => pines +plz => plaza +rnch => ranch +rdg => ridge +rdgs => ridges +rd => road +rte => route +shr => shore +shrs => shores +skwy => skyway +spg => spring +spgs => springs +ste => suite +trfy => trafficway +tunl => tunnel +tpke => turnpike +vly => valley +vlg => village +wy => way diff --git a/synonyms/street_suffix_contractions.txt b/synonyms/street_suffix_contractions.txt new file mode 100644 index 00000000..75a5e3f2 --- /dev/null +++ b/synonyms/street_suffix_contractions.txt @@ -0,0 +1,120 @@ +alley => aly +annex => anx +avenue => ave +bayou => byu +beach => bch +bend => bnd +bluff => blf +bluffs => blfs +bottom => btm +boulevard => blvd +branch => br +bridge => brg +brook => brk +bypass => byp +canyon => cyn +cape => cp +causeway => cswy +center => ctr +channel => chnnl +circle => cir +cliff => clf +close => cl +club => clb +common => cmn +commons => cmns +connector => con +corridor => cor +course => crse +court => ct +cove => cv +creek => crk +crescent => cres +crest => crst +crossing => xing +crossroad => xrd +crossroads => xrds +curve => curv +dale => dl +dam => dm +drive => dr +esplanade => esp +expressway => expy +extended => ext +falls => fls +ferry => fry +field => fld +fields => flds +flat => flt +flats => flts +ford => frd +forest => frst +forge => frg +fork => frk +forks => frks +freeway => fwy +garden => gdn +gardens => gdns +gateway => gtwy +glen => gln +glenn => gln +green => grn +grove => grv +harbor => hbr +haven => hvn +heights => hts +highway => hwy +hill => hl +hills => hls +hollow => holw +isle => is +junction => jct +key => ky +keys => kys +knoll => knl +knolls => knls +landing => lndg +lane => ln +light => lgt +lights => lgts +lock => lck +locks => lcks +manor => mnr +meadow => mdw +meadows => mdws +mill => ml +mills => mls +mountain => mnt +motorway => mtwy +neck => nck +orchard => orch +parkway => pkwy +pasage => psge +pier => pr +pine => pne +pines => pnes +place => pl +plaza => plz +ranch => rnch +ridge => rdg +ridges => rdgs +river => riv +road => rd +route => rte +shore => shr +shores => shrs +skyway => skwy +spring => spg +springs => spgs +square => sq +street => st +suite => ste +terrace => terr +trail => tr +trafficway => trfy +tunnel => tunl +turnpike => tpke +valley => vly +vista => vis +village => vlg +way => wy diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 6649b2c4..04df7b5b 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -103,8 +103,8 @@ "icu_folding", "trim", "ampersand", - "street_synonym", - "direction_synonym", + "street_suffix_contractions", + "directionals", "unique", "notnull" ] @@ -308,137 +308,7 @@ "pattern": "^(0*)", "replacement": "" }, - "address_stop": { - "type": "stop", - "stopwords": [ - "alley", - "annex", - "avenue", - "bay", - "bayou", - "beach", - "beltway", - "bend", - "bluff", - "bluffs", - "boulevard", - "bottom", - "branch", - "bridge", - "brook", - "bypass", - "canyon", - "cape", - "causeway", - "center", - "channel", - "circle", - "cliff", - "club", - "common", - "commons", - "connector", - "corridor", - "course", - "cove", - "creek", - "crescent", - "crest", - "crossing", - "crossroad", - "crossroads", - "curve", - "dale", - "dam", - "drive", - "esplanade", - "expressway", - "extended", - "falls", - "ferry", - "field", - "fields", - "flat", - "flats", - "ford", - "forest", - "forge", - "fork", - "forks", - "freeway", - "garden", - "gardens", - "gateway", - "glen", - "glenn", - "green", - "grove", - "harbor", - "haven", - "heights", - "highway", - "hill", - "hills", - "hollow", - "isle", - "junction", - "key", - "keys", - "knoll", - "knolls", - "landing", - "lane", - "light", - "lights", - "lock", - "locks", - "manor", - "meadow", - "meadows", - "mews", - "mill", - "mills", - "mountain", - "motorway", - "neck", - "orchard", - "parade", - "parkway", - "passage", - "pier", - "pike", - "pine", - "pines", - "place", - "plaza", - "promenade", - "ranch", - "ridge", - "ridges", - "river", - "road", - "route", - "row", - "shore", - "shores", - "skyway", - "spring", - "springs", - "square", - "street", - "terrace", - "trail", - "trafficway", - "tunnel", - "turnpike", - "valley", - "vista", - "village", - "viaduct", - "way" - ] - }, - "street_synonym": { + "street_suffix_contractions": { "type": "synonym", "synonyms": [ "alley => aly", @@ -769,7 +639,7 @@ "w => west" ] }, - "direction_synonym": { + "directionals": { "type": "synonym", "synonyms": [ "southwest => sw", @@ -782,15 +652,6 @@ "west => w" ] }, - "direction_synonym_contraction_keep_original": { - "type": "synonym", - "synonyms": [ - "north => north,n", - "south => south,s", - "east => east,e", - "west => west,w" - ] - }, "remove_ordinals": { "type": "pattern_replace", "pattern": "(?i)((^| )((1)st?|(2)nd?|(3)rd?|([4-9])th?)|(([0-9]*)(1[0-9])th?)|(([0-9]*[02-9])((1)st?|(2)nd?|(3)rd?|([04-9])th?))($| ))", diff --git a/test/settings.js b/test/settings.js index 2675cbfb..0d1f716e 100644 --- a/test/settings.js +++ b/test/settings.js @@ -117,8 +117,8 @@ module.exports.tests.peliasPhraseAnalyzer = function(test, common) { "icu_folding", "trim", "ampersand", - "street_synonym", - "direction_synonym", + "street_suffix_contractions", + "directionals", "unique", "notnull" ]); @@ -297,50 +297,13 @@ module.exports.tests.removeAllZeroNumericPrefixFilter = function(test, common) { }); }; -// this filter can be used to remove certain common words in order to keep -// the index size down and the execution speed quick. -// note: it is not intended to be used with shingles, but useful for ngrams -module.exports.tests.addressStopFilter = function(test, common) { - test('has address_stop filter', function(t) { - var s = settings(); - t.equal(typeof s.analysis.filter.address_stop, 'object', 'there is an address_stop filter'); - var filter = s.analysis.filter.address_stop; - t.equal(filter.type, 'stop'); - t.deepEqual(filter.stopwords, [ - "alley", "annex", "avenue", - "bay", "bayou", "beach", "beltway", "bend", "bluff", "bluffs", "boulevard", "bottom", "branch", "bridge", "brook", "bypass", - "canyon", "cape", "causeway", "center", "channel", "circle", "cliff", "club", "common", "commons", "connector", "corridor", - "course", "cove", "creek", "crescent", "crest", "crossing", "crossroad", "crossroads", "curve", - "dale", "dam", "drive", - "esplanade", "expressway", "extended", - "falls", "ferry", "field", "fields", "flat", "flats", "ford", "forest", "forge", "fork", "forks", "freeway", - "garden", "gardens", "gateway", "glen", "glenn", "green", "grove", - "harbor", "haven", "heights", "highway", "hill", "hills", "hollow", - "isle", - "junction", - "key", "keys", "knoll", "knolls", - "landing", "lane", "light", "lights", "lock", "locks", - "manor", "meadow", "meadows", "mews", "mill", "mills", "mountain", "motorway", - "neck", - "orchard", - "parade", "parkway", "passage", "pier", "pike", "pine", "pines", "place", "plaza", "promenade", - "ranch", "ridge", "ridges", "river", "road", "route", "row", - "shore", "shores", "skyway", "spring", "springs", "square", "street", - "terrace", "trail", "trafficway", "tunnel", "turnpike", - "valley", "vista", "village", "viaduct", - "way" - ]); - t.end(); - }); -}; - // this filter stems common street suffixes // eg. road=>rd and street=>st module.exports.tests.streetSynonymFilter = function(test, common) { - test('has street_synonym filter', function(t) { + test('has street_suffix_contractions filter', function(t) { var s = settings(); - t.equal(typeof s.analysis.filter.street_synonym, 'object', 'there is an street_synonym filter'); - var filter = s.analysis.filter.street_synonym; + t.equal(typeof s.analysis.filter.street_suffix_contractions, 'object', 'there is an street_suffix_contractions filter'); + var filter = s.analysis.filter.street_suffix_contractions; t.equal(filter.type, 'synonym'); t.true(Array.isArray(filter.synonyms)); t.equal(filter.synonyms.length, 120); @@ -351,10 +314,10 @@ module.exports.tests.streetSynonymFilter = function(test, common) { // this filter stems common directional terms // eg. north=>n and south=>s module.exports.tests.directionSynonymFilter = function(test, common) { - test('has direction_synonym filter', function(t) { + test('has directionals filter', function(t) { var s = settings(); - t.equal(typeof s.analysis.filter.direction_synonym, 'object', 'there is an direction_synonym filter'); - var filter = s.analysis.filter.direction_synonym; + t.equal(typeof s.analysis.filter.directionals, 'object', 'there is an directionals filter'); + var filter = s.analysis.filter.directionals; t.equal(filter.type, 'synonym'); t.true(Array.isArray(filter.synonyms)); t.equal(filter.synonyms.length, 8);