From 45cc52ecef95034c47ba7a3909f28ce1f3902f1c Mon Sep 17 00:00:00 2001 From: Fumihiko Hata Date: Sun, 14 Nov 2021 01:08:13 +0900 Subject: [PATCH 1/3] rename jp (country code) to ja (lang code) --- lunr.jp.js => lunr.ja.js | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lunr.jp.js => lunr.ja.js (100%) diff --git a/lunr.jp.js b/lunr.ja.js similarity index 100% rename from lunr.jp.js rename to lunr.ja.js From 7153fd2ab4c1a2be25538aa60dd1a00640560b08 Mon Sep 17 00:00:00 2001 From: Fumihiko Hata Date: Sun, 14 Nov 2021 01:09:00 +0900 Subject: [PATCH 2/3] add ja support and trimmer-ja --- lunr.ja.js | 200 +++++++++++++++++++++++++++-------------------------- 1 file changed, 103 insertions(+), 97 deletions(-) diff --git a/lunr.ja.js b/lunr.ja.js index 4dcdc43..bd679d7 100644 --- a/lunr.ja.js +++ b/lunr.ja.js @@ -19,100 +19,106 @@ * export the module via AMD, CommonJS or as a browser global * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js */ -; -(function(root, factory) { - if (typeof define === 'function' && define.amd) { - // AMD. Register as an anonymous module. - define(factory) - } else if (typeof exports === 'object') { - /** - * Node. Does not work with strict CommonJS, but - * only CommonJS-like environments that support module.exports, - * like Node. - */ - module.exports = factory() - } else { - // Browser globals (root is window) - factory()(root.lunr); - } -}(this, function() { - /** - * Just return a value to define the module export. - * This example returns an object, but the module - * can return a function as the exported value. - */ - return function(lunr) { - /* throw error if lunr is not yet included */ - if ('undefined' === typeof lunr) { - throw new Error('Lunr is not present. Please include / require Lunr before this script.'); - } - - /* throw error if lunr stemmer support is not yet included */ - if ('undefined' === typeof lunr.stemmerSupport) { - throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); - } - - /* register specific locale function */ - lunr.jp = function() { - this.pipeline.reset(); - this.pipeline.add( - lunr.jp.stopWordFilter, - lunr.jp.stemmer - ); - // change the tokenizer for japanese one - lunr.tokenizer = lunr.jp.tokenizer; - }; - var segmenter = new TinySegmenter(); // インスタンス生成 - - lunr.jp.tokenizer = function (obj) { - if (!arguments.length || obj == null || obj == undefined) return [] - if (Array.isArray(obj)) return obj.map(function (t) { return t.toLowerCase() }) - - var str = obj.toString().replace(/^\s+/, '') - - for (var i = str.length - 1; i >= 0; i--) { - if (/\S/.test(str.charAt(i))) { - str = str.substring(0, i + 1) - break - } - } - - - var segs = segmenter.segment(str); // 単語の配列が返る - return segs.filter(function (token) { - return !!token - }) - .map(function (token) { - return token - }) - } - - /* lunr stemmer function */ - lunr.jp.stemmer = (function() { - - /* TODO japanese stemmer */ - return function(word) { - return word; - } - })(); - - lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp'); - - /* stop word filter function */ - lunr.jp.stopWordFilter = function(token) { - if (lunr.jp.stopWordFilter.stopWords.indexOf(token) === -1) { - return token; - } - }; - - lunr.jp.stopWordFilter.stopWords = new lunr.SortedSet(); - lunr.jp.stopWordFilter.stopWords.length = 45; - - // The space at the beginning is crucial: It marks the empty string - // as a stop word. lunr.js crashes during search when documents - // processed by the pipeline still contain the empty string. - // stopword for japanese is from http://www.ranks.nl/stopwords/japanese - lunr.jp.stopWordFilter.stopWords.elements = ' これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' '); - lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp'); - }; -})) \ No newline at end of file + ; + (function(root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like environments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + factory()(root.lunr); + } + }(this, function() { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return function(lunr) { + /* throw error if lunr is not yet included */ + if ('undefined' === typeof lunr) { + throw new Error('Lunr is not present. Please include / require Lunr before this script.'); + } + + /* throw error if lunr stemmer support is not yet included */ + if ('undefined' === typeof lunr.stemmerSupport) { + throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); + } + + /* register specific locale function */ + lunr.ja = function() { + this.pipeline.reset(); + this.pipeline.add( + lunr.ja.trimmer, + lunr.ja.stopWordFilter, + lunr.ja.stemmer + ); + // change the tokenizer for japanese one + lunr.tokenizer = lunr.ja.tokenizer; + }; + var segmenter = new TinySegmenter(); // インスタンス生成 + + lunr.ja.tokenizer = function (obj) { + if (!arguments.length || obj == null || obj == undefined) return [] + if (Array.isArray(obj)) return obj.map(function (t) { return t.toLowerCase() }) + + var str = obj.toString().replace(/^\s+/, '') + + for (var i = str.length - 1; i >= 0; i--) { + if (/\S/.test(str.charAt(i))) { + str = str.substring(0, i + 1) + break + } + } + + + var segs = segmenter.segment(str); // 単語の配列が返る + return segs.filter(function (token) { + return !!token + }) + .map(function (token) { + return token + }) + } + + /* lunr stemmer function */ + lunr.ja.stemmer = (function() { + + /* TODO japanese stemmer */ + return function(word) { + return word; + } + })(); + + lunr.Pipeline.registerFunction(lunr.ja.stemmer, 'stemmer-ja'); + + /* lunr trimmer function */ + lunr.ja.wordCharacters = "一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9"; + lunr.ja.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.ja.wordCharacters); + lunr.Pipeline.registerFunction(lunr.ja.trimmer, 'trimmer-ja'); + + /* stop word filter function */ + lunr.ja.stopWordFilter = function(token) { + if (lunr.ja.stopWordFilter.stopWords.indexOf(token) === -1) { + return token; + } + }; + + lunr.ja.stopWordFilter.stopWords = new lunr.SortedSet(); + lunr.ja.stopWordFilter.stopWords.length = 45; + + // The space at the beginning is crucial: It marks the empty string + // as a stop word. lunr.js crashes during search when documents + // processed by the pipeline still contain the empty string. + // stopword for japanese is from http://www.ranks.nl/stopwords/japanese + lunr.ja.stopWordFilter.stopWords.elements = ' これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' '); + lunr.Pipeline.registerFunction(lunr.ja.stopWordFilter, 'stopWordFilter-ja'); + }; + })) \ No newline at end of file From c66c5c0e9a6bdfe7c974f422b0f344d45ffc91cb Mon Sep 17 00:00:00 2001 From: Fumihiko Hata Date: Sun, 14 Nov 2021 01:09:53 +0900 Subject: [PATCH 3/3] add jp support only for backward compatibility --- lunr.ja.js | 10 ++++++++++ lunr.jp.js | 5 +++++ 2 files changed, 15 insertions(+) create mode 100644 lunr.jp.js diff --git a/lunr.ja.js b/lunr.ja.js index bd679d7..1c3e512 100644 --- a/lunr.ja.js +++ b/lunr.ja.js @@ -120,5 +120,15 @@ // stopword for japanese is from http://www.ranks.nl/stopwords/japanese lunr.ja.stopWordFilter.stopWords.elements = ' これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' '); lunr.Pipeline.registerFunction(lunr.ja.stopWordFilter, 'stopWordFilter-ja'); + + // alias ja => jp for backward-compatibility. + // jp is the country code, while ja is the language code + // a new lunr.ja.js has been created, but in order to + // keep the backward compatibility, we'll leave the lunr.jp.js + // here for a while, and just make it use the new lunr.ja.js + lunr.jp = lunr.ja; + lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp'); + lunr.Pipeline.registerFunction(lunr.jp.trimmer, 'trimmer-jp'); + lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp'); }; })) \ No newline at end of file diff --git a/lunr.jp.js b/lunr.jp.js new file mode 100644 index 0000000..73ebff1 --- /dev/null +++ b/lunr.jp.js @@ -0,0 +1,5 @@ +// jp is the country code, while ja is the language code +// a new lunr.ja.js has been created, but in order to +// keep the backward compatibility, we'll leave the lunr.jp.js +// here for a while, and just make it use the new lunr.ja.js +module.exports = require('./lunr.ja'); \ No newline at end of file