diff --git a/lunr.ja.js b/lunr.ja.js new file mode 100644 index 0000000..1c3e512 --- /dev/null +++ b/lunr.ja.js @@ -0,0 +1,134 @@ +/*! + * Lunr languages, `Japanese` language + * https://github.com/MihaiValentin/lunr-languages + * + * Copyright 2014, Chad Liu + * http://www.mozilla.org/MPL/ + */ +/*! + * based on + * Snowball JavaScript Library v0.3 + * http://code.google.com/p/urim/ + * http://snowball.tartarus.org/ + * + * Copyright 2010, Oleg Mazko + * http://www.mozilla.org/MPL/ + */ + +/** + * export the module via AMD, CommonJS or as a browser global + * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js + */ + ; + (function(root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like environments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + factory()(root.lunr); + } + }(this, function() { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return function(lunr) { + /* throw error if lunr is not yet included */ + if ('undefined' === typeof lunr) { + throw new Error('Lunr is not present. Please include / require Lunr before this script.'); + } + + /* throw error if lunr stemmer support is not yet included */ + if ('undefined' === typeof lunr.stemmerSupport) { + throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); + } + + /* register specific locale function */ + lunr.ja = function() { + this.pipeline.reset(); + this.pipeline.add( + lunr.ja.trimmer, + lunr.ja.stopWordFilter, + lunr.ja.stemmer + ); + // change the tokenizer for japanese one + lunr.tokenizer = lunr.ja.tokenizer; + }; + var segmenter = new TinySegmenter(); // インスタンス生成 + + lunr.ja.tokenizer = function (obj) { + if (!arguments.length || obj == null || obj == undefined) return [] + if (Array.isArray(obj)) return obj.map(function (t) { return t.toLowerCase() }) + + var str = obj.toString().replace(/^\s+/, '') + + for (var i = str.length - 1; i >= 0; i--) { + if (/\S/.test(str.charAt(i))) { + str = str.substring(0, i + 1) + break + } + } + + + var segs = segmenter.segment(str); // 単語の配列が返る + return segs.filter(function (token) { + return !!token + }) + .map(function (token) { + return token + }) + } + + /* lunr stemmer function */ + lunr.ja.stemmer = (function() { + + /* TODO japanese stemmer */ + return function(word) { + return word; + } + })(); + + lunr.Pipeline.registerFunction(lunr.ja.stemmer, 'stemmer-ja'); + + /* lunr trimmer function */ + lunr.ja.wordCharacters = "一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9"; + lunr.ja.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.ja.wordCharacters); + lunr.Pipeline.registerFunction(lunr.ja.trimmer, 'trimmer-ja'); + + /* stop word filter function */ + lunr.ja.stopWordFilter = function(token) { + if (lunr.ja.stopWordFilter.stopWords.indexOf(token) === -1) { + return token; + } + }; + + lunr.ja.stopWordFilter.stopWords = new lunr.SortedSet(); + lunr.ja.stopWordFilter.stopWords.length = 45; + + // The space at the beginning is crucial: It marks the empty string + // as a stop word. lunr.js crashes during search when documents + // processed by the pipeline still contain the empty string. + // stopword for japanese is from http://www.ranks.nl/stopwords/japanese + lunr.ja.stopWordFilter.stopWords.elements = ' これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' '); + lunr.Pipeline.registerFunction(lunr.ja.stopWordFilter, 'stopWordFilter-ja'); + + // alias ja => jp for backward-compatibility. + // jp is the country code, while ja is the language code + // a new lunr.ja.js has been created, but in order to + // keep the backward compatibility, we'll leave the lunr.jp.js + // here for a while, and just make it use the new lunr.ja.js + lunr.jp = lunr.ja; + lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp'); + lunr.Pipeline.registerFunction(lunr.jp.trimmer, 'trimmer-jp'); + lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp'); + }; + })) \ No newline at end of file diff --git a/lunr.jp.js b/lunr.jp.js index 4dcdc43..73ebff1 100644 --- a/lunr.jp.js +++ b/lunr.jp.js @@ -1,118 +1,5 @@ -/*! - * Lunr languages, `Japanese` language - * https://github.com/MihaiValentin/lunr-languages - * - * Copyright 2014, Chad Liu - * http://www.mozilla.org/MPL/ - */ -/*! - * based on - * Snowball JavaScript Library v0.3 - * http://code.google.com/p/urim/ - * http://snowball.tartarus.org/ - * - * Copyright 2010, Oleg Mazko - * http://www.mozilla.org/MPL/ - */ - -/** - * export the module via AMD, CommonJS or as a browser global - * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js - */ -; -(function(root, factory) { - if (typeof define === 'function' && define.amd) { - // AMD. Register as an anonymous module. - define(factory) - } else if (typeof exports === 'object') { - /** - * Node. Does not work with strict CommonJS, but - * only CommonJS-like environments that support module.exports, - * like Node. - */ - module.exports = factory() - } else { - // Browser globals (root is window) - factory()(root.lunr); - } -}(this, function() { - /** - * Just return a value to define the module export. - * This example returns an object, but the module - * can return a function as the exported value. - */ - return function(lunr) { - /* throw error if lunr is not yet included */ - if ('undefined' === typeof lunr) { - throw new Error('Lunr is not present. Please include / require Lunr before this script.'); - } - - /* throw error if lunr stemmer support is not yet included */ - if ('undefined' === typeof lunr.stemmerSupport) { - throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); - } - - /* register specific locale function */ - lunr.jp = function() { - this.pipeline.reset(); - this.pipeline.add( - lunr.jp.stopWordFilter, - lunr.jp.stemmer - ); - // change the tokenizer for japanese one - lunr.tokenizer = lunr.jp.tokenizer; - }; - var segmenter = new TinySegmenter(); // インスタンス生成 - - lunr.jp.tokenizer = function (obj) { - if (!arguments.length || obj == null || obj == undefined) return [] - if (Array.isArray(obj)) return obj.map(function (t) { return t.toLowerCase() }) - - var str = obj.toString().replace(/^\s+/, '') - - for (var i = str.length - 1; i >= 0; i--) { - if (/\S/.test(str.charAt(i))) { - str = str.substring(0, i + 1) - break - } - } - - - var segs = segmenter.segment(str); // 単語の配列が返る - return segs.filter(function (token) { - return !!token - }) - .map(function (token) { - return token - }) - } - - /* lunr stemmer function */ - lunr.jp.stemmer = (function() { - - /* TODO japanese stemmer */ - return function(word) { - return word; - } - })(); - - lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp'); - - /* stop word filter function */ - lunr.jp.stopWordFilter = function(token) { - if (lunr.jp.stopWordFilter.stopWords.indexOf(token) === -1) { - return token; - } - }; - - lunr.jp.stopWordFilter.stopWords = new lunr.SortedSet(); - lunr.jp.stopWordFilter.stopWords.length = 45; - - // The space at the beginning is crucial: It marks the empty string - // as a stop word. lunr.js crashes during search when documents - // processed by the pipeline still contain the empty string. - // stopword for japanese is from http://www.ranks.nl/stopwords/japanese - lunr.jp.stopWordFilter.stopWords.elements = ' これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' '); - lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp'); - }; -})) \ No newline at end of file +// jp is the country code, while ja is the language code +// a new lunr.ja.js has been created, but in order to +// keep the backward compatibility, we'll leave the lunr.jp.js +// here for a while, and just make it use the new lunr.ja.js +module.exports = require('./lunr.ja'); \ No newline at end of file