From a7c349cffaaa793f2056ff4a1c2fd2d62e9030bc Mon Sep 17 00:00:00 2001 From: Jim Porter Date: Fri, 21 Feb 2020 13:55:20 -0800 Subject: [PATCH] Update to lunr-languages v1.4.0; resolves #1729 This renames the Dutch language code to `nl`, the Japanese code to `ja`, and adds support for Arabic (`ar`) and Vietnamese (`vi`). --- docs/about/release-notes.md | 8 + docs/user-guide/configuration.md | 6 +- .../contrib/search/lunr-language/lunr.ar.js | 381 +++++++++++++++ .../contrib/search/lunr-language/lunr.du.js | 2 + .../contrib/search/lunr-language/lunr.ja.js | 188 ++++++++ .../contrib/search/lunr-language/lunr.jp.js | 139 +----- .../search/lunr-language/lunr.multi.js | 10 +- .../contrib/search/lunr-language/lunr.nl.js | 448 ++++++++++++++++++ .../contrib/search/lunr-language/lunr.vi.js | 84 ++++ 9 files changed, 1127 insertions(+), 139 deletions(-) create mode 100644 mkdocs/contrib/search/lunr-language/lunr.ar.js create mode 100644 mkdocs/contrib/search/lunr-language/lunr.ja.js create mode 100644 mkdocs/contrib/search/lunr-language/lunr.nl.js create mode 100644 mkdocs/contrib/search/lunr-language/lunr.vi.js diff --git a/docs/about/release-notes.md b/docs/about/release-notes.md index 10f54828..a0310741 100644 --- a/docs/about/release-notes.md +++ b/docs/about/release-notes.md @@ -70,6 +70,14 @@ do, adding `--strict`, `--theme`, `--theme-dir`, and `--site-dir`. [directory-urls]: ../user-guide/configuration.md#use_directory_urls +#### Updated lunr-languages support (#1729) + +The `lunr-languages` plugin has been updated to 1.4.0, adding support for +Arabic (`ar`) and Vietnamese (`vi`) languages. In addition, the Dutch and +Japanese language codes have been changed to their standard values: `nl` and +`ja`, respectively. The old language codes (`du` and `jp`) remain as aliases but +may be removed in a future version of MkDocs. + ### Other Changes and Additions to Version 1.1 * Bugfix: Ensure nested dot files in themes are ignored and document behavior (#1981). diff --git a/docs/user-guide/configuration.md b/docs/user-guide/configuration.md index 3257cc9f..ffcdd6b1 100644 --- a/docs/user-guide/configuration.md +++ b/docs/user-guide/configuration.md @@ -508,15 +508,16 @@ A list of languages to use when building the search index as identified by their [ISO 639-1] language codes. With [Lunr Languages], the following languages are supported: +* `ar`: Arabic * `da`: Danish -* `du`: Dutch +* `nl`: Dutch * `en`: English * `fi`: Finnish * `fr`: French * `de`: German * `hu`: Hungarian * `it`: Italian -* `jp`: Japanese +* `ja`: Japanese * `no`: Norwegian * `pt`: Portuguese * `ro`: Romanian @@ -525,6 +526,7 @@ supported: * `sv`: Swedish * `th`: Thai * `tr`: Turkish +* `vi`: Vietnamese You may [contribute additional languages]. diff --git a/mkdocs/contrib/search/lunr-language/lunr.ar.js b/mkdocs/contrib/search/lunr-language/lunr.ar.js new file mode 100644 index 00000000..c5450f16 --- /dev/null +++ b/mkdocs/contrib/search/lunr-language/lunr.ar.js @@ -0,0 +1,381 @@ +/*! + * Lunr languages, `Arabic` language + * https://github.com/MihaiValentin/lunr-languages + * + * Copyright 2018, Dalia Al-Shahrabi + * http://www.mozilla.org/MPL/ + */ +/*! + * based on + * Kazem Taghva, Rania Elkhoury, and Jeffrey Coombs (2005) + * Meryeme Hadni, Abdelmonaime Lachkar, and S. Alaoui Ouatik (2012) + * + * Snowball JavaScript Library v0.3 + * http://code.google.com/p/urim/ + * http://snowball.tartarus.org/ + * + * Copyright 2010, Oleg Mazko + * http://www.mozilla.org/MPL/ + */ + +/** + * export the module via AMD, CommonJS or as a browser global + * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js + */ +; +(function(root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like environments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + factory()(root.lunr); + } +}(this, function() { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return function(lunr) { + /* throw error if lunr is not yet included */ + if ('undefined' === typeof lunr) { + throw new Error('Lunr is not present. Please include / require Lunr before this script.'); + } + + /* throw error if lunr stemmer support is not yet included */ + if ('undefined' === typeof lunr.stemmerSupport) { + throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); + } + + /* register specific locale function */ + lunr.ar = function() { + this.pipeline.reset(); + this.pipeline.add( + lunr.ar.trimmer, + lunr.ar.stopWordFilter, + lunr.ar.stemmer + ); + + // for lunr version 2 + // this is necessary so that every searched word is also stemmed before + // in lunr <= 1 this is not needed, as it is done using the normal pipeline + if (this.searchPipeline) { + this.searchPipeline.reset(); + this.searchPipeline.add(lunr.ar.stemmer) + } + }; + + /* lunr trimmer function */ + lunr.ar.wordCharacters = "\u0621-\u065b\u0671\u0640"; + lunr.ar.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.ar.wordCharacters); + + lunr.Pipeline.registerFunction(lunr.ar.trimmer, 'trimmer-ar'); + + /* lunr stemmer function */ + lunr.ar.stemmer = (function() { + var self = this; + var word = ''; + self.result = false; + self.preRemoved = false; + self.sufRemoved = false; + + //prefix data + self.pre = { + pre1: 'ف ك ب و س ل ن ا ي ت', + pre2: 'ال لل', + pre3: 'بال وال فال تال كال ولل', + pre4: 'فبال كبال وبال وكال' + }; + + //suffix data + self.suf = { + suf1: 'ه ك ت ن ا ي', + suf2: 'نك نه ها وك يا اه ون ين تن تم نا وا ان كم كن ني نن ما هم هن تك ته ات يه', + suf3: 'تين كهم نيه نهم ونه وها يهم ونا ونك وني وهم تكم تنا تها تني تهم كما كها ناه نكم هنا تان يها', + suf4: 'كموه ناها ونني ونهم تكما تموه تكاه كماه ناكم ناهم نيها وننا' + } + + //arabic language patterns and alternative mapping for patterns + self.patterns = JSON.parse('{"pt43":[{"pt":[{"c":"ا","l":1}]},{"pt":[{"c":"ا,ت,ن,ي","l":0}],"mPt":[{"c":"ف","l":0,"m":1},{"c":"ع","l":1,"m":2},{"c":"ل","l":2,"m":3}]},{"pt":[{"c":"و","l":2}],"mPt":[{"c":"ف","l":0,"m":0},{"c":"ع","l":1,"m":1},{"c":"ل","l":2,"m":3}]},{"pt":[{"c":"ا","l":2}]},{"pt":[{"c":"ي","l":2}],"mPt":[{"c":"ف","l":0,"m":0},{"c":"ع","l":1,"m":1},{"c":"ا","l":2},{"c":"ل","l":3,"m":3}]},{"pt":[{"c":"م","l":0}]}],"pt53":[{"pt":[{"c":"ت","l":0},{"c":"ا","l":2}]},{"pt":[{"c":"ا,ن,ت,ي","l":0},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ت","l":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"ا","l":0},{"c":"ا","l":2}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ع","l":2,"m":3},{"c":"ل","l":3,"m":4},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"ا","l":0},{"c":"ا","l":3}],"mPt":[{"c":"ف","l":0,"m":1},{"c":"ع","l":1,"m":2},{"c":"ل","l":2,"m":4}]},{"pt":[{"c":"ا","l":3},{"c":"ن","l":4}]},{"pt":[{"c":"ت","l":0},{"c":"ي","l":3}]},{"pt":[{"c":"م","l":0},{"c":"و","l":3}]},{"pt":[{"c":"ا","l":1},{"c":"و","l":3}]},{"pt":[{"c":"و","l":1},{"c":"ا","l":2}]},{"pt":[{"c":"م","l":0},{"c":"ا","l":3}]},{"pt":[{"c":"م","l":0},{"c":"ي","l":3}]},{"pt":[{"c":"ا","l":2},{"c":"ن","l":3}]},{"pt":[{"c":"م","l":0},{"c":"ن","l":1}],"mPt":[{"c":"ا","l":0},{"c":"ن","l":1},{"c":"ف","l":2,"m":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"م","l":0},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ت","l":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"م","l":0},{"c":"ا","l":2}]},{"pt":[{"c":"م","l":1},{"c":"ا","l":3}]},{"pt":[{"c":"ي,ت,ا,ن","l":0},{"c":"ت","l":1}],"mPt":[{"c":"ف","l":0,"m":2},{"c":"ع","l":1,"m":3},{"c":"ا","l":2},{"c":"ل","l":3,"m":4}]},{"pt":[{"c":"ت,ي,ا,ن","l":0},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ت","l":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"ا","l":2},{"c":"ي","l":3}]},{"pt":[{"c":"ا,ي,ت,ن","l":0},{"c":"ن","l":1}],"mPt":[{"c":"ا","l":0},{"c":"ن","l":1},{"c":"ف","l":2,"m":2},{"c":"ع","l":3,"m":3},{"c":"ا","l":4},{"c":"ل","l":5,"m":4}]},{"pt":[{"c":"ا","l":3},{"c":"ء","l":4}]}],"pt63":[{"pt":[{"c":"ا","l":0},{"c":"ت","l":2},{"c":"ا","l":4}]},{"pt":[{"c":"ا,ت,ن,ي","l":0},{"c":"س","l":1},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"س","l":1},{"c":"ت","l":2},{"c":"ف","l":3,"m":3},{"c":"ع","l":4,"m":4},{"c":"ا","l":5},{"c":"ل","l":6,"m":5}]},{"pt":[{"c":"ا,ن,ت,ي","l":0},{"c":"و","l":3}]},{"pt":[{"c":"م","l":0},{"c":"س","l":1},{"c":"ت","l":2}],"mPt":[{"c":"ا","l":0},{"c":"س","l":1},{"c":"ت","l":2},{"c":"ف","l":3,"m":3},{"c":"ع","l":4,"m":4},{"c":"ا","l":5},{"c":"ل","l":6,"m":5}]},{"pt":[{"c":"ي","l":1},{"c":"ي","l":3},{"c":"ا","l":4},{"c":"ء","l":5}]},{"pt":[{"c":"ا","l":0},{"c":"ن","l":1},{"c":"ا","l":4}]}],"pt54":[{"pt":[{"c":"ت","l":0}]},{"pt":[{"c":"ا,ي,ت,ن","l":0}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ع","l":2,"m":2},{"c":"ل","l":3,"m":3},{"c":"ر","l":4,"m":4},{"c":"ا","l":5},{"c":"ر","l":6,"m":4}]},{"pt":[{"c":"م","l":0}],"mPt":[{"c":"ا","l":0},{"c":"ف","l":1,"m":1},{"c":"ع","l":2,"m":2},{"c":"ل","l":3,"m":3},{"c":"ر","l":4,"m":4},{"c":"ا","l":5},{"c":"ر","l":6,"m":4}]},{"pt":[{"c":"ا","l":2}]},{"pt":[{"c":"ا","l":0},{"c":"ن","l":2}]}],"pt64":[{"pt":[{"c":"ا","l":0},{"c":"ا","l":4}]},{"pt":[{"c":"م","l":0},{"c":"ت","l":1}]}],"pt73":[{"pt":[{"c":"ا","l":0},{"c":"س","l":1},{"c":"ت","l":2},{"c":"ا","l":5}]}],"pt75":[{"pt":[{"c":"ا","l":0},{"c":"ا","l":5}]}]}'); + + self.execArray = [ + 'cleanWord', + 'removeDiacritics', + 'cleanAlef', + 'removeStopWords', + 'normalizeHamzaAndAlef', + 'removeStartWaw', + 'removePre432', + 'removeEndTaa', + 'wordCheck' + ]; + + self.stem = function() { + var counter = 0; + self.result = false; + self.preRemoved = false; + self.sufRemoved = false; + while (counter < self.execArray.length && self.result != true) { + self.result = self[self.execArray[counter]](); + counter++; + } + } + + self.setCurrent = function(word) { + self.word = word; + } + + self.getCurrent = function() { + return self.word + } + + /*remove elongating character and test that the word does not contain non-arabic characters. + If the word contains special characters, don't stem. */ + self.cleanWord = function() { + var wordCharacters = "\u0621-\u065b\u0671\u0640"; + var testRegex = new RegExp("[^" + wordCharacters + "]"); + self.word = self.word + .replace('\u0640', ''); + if (testRegex.test(word)) { + return true; + } + return false; + } + + self.removeDiacritics = function() { + var diacriticsRegex = new RegExp("[\u064b-\u065b]"); + self.word = self.word.replace(/[\u064b-\u065b]/gi, ''); + return false; + } + + /*Replace all variations of alef (آأإٱى) to a plain alef (ا)*/ + self.cleanAlef = function() { + var alefRegex = new RegExp("[\u0622\u0623\u0625\u0671\u0649]"); + self.word = self.word.replace(alefRegex, "\u0627"); + return false; + } + + /* if the word is a stop word, don't stem*/ + self.removeStopWords = function() { + var stopWords = '، اض امين اه اها اي ا اب اجل اجمع اخ اخذ اصبح اضحى اقبل اقل اكثر الا ام اما امامك امامك امسى اما ان انا انت انتم انتما انتن انت انشا انى او اوشك اولئك اولئكم اولاء اولالك اوه اي ايا اين اينما اي ان اي اف اذ اذا اذا اذما اذن الى اليكم اليكما اليكن اليك اليك الا اما ان انما اي اياك اياكم اياكما اياكن ايانا اياه اياها اياهم اياهما اياهن اياي ايه ان ا ابتدا اثر اجل احد اخرى اخلولق اذا اربعة ارتد استحال اطار اعادة اعلنت اف اكثر اكد الالاء الالى الا الاخيرة الان الاول الاولى التى التي الثاني الثانية الذاتي الذى الذي الذين السابق الف اللائي اللاتي اللتان اللتيا اللتين اللذان اللذين اللواتي الماضي المقبل الوقت الى اليوم اما امام امس ان انبرى انقلب انه انها او اول اي ايار ايام ايضا ب بات باسم بان بخ برس بسبب بس بشكل بضع بطان بعد بعض بك بكم بكما بكن بل بلى بما بماذا بمن بن بنا به بها بي بيد بين بس بله بئس تان تانك تبدل تجاه تحول تلقاء تلك تلكم تلكما تم تينك تين ته تي ثلاثة ثم ثم ثمة ثم جعل جلل جميع جير حار حاشا حاليا حاي حتى حرى حسب حم حوالى حول حيث حيثما حين حي حبذا حتى حذار خلا خلال دون دونك ذا ذات ذاك ذانك ذان ذلك ذلكم ذلكما ذلكن ذو ذوا ذواتا ذواتي ذيت ذينك ذين ذه ذي راح رجع رويدك ريث رب زيارة سبحان سرعان سنة سنوات سوف سوى ساء ساءما شبه شخصا شرع شتان صار صباح صفر صه صه ضد ضمن طاق طالما طفق طق ظل عاد عام عاما عامة عدا عدة عدد عدم عسى عشر عشرة علق على عليك عليه عليها عل عن عند عندما عوض عين عدس عما غدا غير ف فان فلان فو فى في فيم فيما فيه فيها قال قام قبل قد قط قلما قوة كانما كاين كاي كاين كاد كان كانت كذا كذلك كرب كل كلا كلاهما كلتا كلم كليكما كليهما كلما كلا كم كما كي كيت كيف كيفما كان كخ لئن لا لات لاسيما لدن لدى لعمر لقاء لك لكم لكما لكن لكنما لكي لكيلا للامم لم لما لما لن لنا له لها لو لوكالة لولا لوما لي لست لست لستم لستما لستن لست لسن لعل لكن ليت ليس ليسا ليستا ليست ليسوا لسنا ما ماانفك مابرح مادام ماذا مازال مافتئ مايو متى مثل مذ مساء مع معاذ مقابل مكانكم مكانكما مكانكن مكانك مليار مليون مما ممن من منذ منها مه مهما من من نحن نحو نعم نفس نفسه نهاية نخ نعما نعم ها هاؤم هاك هاهنا هب هذا هذه هكذا هل هلم هلا هم هما هن هنا هناك هنالك هو هي هيا هيت هيا هؤلاء هاتان هاتين هاته هاتي هج هذا هذان هذين هذه هذي هيهات و وا واحد واضاف واضافت واكد وان واها واوضح وراءك وفي وقال وقالت وقد وقف وكان وكانت ولا ولم ومن وهو وهي ويكان وي وشكان يكون يمكن يوم ايان'.split(' '); + if (stopWords.indexOf(self.word) >= 0) { + return true; + } + } + + /* changes ؤ ئ to ء and removes alef if at the end of the word*/ + self.normalizeHamzaAndAlef = function() { + self.word = self.word.replace('\u0624', '\u0621'); + self.word = self.word.replace('\u0626', '\u0621'); + self.word = self.word.replace(/([\u0627])\1+/gi, '\u0627'); + return false; + } + + /*remove end taa marboota ة*/ + self.removeEndTaa = function() { + if (self.word.length > 2) { + self.word = self.word.replace(/[\u0627]$/, ''); + self.word = self.word.replace('\u0629', ''); + return false; + } else return true; + } + + /* if the word starts with double waw وو keep only one of them */ + self.removeStartWaw = function() { + if (self.word.length > 3 && self.word[0] == '\u0648' && self.word[1] == '\u0648') { + self.word = self.word.slice(1); + } + return false; + } + + /* remove prefixes of size 4, 3 and 2 characters */ + self.removePre432 = function() { + var word = self.word; + if (self.word.length >= 7) { + var pre4Regex = new RegExp('^(' + self.pre.pre4.split(' ').join('|') + ')') + self.word = self.word.replace(pre4Regex, ''); + } + if (self.word == word && self.word.length >= 6) { + var pre3Regex = new RegExp('^(' + self.pre.pre3.split(' ').join('|') + ')') + self.word = self.word.replace(pre3Regex, ''); + } + if (self.word == word && self.word.length >= 5) { + var pre2Regex = new RegExp('^(' + self.pre.pre2.split(' ').join('|') + ')') + self.word = self.word.replace(pre2Regex, ''); + } + if (word != self.word) self.preRemoved = true; + return false; + } + + /* check the word against word patterns. If the word matches a pattern, map it to the + alternative pattern if available then stop stemming. */ + self.patternCheck = function(pattern) { + var patternMatch = false; + for (var i = 0; i < pattern.length; i++) { + var currentPatternCheck = true; + for (var j = 0; j < pattern[i].pt.length; j++) { + var chars = pattern[i].pt[j].c.split(','); + var charMatch = false; + chars.forEach(function(el) { + if (self.word[pattern[i].pt[j].l] == el) { + charMatch = true; + } + }) + if (!charMatch) { + currentPatternCheck = false; + break; + } + } + if (currentPatternCheck == true) { + if (pattern[i].mPt) { + var newWord = []; + for (var k = 0; k < pattern[i].mPt.length; k++) { + if (pattern[i].mPt[k].m != null) { + newWord[pattern[i].mPt[k].l] = self.word[pattern[i].mPt[k].m] + } else { + newWord[pattern[i].mPt[k].l] = pattern[i].mPt[k].c + } + } + self.word = newWord.join(''); + } + self.result = true; + break; + } + } + } + + /* remove prefixes of size 1 char*/ + self.removePre1 = function() { + var word = self.word; + if (self.preRemoved == false) + if (self.word.length > 3) { + var pre1Regex = new RegExp('^(' + self.pre.pre1.split(' ').join('|') + ')') + self.word = self.word.replace(pre1Regex, ''); + } + if (word != self.word) self.preRemoved = true; + return false; + } + + /*remove suffixes of size 1 char */ + self.removeSuf1 = function() { + var word = self.word; + if (self.sufRemoved == false) + if (self.word.length > 3) { + var suf1Regex = new RegExp('(' + self.suf.suf1.split(' ').join('|') + ')$') + self.word = self.word.replace(suf1Regex, ''); + } + if (word != self.word) self.sufRemoved = true; + return false; + } + + /*remove suffixes of size 4, 3 and 2 chars*/ + self.removeSuf432 = function() { + var word = self.word; + if (self.word.length >= 6) { + var suf4Regex = new RegExp('(' + self.suf.suf4.split(' ').join('|') + ')$') + self.word = self.word.replace(suf4Regex, ''); + } + if (self.word == word && self.word.length >= 5) { + var suf3Regex = new RegExp('(' + self.suf.suf3.split(' ').join('|') + ')$') + self.word = self.word.replace(suf3Regex, ''); + } + if (self.word == word && self.word.length >= 4) { + var suf2Regex = new RegExp('(' + self.suf.suf2.split(' ').join('|') + ')$') + self.word = self.word.replace(suf2Regex, ''); + } + if (word != self.word) self.sufRemoved = true; + return false; + } + + /*check the word length and decide what is the next step accordingly*/ + self.wordCheck = function() { + var word = self.word; + var word7Exec = [self.removeSuf432, self.removeSuf1, self.removePre1] + var counter = 0; + var patternChecked = false; + while (self.word.length >= 7 && !self.result && counter < word7Exec.length) { + if (self.word.length == 7 && !patternChecked) { + self.checkPattern73(); + patternChecked = true; + } else { + word7Exec[counter](); + counter++; + patternChecked = false; + } + } + + var word6Exec = [self.checkPattern63, self.removeSuf432, self.removeSuf1, self.removePre1, self.checkPattern64]; + counter = 0; + while (self.word.length == 6 && !self.result && counter < word6Exec.length) { + word6Exec[counter](); + counter++; + } + + var word5Exec = [self.checkPattern53, self.removeSuf432, self.removeSuf1, self.removePre1, self.checkPattern54]; + counter = 0; + while (self.word.length == 5 && !self.result && counter < word5Exec.length) { + word5Exec[counter](); + counter++; + } + + var word4Exec = [self.checkPattern43, self.removeSuf1, self.removePre1, self.removeSuf432]; + counter = 0; + while (self.word.length == 4 && !self.result && counter < word4Exec.length) { + word4Exec[counter](); + counter++; + } + return true; + } + + self.checkPattern43 = function() { + self.patternCheck(self.patterns.pt43) + } + self.checkPattern53 = function() { + self.patternCheck(self.patterns.pt53) + } + self.checkPattern54 = function() { + self.patternCheck(self.patterns.pt54) + } + self.checkPattern63 = function() { + self.patternCheck(self.patterns.pt63) + } + self.checkPattern64 = function() { + self.patternCheck(self.patterns.pt64) + } + self.checkPattern73 = function() { + self.patternCheck(self.patterns.pt73) + } + + /* and return a function that stems a word for the current locale */ + return function(token) { + // for lunr version 2 + if (typeof token.update === "function") { + return token.update(function(word) { + self.setCurrent(word); + self.stem(); + return self.getCurrent(); + }) + } else { // for lunr version <= 1 + self.setCurrent(token); + self.stem(); + return self.getCurrent(); + } + + } + })(); + + lunr.Pipeline.registerFunction(lunr.ar.stemmer, 'stemmer-ar'); + + lunr.ar.stopWordFilter = lunr.generateStopWordFilter('، اض امين اه اها اي ا اب اجل اجمع اخ اخذ اصبح اضحى اقبل اقل اكثر الا ام اما امامك امامك امسى اما ان انا انت انتم انتما انتن انت انشا انى او اوشك اولئك اولئكم اولاء اولالك اوه اي ايا اين اينما اي ان اي اف اذ اذا اذا اذما اذن الى اليكم اليكما اليكن اليك اليك الا اما ان انما اي اياك اياكم اياكما اياكن ايانا اياه اياها اياهم اياهما اياهن اياي ايه ان ا ابتدا اثر اجل احد اخرى اخلولق اذا اربعة ارتد استحال اطار اعادة اعلنت اف اكثر اكد الالاء الالى الا الاخيرة الان الاول الاولى التى التي الثاني الثانية الذاتي الذى الذي الذين السابق الف اللائي اللاتي اللتان اللتيا اللتين اللذان اللذين اللواتي الماضي المقبل الوقت الى اليوم اما امام امس ان انبرى انقلب انه انها او اول اي ايار ايام ايضا ب بات باسم بان بخ برس بسبب بس بشكل بضع بطان بعد بعض بك بكم بكما بكن بل بلى بما بماذا بمن بن بنا به بها بي بيد بين بس بله بئس تان تانك تبدل تجاه تحول تلقاء تلك تلكم تلكما تم تينك تين ته تي ثلاثة ثم ثم ثمة ثم جعل جلل جميع جير حار حاشا حاليا حاي حتى حرى حسب حم حوالى حول حيث حيثما حين حي حبذا حتى حذار خلا خلال دون دونك ذا ذات ذاك ذانك ذان ذلك ذلكم ذلكما ذلكن ذو ذوا ذواتا ذواتي ذيت ذينك ذين ذه ذي راح رجع رويدك ريث رب زيارة سبحان سرعان سنة سنوات سوف سوى ساء ساءما شبه شخصا شرع شتان صار صباح صفر صه صه ضد ضمن طاق طالما طفق طق ظل عاد عام عاما عامة عدا عدة عدد عدم عسى عشر عشرة علق على عليك عليه عليها عل عن عند عندما عوض عين عدس عما غدا غير ف فان فلان فو فى في فيم فيما فيه فيها قال قام قبل قد قط قلما قوة كانما كاين كاي كاين كاد كان كانت كذا كذلك كرب كل كلا كلاهما كلتا كلم كليكما كليهما كلما كلا كم كما كي كيت كيف كيفما كان كخ لئن لا لات لاسيما لدن لدى لعمر لقاء لك لكم لكما لكن لكنما لكي لكيلا للامم لم لما لما لن لنا له لها لو لوكالة لولا لوما لي لست لست لستم لستما لستن لست لسن لعل لكن ليت ليس ليسا ليستا ليست ليسوا لسنا ما ماانفك مابرح مادام ماذا مازال مافتئ مايو متى مثل مذ مساء مع معاذ مقابل مكانكم مكانكما مكانكن مكانك مليار مليون مما ممن من منذ منها مه مهما من من نحن نحو نعم نفس نفسه نهاية نخ نعما نعم ها هاؤم هاك هاهنا هب هذا هذه هكذا هل هلم هلا هم هما هن هنا هناك هنالك هو هي هيا هيت هيا هؤلاء هاتان هاتين هاته هاتي هج هذا هذان هذين هذه هذي هيهات وا واحد واضاف واضافت واكد وان واها واوضح وراءك وفي وقال وقالت وقد وقف وكان وكانت ولا ولم ومن وهو وهي ويكان وي وشكان يكون يمكن يوم ايان'.split(' ')); + + lunr.Pipeline.registerFunction(lunr.ar.stopWordFilter, 'stopWordFilter-ar'); + }; +})) \ No newline at end of file diff --git a/mkdocs/contrib/search/lunr-language/lunr.du.js b/mkdocs/contrib/search/lunr-language/lunr.du.js index a7ddf975..18edeb8e 100644 --- a/mkdocs/contrib/search/lunr-language/lunr.du.js +++ b/mkdocs/contrib/search/lunr-language/lunr.du.js @@ -52,6 +52,8 @@ throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); } + console.warn("[Lunr Languages] Please use the \"nl\" instead of the \"du\". The \"nl\" code is the standard code for Dutch language, and \"du\" will be removed in the next major versions."); + /* register specific locale function */ lunr.du = function() { this.pipeline.reset(); diff --git a/mkdocs/contrib/search/lunr-language/lunr.ja.js b/mkdocs/contrib/search/lunr-language/lunr.ja.js new file mode 100644 index 00000000..44eb411e --- /dev/null +++ b/mkdocs/contrib/search/lunr-language/lunr.ja.js @@ -0,0 +1,188 @@ +/*! + * Lunr languages, `Japanese` language + * https://github.com/MihaiValentin/lunr-languages + * + * Copyright 2014, Chad Liu + * http://www.mozilla.org/MPL/ + */ +/*! + * based on + * Snowball JavaScript Library v0.3 + * http://code.google.com/p/urim/ + * http://snowball.tartarus.org/ + * + * Copyright 2010, Oleg Mazko + * http://www.mozilla.org/MPL/ + */ + +/** + * export the module via AMD, CommonJS or as a browser global + * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js + */ +; +(function(root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like environments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + factory()(root.lunr); + } +}(this, function() { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return function(lunr) { + /* throw error if lunr is not yet included */ + if ('undefined' === typeof lunr) { + throw new Error('Lunr is not present. Please include / require Lunr before this script.'); + } + + /* throw error if lunr stemmer support is not yet included */ + if ('undefined' === typeof lunr.stemmerSupport) { + throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); + } + + /* + Japanese tokenization is trickier, since it does not + take into account spaces. + Since the tokenization function is represented different + internally for each of the Lunr versions, this had to be done + in order to try to try to pick the best way of doing this based + on the Lunr version + */ + var isLunr2 = lunr.version[0] == "2"; + + /* register specific locale function */ + lunr.ja = function() { + this.pipeline.reset(); + this.pipeline.add( + lunr.ja.trimmer, + lunr.ja.stopWordFilter, + lunr.ja.stemmer + ); + + // change the tokenizer for japanese one + if (isLunr2) { // for lunr version 2.0.0 + this.tokenizer = lunr.ja.tokenizer; + } else { + if (lunr.tokenizer) { // for lunr version 0.6.0 + lunr.tokenizer = lunr.ja.tokenizer; + } + if (this.tokenizerFn) { // for lunr version 0.7.0 -> 1.0.0 + this.tokenizerFn = lunr.ja.tokenizer; + } + } + }; + var segmenter = new lunr.TinySegmenter(); // インスタンス生成 + + lunr.ja.tokenizer = function(obj) { + var i; + var str; + var len; + var segs; + var tokens; + var char; + var sliceLength; + var sliceStart; + var sliceEnd; + var segStart; + + if (!arguments.length || obj == null || obj == undefined) + return []; + + if (Array.isArray(obj)) { + return obj.map( + function(t) { + return isLunr2 ? new lunr.Token(t.toLowerCase()) : t.toLowerCase(); + } + ); + } + + str = obj.toString().toLowerCase().replace(/^\s+/, ''); + for (i = str.length - 1; i >= 0; i--) { + if (/\S/.test(str.charAt(i))) { + str = str.substring(0, i + 1); + break; + } + } + + tokens = []; + len = str.length; + for (sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) { + char = str.charAt(sliceEnd); + sliceLength = sliceEnd - sliceStart; + + if ((char.match(/\s/) || sliceEnd == len)) { + if (sliceLength > 0) { + segs = segmenter.segment(str.slice(sliceStart, sliceEnd)).filter( + function(token) { + return !!token; + } + ); + + segStart = sliceStart; + for (i = 0; i < segs.length; i++) { + if (isLunr2) { + tokens.push( + new lunr.Token( + segs[i], { + position: [segStart, segs[i].length], + index: tokens.length + } + ) + ); + } else { + tokens.push(segs[i]); + } + segStart += segs[i].length; + } + } + + sliceStart = sliceEnd + 1; + } + } + + return tokens; + } + + /* lunr stemmer function */ + lunr.ja.stemmer = (function() { + + /* TODO japanese stemmer */ + return function(word) { + return word; + } + })(); + lunr.Pipeline.registerFunction(lunr.ja.stemmer, 'stemmer-ja'); + + /* lunr trimmer function */ + lunr.ja.wordCharacters = "一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9"; + lunr.ja.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.ja.wordCharacters); + lunr.Pipeline.registerFunction(lunr.ja.trimmer, 'trimmer-ja'); + + /* lunr stop word filter. see http://www.ranks.nl/stopwords/japanese */ + lunr.ja.stopWordFilter = lunr.generateStopWordFilter( + 'これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' ')); + lunr.Pipeline.registerFunction(lunr.ja.stopWordFilter, 'stopWordFilter-ja'); + + // alias ja => jp for backward-compatibility. + // jp is the country code, while ja is the language code + // a new lunr.ja.js has been created, but in order to + // keep the backward compatibility, we'll leave the lunr.jp.js + // here for a while, and just make it use the new lunr.ja.js + lunr.jp = lunr.ja; + lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp'); + lunr.Pipeline.registerFunction(lunr.jp.trimmer, 'trimmer-jp'); + lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp'); + }; +})) \ No newline at end of file diff --git a/mkdocs/contrib/search/lunr-language/lunr.jp.js b/mkdocs/contrib/search/lunr-language/lunr.jp.js index aef6e31d..73ebff1d 100644 --- a/mkdocs/contrib/search/lunr-language/lunr.jp.js +++ b/mkdocs/contrib/search/lunr-language/lunr.jp.js @@ -1,134 +1,5 @@ -/*! - * Lunr languages, `Japanese` language - * https://github.com/MihaiValentin/lunr-languages - * - * Copyright 2014, Chad Liu - * http://www.mozilla.org/MPL/ - */ -/*! - * based on - * Snowball JavaScript Library v0.3 - * http://code.google.com/p/urim/ - * http://snowball.tartarus.org/ - * - * Copyright 2010, Oleg Mazko - * http://www.mozilla.org/MPL/ - */ - -/** - * export the module via AMD, CommonJS or as a browser global - * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js - */ -; -(function(root, factory) { - if (typeof define === 'function' && define.amd) { - // AMD. Register as an anonymous module. - define(factory) - } else if (typeof exports === 'object') { - /** - * Node. Does not work with strict CommonJS, but - * only CommonJS-like environments that support module.exports, - * like Node. - */ - module.exports = factory() - } else { - // Browser globals (root is window) - factory()(root.lunr); - } -}(this, function() { - /** - * Just return a value to define the module export. - * This example returns an object, but the module - * can return a function as the exported value. - */ - return function(lunr) { - /* throw error if lunr is not yet included */ - if ('undefined' === typeof lunr) { - throw new Error('Lunr is not present. Please include / require Lunr before this script.'); - } - - /* throw error if lunr stemmer support is not yet included */ - if ('undefined' === typeof lunr.stemmerSupport) { - throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); - } - - /* - Japanese tokenization is trickier, since it does not - take into account spaces. - Since the tokenization function is represented different - internally for each of the Lunr versions, this had to be done - in order to try to try to pick the best way of doing this based - on the Lunr version - */ - var isLunr2 = lunr.version[0] == "2"; - - /* register specific locale function */ - lunr.jp = function() { - this.pipeline.reset(); - this.pipeline.add( - lunr.jp.stopWordFilter, - lunr.jp.stemmer - ); - - // change the tokenizer for japanese one - if (isLunr2) { // for lunr version 2.0.0 - this.tokenizer = lunr.jp.tokenizer; - } else { - if (lunr.tokenizer) { // for lunr version 0.6.0 - lunr.tokenizer = lunr.jp.tokenizer; - } - if (this.tokenizerFn) { // for lunr version 0.7.0 -> 1.0.0 - this.tokenizerFn = lunr.jp.tokenizer; - } - } - }; - var segmenter = new lunr.TinySegmenter(); // インスタンス生成 - - lunr.jp.tokenizer = function (obj) { - if (!arguments.length || obj == null || obj == undefined) return [] - if (Array.isArray(obj)) return obj.map(function (t) { return isLunr2 ? new lunr.Token(t.toLowerCase()) : t.toLowerCase() }) - - var str = obj.toString().toLowerCase().replace(/^\s+/, '') - - for (var i = str.length - 1; i >= 0; i--) { - if (/\S/.test(str.charAt(i))) { - str = str.substring(0, i + 1) - break - } - } - - var segs = segmenter.segment(str); // 単語の配列が返る - return segs.filter(function (token) { - return !!token - }) - .map(function (token) { - return isLunr2 ? new lunr.Token(token) : token - }) - } - - /* lunr stemmer function */ - lunr.jp.stemmer = (function() { - - /* TODO japanese stemmer */ - return function(word) { - return word; - } - })(); - - lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp'); - lunr.jp.wordCharacters = "一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9"; - - /* stop word filter function */ - lunr.jp.stopWordFilter = function(token) { - if (lunr.jp.stopWordFilter.stopWords.indexOf(isLunr2 ? token.toString() : token) === -1) { - return token; - } - }; - - // stopword for japanese is from http://www.ranks.nl/stopwords/japanese - lunr.jp.stopWordFilter = lunr.generateStopWordFilter( - 'これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' ')); - - lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp'); - }; -})) \ No newline at end of file +// jp is the country code, while ja is the language code +// a new lunr.ja.js has been created, but in order to +// keep the backward compatibility, we'll leave the lunr.jp.js +// here for a while, and just make it use the new lunr.ja.js +module.exports = require('./lunr.ja'); \ No newline at end of file diff --git a/mkdocs/contrib/search/lunr-language/lunr.multi.js b/mkdocs/contrib/search/lunr-language/lunr.multi.js index 076792c7..4e921e02 100644 --- a/mkdocs/contrib/search/lunr-language/lunr.multi.js +++ b/mkdocs/contrib/search/lunr-language/lunr.multi.js @@ -48,9 +48,13 @@ searchPipeline.push(lunr.stemmer); } else { wordCharacters += lunr[languages[i]].wordCharacters; - pipeline.unshift(lunr[languages[i]].stopWordFilter); - pipeline.push(lunr[languages[i]].stemmer); - searchPipeline.push(lunr[languages[i]].stemmer); + if (lunr[languages[i]].stopWordFilter) { + pipeline.unshift(lunr[languages[i]].stopWordFilter); + } + if (lunr[languages[i]].stemmer) { + pipeline.push(lunr[languages[i]].stemmer); + searchPipeline.push(lunr[languages[i]].stemmer); + } } }; var multiTrimmer = lunr.trimmerSupport.generateTrimmer(wordCharacters); diff --git a/mkdocs/contrib/search/lunr-language/lunr.nl.js b/mkdocs/contrib/search/lunr-language/lunr.nl.js new file mode 100644 index 00000000..55b43ff0 --- /dev/null +++ b/mkdocs/contrib/search/lunr-language/lunr.nl.js @@ -0,0 +1,448 @@ +/*! + * Lunr languages, `Dutch` language + * https://github.com/MihaiValentin/lunr-languages + * + * Copyright 2014, Mihai Valentin + * http://www.mozilla.org/MPL/ + */ +/*! + * based on + * Snowball JavaScript Library v0.3 + * http://code.google.com/p/urim/ + * http://snowball.tartarus.org/ + * + * Copyright 2010, Oleg Mazko + * http://www.mozilla.org/MPL/ + */ + +/** + * export the module via AMD, CommonJS or as a browser global + * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js + */ +; +(function(root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like environments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + factory()(root.lunr); + } +}(this, function() { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return function(lunr) { + /* throw error if lunr is not yet included */ + if ('undefined' === typeof lunr) { + throw new Error('Lunr is not present. Please include / require Lunr before this script.'); + } + + /* throw error if lunr stemmer support is not yet included */ + if ('undefined' === typeof lunr.stemmerSupport) { + throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); + } + + /* register specific locale function */ + lunr.nl = function() { + this.pipeline.reset(); + this.pipeline.add( + lunr.nl.trimmer, + lunr.nl.stopWordFilter, + lunr.nl.stemmer + ); + + // for lunr version 2 + // this is necessary so that every searched word is also stemmed before + // in lunr <= 1 this is not needed, as it is done using the normal pipeline + if (this.searchPipeline) { + this.searchPipeline.reset(); + this.searchPipeline.add(lunr.nl.stemmer) + } + }; + + /* lunr trimmer function */ + lunr.nl.wordCharacters = "A-Za-z\xAA\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02B8\u02E0-\u02E4\u1D00-\u1D25\u1D2C-\u1D5C\u1D62-\u1D65\u1D6B-\u1D77\u1D79-\u1DBE\u1E00-\u1EFF\u2071\u207F\u2090-\u209C\u212A\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA7AD\uA7B0-\uA7B7\uA7F7-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB64\uFB00-\uFB06\uFF21-\uFF3A\uFF41-\uFF5A"; + lunr.nl.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.nl.wordCharacters); + + lunr.Pipeline.registerFunction(lunr.nl.trimmer, 'trimmer-nl'); + + /* lunr stemmer function */ + lunr.nl.stemmer = (function() { + /* create the wrapped stemmer object */ + var Among = lunr.stemmerSupport.Among, + SnowballProgram = lunr.stemmerSupport.SnowballProgram, + st = new function DutchStemmer() { + var a_0 = [new Among("", -1, 6), new Among("\u00E1", 0, 1), + new Among("\u00E4", 0, 1), new Among("\u00E9", 0, 2), + new Among("\u00EB", 0, 2), new Among("\u00ED", 0, 3), + new Among("\u00EF", 0, 3), new Among("\u00F3", 0, 4), + new Among("\u00F6", 0, 4), new Among("\u00FA", 0, 5), + new Among("\u00FC", 0, 5) + ], + a_1 = [new Among("", -1, 3), + new Among("I", 0, 2), new Among("Y", 0, 1) + ], + a_2 = [ + new Among("dd", -1, -1), new Among("kk", -1, -1), + new Among("tt", -1, -1) + ], + a_3 = [new Among("ene", -1, 2), + new Among("se", -1, 3), new Among("en", -1, 2), + new Among("heden", 2, 1), new Among("s", -1, 3) + ], + a_4 = [ + new Among("end", -1, 1), new Among("ig", -1, 2), + new Among("ing", -1, 1), new Among("lijk", -1, 3), + new Among("baar", -1, 4), new Among("bar", -1, 5) + ], + a_5 = [ + new Among("aa", -1, -1), new Among("ee", -1, -1), + new Among("oo", -1, -1), new Among("uu", -1, -1) + ], + g_v = [17, 65, + 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 + ], + g_v_I = [1, 0, 0, + 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 + ], + g_v_j = [ + 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 + ], + I_p2, I_p1, B_e_found, sbp = new SnowballProgram(); + this.setCurrent = function(word) { + sbp.setCurrent(word); + }; + this.getCurrent = function() { + return sbp.getCurrent(); + }; + + function r_prelude() { + var among_var, v_1 = sbp.cursor, + v_2, v_3; + while (true) { + sbp.bra = sbp.cursor; + among_var = sbp.find_among(a_0, 11); + if (among_var) { + sbp.ket = sbp.cursor; + switch (among_var) { + case 1: + sbp.slice_from("a"); + continue; + case 2: + sbp.slice_from("e"); + continue; + case 3: + sbp.slice_from("i"); + continue; + case 4: + sbp.slice_from("o"); + continue; + case 5: + sbp.slice_from("u"); + continue; + case 6: + if (sbp.cursor >= sbp.limit) + break; + sbp.cursor++; + continue; + } + } + break; + } + sbp.cursor = v_1; + sbp.bra = v_1; + if (sbp.eq_s(1, "y")) { + sbp.ket = sbp.cursor; + sbp.slice_from("Y"); + } else + sbp.cursor = v_1; + while (true) { + v_2 = sbp.cursor; + if (sbp.in_grouping(g_v, 97, 232)) { + v_3 = sbp.cursor; + sbp.bra = v_3; + if (sbp.eq_s(1, "i")) { + sbp.ket = sbp.cursor; + if (sbp.in_grouping(g_v, 97, 232)) { + sbp.slice_from("I"); + sbp.cursor = v_2; + } + } else { + sbp.cursor = v_3; + if (sbp.eq_s(1, "y")) { + sbp.ket = sbp.cursor; + sbp.slice_from("Y"); + sbp.cursor = v_2; + } else if (habr1(v_2)) + break; + } + } else if (habr1(v_2)) + break; + } + } + + function habr1(v_1) { + sbp.cursor = v_1; + if (v_1 >= sbp.limit) + return true; + sbp.cursor++; + return false; + } + + function r_mark_regions() { + I_p1 = sbp.limit; + I_p2 = I_p1; + if (!habr2()) { + I_p1 = sbp.cursor; + if (I_p1 < 3) + I_p1 = 3; + if (!habr2()) + I_p2 = sbp.cursor; + } + } + + function habr2() { + while (!sbp.in_grouping(g_v, 97, 232)) { + if (sbp.cursor >= sbp.limit) + return true; + sbp.cursor++; + } + while (!sbp.out_grouping(g_v, 97, 232)) { + if (sbp.cursor >= sbp.limit) + return true; + sbp.cursor++; + } + return false; + } + + function r_postlude() { + var among_var; + while (true) { + sbp.bra = sbp.cursor; + among_var = sbp.find_among(a_1, 3); + if (among_var) { + sbp.ket = sbp.cursor; + switch (among_var) { + case 1: + sbp.slice_from("y"); + break; + case 2: + sbp.slice_from("i"); + break; + case 3: + if (sbp.cursor >= sbp.limit) + return; + sbp.cursor++; + break; + } + } + } + } + + function r_R1() { + return I_p1 <= sbp.cursor; + } + + function r_R2() { + return I_p2 <= sbp.cursor; + } + + function r_undouble() { + var v_1 = sbp.limit - sbp.cursor; + if (sbp.find_among_b(a_2, 3)) { + sbp.cursor = sbp.limit - v_1; + sbp.ket = sbp.cursor; + if (sbp.cursor > sbp.limit_backward) { + sbp.cursor--; + sbp.bra = sbp.cursor; + sbp.slice_del(); + } + } + } + + function r_e_ending() { + var v_1; + B_e_found = false; + sbp.ket = sbp.cursor; + if (sbp.eq_s_b(1, "e")) { + sbp.bra = sbp.cursor; + if (r_R1()) { + v_1 = sbp.limit - sbp.cursor; + if (sbp.out_grouping_b(g_v, 97, 232)) { + sbp.cursor = sbp.limit - v_1; + sbp.slice_del(); + B_e_found = true; + r_undouble(); + } + } + } + } + + function r_en_ending() { + var v_1; + if (r_R1()) { + v_1 = sbp.limit - sbp.cursor; + if (sbp.out_grouping_b(g_v, 97, 232)) { + sbp.cursor = sbp.limit - v_1; + if (!sbp.eq_s_b(3, "gem")) { + sbp.cursor = sbp.limit - v_1; + sbp.slice_del(); + r_undouble(); + } + } + } + } + + function r_standard_suffix() { + var among_var, v_1 = sbp.limit - sbp.cursor, + v_2, v_3, v_4, v_5, v_6; + sbp.ket = sbp.cursor; + among_var = sbp.find_among_b(a_3, 5); + if (among_var) { + sbp.bra = sbp.cursor; + switch (among_var) { + case 1: + if (r_R1()) + sbp.slice_from("heid"); + break; + case 2: + r_en_ending(); + break; + case 3: + if (r_R1() && sbp.out_grouping_b(g_v_j, 97, 232)) + sbp.slice_del(); + break; + } + } + sbp.cursor = sbp.limit - v_1; + r_e_ending(); + sbp.cursor = sbp.limit - v_1; + sbp.ket = sbp.cursor; + if (sbp.eq_s_b(4, "heid")) { + sbp.bra = sbp.cursor; + if (r_R2()) { + v_2 = sbp.limit - sbp.cursor; + if (!sbp.eq_s_b(1, "c")) { + sbp.cursor = sbp.limit - v_2; + sbp.slice_del(); + sbp.ket = sbp.cursor; + if (sbp.eq_s_b(2, "en")) { + sbp.bra = sbp.cursor; + r_en_ending(); + } + } + } + } + sbp.cursor = sbp.limit - v_1; + sbp.ket = sbp.cursor; + among_var = sbp.find_among_b(a_4, 6); + if (among_var) { + sbp.bra = sbp.cursor; + switch (among_var) { + case 1: + if (r_R2()) { + sbp.slice_del(); + v_3 = sbp.limit - sbp.cursor; + sbp.ket = sbp.cursor; + if (sbp.eq_s_b(2, "ig")) { + sbp.bra = sbp.cursor; + if (r_R2()) { + v_4 = sbp.limit - sbp.cursor; + if (!sbp.eq_s_b(1, "e")) { + sbp.cursor = sbp.limit - v_4; + sbp.slice_del(); + break; + } + } + } + sbp.cursor = sbp.limit - v_3; + r_undouble(); + } + break; + case 2: + if (r_R2()) { + v_5 = sbp.limit - sbp.cursor; + if (!sbp.eq_s_b(1, "e")) { + sbp.cursor = sbp.limit - v_5; + sbp.slice_del(); + } + } + break; + case 3: + if (r_R2()) { + sbp.slice_del(); + r_e_ending(); + } + break; + case 4: + if (r_R2()) + sbp.slice_del(); + break; + case 5: + if (r_R2() && B_e_found) + sbp.slice_del(); + break; + } + } + sbp.cursor = sbp.limit - v_1; + if (sbp.out_grouping_b(g_v_I, 73, 232)) { + v_6 = sbp.limit - sbp.cursor; + if (sbp.find_among_b(a_5, 4) && sbp.out_grouping_b(g_v, 97, 232)) { + sbp.cursor = sbp.limit - v_6; + sbp.ket = sbp.cursor; + if (sbp.cursor > sbp.limit_backward) { + sbp.cursor--; + sbp.bra = sbp.cursor; + sbp.slice_del(); + } + } + } + } + this.stem = function() { + var v_1 = sbp.cursor; + r_prelude(); + sbp.cursor = v_1; + r_mark_regions(); + sbp.limit_backward = v_1; + sbp.cursor = sbp.limit; + r_standard_suffix(); + sbp.cursor = sbp.limit_backward; + r_postlude(); + return true; + } + }; + + /* and return a function that stems a word for the current locale */ + return function(token) { + // for lunr version 2 + if (typeof token.update === "function") { + return token.update(function(word) { + st.setCurrent(word); + st.stem(); + return st.getCurrent(); + }) + } else { // for lunr version <= 1 + st.setCurrent(token); + st.stem(); + return st.getCurrent(); + } + } + })(); + + lunr.Pipeline.registerFunction(lunr.nl.stemmer, 'stemmer-nl'); + + lunr.nl.stopWordFilter = lunr.generateStopWordFilter(' aan al alles als altijd andere ben bij daar dan dat de der deze die dit doch doen door dus een eens en er ge geen geweest haar had heb hebben heeft hem het hier hij hoe hun iemand iets ik in is ja je kan kon kunnen maar me meer men met mij mijn moet na naar niet niets nog nu of om omdat onder ons ook op over reeds te tegen toch toen tot u uit uw van veel voor want waren was wat werd wezen wie wil worden wordt zal ze zelf zich zij zijn zo zonder zou'.split(' ')); + + lunr.Pipeline.registerFunction(lunr.nl.stopWordFilter, 'stopWordFilter-nl'); + }; +})) \ No newline at end of file diff --git a/mkdocs/contrib/search/lunr-language/lunr.vi.js b/mkdocs/contrib/search/lunr-language/lunr.vi.js new file mode 100644 index 00000000..e7a65b23 --- /dev/null +++ b/mkdocs/contrib/search/lunr-language/lunr.vi.js @@ -0,0 +1,84 @@ +/*! + * Lunr languages, `Vietnamese` language + * https://github.com/MihaiValentin/lunr-languages + * + * Copyright 2017, Keerati Thiwanruk + * http://www.mozilla.org/MPL/ + */ +/*! + * based on + * Snowball JavaScript Library v0.3 + * http://code.google.com/p/urim/ + * http://snowball.tartarus.org/ + * + * Copyright 2010, Oleg Mazko + * http://www.mozilla.org/MPL/ + */ + +/** + * export the module via AMD, CommonJS or as a browser global + * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js + */ +; +(function(root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like environments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + factory()(root.lunr); + } +}(this, function() { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return function(lunr) { + /* throw error if lunr is not yet included */ + if ('undefined' === typeof lunr) { + throw new Error('Lunr is not present. Please include / require Lunr before this script.'); + } + + /* throw error if lunr stemmer support is not yet included */ + if ('undefined' === typeof lunr.stemmerSupport) { + throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.'); + } + + /* register specific locale function */ + lunr.vi = function() { + this.pipeline.reset(); + this.pipeline.add( + lunr.vi.stopWordFilter, + lunr.vi.trimmer + ); + }; + + /* lunr trimmer function */ + lunr.vi.wordCharacters = "[" + + "A-Za-z" + + "\u0300\u0350" + // dấu huyền + "\u0301\u0351" + // dấu sắc + "\u0309" + // dấu hỏi + "\u0323" + // dấu nặng + "\u0303\u0343" + // dấu ngã + "\u00C2\u00E2" + // Â + "\u00CA\u00EA" + // Ê + "\u00D4\u00F4" + // Ô + "\u0102-\u0103" + // Ă + "\u0110-\u0111" + // Đ + "\u01A0-\u01A1" + // Ơ + "\u01AF-\u01B0" + // Ư + "]"; + lunr.vi.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.vi.wordCharacters); + lunr.Pipeline.registerFunction(lunr.vi.trimmer, 'trimmer-vi'); + lunr.vi.stopWordFilter = lunr.generateStopWordFilter('là cái nhưng mà'.split(' ')); + }; +})) \ No newline at end of file