Adding or refreshing mcc in ro

author: filip <filip.komar@gmail.com> 2017-07-14 20:22:49 +0200
committer: filip <filip.komar@gmail.com> 2017-07-14 20:22:49 +0200
commit: efd81010cbe40cb41085cc2ac4878a0495bd2b0b (patch)
tree: 4efb30ff6a7abbbbe1c108f680a0da6f6f514402 /mcc/6/ro/content/search/stemmers/en_stemmer.js
parent: 012045294a44e9f00bc480141491d33e7176d3ba (diff)
download: doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.tar
doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.tar.gz
doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.tar.bz2
doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.tar.xz
doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.zip
1 files changed, 187 insertions, 0 deletions
diff --git a/mcc/6/ro/content/search/stemmers/en_stemmer.js b/mcc/6/ro/content/search/stemmers/en_stemmer.js
new file mode 100644
index 00000000..f58012f2
--- /dev/null
+++ b/mcc/6/ro/content/search/stemmers/en_stemmer.js
@@ -0,0 +1,187 @@
+// Porter stemmer in Javascript. Few comments, but it's easy to follow against the rules in the original
+// paper, in
+//
+//  Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+//  no. 3, pp 130-137,
+//
+// see also http://www.tartarus.org/~martin/PorterStemmer
+
+// Release 1 be 'andargor', Jul 2004
+// Release 2 (substantially revised) by Christopher McKenzie, Aug 2009
+
+
+var stemmer = (function(){
+	var step2list = {
+			"ational" : "ate",
+			"tional" : "tion",
+			"enci" : "ence",
+			"anci" : "ance",
+			"izer" : "ize",
+			"bli" : "ble",
+			"alli" : "al",
+			"entli" : "ent",
+			"eli" : "e",
+			"ousli" : "ous",
+			"ization" : "ize",
+			"ation" : "ate",
+			"ator" : "ate",
+			"alism" : "al",
+			"iveness" : "ive",
+			"fulness" : "ful",
+			"ousness" : "ous",
+			"aliti" : "al",
+			"iviti" : "ive",
+			"biliti" : "ble",
+			"logi" : "log"
+		},
+
+		step3list = {
+			"icate" : "ic",
+			"ative" : "",
+			"alize" : "al",
+			"iciti" : "ic",
+			"ical" : "ic",
+			"ful" : "",
+			"ness" : ""
+		},
+
+		c = "[^aeiou]",          // consonant
+		v = "[aeiouy]",          // vowel
+		C = c + "[^aeiouy]*",    // consonant sequence
+		V = v + "[aeiou]*",      // vowel sequence
+
+		mgr0 = "^(" + C + ")?" + V + C,               // [C]VC... is m>0
+		meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$",  // [C]VC[V] is m=1
+		mgr1 = "^(" + C + ")?" + V + C + V + C,       // [C]VCVC... is m>1
+		s_v = "^(" + C + ")?" + v;                   // vowel in stem
+
+	return function (w) {
+		var 	stem,
+			suffix,
+			firstch,
+			re,
+			re2,
+			re3,
+			re4,
+			origword = w;
+
+		if (w.length < 3) { return w; }
+
+		firstch = w.substr(0,1);
+		if (firstch == "y") {
+			w = firstch.toUpperCase() + w.substr(1);
+		}
+
+		// Step 1a
+		re = /^(.+?)(ss|i)es$/;
+		re2 = /^(.+?)([^s])s$/;
+
+		if (re.test(w)) { w = w.replace(re,"$1$2"); }
+		else if (re2.test(w)) {	w = w.replace(re2,"$1$2"); }
+
+		// Step 1b
+		re = /^(.+?)eed$/;
+		re2 = /^(.+?)(ed|ing)$/;
+		if (re.test(w)) {
+			var fp = re.exec(w);
+			re = new RegExp(mgr0);
+			if (re.test(fp[1])) {
+				re = /.$/;
+				w = w.replace(re,"");
+			}
+		} else if (re2.test(w)) {
+			var fp = re2.exec(w);
+			stem = fp[1];
+			re2 = new RegExp(s_v);
+			if (re2.test(stem)) {
+				w = stem;
+				re2 = /(at|bl|iz)$/;
+				re3 = new RegExp("([^aeiouylsz])\\1$");
+				re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
+				if (re2.test(w)) {	w = w + "e"; }
+				else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); }
+				else if (re4.test(w)) { w = w + "e"; }
+			}
+		}
+
+		// Step 1c
+		re = /^(.+?)y$/;
+		if (re.test(w)) {
+			var fp = re.exec(w);
+			stem = fp[1];
+			re = new RegExp(s_v);
+			if (re.test(stem)) { w = stem + "i"; }
+		}
+
+		// Step 2
+		re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
+		if (re.test(w)) {
+			var fp = re.exec(w);
+			stem = fp[1];
+			suffix = fp[2];
+			re = new RegExp(mgr0);
+			if (re.test(stem)) {
+				w = stem + step2list[suffix];
+			}
+		}
+
+		// Step 3
+		re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
+		if (re.test(w)) {
+			var fp = re.exec(w);
+			stem = fp[1];
+			suffix = fp[2];
+			re = new RegExp(mgr0);
+			if (re.test(stem)) {
+				w = stem + step3list[suffix];
+			}
+		}
+
+		// Step 4
+		re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
+		re2 = /^(.+?)(s|t)(ion)$/;
+		if (re.test(w)) {
+			var fp = re.exec(w);
+			stem = fp[1];
+			re = new RegExp(mgr1);
+			if (re.test(stem)) {
+				w = stem;
+			}
+		} else if (re2.test(w)) {
+			var fp = re2.exec(w);
+			stem = fp[1] + fp[2];
+			re2 = new RegExp(mgr1);
+			if (re2.test(stem)) {
+				w = stem;
+			}
+		}
+
+		// Step 5
+		re = /^(.+?)e$/;
+		if (re.test(w)) {
+			var fp = re.exec(w);
+			stem = fp[1];
+			re = new RegExp(mgr1);
+			re2 = new RegExp(meq1);
+			re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
+			if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
+				w = stem;
+			}
+		}
+
+		re = /ll$/;
+		re2 = new RegExp(mgr1);
+		if (re.test(w) && re2.test(w)) {
+			re = /.$/;
+			w = w.replace(re,"");
+		}
+
+		// and turn initial Y back to y
+
+		if (firstch == "y") {
+			w = firstch.toLowerCase() + w.substr(1);
+		}
+
+		return w;
+	}
+})();
+\ No newline at end of file
author	filip <filip.komar@gmail.com>	2017-07-14 20:22:49 +0200
committer	filip <filip.komar@gmail.com>	2017-07-14 20:22:49 +0200
commit	efd81010cbe40cb41085cc2ac4878a0495bd2b0b (patch)
tree	4efb30ff6a7abbbbe1c108f680a0da6f6f514402 /mcc/6/ro/content/search/stemmers/en_stemmer.js
parent	012045294a44e9f00bc480141491d33e7176d3ba (diff)
download	doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.tar doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.tar.gz doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.tar.bz2 doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.tar.xz doc-efd81010cbe40cb41085cc2ac4878a0495bd2b0b.zip