1 /** Stemming algorithms
2  */
3 module nxt.stemming;
4 
5 import std.algorithm.comparison: among;
6 import std.algorithm.searching : endsWith, canFind;
7 import std.range: empty;
8 import std.traits: isSomeString;
9 import std.typecons : Tuple, tuple;
10 
11 import nxt.iso_639_1 : Language;
12 import nxt.lingua : isEnglishVowel, isSwedishVowel, isSwedishConsonant, isEnglishConsonant;
13 import nxt.skip_ex : skipOverBack;
14 
15 public class Stemmer(S)
16 if (isSomeString!S)
17 {
18 	/**
19 	 * In stem(p,i,j), p is a char pointer, and the string to be stemmed
20 	 * is from p[i] to p[j] inclusive. Typically i is zero and j is the
21 	 * offset to the last character of a string, (p[j+1] == '\0'). The
22 	 * stemmer adjusts the characters p[i] ... p[j] and returns the new
23 	 * end-point of the string, k. Stemming never increases word length, so
24 	 * i <= k <= j. To turn the stemmer into a module, declare 'stem' as
25 	 * extern, and delete the remainder of this file.
26 	 */
27 	public S stem(S p)
28 	{
29 		_b = p;
30 		_k = p.length - 1;
31 		_k0 = 0;
32 
33 		/** strings of length 1 or 2 don't go through the stemming process,
34 		 * although no mention is made of this in the published
35 		 * algorithm. Remove the line to match the published algorithm.
36 		 */
37 		if (_k <= _k0 + 1)
38 			return _b;
39 
40 		step1ab();
41 		step1c();
42 		step2();
43 		step3();
44 		step4();
45 		step5();
46 		return _b[_k0 .. _k + 1];
47 
48 	}
49 
50 private:
51 	S _b;			// buffer for the word
52 	ptrdiff_t _k = 0;
53 	ptrdiff_t _k0 = 0;
54 	ptrdiff_t _j = 0;	   // offset within the string
55 
56 	/**
57 	 * cons returns true, if b[i] is a consonant
58 	 */
59 	bool isConsonant(ptrdiff_t i)
60 	{
61 		if (_b[i].isEnglishVowel)
62 			return false;
63 		if (_b[i] == 'y')
64 		{
65 			if (i == _k0)
66 			{
67 				return true;
68 			}
69 			else
70 			{
71 				return !isConsonant(i - 1);
72 			}
73 		}
74 		return true;
75 	}
76 
77 	/** Return the number of consonant sequences between k0 and j.
78 	 * if c is a consonant sequence and v a vowel sequence, and <..>
79 	 * indicates arbitrary presence,
80 	 *
81 	 * <c><v>	   gives 0
82 	 * <c>vc<v>	 gives 1
83 	 * <c>vcvc<v>   gives 2
84 	 * <c>vcvcvc<v> gives 3
85 	 *
86 	 */
87 	size_t m()
88 	{
89 		ptrdiff_t n = 0;
90 		ptrdiff_t i = _k0;
91 
92 		while (true)
93 		{
94 			if (i > _j)
95 			{
96 				return n;
97 			}
98 			if (!isConsonant(i))
99 			{
100 				break;
101 			}
102 			i++;
103 		}
104 		i++;
105 		while (true)
106 		{
107 			while (true)
108 			{
109 				if (i > _j)
110 				{
111 					return n;
112 				}
113 				if (isConsonant(i))
114 				{
115 					break;
116 				}
117 				i++;
118 			}
119 			i++;
120 			n++;
121 			while (true)
122 			{
123 				if (i > _j)
124 				{
125 					return n;
126 				}
127 				if (!isConsonant(i))
128 				{
129 					break;
130 				}
131 				i++;
132 			}
133 			i++;
134 		}
135 	}
136 
137 	/** Returns true if k0...j contains a vowel. */
138 	bool hasVowelInStem()
139 	{
140 		for (ptrdiff_t i = _k0; i < _j + 1; i++)
141 		{
142 			if (!isConsonant(i))
143 				return true;
144 		}
145 		return false;
146 	}
147 
148 	/** Returns true if j, j-1 contains a double consonant
149 	 */
150 	bool doublec(ptrdiff_t j)
151 	{
152 		if (j < (_k0 + 1))
153 			return false;
154 		if (_b[j] != _b[j-1])
155 			return false;
156 		return isConsonant(j);
157 	}
158 
159 	/** Returns true if i-2,i-1,i has the form consonant - vowel - consonant
160 	 * and also if the second c is not w,x or y. this is used when trying to
161 	 * restore an e at the end of a short  e.g.
162 	 *
163 	 *	cav(e), lov(e), hop(e), crim(e), but
164 	 *	snow, box, tray.
165 	 *
166 	 */
167 	bool cvc(ptrdiff_t i)
168 	{
169 		if (i < (_k0 + 2) || !isConsonant(i) || isConsonant(i-1) || !isConsonant(i-2))
170 			return false;
171 		if (_b[i] == 'w' || _b[i] == 'x' || _b[i] == 'y')
172 			return false;
173 		return true;
174 	}
175 
176 	/** Return true if k0,...k endsWith with the string s.
177 	 */
178 	bool endsWith(S)(S s)
179 	if (isSomeString!S)
180 	{
181 		const len = s.length;
182 
183 		if (s[len - 1] != _b[_k])
184 			return false;
185 		if (len > (_k - _k0 + 1))
186 			return false;
187 
188 		const a = _k - len + 1;
189 		const b = _k + 1;
190 
191 		if (_b[a..b] != s)
192 		{
193 			return false;
194 		}
195 		_j = _k - len;
196 
197 		return true;
198 	}
199 
200 	/** Sets (j+1),...k to the characters in the string s, readjusting k. */
201 	void setto(S)(S s)
202 	if (isSomeString!S)
203 	{
204 		_b = _b[0.._j+1] ~ s ~ _b[_j + s.length + 1 .. _b.length];
205 		_k = _j + s.length;
206 	}
207 
208 	/** Used further down. */
209 	void r(S)(S s)
210 	if (isSomeString!S)
211 	{
212 		if (m() > 0)
213 			setto(s);
214 	}
215 
216 	/** Gets rid of plurals and -ed or -ing. e.g. */
217 	void step1ab()
218 	{
219 		if (_b[_k] == 's')
220 		{
221 			if (endsWith("sses"))
222 			{
223 				_k = _k - 2;
224 			}
225 			else if (endsWith("ies"))
226 			{
227 				setto("i");
228 			}
229 			else if (_b[_k - 1] != 's')
230 			{
231 				_k--;
232 			}
233 		}
234 		if (endsWith("eed"))
235 		{
236 			if (m() > 0)
237 				_k--;
238 		}
239 		else if ((endsWith("ed") || endsWith("ing")) && hasVowelInStem())
240 		{
241 			_k = _j;
242 			if (endsWith("at"))
243 			{
244 				setto("ate");
245 			}
246 			else if (endsWith("bl"))
247 			{
248 				setto("ble");
249 			}
250 			else if (endsWith("iz"))
251 			{
252 				setto("ize");
253 			}
254 			else if (doublec(_k))
255 			{
256 				_k--;
257 				if (_b[_k] == 'l' || _b[_k] == 's' || _b[_k] == 'z')
258 					_k++;
259 			}
260 			else if (m() == 1 && cvc(_k))
261 			{
262 				setto("e");
263 			}
264 		}
265 	}
266 
267 	/**
268 	 * step1c() turns terminal y to i when there is another vowel in the stem.
269 	 */
270 	void step1c()
271 	{
272 		if (endsWith("y") &&
273 			!endsWith("day") &&
274 			hasVowelInStem())
275 		{
276 			_b = _b[0.._k] ~ 'i' ~ _b[_k+1 .. _b.length];
277 		}
278 	}
279 
280 	/**
281 	 * step2() maps double suffices to single ones.
282 	 * so -ization (= -ize plus -ation) maps to -ize etc. note that the
283 	 * string before the suffix must give m() > 0.*
284 	 */
285 	void step2()
286 	{
287 		if (_b[_k - 1] == 'a')
288 		{
289 			if (endsWith("ational"))
290 				r("ate");
291 			else if (endsWith("tional"))
292 				r("tion");
293 		}
294 		else if (_b[_k - 1] == 'c')
295 		{
296 			if (endsWith("enci"))
297 				r("ence");
298 			else if (endsWith("anci"))
299 				r("ance");
300 		}
301 		else if (_b[_k - 1] == 'e')
302 		{
303 			if (endsWith("izer"))
304 				r("ize");
305 		}
306 		else if (_b[_k - 1] == 'l')
307 		{
308 			if (endsWith("bli"))
309 				r("ble");
310 			/* --DEPARTURE--
311 			 * To match the published algorithm, replace this phrase with
312 			 * if (endsWith("abli"))
313 			 *	   r("able");
314 			 */
315 			else if (endsWith("alli"))
316 				r("al");
317 			else if (endsWith("entli"))
318 				r("ent");
319 			else if (endsWith("eli"))
320 				r("e");
321 			else if (endsWith("ousli"))
322 				r("ous");
323 		}
324 		else if (_b[_k - 1] == 'o')
325 		{
326 			if (endsWith("ization"))
327 				r("ize");
328 			else if (endsWith("ation") || endsWith("ator"))
329 				r("ate");
330 		}
331 		else if (_b[_k - 1] == 's')
332 		{
333 			if (endsWith("alism"))
334 				r("al");
335 			else if (endsWith("iveness"))
336 				r("ive");
337 			else if (endsWith("fulness"))
338 				r("ful");
339 			else if (endsWith("ousness"))
340 				r("ous");
341 		}
342 		else if (_b[_k - 1] == 't')
343 		{
344 			if (endsWith("aliti"))
345 				r("al");
346 			else if (endsWith("iviti"))
347 				r("ive");
348 			else if (endsWith("biliti"))
349 				r("ble");
350 		}
351 		else if (_b[_k - 1] == 'g')
352 		{
353 			/**
354 			 * --DEPARTURE--
355 			 * To match the published algorithm, delete this phrase
356 			 */
357 			if (endsWith("logi"))
358 				r("log");
359 		}
360 	}
361 
362 	/**
363 	 * step3() dels with -ic-, -full, -ness etc. similar strategy to step2.
364 	 */
365 	void step3()
366 	{
367 		if (_b[_k] == 'e')
368 		{
369 			if	  (endsWith("icate")) r("ic");
370 			else if (endsWith("ative")) r("");
371 			else if (endsWith("alize")) r("al");
372 		}
373 		else if (_b[_k] == 'i')
374 		{
375 			if (endsWith("iciti")) r("ic");
376 		}
377 		else if (_b[_k] == 'l')
378 		{
379 			if	  (endsWith("ical")) r("ic");
380 			else if (endsWith("ful")) r("");
381 		}
382 		else if (_b[_k] == 's')
383 		{
384 			if (endsWith("ness")) r("");
385 		}
386 	}
387 
388 	/**
389 	 * step4() takes off -ant, -ence etc., in context <c>vcvc<v>.
390 	 */
391 	void step4()
392 	{
393 		/* fixes bug 1 */
394 		if (_k == 0)
395 			return;
396 		switch (_b[_k - 1])
397 		{
398 			case 'a':
399 				if (endsWith("al"))
400 					break;
401 				return;
402 			case 'c':
403 				if (endsWith("ance") || endsWith("ence"))
404 					break;
405 				return;
406 			case 'e':
407 				if (endsWith("er"))
408 					break;
409 				return;
410 			case 'i':
411 				if (endsWith("ic"))
412 					break;
413 				return;
414 			case 'l':
415 				if (endsWith("able") || endsWith("ible"))
416 					break;
417 				return;
418 			case 'n':
419 				if (endsWith("ant") || endsWith("ement") || endsWith("ment") || endsWith("ent"))
420 					break;
421 				return;
422 			case 'o':
423 				if (endsWith("ion") && _j >= 0 && (_b[_j] == 's' || _b[_j] == 't'))
424 				{
425 					/* _j >= 0 fixes bug 2 */
426 					break;
427 				}
428 				if (endsWith("ou"))
429 					break;
430 				return;
431 			case 's':
432 				if (endsWith("ism"))
433 					break;
434 				return;
435 			case 't':
436 				if (endsWith("ate") || endsWith("iti"))
437 					break;
438 				return;
439 			case 'u':
440 				if (endsWith("ous"))
441 					break;
442 				return;
443 			case 'v':
444 				if (endsWith("ive"))
445 					break;
446 				return;
447 			case 'z':
448 				if (endsWith("ize"))
449 					break;
450 				return;
451 			default:
452 				return;
453 		}
454 
455 		if (m() > 1)
456 			_k = _j;
457 
458 	}
459 
460 	/**
461 	 * step5() removes a final -e if m() > 1, and changes -ll to -l if m() > 1.
462 	 */
463 	void step5()
464 	{
465 		_j = _k;
466 		if (_b[_k] == 'e' &&
467 			_b[0 .. _k] != `false`)
468 		{
469 			auto a = m();
470 			if (a > 1 || (a == 1 && !cvc(_k - 1)))
471 				_k--;
472 		}
473 		if (_b[_k] == 'l' && doublec(_k) && m() > 1)
474 			_k--;
475 	}
476 }
477 
478 unittest {
479 	scope stemmer = new Stemmer!string();
480 
481 	assert(stemmer.stem("") == "");
482 	assert(stemmer.stem("x") == "x");
483 	assert(stemmer.stem("xyz") == "xyz");
484 	assert(stemmer.stem("win") == "win");
485 	/+ TODO: assert(stemmer.stem("winner") == "win"); +/
486 	assert(stemmer.stem("winning") == "win");
487 	assert(stemmer.stem("farted") == "fart");
488 	assert(stemmer.stem("caresses") == "caress");
489 	assert(stemmer.stem("ponies") == "poni");
490 	assert(stemmer.stem("ties") == "ti");
491 	assert(stemmer.stem("caress") == "caress");
492 	assert(stemmer.stem("cats") == "cat");
493 	assert(stemmer.stem("feed") == "feed");
494 	assert(stemmer.stem("matting") == "mat");
495 	assert(stemmer.stem("mating") == "mate");
496 	assert(stemmer.stem("meeting") == "meet");
497 	assert(stemmer.stem("milling") == "mill");
498 	assert(stemmer.stem("messing") == "mess");
499 	assert(stemmer.stem("meetings") == "meet");
500 	assert(stemmer.stem("neutralize") == "neutral");
501 	assert(stemmer.stem("relational") == "relat");
502 	assert(stemmer.stem("relational") == "relat");
503 	assert(stemmer.stem("intricate") == "intric");
504 
505 	assert(stemmer.stem("connection") == "connect");
506 	assert(stemmer.stem("connective") == "connect");
507 	assert(stemmer.stem("connecting") == "connect");
508 
509 	assert(stemmer.stem("agreed") == "agre");
510 	assert(stemmer.stem("disabled") == "disabl");
511 	assert(stemmer.stem("gentle") == "gentl");
512 	assert(stemmer.stem("gently") == "gentli");
513 	assert(stemmer.stem("served") == "serv");
514 	assert(stemmer.stem("competes") == "compet");
515 
516 	assert(stemmer.stem("fullnessful") == "fullness");
517 	assert(stemmer.stem(stemmer.stem("fullnessful")) == "full");
518 
519 	assert(stemmer.stem("bee") == "bee");
520 
521 	assert(stemmer.stem("dogs") == "dog");
522 	assert(stemmer.stem("churches") == "church");
523 	assert(stemmer.stem("hardrock") == "hardrock");
524 
525 	/+ TODO: assert(stemmer.stem("false") == "false"); +/
526 }
527 
528 import nxt.debugio;
529 
530 /** Stem Swedish Word $(D s).
531  */
532 auto ref stemSwedish(S)(S s)
533 if (isSomeString!S)
534 {
535 	enum ar = `ar`;
536 	enum or = `or`;
537 	enum er = `er`;
538 	enum ya = `ya`;
539 
540 	enum en = `en`;
541 	enum ern = `ern`;
542 	enum an = `an`;
543 	enum na = `na`;
544 	enum et = `et`;
545 	enum aste = `aste`;
546 	enum are = `are`;
547 	enum ast = `ast`;
548 	enum iserad = `iserad`;
549 	enum de = `de`;
550 	enum ing = `ing`;
551 	enum igt = `igt`;
552 	enum llt = `llt`;
553 
554 	switch (s)
555 	{
556 		case `samtida`: return `samtid`;
557 		default: break;
558 	}
559 
560 	if (s.endsWith(`n`))
561 	{
562 		if (s.endsWith(en))
563 		{
564 			const t = s[0 .. $ - en.length];
565 			if (s.among!(`även`))
566 			{
567 				return s;
568 			}
569 			else if (t.among!(`sann`))
570 			{
571 				return t;
572 			}
573 			else if (t.endsWith(`mm`, `nn`))
574 			{
575 				return t[0 .. $ - 1];
576 			}
577 			return t;
578 		}
579 		if (s.endsWith(ern))
580 		{
581 			return s[0 .. $ - 1];
582 		}
583 		if (s.endsWith(an))
584 		{
585 			const t = s[0 .. $ - an.length];
586 			if (t.length >= 3 &&
587 				t.endsWith(`tt`, `mp`, `ck`, `st`))
588 			{
589 				return s[0 ..$ - 1];
590 			}
591 			else if (t.length >= 2 &&
592 					 t.endsWith(`n`, `p`))
593 			{
594 				return s[0 ..$ - 1];
595 			}
596 			else if (t.length < 3)
597 			{
598 				return s;
599 			}
600 			return t;
601 		}
602 	}
603 
604 	if (s.endsWith(igt))
605 	{
606 		return s[0 .. $ - 1];
607 	}
608 
609 	if (s.endsWith(ya))
610 	{
611 		return s[0 .. $ - 1];
612 	}
613 
614 	if (s.endsWith(na))
615 	{
616 		if (s.among!(`sina`, `dina`, `mina`))
617 		{
618 			return s[0 .. $ - 1];
619 		}
620 		auto t = s[0 .. $ - na.length];
621 		if (t.endsWith(`r`))
622 		{
623 			if (t.endsWith(ar, or, er))
624 			{
625 				const u = t[0 .. $ - ar.length];
626 				if (u.canFind!(a => a.isSwedishVowel))
627 				{
628 					return u;
629 				}
630 				else
631 				{
632 					return t[0 .. $ - 1];
633 				}
634 			}
635 		}
636 	}
637 
638 	if (s.endsWith(et))
639 	{
640 		const t = s[0 .. $ - et.length];
641 		if (t.length >= 3 &&
642 			t[$ - 3].isSwedishConsonant &&
643 			t[$ - 2].isSwedishConsonant &&
644 			t[$ - 1].isSwedishConsonant)
645 		{
646 			return s[0 .. $ - 1];
647 		}
648 		else if (t.endsWith(`ck`))
649 		{
650 			return s[0 .. $ - 1];
651 		}
652 
653 		return t;
654 	}
655 
656 	if (s.endsWith(ar, or, er))
657 	{
658 		const t = s[0 .. $ - ar.length];
659 		if (t.canFind!(a => a.isSwedishVowel))
660 		{
661 			if (t.endsWith(`mm`, `nn`))
662 			{
663 				return t[0 .. $ - 1];
664 			}
665 			else
666 			{
667 				return t;
668 			}
669 		}
670 		else
671 		{
672 			return s[0 .. $ - 1];
673 		}
674 	}
675 
676 	if (s.endsWith(aste))
677 	{
678 		const t = s[0 .. $ - aste.length];
679 		if (t.among!(`sann`))
680 		{
681 			return t;
682 		}
683 		if (t.endsWith(`mm`, `nn`))
684 		{
685 			return t[0 .. $ - 1];
686 		}
687 		if (t.canFind!(a => a.isSwedishVowel))
688 		{
689 			return t;
690 		}
691 	}
692 
693 	if (s.endsWith(are, ast))
694 	{
695 		const t = s[0 .. $ - are.length];
696 		if (t.among!(`sann`))
697 		{
698 			return t;
699 		}
700 		if (t.endsWith(`mm`, `nn`))
701 		{
702 			return t[0 .. $ - 1];
703 		}
704 		if (t.canFind!(a => a.isSwedishVowel))
705 		{
706 			return t;
707 		}
708 	}
709 
710 	if (s.endsWith(iserad))
711 	{
712 		const t = s[0 .. $ - iserad.length];
713 		if (!t.endsWith(`n`))
714 		{
715 			return t;
716 		}
717 	}
718 
719 	if (s.endsWith(de))
720 	{
721 		enum ande = `ande`;
722 		if (s.endsWith(ande))
723 		{
724 			const t = s[0 .. $ - ande.length];
725 			if (t.empty)
726 			{
727 				return s;
728 			}
729 			else if (t[$ - 1].isSwedishConsonant)
730 			{
731 				return s[0 .. $ - 3];
732 			}
733 			return t;
734 		}
735 		if (s.among!(`hade`))
736 		{
737 			return s;
738 		}
739 		const t = s[0 .. $ - de.length];
740 		return t;
741 	}
742 
743 	if (s.endsWith(ing))
744 	{
745 		enum ning = `ning`;
746 		if (s.endsWith(ning))
747 		{
748 			const t = s[0 .. $ - ning.length];
749 			if (!t.endsWith(`n`) &&
750 				t != `tid`)
751 			{
752 				return t;
753 			}
754 		}
755 		return s[0 .. $ - ing.length];
756 	}
757 
758 	if (s.endsWith(llt))
759 	{
760 		return s[0 .. $ - 1];
761 	}
762 
763 	return s;
764 }
765 
766 unittest {
767 	// import nxt.assert_ex;
768 
769 	assert("rumpan".stemSwedish == "rumpa");
770 	assert("sopan".stemSwedish == "sopa");
771 	assert("kistan".stemSwedish == "kista");
772 
773 	assert("karl".stemSwedish == "karl");
774 
775 	assert("grenen".stemSwedish == "gren");
776 	assert("busen".stemSwedish == "bus");
777 	assert("husen".stemSwedish == "hus");
778 	assert("räven".stemSwedish == "räv");
779 	assert("dunken".stemSwedish == "dunk");
780 	assert("männen".stemSwedish == "män");
781 	assert("manen".stemSwedish == "man");
782 	assert("mannen".stemSwedish == "man");
783 
784 	assert("skalet".stemSwedish == "skal");
785 	assert("karet".stemSwedish == "kar");
786 	assert("taket".stemSwedish == "tak");
787 	assert("stinget".stemSwedish == "sting");
788 
789 	assert("äpplet".stemSwedish == "äpple");
790 
791 	assert("jakt".stemSwedish == "jakt");
792 
793 	assert("sot".stemSwedish == "sot");
794 	assert("sotare".stemSwedish == "sot");
795 
796 	assert("klok".stemSwedish == "klok");
797 	assert("klokare".stemSwedish == "klok");
798 	assert("klokast".stemSwedish == "klok");
799 
800 	assert("stark".stemSwedish == "stark");
801 	assert("starkare".stemSwedish == "stark");
802 	assert("starkast".stemSwedish == "stark");
803 
804 	assert("kort".stemSwedish == "kort");
805 	assert("kortare".stemSwedish == "kort");
806 	assert("kortast".stemSwedish == "kort");
807 
808 	assert("rolig".stemSwedish == "rolig");
809 	assert("roligare".stemSwedish == "rolig");
810 	assert("roligast".stemSwedish == "rolig");
811 
812 	assert("dum".stemSwedish == "dum");
813 	assert("dummare".stemSwedish == "dum");
814 	assert("dummast".stemSwedish == "dum");
815 	assert("dummaste".stemSwedish == "dum");
816 	assert("senaste".stemSwedish == "sen");
817 
818 	assert("sanning".stemSwedish == "sann");
819 	assert("sann".stemSwedish == "sann");
820 	assert("sannare".stemSwedish == "sann");
821 	assert("sannare".stemSwedish == "sann");
822 
823 	assert("stare".stemSwedish == "stare");
824 	assert("kvast".stemSwedish == "kvast");
825 
826 	assert("täcket".stemSwedish == "täcke");
827 	assert("räcket".stemSwedish == "räcke");
828 
829 	assert("van".stemSwedish == "van");
830 	assert("dan".stemSwedish == "dan");
831 	assert("man".stemSwedish == "man");
832 	assert("ovan".stemSwedish == "ovan");
833 	assert("stan".stemSwedish == "stan");
834 	assert("klan".stemSwedish == "klan");
835 
836 	assert("klockan".stemSwedish == "klocka");
837 	assert("klockande".stemSwedish == "klocka");
838 	assert("sockan".stemSwedish == "socka");
839 	assert("rockan".stemSwedish == "rocka");
840 	assert("rock".stemSwedish == "rock");
841 
842 	assert("agenter".stemSwedish == "agent");
843 	assert("agenterna".stemSwedish == "agent");
844 	assert("regenter".stemSwedish == "regent");
845 	assert("regenterna".stemSwedish == "regent");
846 
847 	assert("brodern".stemSwedish == "broder");
848 	assert("kärnan".stemSwedish == "kärna");
849 
850 	assert("skorna".stemSwedish == "sko");
851 
852 	assert("inträffade".stemSwedish == "inträffa");
853 	assert("roa".stemSwedish == "roa");
854 	assert("roade".stemSwedish == "roa");
855 	assert("hade".stemSwedish == "hade");
856 	assert("hades".stemSwedish == "hades");
857 
858 	assert("fullt".stemSwedish == "full");
859 
860 	assert("kanaliserad".stemSwedish == "kanal");
861 	assert("alkoholiserad".stemSwedish == "alkohol");
862 
863 	assert("roande".stemSwedish == "ro");
864 
865 	/* assertEqual("ror".stemSwedish, "ro"); */
866 	/* assertEqual("öbor".stemSwedish, "öbo"); */
867 
868 	assert("ande".stemSwedish == "ande");
869 
870 	assert("störande".stemSwedish == "störa");
871 	assert("nekande".stemSwedish == "neka");
872 	assert("jagande".stemSwedish == "jaga");
873 	assert("stimulerande".stemSwedish == "stimulera");
874 
875 	assert("karlar".stemSwedish == "karl");
876 	assert("lagar".stemSwedish == "lag");
877 
878 	assert("sina".stemSwedish == "sin");
879 	assert("dina".stemSwedish == "din");
880 	assert("mina".stemSwedish == "min");
881 
882 	assert("även".stemSwedish == "även");
883 
884 	assert("samtida".stemSwedish == "samtid");
885 
886 	assert("trattar".stemSwedish == "tratt");
887 
888 	assert("katter".stemSwedish == "katt");
889 	assert("dagar".stemSwedish == "dag");
890 	assert("öar".stemSwedish == "ö");
891 	assert("åar".stemSwedish == "å");
892 	assert("ängar".stemSwedish == "äng");
893 
894 	assert("spelar".stemSwedish == "spel");
895 	assert("drar".stemSwedish == "dra");
896 
897 	assert("kullar".stemSwedish == "kull");
898 	assert("kullarna".stemSwedish == "kull");
899 
900 	assert("mamma".stemSwedish == "mamma");
901 
902 	assert("bestyr".stemSwedish == "bestyr");
903 
904 	assert("krya".stemSwedish == "kry");
905 	assert("nya".stemSwedish == "ny");
906 
907 	assert("lemmar".stemSwedish == "lem");
908 
909 	/* assertEqual("ämnar".stemSwedish, "ämna"); */
910 	/* assert("rämnar".stemSwedish == "rämna"); */
911 	/* assert("lämnar".stemSwedish == "lämna"); */
912 }
913 
914 auto ref stemNorvegian(S)(S s)
915 if (isSomeString!S)
916 {
917 	s.skipOverBack(`ede`);
918 	return s;
919 }
920 
921 /** Stem $(D s) in Language $(D lang).
922 	If lang is unknown try each known language until failure.
923  */
924 Tuple!(S, Language_ISO_639_1) stemIn(S)(S s, Language_ISO_639_1 lang = Language_ISO_639_1.init)
925 if (isSomeString!S)
926 {
927 	typeof(return) t;
928 	switch (lang) with (Language_ISO_639_1)
929 	{
930 		case unknown:
931 			t = s.stemIn(en); if (t[0].length != s.length) return t;
932 			t = s.stemIn(sv); if (t[0].length != s.length) return t;
933 			t = s.stemIn(no); if (t[0].length != s.length) return t;
934 			break;
935 		case sv: t = tuple(s.stemSwedish, sv); break;
936 		case no: t = tuple(s.stemNorvegian, no); break;
937 		case en:
938 		default:
939 			auto stemmer = new Stemmer!string();
940 			t = tuple(stemmer.stem(s), lang);
941 	}
942 	return t;
943 }
944 
945 /** Destructively Stem $(D s) in Language $(D lang). */
946 Tuple!(bool, Language_ISO_639_1) stemize(S)(ref S s, Language_ISO_639_1 lang = Language_ISO_639_1.init)
947 if (isSomeString!S)
948 {
949 	const n = s.length;
950 	auto t = s.stemIn(lang);
951 	s = t[0];
952 	return tuple(n != s.length, t[1]);
953 }
954 
955 /** Return Stem of $(D s) using Porter's algorithm
956 	See_Also: https://en.wikipedia.org/wiki/I_m_still_remembering
957 	See_Also: https://en.wikipedia.org/wiki/Martin_Porter
958 	See_Also: https://www.youtube.com/watch?v=2s7f8mBwnko&list=PL6397E4B26D00A269&index=4.
959 */
960 S alternativePorterStemEnglish(S)(S s)
961 if (isSomeString!S)
962 {
963 	/* Step 1a */
964 	if	  (s.endsWith(`sses`)) { s = s[0 .. $-2]; }
965 	else if (s.endsWith(`ies`))  { s = s[0 .. $-2]; }
966 	else if (s.endsWith(`ss`))   { }
967 	else if (s.endsWith(`s`))	{ s = s[0 .. $-1]; }
968 
969 	/* Step 2 */
970 	if	  (s.endsWith(`ational`)) { s = s[0 .. $-7] ~ `ate`; }
971 	else if (s.endsWith(`izer`))	{ s = s[0 .. $-1]; }
972 	else if (s.endsWith(`ator`))	{ s = s[0 .. $-2] ~ `e`; }
973 
974 	/* Step 3 */
975 	else if (s.endsWith(`al`)) { s = s[0 .. $-2] ~ `e`; }
976 	else if (s.endsWith(`able`)) { s = s[0 .. $-4]; }
977 	else if (s.endsWith(`ate`)) { s = s[0 .. $-3] ~ `e`; }
978 
979 	return s;
980 }
981 
982 unittest {
983 	assert(`caresses`.alternativePorterStemEnglish == `caress`);
984 	assert(`ponies`.alternativePorterStemEnglish == `poni`);
985 	assert(`caress`.alternativePorterStemEnglish == `caress`);
986 	assert(`cats`.alternativePorterStemEnglish == `cat`);
987 
988 	assert(`relational`.alternativePorterStemEnglish == `relate`);
989 	assert(`digitizer`.alternativePorterStemEnglish == `digitize`);
990 	assert(`operator`.alternativePorterStemEnglish == `operate`);
991 
992 	assert(`revival`.alternativePorterStemEnglish == `revive`);
993 	assert(`adjustable`.alternativePorterStemEnglish == `adjust`);
994 	assert(`activate`.alternativePorterStemEnglish == `active`);
995 }