nxt.stemming source code

1 /** Stemming algorithms
2  */
3 module nxt.stemming;
4 
5 import std.algorithm.comparison: among;
6 import std.algorithm.searching : endsWith, canFind;
7 import std.range: empty;
8 import std.traits: isSomeString;
9 import std.typecons : Tuple, tuple;
10 
11 import nxt.languages : Lang;
12 import nxt.lingua : isEnglishVowel, isSwedishVowel, isSwedishConsonant, isEnglishConsonant;
13 import nxt.skip_ex : skipOverBack;
14 
15 public class Stemmer(S)
16 if (isSomeString!S)
17 {
18     /**
19      * In stem(p,i,j), p is a char pointer, and the string to be stemmed
20      * is from p[i] to p[j] inclusive. Typically i is zero and j is the
21      * offset to the last character of a string, (p[j+1] == '\0'). The
22      * stemmer adjusts the characters p[i] ... p[j] and returns the new
23      * end-point of the string, k. Stemming never increases word length, so
24      * i <= k <= j. To turn the stemmer into a module, declare 'stem' as
25      * extern, and delete the remainder of this file.
26      */
27     public S stem(S p)
28     {
29         _b = p;
30         _k = p.length - 1;
31         _k0 = 0;
32 
33         /** strings of length 1 or 2 don't go through the stemming process,
34          * although no mention is made of this in the published
35          * algorithm. Remove the line to match the published algorithm.
36          */
37         if (_k <= _k0 + 1)
38             return _b;
39 
40         step1ab();
41         step1c();
42         step2();
43         step3();
44         step4();
45         step5();
46         return _b[_k0 .. _k + 1];
47 
48     }
49 
50 private:
51     S _b;			// buffer for the word
52     ptrdiff_t _k = 0;
53     ptrdiff_t _k0 = 0;
54     ptrdiff_t _j = 0;       // offset within the string
55 
56     /**
57      * cons returns true, if b[i] is a consonant
58      */
59     bool isConsonant(ptrdiff_t i)
60     {
61         if (_b[i].isEnglishVowel)
62             return false;
63         if (_b[i] == 'y')
64         {
65             if (i == _k0)
66             {
67                 return true;
68             }
69             else
70             {
71                 return !isConsonant(i - 1);
72             }
73         }
74         return true;
75     }
76 
77     /** Return the number of consonant sequences between k0 and j.
78      * if c is a consonant sequence and v a vowel sequence, and <..>
79      * indicates arbitrary presence,
80      *
81      * <c><v>       gives 0
82      * <c>vc<v>     gives 1
83      * <c>vcvc<v>   gives 2
84      * <c>vcvcvc<v> gives 3
85      *
86      */
87     size_t m()
88     {
89         ptrdiff_t n = 0;
90         ptrdiff_t i = _k0;
91 
92         while (true)
93         {
94             if (i > _j)
95             {
96                 return n;
97             }
98             if (!isConsonant(i))
99             {
100                 break;
101             }
102             i++;
103         }
104         i++;
105         while (true)
106         {
107             while (true)
108             {
109                 if (i > _j)
110                 {
111                     return n;
112                 }
113                 if (isConsonant(i))
114                 {
115                     break;
116                 }
117                 i++;
118             }
119             i++;
120             n++;
121             while (true)
122             {
123                 if (i > _j)
124                 {
125                     return n;
126                 }
127                 if (!isConsonant(i))
128                 {
129                     break;
130                 }
131                 i++;
132             }
133             i++;
134         }
135     }
136 
137     /** Returns true if k0...j contains a vowel. */
138     bool hasVowelInStem()
139     {
140         for (ptrdiff_t i = _k0; i < _j + 1; i++)
141         {
142             if (!isConsonant(i))
143                 return true;
144         }
145         return false;
146     }
147 
148     /** Returns true if j, j-1 contains a double consonant
149      */
150     bool doublec(ptrdiff_t j)
151     {
152         if (j < (_k0 + 1))
153             return false;
154         if (_b[j] != _b[j-1])
155             return false;
156         return isConsonant(j);
157     }
158 
159     /** Returns true if i-2,i-1,i has the form consonant - vowel - consonant
160      * and also if the second c is not w,x or y. this is used when trying to
161      * restore an e at the end of a short  e.g.
162      *
163      *    cav(e), lov(e), hop(e), crim(e), but
164      *    snow, box, tray.
165      *
166      */
167     bool cvc(ptrdiff_t i)
168     {
169         if (i < (_k0 + 2) || !isConsonant(i) || isConsonant(i-1) || !isConsonant(i-2))
170             return false;
171         if (_b[i] == 'w' || _b[i] == 'x' || _b[i] == 'y')
172             return false;
173         return true;
174     }
175 
176     /** Return true if k0,...k endsWith with the string s.
177      */
178     bool endsWith(S)(S s)
179     if (isSomeString!S)
180     {
181         const len = s.length;
182 
183         if (s[len - 1] != _b[_k])
184             return false;
185         if (len > (_k - _k0 + 1))
186             return false;
187 
188         const a = _k - len + 1;
189         const b = _k + 1;
190 
191         if (_b[a..b] != s)
192         {
193             return false;
194         }
195         _j = _k - len;
196 
197         return true;
198     }
199 
200     /** Sets (j+1),...k to the characters in the string s, readjusting k. */
201     void setto(S)(S s)
202     if (isSomeString!S)
203     {
204         _b = _b[0.._j+1] ~ s ~ _b[_j + s.length + 1 .. _b.length];
205         _k = _j + s.length;
206     }
207 
208     /** Used further down. */
209     void r(S)(S s)
210     if (isSomeString!S)
211     {
212         if (m() > 0)
213             setto(s);
214     }
215 
216     /** Gets rid of plurals and -ed or -ing. e.g. */
217     void step1ab()
218     {
219         if (_b[_k] == 's')
220         {
221             if (endsWith("sses"))
222             {
223                 _k = _k - 2;
224             }
225             else if (endsWith("ies"))
226             {
227                 setto("i");
228             }
229             else if (_b[_k - 1] != 's')
230             {
231                 _k--;
232             }
233         }
234         if (endsWith("eed"))
235         {
236             if (m() > 0)
237                 _k--;
238         }
239         else if ((endsWith("ed") || endsWith("ing")) && hasVowelInStem())
240         {
241             _k = _j;
242             if (endsWith("at"))
243             {
244                 setto("ate");
245             }
246             else if (endsWith("bl"))
247             {
248                 setto("ble");
249             }
250             else if (endsWith("iz"))
251             {
252                 setto("ize");
253             }
254             else if (doublec(_k))
255             {
256                 _k--;
257                 if (_b[_k] == 'l' || _b[_k] == 's' || _b[_k] == 'z')
258                     _k++;
259             }
260             else if (m() == 1 && cvc(_k))
261             {
262                 setto("e");
263             }
264         }
265     }
266 
267     /**
268      * step1c() turns terminal y to i when there is another vowel in the stem.
269      */
270     void step1c()
271     {
272         if (endsWith("y") &&
273             !endsWith("day") &&
274             hasVowelInStem())
275         {
276             _b = _b[0.._k] ~ 'i' ~ _b[_k+1 .. _b.length];
277         }
278     }
279 
280     /**
281      * step2() maps double suffices to single ones.
282      * so -ization (= -ize plus -ation) maps to -ize etc. note that the
283      * string before the suffix must give m() > 0.*
284      */
285     void step2()
286     {
287         if (_b[_k - 1] == 'a')
288         {
289             if (endsWith("ational"))
290                 r("ate");
291             else if (endsWith("tional"))
292                 r("tion");
293         }
294         else if (_b[_k - 1] == 'c')
295         {
296             if (endsWith("enci"))
297                 r("ence");
298             else if (endsWith("anci"))
299                 r("ance");
300         }
301         else if (_b[_k - 1] == 'e')
302         {
303             if (endsWith("izer"))
304                 r("ize");
305         }
306         else if (_b[_k - 1] == 'l')
307         {
308             if (endsWith("bli"))
309                 r("ble");
310             /* --DEPARTURE--
311              * To match the published algorithm, replace this phrase with
312              * if (endsWith("abli"))
313              *	   r("able");
314              */
315             else if (endsWith("alli"))
316                 r("al");
317             else if (endsWith("entli"))
318                 r("ent");
319             else if (endsWith("eli"))
320                 r("e");
321             else if (endsWith("ousli"))
322                 r("ous");
323         }
324         else if (_b[_k - 1] == 'o')
325         {
326             if (endsWith("ization"))
327                 r("ize");
328             else if (endsWith("ation") || endsWith("ator"))
329                 r("ate");
330         }
331         else if (_b[_k - 1] == 's')
332         {
333             if (endsWith("alism"))
334                 r("al");
335             else if (endsWith("iveness"))
336                 r("ive");
337             else if (endsWith("fulness"))
338                 r("ful");
339             else if (endsWith("ousness"))
340                 r("ous");
341         }
342         else if (_b[_k - 1] == 't')
343         {
344             if (endsWith("aliti"))
345                 r("al");
346             else if (endsWith("iviti"))
347                 r("ive");
348             else if (endsWith("biliti"))
349                 r("ble");
350         }
351         else if (_b[_k - 1] == 'g')
352         {
353             /**
354              * --DEPARTURE--
355              * To match the published algorithm, delete this phrase
356              */
357             if (endsWith("logi"))
358                 r("log");
359         }
360     }
361 
362     /**
363      * step3() dels with -ic-, -full, -ness etc. similar strategy to step2.
364      */
365     void step3()
366     {
367         if (_b[_k] == 'e')
368         {
369             if      (endsWith("icate")) r("ic");
370             else if (endsWith("ative")) r("");
371             else if (endsWith("alize")) r("al");
372         }
373         else if (_b[_k] == 'i')
374         {
375             if (endsWith("iciti")) r("ic");
376         }
377         else if (_b[_k] == 'l')
378         {
379             if      (endsWith("ical")) r("ic");
380             else if (endsWith("ful")) r("");
381         }
382         else if (_b[_k] == 's')
383         {
384             if (endsWith("ness")) r("");
385         }
386     }
387 
388     /**
389      * step4() takes off -ant, -ence etc., in context <c>vcvc<v>.
390      */
391     void step4()
392     {
393         /* fixes bug 1 */
394         if (_k == 0)
395             return;
396         switch (_b[_k - 1])
397         {
398             case 'a':
399                 if (endsWith("al"))
400                     break;
401                 return;
402             case 'c':
403                 if (endsWith("ance") || endsWith("ence"))
404                     break;
405                 return;
406             case 'e':
407                 if (endsWith("er"))
408                     break;
409                 return;
410             case 'i':
411                 if (endsWith("ic"))
412                     break;
413                 return;
414             case 'l':
415                 if (endsWith("able") || endsWith("ible"))
416                     break;
417                 return;
418             case 'n':
419                 if (endsWith("ant") || endsWith("ement") || endsWith("ment") || endsWith("ent"))
420                     break;
421                 return;
422             case 'o':
423                 if (endsWith("ion") && _j >= 0 && (_b[_j] == 's' || _b[_j] == 't'))
424                 {
425                     /* _j >= 0 fixes bug 2 */
426                     break;
427                 }
428                 if (endsWith("ou"))
429                     break;
430                 return;
431             case 's':
432                 if (endsWith("ism"))
433                     break;
434                 return;
435             case 't':
436                 if (endsWith("ate") || endsWith("iti"))
437                     break;
438                 return;
439             case 'u':
440                 if (endsWith("ous"))
441                     break;
442                 return;
443             case 'v':
444                 if (endsWith("ive"))
445                     break;
446                 return;
447             case 'z':
448                 if (endsWith("ize"))
449                     break;
450                 return;
451             default:
452                 return;
453         }
454 
455         if (m() > 1)
456             _k = _j;
457 
458     }
459 
460     /**
461      * step5() removes a final -e if m() > 1, and changes -ll to -l if m() > 1.
462      */
463     void step5()
464     {
465         _j = _k;
466         if (_b[_k] == 'e' &&
467             _b[0 .. _k] != `false`)
468         {
469             auto a = m();
470             if (a > 1 || (a == 1 && !cvc(_k - 1)))
471                 _k--;
472         }
473         if (_b[_k] == 'l' && doublec(_k) && m() > 1)
474             _k--;
475     }
476 }
477 
478 unittest
479 {
480     scope stemmer = new Stemmer!string();
481 
482     assert(stemmer.stem("") == "");
483     assert(stemmer.stem("x") == "x");
484     assert(stemmer.stem("xyz") == "xyz");
485     assert(stemmer.stem("win") == "win");
486     // TODO assert(stemmer.stem("winner") == "win");
487     assert(stemmer.stem("winning") == "win");
488     assert(stemmer.stem("farted") == "fart");
489     assert(stemmer.stem("caresses") == "caress");
490     assert(stemmer.stem("ponies") == "poni");
491     assert(stemmer.stem("ties") == "ti");
492     assert(stemmer.stem("caress") == "caress");
493     assert(stemmer.stem("cats") == "cat");
494     assert(stemmer.stem("feed") == "feed");
495     assert(stemmer.stem("matting") == "mat");
496     assert(stemmer.stem("mating") == "mate");
497     assert(stemmer.stem("meeting") == "meet");
498     assert(stemmer.stem("milling") == "mill");
499     assert(stemmer.stem("messing") == "mess");
500     assert(stemmer.stem("meetings") == "meet");
501     assert(stemmer.stem("neutralize") == "neutral");
502     assert(stemmer.stem("relational") == "relat");
503     assert(stemmer.stem("relational") == "relat");
504     assert(stemmer.stem("intricate") == "intric");
505 
506     assert(stemmer.stem("connection") == "connect");
507     assert(stemmer.stem("connective") == "connect");
508     assert(stemmer.stem("connecting") == "connect");
509 
510     assert(stemmer.stem("agreed") == "agre");
511     assert(stemmer.stem("disabled") == "disabl");
512     assert(stemmer.stem("gentle") == "gentl");
513     assert(stemmer.stem("gently") == "gentli");
514     assert(stemmer.stem("served") == "serv");
515     assert(stemmer.stem("competes") == "compet");
516 
517     assert(stemmer.stem("fullnessful") == "fullness");
518     assert(stemmer.stem(stemmer.stem("fullnessful")) == "full");
519 
520     assert(stemmer.stem("bee") == "bee");
521 
522     assert(stemmer.stem("dogs") == "dog");
523     assert(stemmer.stem("churches") == "church");
524     assert(stemmer.stem("hardrock") == "hardrock");
525 
526     // TODO assert(stemmer.stem("false") == "false");
527 }
528 
529 import nxt.dbgio;
530 
531 /** Stem Swedish Word $(D s).
532  */
533 auto ref stemSwedish(S)(S s)
534 if (isSomeString!S)
535 {
536     enum ar = `ar`;
537     enum or = `or`;
538     enum er = `er`;
539     enum ya = `ya`;
540 
541     enum en = `en`;
542     enum ern = `ern`;
543     enum an = `an`;
544     enum na = `na`;
545     enum et = `et`;
546     enum aste = `aste`;
547     enum are = `are`;
548     enum ast = `ast`;
549     enum iserad = `iserad`;
550     enum de = `de`;
551     enum ing = `ing`;
552     enum igt = `igt`;
553     enum llt = `llt`;
554 
555     switch (s)
556     {
557         case `samtida`: return `samtid`;
558         default: break;
559     }
560 
561     if (s.endsWith(`n`))
562     {
563         if (s.endsWith(en))
564         {
565             const t = s[0 .. $ - en.length];
566             if (s.among!(`även`))
567             {
568                 return s;
569             }
570             else if (t.among!(`sann`))
571             {
572                 return t;
573             }
574             else if (t.endsWith(`mm`, `nn`))
575             {
576                 return t[0 .. $ - 1];
577             }
578             return t;
579         }
580         if (s.endsWith(ern))
581         {
582             return s[0 .. $ - 1];
583         }
584         if (s.endsWith(an))
585         {
586             const t = s[0 .. $ - an.length];
587             if (t.length >= 3 &&
588                 t.endsWith(`tt`, `mp`, `ck`, `st`))
589             {
590                 return s[0 ..$ - 1];
591             }
592             else if (t.length >= 2 &&
593                      t.endsWith(`n`, `p`))
594             {
595                 return s[0 ..$ - 1];
596             }
597             else if (t.length < 3)
598             {
599                 return s;
600             }
601             return t;
602         }
603     }
604 
605     if (s.endsWith(igt))
606     {
607         return s[0 .. $ - 1];
608     }
609 
610     if (s.endsWith(ya))
611     {
612         return s[0 .. $ - 1];
613     }
614 
615     if (s.endsWith(na))
616     {
617         if (s.among!(`sina`, `dina`, `mina`))
618         {
619             return s[0 .. $ - 1];
620         }
621         auto t = s[0 .. $ - na.length];
622         if (t.endsWith(`r`))
623         {
624             if (t.endsWith(ar, or, er))
625             {
626                 const u = t[0 .. $ - ar.length];
627                 if (u.canFind!(a => a.isSwedishVowel))
628                 {
629                     return u;
630                 }
631                 else
632                 {
633                     return t[0 .. $ - 1];
634                 }
635             }
636         }
637     }
638 
639     if (s.endsWith(et))
640     {
641         const t = s[0 .. $ - et.length];
642         if (t.length >= 3 &&
643             t[$ - 3].isSwedishConsonant &&
644             t[$ - 2].isSwedishConsonant &&
645             t[$ - 1].isSwedishConsonant)
646         {
647             return s[0 .. $ - 1];
648         }
649         else if (t.endsWith(`ck`))
650         {
651             return s[0 .. $ - 1];
652         }
653 
654         return t;
655     }
656 
657     if (s.endsWith(ar, or, er))
658     {
659         const t = s[0 .. $ - ar.length];
660         if (t.canFind!(a => a.isSwedishVowel))
661         {
662             if (t.endsWith(`mm`, `nn`))
663             {
664                 return t[0 .. $ - 1];
665             }
666             else
667             {
668                 return t;
669             }
670         }
671         else
672         {
673             return s[0 .. $ - 1];
674         }
675     }
676 
677     if (s.endsWith(aste))
678     {
679         const t = s[0 .. $ - aste.length];
680         if (t.among!(`sann`))
681         {
682             return t;
683         }
684         if (t.endsWith(`mm`, `nn`))
685         {
686             return t[0 .. $ - 1];
687         }
688         if (t.canFind!(a => a.isSwedishVowel))
689         {
690             return t;
691         }
692     }
693 
694     if (s.endsWith(are, ast))
695     {
696         const t = s[0 .. $ - are.length];
697         if (t.among!(`sann`))
698         {
699             return t;
700         }
701         if (t.endsWith(`mm`, `nn`))
702         {
703             return t[0 .. $ - 1];
704         }
705         if (t.canFind!(a => a.isSwedishVowel))
706         {
707             return t;
708         }
709     }
710 
711     if (s.endsWith(iserad))
712     {
713         const t = s[0 .. $ - iserad.length];
714         if (!t.endsWith(`n`))
715         {
716             return t;
717         }
718     }
719 
720     if (s.endsWith(de))
721     {
722         enum ande = `ande`;
723         if (s.endsWith(ande))
724         {
725             const t = s[0 .. $ - ande.length];
726             if (t.empty)
727             {
728                 return s;
729             }
730             else if (t[$ - 1].isSwedishConsonant)
731             {
732                 return s[0 .. $ - 3];
733             }
734             return t;
735         }
736         if (s.among!(`hade`))
737         {
738             return s;
739         }
740         const t = s[0 .. $ - de.length];
741         return t;
742     }
743 
744     if (s.endsWith(ing))
745     {
746         enum ning = `ning`;
747         if (s.endsWith(ning))
748         {
749             const t = s[0 .. $ - ning.length];
750             if (!t.endsWith(`n`) &&
751                 t != `tid`)
752             {
753                 return t;
754             }
755         }
756         return s[0 .. $ - ing.length];
757     }
758 
759     if (s.endsWith(llt))
760     {
761         return s[0 .. $ - 1];
762     }
763 
764     return s;
765 }
766 
767 unittest
768 {
769     // import nxt.assert_ex;
770 
771     assert("rumpan".stemSwedish == "rumpa");
772     assert("sopan".stemSwedish == "sopa");
773     assert("kistan".stemSwedish == "kista");
774 
775     assert("karl".stemSwedish == "karl");
776 
777     assert("grenen".stemSwedish == "gren");
778     assert("busen".stemSwedish == "bus");
779     assert("husen".stemSwedish == "hus");
780     assert("räven".stemSwedish == "räv");
781     assert("dunken".stemSwedish == "dunk");
782     assert("männen".stemSwedish == "män");
783     assert("manen".stemSwedish == "man");
784     assert("mannen".stemSwedish == "man");
785 
786     assert("skalet".stemSwedish == "skal");
787     assert("karet".stemSwedish == "kar");
788     assert("taket".stemSwedish == "tak");
789     assert("stinget".stemSwedish == "sting");
790 
791     assert("äpplet".stemSwedish == "äpple");
792 
793     assert("jakt".stemSwedish == "jakt");
794 
795     assert("sot".stemSwedish == "sot");
796     assert("sotare".stemSwedish == "sot");
797 
798     assert("klok".stemSwedish == "klok");
799     assert("klokare".stemSwedish == "klok");
800     assert("klokast".stemSwedish == "klok");
801 
802     assert("stark".stemSwedish == "stark");
803     assert("starkare".stemSwedish == "stark");
804     assert("starkast".stemSwedish == "stark");
805 
806     assert("kort".stemSwedish == "kort");
807     assert("kortare".stemSwedish == "kort");
808     assert("kortast".stemSwedish == "kort");
809 
810     assert("rolig".stemSwedish == "rolig");
811     assert("roligare".stemSwedish == "rolig");
812     assert("roligast".stemSwedish == "rolig");
813 
814     assert("dum".stemSwedish == "dum");
815     assert("dummare".stemSwedish == "dum");
816     assert("dummast".stemSwedish == "dum");
817     assert("dummaste".stemSwedish == "dum");
818     assert("senaste".stemSwedish == "sen");
819 
820     assert("sanning".stemSwedish == "sann");
821     assert("sann".stemSwedish == "sann");
822     assert("sannare".stemSwedish == "sann");
823     assert("sannare".stemSwedish == "sann");
824 
825     assert("stare".stemSwedish == "stare");
826     assert("kvast".stemSwedish == "kvast");
827 
828     assert("täcket".stemSwedish == "täcke");
829     assert("räcket".stemSwedish == "räcke");
830 
831     assert("van".stemSwedish == "van");
832     assert("dan".stemSwedish == "dan");
833     assert("man".stemSwedish == "man");
834     assert("ovan".stemSwedish == "ovan");
835     assert("stan".stemSwedish == "stan");
836     assert("klan".stemSwedish == "klan");
837 
838     assert("klockan".stemSwedish == "klocka");
839     assert("klockande".stemSwedish == "klocka");
840     assert("sockan".stemSwedish == "socka");
841     assert("rockan".stemSwedish == "rocka");
842     assert("rock".stemSwedish == "rock");
843 
844     assert("agenter".stemSwedish == "agent");
845     assert("agenterna".stemSwedish == "agent");
846     assert("regenter".stemSwedish == "regent");
847     assert("regenterna".stemSwedish == "regent");
848 
849     assert("brodern".stemSwedish == "broder");
850     assert("kärnan".stemSwedish == "kärna");
851 
852     assert("skorna".stemSwedish == "sko");
853 
854     assert("inträffade".stemSwedish == "inträffa");
855     assert("roa".stemSwedish == "roa");
856     assert("roade".stemSwedish == "roa");
857     assert("hade".stemSwedish == "hade");
858     assert("hades".stemSwedish == "hades");
859 
860     assert("fullt".stemSwedish == "full");
861 
862     assert("kanaliserad".stemSwedish == "kanal");
863     assert("alkoholiserad".stemSwedish == "alkohol");
864 
865     assert("roande".stemSwedish == "ro");
866 
867     /* assertEqual("ror".stemSwedish, "ro"); */
868     /* assertEqual("öbor".stemSwedish, "öbo"); */
869 
870     assert("ande".stemSwedish == "ande");
871 
872     assert("störande".stemSwedish == "störa");
873     assert("nekande".stemSwedish == "neka");
874     assert("jagande".stemSwedish == "jaga");
875     assert("stimulerande".stemSwedish == "stimulera");
876 
877     assert("karlar".stemSwedish == "karl");
878     assert("lagar".stemSwedish == "lag");
879 
880     assert("sina".stemSwedish == "sin");
881     assert("dina".stemSwedish == "din");
882     assert("mina".stemSwedish == "min");
883 
884     assert("även".stemSwedish == "även");
885 
886     assert("samtida".stemSwedish == "samtid");
887 
888     assert("trattar".stemSwedish == "tratt");
889 
890     assert("katter".stemSwedish == "katt");
891     assert("dagar".stemSwedish == "dag");
892     assert("öar".stemSwedish == "ö");
893     assert("åar".stemSwedish == "å");
894     assert("ängar".stemSwedish == "äng");
895 
896     assert("spelar".stemSwedish == "spel");
897     assert("drar".stemSwedish == "dra");
898 
899     assert("kullar".stemSwedish == "kull");
900     assert("kullarna".stemSwedish == "kull");
901 
902     assert("mamma".stemSwedish == "mamma");
903 
904     assert("bestyr".stemSwedish == "bestyr");
905 
906     assert("krya".stemSwedish == "kry");
907     assert("nya".stemSwedish == "ny");
908 
909     assert("lemmar".stemSwedish == "lem");
910 
911     /* assertEqual("ämnar".stemSwedish, "ämna"); */
912     /* assert("rämnar".stemSwedish == "rämna"); */
913     /* assert("lämnar".stemSwedish == "lämna"); */
914 }
915 
916 auto ref stemNorvegian(S)(S s)
917 if (isSomeString!S)
918 {
919     s.skipOverBack(`ede`);
920     return s;
921 }
922 
923 /** Stem $(D s) in Language $(D lang).
924     If lang is unknown try each known language until failure.
925  */
926 Tuple!(S, Lang) stemIn(S)(S s, Lang lang = Lang.init)
927 if (isSomeString!S)
928 {
929     typeof(return) t;
930     switch (lang) with (Lang)
931     {
932         case unknown:
933             t = s.stemIn(en); if (t[0].length != s.length) return t;
934             t = s.stemIn(sv); if (t[0].length != s.length) return t;
935             t = s.stemIn(no); if (t[0].length != s.length) return t;
936             break;
937         case sv: t = tuple(s.stemSwedish, sv); break;
938         case no: t = tuple(s.stemNorvegian, no); break;
939         case en:
940         default:
941             auto stemmer = new Stemmer!string();
942             t = tuple(stemmer.stem(s), lang);
943     }
944     return t;
945 }
946 
947 /** Destructively Stem $(D s) in Language $(D lang). */
948 Tuple!(bool, Lang) stemize(S)(ref S s, Lang lang = Lang.init)
949 if (isSomeString!S)
950 {
951     const n = s.length;
952     auto t = s.stemIn(lang);
953     s = t[0];
954     return tuple(n != s.length, t[1]);
955 }
956 
957 /** Return Stem of $(D s) using Porter's algorithm
958     See_Also: https://en.wikipedia.org/wiki/I_m_still_remembering
959     See_Also: https://en.wikipedia.org/wiki/Martin_Porter
960     See_Also: https://www.youtube.com/watch?v=2s7f8mBwnko&list=PL6397E4B26D00A269&index=4.
961 */
962 S alternativePorterStemEnglish(S)(S s)
963 if (isSomeString!S)
964 {
965     /* Step 1a */
966     if      (s.endsWith(`sses`)) { s = s[0 .. $-2]; }
967     else if (s.endsWith(`ies`))  { s = s[0 .. $-2]; }
968     else if (s.endsWith(`ss`))   { }
969     else if (s.endsWith(`s`))    { s = s[0 .. $-1]; }
970 
971     /* Step 2 */
972     if      (s.endsWith(`ational`)) { s = s[0 .. $-7] ~ `ate`; }
973     else if (s.endsWith(`izer`))    { s = s[0 .. $-1]; }
974     else if (s.endsWith(`ator`))    { s = s[0 .. $-2] ~ `e`; }
975 
976     /* Step 3 */
977     else if (s.endsWith(`al`)) { s = s[0 .. $-2] ~ `e`; }
978     else if (s.endsWith(`able`)) { s = s[0 .. $-4]; }
979     else if (s.endsWith(`ate`)) { s = s[0 .. $-3] ~ `e`; }
980 
981     return s;
982 }
983 
984 unittest
985 {
986     assert(`caresses`.alternativePorterStemEnglish == `caress`);
987     assert(`ponies`.alternativePorterStemEnglish == `poni`);
988     assert(`caress`.alternativePorterStemEnglish == `caress`);
989     assert(`cats`.alternativePorterStemEnglish == `cat`);
990 
991     assert(`relational`.alternativePorterStemEnglish == `relate`);
992     assert(`digitizer`.alternativePorterStemEnglish == `digitize`);
993     assert(`operator`.alternativePorterStemEnglish == `operate`);
994 
995     assert(`revival`.alternativePorterStemEnglish == `revive`);
996     assert(`adjustable`.alternativePorterStemEnglish == `adjust`);
997     assert(`activate`.alternativePorterStemEnglish == `active`);
998 }