1 module nxt.syllables;
2 
3 import nxt.languages : Lang;
4 import std.traits: isSomeString;
5 import std.uni: byGrapheme;
6 
7 /** Count Number of Syllables in $(D s) interpreted in language $(D lang).
8 
9     The Algorithm:
10 
11     - If number of letters <= 3 : return 1. Incorrect for Ira, weapon:usi.
12 
13     - If doesn’t end with “ted” or “tes” or “ses” or “ied” or “ies”, discard
14       “es” and “ed” at the end. If it has only 1 vowel or 1 set of consecutive
15       vowels, discard. (like “speed”, “fled” etc.)
16 
17      - Discard trailing “e”, except where ending is “le” and isn’t in the
18        le_except array
19 
20     - Check if consecutive vowels exists, triplets or pairs, count them as one.
21 
22     - Count remaining vowels in the word.
23 
24     - Add one if begins with “mc”
25 
26     - Add one if ends with “y” but is not surrouned by vowel. (ex. “mickey”)
27 
28     - Add one if “y” is surrounded by non-vowels and is not in the last
29       word. (ex. “python”)
30 
31     - If begins with “tri-” or “bi-” and is followed by a vowel, add one. (so
32       that “ia” at “triangle” won’t be mistreated by step 4)
33 
34     - If ends with “-ian”, should be counted as two syllables, except for
35       “-tian” and “-cian”. (ex. “indian” and “politician” should be handled
36       differently and shouldn’t be mistreated by step 4)
37 
38     - If begins with “co-” and is followed by a vowel, check if it exists in the
39       double syllable dictionary, if not, check if in single dictionary and act
40       accordingly. (co_one and co_two dictionaries handle it. Ex. “coach” and
41       “coapt” shouldn’t be treated equally by step 4)
42 
43     - If starts with “pre-” and is followed by a vowel, check if exists in the
44       double syllable dictionary, if not, check if in single dictionary and act
45       accordingly. (similar to step 11, but very weak dictionary for the moment)
46 
47     - Check for “-n’t” and cross match with dictionary to add
48       syllable. (ex. “doesn’t”, “couldn’t”)
49 
50     - Handling the exceptional words. (ex. “serious”, “fortunately”)
51 
52     Like I said earlier, this isn’t perfect, so there are some steps to add or
53     modify, but it works just “fine”. Some exceptions should be added such as
54     “evacuate”, “ambulances”, “shuttled”, “anyone” etc… Also it can’t handle
55     some compund words like “facebook”. Counting only “face” would result
56     correctly “1″, and “book” would also come out correct, but due to the “e”
57     letter not being detected as a “silent e”, “facebook” will return “3
58     syllables.”
59 
60     See_Also: http://eayd.in/?p=232
61     See_Also: http://forum.dlang.org/thread/ovzcetxbrdblpmyizdjr@forum.dlang.org#post-ovzcetxbrdblpmyizdjr:40forum.dlang.org
62  */
63 uint countSyllables(S)(S s, Lang lang = Lang.en)
64 if (isSomeString!S)
65 {
66     import std.string: toLower;
67     s = s.toLower;
68 
69     enum exception_add = ["serious", "crucial"]; /* words that need extra syllables */
70     enum exception_del = ["fortunately", "unfortunately"]; /* words that need less syllables */
71     enum co_one = ["cool", "coach", "coat", "coal", "count", "coin", "coarse", "coup", "coif", "cook", "coign", "coiffe", "coof", "court"];
72     enum co_two = ["coapt", "coed", "coinci"];
73     enum pre_one = ["preach"];
74 
75     uint syls = 0;  // added syllable number
76     uint disc = 0; // discarded syllable number
77 
78     return 0;
79 }
80 
81 /* what about the word ira? */
82 /* #1) if letters < 3 : return 1 */
83 /*     if len(word) <= 3 : */
84 /* syls = 1 */
85 /* return syls */
86 
87 /* #2) if doesn't end with "ted" or "tes" or "ses" or "ied" or "ies", discard "es" and "ed" at the end. */
88 /*     # if it has only 1 vowel or 1 set of consecutive vowels, discard. (like "speed", "fled" etc.) */
89 
90 /*     if word[-2:] == "es" or word[-2:] == "ed" : */
91 /*         doubleAndtripple_1 = len(re.findall(r'[eaoui][eaoui]',word)) */
92 /*         if doubleAndtripple_1 > 1 or len(re.findall(r'[eaoui][^eaoui]',word)) > 1 : */
93 /*             if word[-3:] == "ted" or word[-3:] == "tes" or word[-3:] == "ses" or word[-3:] == "ied" or word[-3:] == "ies" : */
94 /*                 pass */
95 /*             else : */
96 /*                 disc+=1 */
97 
98 /*     #3) discard trailing "e", except where ending is "le" */
99 
100 /*     le_except = ['whole','mobile','pole','male','female','hale','pale','tale','sale','aisle','whale','while'] */
101 
102 /*     if word[-1:] == "e" : */
103 /*         if word[-2:] == "le" and word not in le_except : */
104 /*             pass */
105 
106 /*         else : */
107 /*             disc+=1 */
108 
109 /*     #4) check if consecutive vowels exists, triplets or pairs, count them as one. */
110 
111 /*     doubleAndtripple = len(re.findall(r'[eaoui][eaoui]',word)) */
112 /*     tripple = len(re.findall(r'[eaoui][eaoui][eaoui]',word)) */
113 /*     disc+=doubleAndtripple + tripple */
114 
115 /*     #5) count remaining vowels in word. */
116 /*     numVowels = len(re.findall(r'[eaoui]',word)) */
117 
118 /*     #6) add one if starts with "mc" */
119 /*     if word[:2] == "mc" : */
120 /*         syls+=1 */
121 
122 /*     #7) add one if ends with "y" but is not surrouned by vowel */
123 /*     if word[-1:] == "y" and word[-2] not in "aeoui" : */
124 /*         syls +=1 */
125 
126 /*     #8) add one if "y" is surrounded by non-vowels and is not in the last word. */
127 
128 /*     for i,j in enumerate(word) : */
129 /*         if j == "y" : */
130 /*             if (i != 0) and (i != len(word)-1) : */
131 /*                 if word[i-1] not in "aeoui" and word[i+1] not in "aeoui" : */
132 /*                     syls+=1 */
133 
134 /*     #9) if starts with "tri-" or "bi-" and is followed by a vowel, add one. */
135 
136 /*     if word[:3] == "tri" and word[3] in "aeoui" : */
137 /*         syls+=1 */
138 
139 /*     if word[:2] == "bi" and word[2] in "aeoui" : */
140 /*         syls+=1 */
141 
142 /*     #10) if ends with "-ian", should be counted as two syllables, except for "-tian" and "-cian" */
143 
144 /*     if word[-3:] == "ian" : */
145 /*     #and (word[-4:] != "cian" or word[-4:] != "tian") : */
146 /*         if word[-4:] == "cian" or word[-4:] == "tian" : */
147 /*             pass */
148 /*         else : */
149 /*             syls+=1 */
150 
151 /*     #11) if starts with "co-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly. */
152 
153 /*     if word[:2] == "co" and word[2] in 'eaoui' : */
154 
155 /*         if word[:4] in co_two or word[:5] in co_two or word[:6] in co_two : */
156 /*             syls+=1 */
157 /*         elif word[:4] in co_one or word[:5] in co_one or word[:6] in co_one : */
158 /*             pass */
159 /*         else : */
160 /*             syls+=1 */
161 
162 /*     #12) if starts with "pre-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly. */
163 
164 /*     if word[:3] == "pre" and word[3] in 'eaoui' : */
165 /*         if word[:6] in pre_one : */
166 /*             pass */
167 /*         else : */
168 /*             syls+=1 */
169 
170 /*     #13) check for "-n't" and cross match with dictionary to add syllable. */
171 
172 /*     negative = ["doesn't", "isn't", "shouldn't", "couldn't","wouldn't"] */
173 
174 /*     if word[-3:] == "n't" : */
175 /*         if word in negative : */
176 /*             syls+=1 */
177 /*         else : */
178 /*             pass */
179 
180 /*     #14) Handling the exceptional words. */
181 
182 /*     if word in exception_del : */
183 /*         disc+=1 */
184 
185 /*     if word in exception_add : */
186 /*         syls+=1 */
187 
188 /*     # calculate the output */
189 /*     return numVowels - disc + syls */