1 module nxt.syllables; 2 3 import nxt.languages : Lang; 4 import std.traits: isSomeString; 5 import std.uni: byGrapheme; 6 7 /** Count Number of Syllables in $(D s) interpreted in language $(D lang). 8 9 The Algorithm: 10 11 - If number of letters <= 3 : return 1. Incorrect for Ira, weapon:usi. 12 13 - If doesn’t end with “ted” or “tes” or “ses” or “ied” or “ies”, discard 14 “es” and “ed” at the end. If it has only 1 vowel or 1 set of consecutive 15 vowels, discard. (like “speed”, “fled” etc.) 16 17 - Discard trailing “e”, except where ending is “le” and isn’t in the 18 le_except array 19 20 - Check if consecutive vowels exists, triplets or pairs, count them as one. 21 22 - Count remaining vowels in the word. 23 24 - Add one if begins with “mc” 25 26 - Add one if ends with “y” but is not surrouned by vowel. (ex. “mickey”) 27 28 - Add one if “y” is surrounded by non-vowels and is not in the last 29 word. (ex. “python”) 30 31 - If begins with “tri-” or “bi-” and is followed by a vowel, add one. (so 32 that “ia” at “triangle” won’t be mistreated by step 4) 33 34 - If ends with “-ian”, should be counted as two syllables, except for 35 “-tian” and “-cian”. (ex. “indian” and “politician” should be handled 36 differently and shouldn’t be mistreated by step 4) 37 38 - If begins with “co-” and is followed by a vowel, check if it exists in the 39 double syllable dictionary, if not, check if in single dictionary and act 40 accordingly. (co_one and co_two dictionaries handle it. Ex. “coach” and 41 “coapt” shouldn’t be treated equally by step 4) 42 43 - If starts with “pre-” and is followed by a vowel, check if exists in the 44 double syllable dictionary, if not, check if in single dictionary and act 45 accordingly. (similar to step 11, but very weak dictionary for the moment) 46 47 - Check for “-n’t” and cross match with dictionary to add 48 syllable. (ex. “doesn’t”, “couldn’t”) 49 50 - Handling the exceptional words. (ex. “serious”, “fortunately”) 51 52 Like I said earlier, this isn’t perfect, so there are some steps to add or 53 modify, but it works just “fine”. Some exceptions should be added such as 54 “evacuate”, “ambulances”, “shuttled”, “anyone” etc… Also it can’t handle 55 some compund words like “facebook”. Counting only “face” would result 56 correctly “1″, and “book” would also come out correct, but due to the “e” 57 letter not being detected as a “silent e”, “facebook” will return “3 58 syllables.” 59 60 See_Also: http://eayd.in/?p=232 61 See_Also: http://forum.dlang.org/thread/ovzcetxbrdblpmyizdjr@forum.dlang.org#post-ovzcetxbrdblpmyizdjr:40forum.dlang.org 62 */ 63 uint countSyllables(S)(S s, Lang lang = Lang.en) 64 if (isSomeString!S) 65 { 66 import std.string: toLower; 67 s = s.toLower; 68 69 enum exception_add = ["serious", "crucial"]; /* words that need extra syllables */ 70 enum exception_del = ["fortunately", "unfortunately"]; /* words that need less syllables */ 71 enum co_one = ["cool", "coach", "coat", "coal", "count", "coin", "coarse", "coup", "coif", "cook", "coign", "coiffe", "coof", "court"]; 72 enum co_two = ["coapt", "coed", "coinci"]; 73 enum pre_one = ["preach"]; 74 75 uint syls = 0; // added syllable number 76 uint disc = 0; // discarded syllable number 77 78 return 0; 79 } 80 81 /* what about the word ira? */ 82 /* #1) if letters < 3 : return 1 */ 83 /* if len(word) <= 3 : */ 84 /* syls = 1 */ 85 /* return syls */ 86 87 /* #2) if doesn't end with "ted" or "tes" or "ses" or "ied" or "ies", discard "es" and "ed" at the end. */ 88 /* # if it has only 1 vowel or 1 set of consecutive vowels, discard. (like "speed", "fled" etc.) */ 89 90 /* if word[-2:] == "es" or word[-2:] == "ed" : */ 91 /* doubleAndtripple_1 = len(re.findall(r'[eaoui][eaoui]',word)) */ 92 /* if doubleAndtripple_1 > 1 or len(re.findall(r'[eaoui][^eaoui]',word)) > 1 : */ 93 /* if word[-3:] == "ted" or word[-3:] == "tes" or word[-3:] == "ses" or word[-3:] == "ied" or word[-3:] == "ies" : */ 94 /* pass */ 95 /* else : */ 96 /* disc+=1 */ 97 98 /* #3) discard trailing "e", except where ending is "le" */ 99 100 /* le_except = ['whole','mobile','pole','male','female','hale','pale','tale','sale','aisle','whale','while'] */ 101 102 /* if word[-1:] == "e" : */ 103 /* if word[-2:] == "le" and word not in le_except : */ 104 /* pass */ 105 106 /* else : */ 107 /* disc+=1 */ 108 109 /* #4) check if consecutive vowels exists, triplets or pairs, count them as one. */ 110 111 /* doubleAndtripple = len(re.findall(r'[eaoui][eaoui]',word)) */ 112 /* tripple = len(re.findall(r'[eaoui][eaoui][eaoui]',word)) */ 113 /* disc+=doubleAndtripple + tripple */ 114 115 /* #5) count remaining vowels in word. */ 116 /* numVowels = len(re.findall(r'[eaoui]',word)) */ 117 118 /* #6) add one if starts with "mc" */ 119 /* if word[:2] == "mc" : */ 120 /* syls+=1 */ 121 122 /* #7) add one if ends with "y" but is not surrouned by vowel */ 123 /* if word[-1:] == "y" and word[-2] not in "aeoui" : */ 124 /* syls +=1 */ 125 126 /* #8) add one if "y" is surrounded by non-vowels and is not in the last word. */ 127 128 /* for i,j in enumerate(word) : */ 129 /* if j == "y" : */ 130 /* if (i != 0) and (i != len(word)-1) : */ 131 /* if word[i-1] not in "aeoui" and word[i+1] not in "aeoui" : */ 132 /* syls+=1 */ 133 134 /* #9) if starts with "tri-" or "bi-" and is followed by a vowel, add one. */ 135 136 /* if word[:3] == "tri" and word[3] in "aeoui" : */ 137 /* syls+=1 */ 138 139 /* if word[:2] == "bi" and word[2] in "aeoui" : */ 140 /* syls+=1 */ 141 142 /* #10) if ends with "-ian", should be counted as two syllables, except for "-tian" and "-cian" */ 143 144 /* if word[-3:] == "ian" : */ 145 /* #and (word[-4:] != "cian" or word[-4:] != "tian") : */ 146 /* if word[-4:] == "cian" or word[-4:] == "tian" : */ 147 /* pass */ 148 /* else : */ 149 /* syls+=1 */ 150 151 /* #11) if starts with "co-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly. */ 152 153 /* if word[:2] == "co" and word[2] in 'eaoui' : */ 154 155 /* if word[:4] in co_two or word[:5] in co_two or word[:6] in co_two : */ 156 /* syls+=1 */ 157 /* elif word[:4] in co_one or word[:5] in co_one or word[:6] in co_one : */ 158 /* pass */ 159 /* else : */ 160 /* syls+=1 */ 161 162 /* #12) if starts with "pre-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly. */ 163 164 /* if word[:3] == "pre" and word[3] in 'eaoui' : */ 165 /* if word[:6] in pre_one : */ 166 /* pass */ 167 /* else : */ 168 /* syls+=1 */ 169 170 /* #13) check for "-n't" and cross match with dictionary to add syllable. */ 171 172 /* negative = ["doesn't", "isn't", "shouldn't", "couldn't","wouldn't"] */ 173 174 /* if word[-3:] == "n't" : */ 175 /* if word in negative : */ 176 /* syls+=1 */ 177 /* else : */ 178 /* pass */ 179 180 /* #14) Handling the exceptional words. */ 181 182 /* if word in exception_del : */ 183 /* disc+=1 */ 184 185 /* if word in exception_add : */ 186 /* syls+=1 */ 187 188 /* # calculate the output */ 189 /* return numVowels - disc + syls */