1 #!/usr/bin/env rdmd-dev 2 3 module nxt.syllables; 4 5 import nxt.languages : Lang; 6 import std.traits: isSomeString; 7 import std.uni: byGrapheme; 8 9 /** Count Number of Syllables in $(D s) interpreted in language $(D lang). 10 11 The Algorithm: 12 13 - If number of letters <= 3 : return 1. Incorrect for Ira, weapon:usi. 14 15 - If doesn’t end with “ted” or “tes” or “ses” or “ied” or “ies”, discard 16 “es” and “ed” at the end. If it has only 1 vowel or 1 set of consecutive 17 vowels, discard. (like “speed”, “fled” etc.) 18 19 - Discard trailing “e”, except where ending is “le” and isn’t in the 20 le_except array 21 22 - Check if consecutive vowels exists, triplets or pairs, count them as one. 23 24 - Count remaining vowels in the word. 25 26 - Add one if begins with “mc” 27 28 - Add one if ends with “y” but is not surrouned by vowel. (ex. “mickey”) 29 30 - Add one if “y” is surrounded by non-vowels and is not in the last 31 word. (ex. “python”) 32 33 - If begins with “tri-” or “bi-” and is followed by a vowel, add one. (so 34 that “ia” at “triangle” won’t be mistreated by step 4) 35 36 - If ends with “-ian”, should be counted as two syllables, except for 37 “-tian” and “-cian”. (ex. “indian” and “politician” should be handled 38 differently and shouldn’t be mistreated by step 4) 39 40 - If begins with “co-” and is followed by a vowel, check if it exists in the 41 double syllable dictionary, if not, check if in single dictionary and act 42 accordingly. (co_one and co_two dictionaries handle it. Ex. “coach” and 43 “coapt” shouldn’t be treated equally by step 4) 44 45 - If starts with “pre-” and is followed by a vowel, check if exists in the 46 double syllable dictionary, if not, check if in single dictionary and act 47 accordingly. (similar to step 11, but very weak dictionary for the moment) 48 49 - Check for “-n’t” and cross match with dictionary to add 50 syllable. (ex. “doesn’t”, “couldn’t”) 51 52 - Handling the exceptional words. (ex. “serious”, “fortunately”) 53 54 Like I said earlier, this isn’t perfect, so there are some steps to add or 55 modify, but it works just “fine”. Some exceptions should be added such as 56 “evacuate”, “ambulances”, “shuttled”, “anyone” etc… Also it can’t handle 57 some compund words like “facebook”. Counting only “face” would result 58 correctly “1″, and “book” would also come out correct, but due to the “e” 59 letter not being detected as a “silent e”, “facebook” will return “3 60 syllables.” 61 62 See_Also: http://eayd.in/?p=232 63 See_Also: http://forum.dlang.org/thread/ovzcetxbrdblpmyizdjr@forum.dlang.org#post-ovzcetxbrdblpmyizdjr:40forum.dlang.org 64 */ 65 uint countSyllables(S)(S s, Lang lang = Lang.en) 66 if (isSomeString!S) 67 { 68 import std..string: toLower; 69 s = s.toLower; 70 71 enum exception_add = ["serious", "crucial"]; /* words that need extra syllables */ 72 enum exception_del = ["fortunately", "unfortunately"]; /* words that need less syllables */ 73 enum co_one = ["cool", "coach", "coat", "coal", "count", "coin", "coarse", "coup", "coif", "cook", "coign", "coiffe", "coof", "court"]; 74 enum co_two = ["coapt", "coed", "coinci"]; 75 enum pre_one = ["preach"]; 76 77 uint syls = 0; // added syllable number 78 uint disc = 0; // discarded syllable number 79 80 return 0; 81 } 82 83 /* what about the word ira? */ 84 /* #1) if letters < 3 : return 1 */ 85 /* if len(word) <= 3 : */ 86 /* syls = 1 */ 87 /* return syls */ 88 89 /* #2) if doesn't end with "ted" or "tes" or "ses" or "ied" or "ies", discard "es" and "ed" at the end. */ 90 /* # if it has only 1 vowel or 1 set of consecutive vowels, discard. (like "speed", "fled" etc.) */ 91 92 /* if word[-2:] == "es" or word[-2:] == "ed" : */ 93 /* doubleAndtripple_1 = len(re.findall(r'[eaoui][eaoui]',word)) */ 94 /* if doubleAndtripple_1 > 1 or len(re.findall(r'[eaoui][^eaoui]',word)) > 1 : */ 95 /* if word[-3:] == "ted" or word[-3:] == "tes" or word[-3:] == "ses" or word[-3:] == "ied" or word[-3:] == "ies" : */ 96 /* pass */ 97 /* else : */ 98 /* disc+=1 */ 99 100 /* #3) discard trailing "e", except where ending is "le" */ 101 102 /* le_except = ['whole','mobile','pole','male','female','hale','pale','tale','sale','aisle','whale','while'] */ 103 104 /* if word[-1:] == "e" : */ 105 /* if word[-2:] == "le" and word not in le_except : */ 106 /* pass */ 107 108 /* else : */ 109 /* disc+=1 */ 110 111 /* #4) check if consecutive vowels exists, triplets or pairs, count them as one. */ 112 113 /* doubleAndtripple = len(re.findall(r'[eaoui][eaoui]',word)) */ 114 /* tripple = len(re.findall(r'[eaoui][eaoui][eaoui]',word)) */ 115 /* disc+=doubleAndtripple + tripple */ 116 117 /* #5) count remaining vowels in word. */ 118 /* numVowels = len(re.findall(r'[eaoui]',word)) */ 119 120 /* #6) add one if starts with "mc" */ 121 /* if word[:2] == "mc" : */ 122 /* syls+=1 */ 123 124 /* #7) add one if ends with "y" but is not surrouned by vowel */ 125 /* if word[-1:] == "y" and word[-2] not in "aeoui" : */ 126 /* syls +=1 */ 127 128 /* #8) add one if "y" is surrounded by non-vowels and is not in the last word. */ 129 130 /* for i,j in enumerate(word) : */ 131 /* if j == "y" : */ 132 /* if (i != 0) and (i != len(word)-1) : */ 133 /* if word[i-1] not in "aeoui" and word[i+1] not in "aeoui" : */ 134 /* syls+=1 */ 135 136 /* #9) if starts with "tri-" or "bi-" and is followed by a vowel, add one. */ 137 138 /* if word[:3] == "tri" and word[3] in "aeoui" : */ 139 /* syls+=1 */ 140 141 /* if word[:2] == "bi" and word[2] in "aeoui" : */ 142 /* syls+=1 */ 143 144 /* #10) if ends with "-ian", should be counted as two syllables, except for "-tian" and "-cian" */ 145 146 /* if word[-3:] == "ian" : */ 147 /* #and (word[-4:] != "cian" or word[-4:] != "tian") : */ 148 /* if word[-4:] == "cian" or word[-4:] == "tian" : */ 149 /* pass */ 150 /* else : */ 151 /* syls+=1 */ 152 153 /* #11) if starts with "co-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly. */ 154 155 /* if word[:2] == "co" and word[2] in 'eaoui' : */ 156 157 /* if word[:4] in co_two or word[:5] in co_two or word[:6] in co_two : */ 158 /* syls+=1 */ 159 /* elif word[:4] in co_one or word[:5] in co_one or word[:6] in co_one : */ 160 /* pass */ 161 /* else : */ 162 /* syls+=1 */ 163 164 /* #12) if starts with "pre-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly. */ 165 166 /* if word[:3] == "pre" and word[3] in 'eaoui' : */ 167 /* if word[:6] in pre_one : */ 168 /* pass */ 169 /* else : */ 170 /* syls+=1 */ 171 172 /* #13) check for "-n't" and cross match with dictionary to add syllable. */ 173 174 /* negative = ["doesn't", "isn't", "shouldn't", "couldn't","wouldn't"] */ 175 176 /* if word[-3:] == "n't" : */ 177 /* if word in negative : */ 178 /* syls+=1 */ 179 /* else : */ 180 /* pass */ 181 182 /* #14) Handling the exceptional words. */ 183 184 /* if word in exception_del : */ 185 /* disc+=1 */ 186 187 /* if word in exception_add : */ 188 /* syls+=1 */ 189 190 /* # calculate the output */ 191 /* return numVowels - disc + syls */