1 module string_ex; 2 3 import std.traits : isSomeString; 4 import dbg; 5 6 bool isCapitalizedEasy(S)(S s) 7 if (isSomeString!S) 8 { 9 import std.range.primitives : empty, front, popFront; 10 import std.uni : isUpper, isLower; 11 12 if (s.empty) return false; 13 const firstUpper = s.front.isUpper; 14 if (!firstUpper) return false; 15 s.popFront; 16 17 if (s.empty) return false; 18 return s.front.isLower; 19 } 20 21 @safe pure unittest 22 { 23 assert(!`A`.isCapitalizedEasy); 24 assert(!`a`.isCapitalizedEasy); 25 assert(!`alpha`.isCapitalizedEasy); 26 assert(!`ALPHA`.isCapitalizedEasy); 27 assert(!`aThing`.isCapitalizedEasy); 28 assert(`Alpha`.isCapitalizedEasy); 29 } 30 31 /** Check if $(D s) starts with a capital letter. 32 TODO make nothrow by not using front 33 */ 34 bool isCapitalized(S)(S s) 35 if (isSomeString!S) 36 { 37 import std.range.primitives : empty, front, popFront; 38 import std.ascii : isDigit; 39 import std.uni : isUpper, isLower; 40 41 if (s.empty) 42 return false; 43 44 const firstDigit = s.front.isDigit; 45 const firstUpper = s.front.isUpper; 46 if (!(firstDigit || firstUpper)) 47 return false; 48 49 s.popFront; 50 51 import std.algorithm.searching : all; 52 53 if (s.empty) 54 return firstDigit; 55 else 56 return s.all!(x => (x.isDigit || 57 x.isLower)); 58 } 59 60 @safe pure unittest 61 { 62 assert(!`alpha`.isCapitalized); 63 assert(!`ALPHA`.isCapitalized); 64 assert(!`aThing`.isCapitalized); 65 assert(`Alpha`.isCapitalized); 66 } 67 68 /** Return `true` if `s` has proper name-style capitalization, such as in `Alpha Centauri`. 69 */ 70 bool isNameCapitalized(S)(S s) 71 if (isSomeString!S) 72 { 73 import std.algorithm.iteration : splitter; 74 import std.algorithm.searching : all; 75 import std.algorithm.comparison : among; 76 import std.ascii : isWhite; 77 import std.range : enumerate; 78 import std.uni : isUpper; 79 return s.splitter!(s => (s.isWhite || s.among!('-') != 0)) 80 .enumerate 81 .all!(x => ((x.index >= 1 && 82 (x.value.all!(x => x.isUpper) || // Henry II 83 x.value.among!(`of`, `upon`))) || 84 x.value.isCapitalized)); // TODO add enumerate and all middle word to be a preposition 85 } 86 87 @safe pure unittest 88 { 89 assert(!`alpha`.isNameCapitalized); 90 assert(!`alpha centauri`.isNameCapitalized); 91 assert(!`ALPHA`.isNameCapitalized); 92 assert(!`ALPHA CENTAURI`.isNameCapitalized); 93 assert(!`aThing`.isNameCapitalized); 94 assert(`Alpha`.isNameCapitalized); 95 assert(`Alpha Centauri`.isNameCapitalized); 96 assert(`11104 Airion`.isNameCapitalized); 97 assert(`New York City`.isNameCapitalized); 98 assert(`1-Hexanol`.isNameCapitalized); 99 assert(`11-Hexanol`.isNameCapitalized); 100 assert(`22nd Army`.isNameCapitalized); 101 assert(!`22nd army`.isNameCapitalized); 102 assert(`2nd World War`.isNameCapitalized); 103 assert(`Second World War`.isNameCapitalized); 104 assert(`Värmland`.isNameCapitalized); 105 assert(!`The big sky`.isNameCapitalized); 106 assert(`Suur-London`.isNameCapitalized); 107 assert(`Kingdom of Sweden`.isNameCapitalized); 108 assert(`Stratford upon Avon`.isNameCapitalized); 109 assert(`Henry II`.isNameCapitalized); 110 } 111 112 S[] quotedWords(S)(S s, 113 string quoteBeginChar = `"`, 114 string quoteEndChar = `"`) 115 if (isSomeString!S) 116 { 117 typeof(return) words; 118 import std.array : array; 119 import std.algorithm : filter, splitter; 120 import std.string : indexOf, lastIndexOf; 121 import std.range : empty; 122 while (!s.empty) 123 { 124 auto quoteBeginI = s.indexOf(quoteBeginChar); 125 if (quoteBeginI >= 0) 126 { 127 auto currI = quoteBeginI; 128 129 auto prefixBeginI = s[0 .. currI].lastIndexOf(' '); 130 if (prefixBeginI >= 0) 131 { 132 currI = prefixBeginI + 1; 133 } 134 135 words ~= s[0 .. currI].splitter(' ') 136 .filter!(a => !a.empty) 137 .array; 138 139 auto quoteEndI = s[quoteBeginI + 1 .. $].indexOf(quoteEndChar) + quoteBeginI + 1; 140 auto suffixEndI = s[quoteEndI + 1 .. $].indexOf(' '); 141 if (suffixEndI >= 0) 142 { 143 quoteEndI = quoteEndI + suffixEndI; 144 } 145 words ~= s[currI .. quoteEndI + 1]; 146 s = s[quoteEndI + 1 .. $]; 147 } 148 else 149 { 150 words ~= s.splitter(' ') 151 .filter!(a => !a.empty) 152 .array; 153 s = []; 154 } 155 } 156 return words; 157 } 158 159 @safe pure unittest 160 { 161 import std.stdio; 162 import std.algorithm.comparison : equal; 163 const t = `verb:is noun:"New York" a noun:"big city"@en `; 164 const x = t.quotedWords; 165 const xs = [`verb:is`, `noun:"New York"`, `a`, `noun:"big city"@en`]; 166 assert(equal(x, xs)); 167 // TODO assert(equal(` verb:is name:"New York"@en article:a noun:"big city"@en `.quotedWords, 168 // [`verb:is`, `name:"New York"@en`, `article:a`, `noun:"big city"@en`])); 169 } 170 171 /** Check if `s` contains more than one word. */ 172 auto isMultiWord(const(char)[] s) 173 { 174 import std.algorithm.searching : canFind; 175 return s.canFind(`_`, ` `) >= 1; 176 } 177 178 @safe pure unittest 179 { 180 assert(isMultiWord("hey there")); 181 }