1 module string_ex;
2 
3 import std.traits : isSomeString;
4 import dbg;
5 
6 bool isCapitalizedEasy(S)(S s)
7     if (isSomeString!S)
8 {
9     import std.range.primitives : empty, front, popFront;
10     import std.uni : isUpper, isLower;
11 
12     if (s.empty) return false;
13     const firstUpper = s.front.isUpper;
14     if (!firstUpper) return false;
15     s.popFront;
16 
17     if (s.empty) return false;
18     return s.front.isLower;
19 }
20 
21 @safe pure unittest
22 {
23     assert(!`A`.isCapitalizedEasy);
24     assert(!`a`.isCapitalizedEasy);
25     assert(!`alpha`.isCapitalizedEasy);
26     assert(!`ALPHA`.isCapitalizedEasy);
27     assert(!`aThing`.isCapitalizedEasy);
28     assert(`Alpha`.isCapitalizedEasy);
29 }
30 
31 /** Check if $(D s) starts with a capital letter.
32     TODO make nothrow by not using front
33  */
34 bool isCapitalized(S)(S s)
35     if (isSomeString!S)
36 {
37     import std.range.primitives : empty, front, popFront;
38     import std.ascii : isDigit;
39     import std.uni : isUpper, isLower;
40 
41     if (s.empty)
42         return false;
43 
44     const firstDigit = s.front.isDigit;
45     const firstUpper = s.front.isUpper;
46     if (!(firstDigit || firstUpper))
47         return false;
48 
49     s.popFront;
50 
51     import std.algorithm.searching : all;
52 
53     if (s.empty)
54         return firstDigit;
55     else
56         return s.all!(x => (x.isDigit ||
57                             x.isLower));
58 }
59 
60 @safe pure unittest
61 {
62     assert(!`alpha`.isCapitalized);
63     assert(!`ALPHA`.isCapitalized);
64     assert(!`aThing`.isCapitalized);
65     assert(`Alpha`.isCapitalized);
66 }
67 
68 /** Return `true` if `s` has proper name-style capitalization, such as in `Alpha Centauri`.
69  */
70 bool isNameCapitalized(S)(S s)
71     if (isSomeString!S)
72 {
73     import std.algorithm.iteration : splitter;
74     import std.algorithm.searching : all;
75     import std.algorithm.comparison : among;
76     import std.ascii : isWhite;
77     import std.range : enumerate;
78     import std.uni : isUpper;
79     return s.splitter!(s => (s.isWhite || s.among!('-') != 0))
80             .enumerate
81             .all!(x => ((x.index >= 1 &&
82                          (x.value.all!(x => x.isUpper) || // Henry II
83                           x.value.among!(`of`, `upon`))) ||
84                         x.value.isCapitalized)); // TODO add enumerate and all middle word to be a preposition
85 }
86 
87 @safe pure unittest
88 {
89     assert(!`alpha`.isNameCapitalized);
90     assert(!`alpha centauri`.isNameCapitalized);
91     assert(!`ALPHA`.isNameCapitalized);
92     assert(!`ALPHA CENTAURI`.isNameCapitalized);
93     assert(!`aThing`.isNameCapitalized);
94     assert(`Alpha`.isNameCapitalized);
95     assert(`Alpha Centauri`.isNameCapitalized);
96     assert(`11104 Airion`.isNameCapitalized);
97     assert(`New York City`.isNameCapitalized);
98     assert(`1-Hexanol`.isNameCapitalized);
99     assert(`11-Hexanol`.isNameCapitalized);
100     assert(`22nd Army`.isNameCapitalized);
101     assert(!`22nd army`.isNameCapitalized);
102     assert(`2nd World War`.isNameCapitalized);
103     assert(`Second World War`.isNameCapitalized);
104     assert(`Värmland`.isNameCapitalized);
105     assert(!`The big sky`.isNameCapitalized);
106     assert(`Suur-London`.isNameCapitalized);
107     assert(`Kingdom of Sweden`.isNameCapitalized);
108     assert(`Stratford upon Avon`.isNameCapitalized);
109     assert(`Henry II`.isNameCapitalized);
110 }
111 
112 S[] quotedWords(S)(S s,
113                    string quoteBeginChar = `"`,
114                    string quoteEndChar = `"`)
115     if (isSomeString!S)
116 {
117     typeof(return) words;
118     import std.array : array;
119     import std.algorithm : filter, splitter;
120     import std.string : indexOf, lastIndexOf;
121     import std.range : empty;
122     while (!s.empty)
123     {
124         auto quoteBeginI = s.indexOf(quoteBeginChar);
125         if (quoteBeginI >= 0)
126         {
127             auto currI = quoteBeginI;
128 
129             auto prefixBeginI = s[0 .. currI].lastIndexOf(' ');
130             if (prefixBeginI >= 0)
131             {
132                 currI = prefixBeginI + 1;
133             }
134 
135             words ~= s[0 .. currI].splitter(' ')
136                                   .filter!(a => !a.empty)
137                                   .array;
138 
139             auto quoteEndI = s[quoteBeginI + 1 .. $].indexOf(quoteEndChar) + quoteBeginI + 1;
140             auto suffixEndI = s[quoteEndI + 1 .. $].indexOf(' ');
141             if (suffixEndI >= 0)
142             {
143                 quoteEndI = quoteEndI + suffixEndI;
144             }
145             words ~= s[currI .. quoteEndI + 1];
146             s = s[quoteEndI + 1 .. $];
147         }
148         else
149         {
150             words ~= s.splitter(' ')
151                       .filter!(a => !a.empty)
152                       .array;
153             s = [];
154         }
155     }
156     return words;
157 }
158 
159 @safe pure unittest
160 {
161     import std.stdio;
162     import std.algorithm.comparison : equal;
163     const t = `verb:is   noun:"New York" a noun:"big  city"@en `;
164     const x = t.quotedWords;
165     const xs = [`verb:is`, `noun:"New York"`, `a`, `noun:"big  city"@en`];
166     assert(equal(x, xs));
167     // TODO assert(equal(` verb:is   name:"New York"@en article:a noun:"big  city"@en `.quotedWords,
168     //              [`verb:is`, `name:"New York"@en`, `article:a`, `noun:"big  city"@en`]));
169 }
170 
171 /** Check if `s` contains more than one word. */
172 auto isMultiWord(const(char)[] s)
173 {
174     import std.algorithm.searching : canFind;
175     return s.canFind(`_`, ` `) >= 1;
176 }
177 
178 @safe pure unittest
179 {
180     assert(isMultiWord("hey there"));
181 }