1 module nxt.conv_ex;
2 
3 import nxt.array_traits : isCharArray;
4 import nxt.traits_ex : isSourceOfSomeChar;
5 
6 /** Variant of std.conv.to with $(D defaultValue) making it $(D nothrow).
7  *
8  * See_Also: https://forum.dlang.org/post/bnbbheofzaxlabvnvrrc@forum.dlang.org
9  * See_Also: http://forum.dlang.org/post/tsszfamjalzviqjhpdcr@forum.dlang.org
10  * See_Also: https://forum.dlang.org/post/kdjbkqbnspzshdqtsntg@forum.dlang.org
11  */
12 T toDefaulted(T, S, U)(const scope S value,
13                        /*lazy*/ U defaultValue) nothrow
14 if (!is(T == enum) &&
15     is(typeof(() { T r = defaultValue; }))) // TODO: use std.traits.isAssignable!(T, U) ?
16 {
17     static if (is(S == enum) &&
18                is(T == string))
19     {                           // @nogc:
20         switch (value)              // instead of slower `std.conv.to`:
21         {
22             static foreach (member; __traits(allMembers, S)) // instead of slower `EnumMembers`
23             {
24             case __traits(getMember, S, member):
25                 return member;
26             }
27         default:
28             return defaultValue;
29         }
30     }
31     else                        // non-@nogc:
32     {
33         import std.conv : to;
34         try
35             return value.to!T;
36         catch (Exception e) // assume `ConvException`. TODO: can we capture `ConvException` instead make it inferred `nothrow`
37             return defaultValue;
38     }
39 }
40 /// ditto
41 T toDefaulted(T)(scope const(char)[] value,
42                  const T defaultValue) @safe pure nothrow @nogc
43 if (is(T == enum))
44 {
45     switch (value)              // instead of slower `std.conv.to`:
46     {
47         static foreach (member; __traits(allMembers, T)) // instead of slower `EnumMembers`
48         {
49         case member:
50             return __traits(getMember, T, member); // NOTE this is slower: mixin(`return T.` ~ member ~ `;`);
51         }
52     default:
53         return defaultValue;
54     }
55 }
56 
57 ///
58 @safe pure nothrow /*TODO: @nogc*/ unittest
59 {
60     assert("42_1".toDefaulted!int(43) == 43);
61     assert(42.toDefaulted!string("_43") == "42");
62 }
63 
64 ///
65 @safe pure nothrow @nogc unittest
66 {
67     enum E { unknown, x, y, z, z2 = z, }
68     assert("x".toDefaulted!(E)(E.init) == E.x);
69     assert("z".toDefaulted!(E)(E.init) == E.z);
70     assert("z2".toDefaulted!(E)(E.init) == E.z);
71     assert("_".toDefaulted!(E)(E.init) == E.unknown);
72 }
73 
74 ///
75 @safe pure nothrow @nogc unittest
76 {
77     enum E { unknown, x, }
78     assert(E.x.toDefaulted!string("init") == "x");
79     assert(E.init.toDefaulted!string("init") == "unknown");
80 }
81 
82 /** More tolerant variant of `std.conv.to`.
83 */
84 auto tolerantTo(U, S)(S value,
85                       bool tryStrippingPluralS = true,
86                       bool tryToLower = true,
87                       bool tryLevenshtein = true,
88                       size_t levenshteinMaxDistance = 3)
89 if (isCharArray!S)
90 {
91     import std.conv: to;
92     try
93         return value.to!U;
94     catch (Exception e)
95     {
96         import std.uni: toLower;
97         try
98             if (tryToLower)
99                 return value.toLower.tolerantTo!U(tryStrippingPluralS,
100                                                   false,
101                                                   tryLevenshtein,
102                                                   levenshteinMaxDistance);
103         catch (Exception e)
104         {
105             import nxt.array_algorithm : endsWith;
106             if (tryStrippingPluralS &&
107                 value.endsWith(`s`))
108             {
109                 try
110                     return value[0 .. $ - 1].tolerantTo!U(false,
111                                                           tryToLower,
112                                                           tryLevenshtein,
113                                                           levenshteinMaxDistance);
114                 catch (Exception e)
115                 {
116                 }
117             }
118         }
119     }
120 
121     static if (is(U == enum))
122         if (tryLevenshtein)
123         {
124             import std.traits: EnumMembers;
125             auto members = [EnumMembers!U]; // TODO: make const
126             import std.range: empty, front;
127             if (!members.empty)
128             {
129                 import std.algorithm.iteration: map;
130                 import std.algorithm.comparison: levenshteinDistance;
131                 import std.algorithm.searching: minPos;
132                 import std.typecons: tuple;
133                 return members.map!(s => tuple(value.levenshteinDistance(s.to!string), s))
134                               .minPos!"a[0] < b[0]".front[1];
135             }
136         }
137 
138     return U.init;
139 }
140 
141 @safe /*pure*/ unittest  // TODO: make pure when Issue 14962 is fixed
142 {
143     enum E { _, alpha, beta, gamma }
144 
145     assert("alpha".tolerantTo!E == E.alpha);
146     assert("alphas".tolerantTo!E == E.alpha);
147     assert("alph".tolerantTo!E == E.alpha);
148     assert("alp".tolerantTo!E == E.alpha);
149 
150     assert("gamma".tolerantTo!E == E.gamma);
151     assert("gamm".tolerantTo!E == E.gamma);
152     assert("gam".tolerantTo!E == E.gamma);
153 
154     assert("_".tolerantTo!E == E._);
155 }
156 
157 private auto parseError(lazy string msg,
158                         string fn = __FILE__,
159                         size_t ln = __LINE__) @safe pure
160 {
161     import std.conv : ConvException;
162     return new ConvException("Can't parse string: " ~ msg, fn, ln);
163 }
164 
165 private void parseCheck(alias source)(dchar c,
166                                       string fn = __FILE__,
167                                       size_t ln = __LINE__)
168 {
169     if (source.empty)
170         throw parseError(text("unexpected end of input when expecting", "\"", c, "\""));
171     if (source.front != c)
172         throw parseError(text("\"", c, "\" is missing"), fn, ln);
173     import std.range.primitives : popFront;
174     source.popFront();
175 }
176 
177 /** Parse escape characters in `s`.
178  *
179  * Copied this from std.conv.
180  *
181  * TODO: Reuse std.conv.parseEscape when moved there.
182 */
183 private dchar parseEscape(Source)(ref Source s)
184 if (isSourceOfSomeChar!Source)
185 {
186     import std.range.primitives : empty, front, popFront;
187 
188     if (s.empty)
189         throw parseError("Unterminated escape sequence");
190 
191     dchar getHexDigit()(ref Source s_ = s)  // workaround
192     {
193         import std.ascii : isAlpha, isHexDigit;
194         if (s_.empty)
195             throw parseError("Unterminated escape sequence");
196         import std.range.primitives : popFront;
197         s_.popFront();
198         if (s_.empty)
199             throw parseError("Unterminated escape sequence");
200         dchar c = s_.front;
201         if (!isHexDigit(c))
202             throw parseError("Hex digit is missing");
203         return isAlpha(c) ? ((c & ~0x20) - ('A' - 10)) : c - '0';
204     }
205 
206     dchar result;
207 
208     switch (s.front)
209     {
210     case '"':   result = '\"';  break;
211     case '\'':  result = '\'';  break;
212     case '0':   result = '\0';  break;
213     case '?':   result = '\?';  break;
214     case '\\':  result = '\\';  break;
215     case 'a':   result = '\a';  break;
216     case 'b':   result = '\b';  break;
217     case 'f':   result = '\f';  break;
218     case 'n':   result = '\n';  break;
219     case 'r':   result = '\r';  break;
220     case 't':   result = '\t';  break;
221     case 'v':   result = '\v';  break;
222     case 'x':
223         result  = getHexDigit() << 4;
224         result |= getHexDigit();
225         break;
226     case 'u':
227         result  = getHexDigit() << 12;
228         result |= getHexDigit() << 8;
229         result |= getHexDigit() << 4;
230         result |= getHexDigit();
231         break;
232     case 'U':
233         result  = getHexDigit() << 28;
234         result |= getHexDigit() << 24;
235         result |= getHexDigit() << 20;
236         result |= getHexDigit() << 16;
237         result |= getHexDigit() << 12;
238         result |= getHexDigit() << 8;
239         result |= getHexDigit() << 4;
240         result |= getHexDigit();
241         break;
242     default:
243         import std.conv : to;
244         throw parseError("Unknown escape character at front of " ~ to!string(s));
245     }
246     if (s.empty)
247         throw parseError("Unterminated escape sequence");
248 
249     import std.range.primitives : popFront;
250     s.popFront();
251 
252     return result;
253 }
254 
255 /** Parse/Decode Escape Sequences in $(S s) into Unicode Characters $(D dchar).
256     Returns: $(D InputRange) of $(D dchar)
257     TODO: Add to Phobos
258  */
259 auto decodeEscapes(Source)(Source s)
260 if (isSourceOfSomeChar!Source)
261 {
262     import std.range.primitives : ElementType;
263     alias E = ElementType!Source;
264     static struct Result
265     {
266         import std.range.primitives : isInfinite;
267 
268         this(Source s_)
269         {
270             _remainingSource = s_;
271             popFront();
272         }
273 
274         // empty
275         static if (isInfinite!Source)
276             enum bool empty = false;
277         else
278             @property bool empty() const { return _empty; }
279 
280         @property E front() const { return _decodedFront; }
281 
282         void popFront()
283         {
284             import std.range.primitives : empty, front, popFront;
285             if (!_remainingSource.empty)
286             {
287                 if (_remainingSource.front == '\\') // TODO: nothrow
288                 {
289                     _remainingSource.popFront();
290                     _decodedFront = _remainingSource.parseEscape;
291                 }
292                 else
293                 {
294                     _decodedFront = _remainingSource.front;
295                     _remainingSource.popFront();
296                 }
297             }
298             else
299                 _empty = true;
300         }
301 
302     private:
303         Source _remainingSource;
304         E _decodedFront;
305         static if (!isInfinite!Source)
306             bool _empty;
307     }
308 
309     return Result(s);
310 }
311 
312 ///
313 @safe pure /*TODO: nothrow*/ unittest
314 {
315     import std.algorithm : equal;
316     assert(`\u00F6`.decodeEscapes.equal("ö"));
317     assert(`s\u00F6der`.decodeEscapes.equal("söder"));
318     assert(`_\u00F6\u00F6_`.decodeEscapes.equal("_öö_"));
319     assert(`http://dbpedia.org/resource/Malm\u00F6`.decodeEscapes.equal(`http://dbpedia.org/resource/Malmö`));
320 }
321 
322 // import std.range.primitives : isInputRange, ElementType;
323 // /** Range Implementation of std.utf.toUTF8.
324 //     Add to Phobos std.utf
325 // */
326 // string toUTF8(S)(S s)
327 // if (isInputRange!S &&
328 //         is(ElementType!S == dchar))
329 // {
330 //     import std.range.primitives : isRandomAccessRange;
331 //     import std.utf : toUTF8;
332 //     import std.conv : to;
333 //     static if (isRandomAccessRange!S)
334 //         return std.utf.toUTF8(s); // reuse array overload
335 //     else
336 //         return s.to!(typeof(return));
337 // }
338 
339 // /** Range Implementation of std.utf.toUTF16.
340 //     Add to Phobos std.utf
341 // */
342 // wstring toUTF16(S)(S s)
343 // if (isInputRange!S &&
344 //         is(ElementType!S == dchar))
345 // {
346 //     import std.range.primitives : isRandomAccessRange;
347 //     import std.utf : toUTF16;
348 //     import std.conv : to;
349 //     static if (isRandomAccessRange!S)
350 //         return std.utf.toUTF16(s); // reuse array overload
351 //     else
352 //         return s.to!(typeof(return));
353 // }