1 module nxt.conv_ex;
2 
3 import nxt.array_traits : isCharArray;
4 import nxt.traits_ex : isSourceOfSomeChar;
5 
6 /** Variant of std.conv.to with $(D defaultValue) making it $(D nothrow).
7  *
8  * See_Also: https://forum.dlang.org/post/bnbbheofzaxlabvnvrrc@forum.dlang.org
9  * See_Also: http://forum.dlang.org/post/tsszfamjalzviqjhpdcr@forum.dlang.org
10  * See_Also: https://forum.dlang.org/post/kdjbkqbnspzshdqtsntg@forum.dlang.org
11  */
12 T toDefaulted(T, S, U)(const scope S value,
13                        /*lazy*/ U defaultValue) nothrow
14 if (!is(T == enum) &&
15     is(typeof(() { T r = defaultValue; }))) // TODO: use std.traits.isAssignable!(T, U) ?
16 {
17     static if (is(S == enum) &&
18                is(T == string))
19     {                           // @nogc:
20         switch (value)              // instead of slower `std.conv.to`:
21         {
22             static foreach (member; __traits(allMembers, S)) // instead of slower `EnumMembers`
23             {
24             case __traits(getMember, S, member):
25                 return member;
26             }
27         default:
28             return defaultValue;
29         }
30     }
31     else                        // non-@nogc:
32     {
33         import std.conv : to;
34         try
35             return value.to!T;
36         catch (Exception e) // assume `ConvException`. TODO: can we capture `ConvException` instead make it inferred `nothrow`
37             return defaultValue;
38     }
39 }
40 /// ditto
41 T toDefaulted(T)(scope const(char)[] value,
42                  const T defaultValue) @safe pure nothrow @nogc
43 if (is(T == enum))
44 {
45     switch (value)              // instead of slower `std.conv.to`:
46     {
47         static foreach (member; __traits(allMembers, T)) // instead of slower `EnumMembers`
48         {
49         case member:
50             return __traits(getMember, T, member); // NOTE this is slower: mixin(`return T.` ~ member ~ `;`);
51         }
52     default:
53         return defaultValue;
54     }
55 }
56 
57 ///
58 @safe pure nothrow /*TODO: @nogc*/ unittest
59 {
60     assert("42_1".toDefaulted!int(43) == 43);
61     assert(42.toDefaulted!string("_43") == "42");
62 }
63 
64 ///
65 @safe pure nothrow @nogc unittest
66 {
67     enum E { unknown, x, y, z, z2 = z, }
68     assert("x".toDefaulted!(E)(E.init) == E.x);
69     assert("z".toDefaulted!(E)(E.init) == E.z);
70     assert("z2".toDefaulted!(E)(E.init) == E.z);
71     assert("_".toDefaulted!(E)(E.init) == E.unknown);
72 }
73 
74 ///
75 @safe pure nothrow @nogc unittest
76 {
77     enum E { unknown, x, }
78     assert(E.x.toDefaulted!string("init") == "x");
79     assert(E.init.toDefaulted!string("init") == "unknown");
80 }
81 
82 /** More tolerant variant of `std.conv.to`.
83 */
84 auto tolerantTo(U, S)(S value,
85                       bool tryStrippingPluralS = true,
86                       bool tryToLower = true,
87                       bool tryLevenshtein = true,
88                       size_t levenshteinMaxDistance = 3)
89 if (isCharArray!S)
90 {
91     import std.conv: to;
92     try
93         return value.to!U;
94     catch (Exception e)
95     {
96         import std.uni: toLower;
97         try
98             if (tryToLower)
99                 return value.toLower.tolerantTo!U(tryStrippingPluralS,
100                                                   false,
101                                                   tryLevenshtein,
102                                                   levenshteinMaxDistance);
103         catch (Exception e)
104         {
105             import nxt.array_algorithm : endsWith;
106             if (tryStrippingPluralS &&
107                 value.endsWith(`s`))
108             {
109                 try
110                     return value[0 .. $ - 1].tolerantTo!U(false,
111                                                           tryToLower,
112                                                           tryLevenshtein,
113                                                           levenshteinMaxDistance);
114                 catch (Exception e) {}
115             }
116         }
117     }
118 
119     static if (is(U == enum))
120         if (tryLevenshtein)
121         {
122             import std.traits: EnumMembers;
123             const members = [EnumMembers!U];
124             import std.range: empty, front;
125             if (!members.empty)
126             {
127                 import std.algorithm.iteration: map;
128                 import std.algorithm.comparison: levenshteinDistance;
129                 import std.algorithm.searching: minPos;
130                 import std.typecons: tuple;
131                 return members.map!(m => tuple(value.levenshteinDistance(m.to!string), m)).minPos!"a[0] < b[0]".front[1];
132             }
133         }
134 
135     return U.init;
136 }
137 
138 @safe /*pure*/ unittest  // TODO: make pure when Issue 14962 is fixed
139 {
140     enum E { _, alpha, beta, gamma }
141 
142     assert("alpha".tolerantTo!E == E.alpha);
143     assert("alphas".tolerantTo!E == E.alpha);
144     assert("alph".tolerantTo!E == E.alpha);
145     assert("alp".tolerantTo!E == E.alpha);
146 
147     assert("gamma".tolerantTo!E == E.gamma);
148     assert("gamm".tolerantTo!E == E.gamma);
149     assert("gam".tolerantTo!E == E.gamma);
150 
151     assert("_".tolerantTo!E == E._);
152 }
153 
154 private auto parseError(lazy string msg,
155                         string fn = __FILE__,
156                         size_t ln = __LINE__) @safe pure
157 {
158     import std.conv : ConvException;
159     return new ConvException("Can't parse string: " ~ msg, fn, ln);
160 }
161 
162 private void parseCheck(alias source)(dchar c,
163                                       string fn = __FILE__,
164                                       size_t ln = __LINE__)
165 {
166     if (source.empty)
167         throw parseError(text("unexpected end of input when expecting", "\"", c, "\""));
168     if (source.front != c)
169         throw parseError(text("\"", c, "\" is missing"), fn, ln);
170     import std.range.primitives : popFront;
171     source.popFront();
172 }
173 
174 /** Parse escape characters in `s`.
175  *
176  * Copied this from std.conv.
177  *
178  * TODO: Reuse std.conv.parseEscape when moved there.
179 */
180 private dchar parseEscape(Source)(ref Source s)
181 if (isSourceOfSomeChar!Source)
182 {
183     import std.range.primitives : empty, front, popFront;
184 
185     if (s.empty)
186         throw parseError("Unterminated escape sequence");
187 
188     dchar getHexDigit()(ref Source s_ = s)  // workaround
189     {
190         import std.ascii : isAlpha, isHexDigit;
191         if (s_.empty)
192             throw parseError("Unterminated escape sequence");
193         import std.range.primitives : popFront;
194         s_.popFront();
195         if (s_.empty)
196             throw parseError("Unterminated escape sequence");
197         dchar c = s_.front;
198         if (!isHexDigit(c))
199             throw parseError("Hex digit is missing");
200         return isAlpha(c) ? ((c & ~0x20) - ('A' - 10)) : c - '0';
201     }
202 
203     dchar result;
204 
205     switch (s.front)
206     {
207     case '"':   result = '\"';  break;
208     case '\'':  result = '\'';  break;
209     case '0':   result = '\0';  break;
210     case '?':   result = '\?';  break;
211     case '\\':  result = '\\';  break;
212     case 'a':   result = '\a';  break;
213     case 'b':   result = '\b';  break;
214     case 'f':   result = '\f';  break;
215     case 'n':   result = '\n';  break;
216     case 'r':   result = '\r';  break;
217     case 't':   result = '\t';  break;
218     case 'v':   result = '\v';  break;
219     case 'x':
220         result  = getHexDigit() << 4;
221         result |= getHexDigit();
222         break;
223     case 'u':
224         result  = getHexDigit() << 12;
225         result |= getHexDigit() << 8;
226         result |= getHexDigit() << 4;
227         result |= getHexDigit();
228         break;
229     case 'U':
230         result  = getHexDigit() << 28;
231         result |= getHexDigit() << 24;
232         result |= getHexDigit() << 20;
233         result |= getHexDigit() << 16;
234         result |= getHexDigit() << 12;
235         result |= getHexDigit() << 8;
236         result |= getHexDigit() << 4;
237         result |= getHexDigit();
238         break;
239     default:
240         import std.conv : to;
241         throw parseError("Unknown escape character at front of " ~ to!string(s));
242     }
243     if (s.empty)
244         throw parseError("Unterminated escape sequence");
245 
246     import std.range.primitives : popFront;
247     s.popFront();
248 
249     return result;
250 }
251 
252 /** Parse/Decode Escape Sequences in $(S s) into Unicode Characters $(D dchar).
253     Returns: $(D InputRange) of $(D dchar)
254     TODO: Move to Phobos
255  */
256 auto decodeEscapes(Source)(Source s)
257 if (isSourceOfSomeChar!Source)
258 {
259     import std.range.primitives : ElementType;
260     alias E = ElementType!Source;
261     static struct Result
262     {
263         import std.range.primitives : isInfinite;
264 
265         this(Source s_)
266         {
267             _remainingSource = s_;
268             popFront();
269         }
270 
271         // empty
272         static if (isInfinite!Source)
273             enum bool empty = false;
274         else
275             bool empty() const @property { return _empty; }
276 
277         @property E front() const { return _decodedFront; }
278 
279         void popFront()
280         {
281             import std.range.primitives : empty, front, popFront;
282             if (!_remainingSource.empty)
283             {
284                 if (_remainingSource.front == '\\') // TODO: nothrow
285                 {
286                     _remainingSource.popFront();
287                     _decodedFront = _remainingSource.parseEscape;
288                 }
289                 else
290                 {
291                     _decodedFront = _remainingSource.front;
292                     _remainingSource.popFront();
293                 }
294             }
295             else
296                 _empty = true;
297         }
298 
299     private:
300         Source _remainingSource;
301         E _decodedFront;
302         static if (!isInfinite!Source)
303             bool _empty;
304     }
305 
306     return Result(s);
307 }
308 
309 ///
310 @safe pure /* nothrow */ unittest
311 {
312     import std.algorithm.comparison : equal;
313     assert(`\u00F6`.decodeEscapes.equal("ö"));
314     assert(`s\u00F6der`.decodeEscapes.equal("söder"));
315     assert(`_\u00F6\u00F6_`.decodeEscapes.equal("_öö_"));
316     assert(`http://dbpedia.org/resource/Malm\u00F6`.decodeEscapes.equal(`http://dbpedia.org/resource/Malmö`));
317 }
318 
319 // import std.range.primitives : isInputRange, ElementType;
320 // /** Range Implementation of std.utf.toUTF8.
321 //     Move to Phobos std.utf
322 // */
323 // string toUTF8(S)(S s)
324 // if (isInputRange!S &&
325 //         is(ElementType!S == dchar))
326 // {
327 //     import std.range.primitives : isRandomAccessRange;
328 //     import std.utf : toUTF8;
329 //     import std.conv : to;
330 //     static if (isRandomAccessRange!S)
331 //         return std.utf.toUTF8(s); // reuse array overload
332 //     else
333 //         return s.to!(typeof(return));
334 // }
335 
336 // /** Range Implementation of std.utf.toUTF16.
337 //     Move to Phobos std.utf
338 // */
339 // wstring toUTF16(S)(S s)
340 // if (isInputRange!S &&
341 //         is(ElementType!S == dchar))
342 // {
343 //     import std.range.primitives : isRandomAccessRange;
344 //     import std.utf : toUTF16;
345 //     import std.conv : to;
346 //     static if (isRandomAccessRange!S)
347 //         return std.utf.toUTF16(s); // reuse array overload
348 //     else
349 //         return s.to!(typeof(return));
350 // }