1 module nxt.conv_ex;
2 
3 import nxt.array_traits : isCharArray;
4 import nxt.traits_ex : isSourceOfSomeChar;
5 
6 /** Variant of std.conv.to with $(D defaultValue) making it $(D nothrow).
7  *
8  * See_Also: https://forum.dlang.org/post/bnbbheofzaxlabvnvrrc@forum.dlang.org
9  * See_Also: http://forum.dlang.org/post/tsszfamjalzviqjhpdcr@forum.dlang.org
10  * See_Also: https://forum.dlang.org/post/kdjbkqbnspzshdqtsntg@forum.dlang.org
11  */
12 T toDefaulted(T, S, U)(const scope S value,
13                        /*lazy*/ U defaultValue) nothrow
14 if (!is(T == enum) &&
15     is(typeof(() { T r = defaultValue; }))) // TODO use std.traits.isAssignable!(T, U) ?
16 {
17     try
18     {
19         import std.conv : to;
20         return value.to!T;
21     }
22     catch (Exception e) // assume `ConvException`. TODO can we capture `ConvException` instead make it inferred `nothrow`
23     {
24         return defaultValue;
25     }
26 }
27 /// ditto
28 T toDefaulted(T)(scope const(char)[] value,
29                  const T defaultValue) @safe pure nothrow @nogc
30 if (is(T == enum))
31 {
32     switch (value)              // instead of slower `std.conv.to`:
33     {
34         static foreach (member; __traits(allMembers, T)) // instead of slower `EnumMembers`
35         {
36         case member:
37             return __traits(getMember, T, member); // NOTE this is slower: mixin(`return T.` ~ member ~ `;`);
38         }
39     default:
40         return defaultValue;
41     }
42 }
43 
44 ///
45 @safe pure nothrow /*TODO @nogc*/ unittest
46 {
47     assert("42_1".toDefaulted!int(43) == 43);
48     assert(42.toDefaulted!string("_43") == "42");
49 }
50 
51 ///
52 @safe pure nothrow @nogc unittest
53 {
54     enum E { unknown, x, y, z, z2 = z, }
55     assert("x".toDefaulted!(E)(E.init) == E.x);
56     assert("z".toDefaulted!(E)(E.init) == E.z);
57     assert("z2".toDefaulted!(E)(E.init) == E.z);
58     assert("_".toDefaulted!(E)(E.init) == E.unknown);
59 }
60 
61 /** More tolerant variant of `std.conv.to`.
62 */
63 auto tolerantTo(U, S)(S value,
64                       bool tryStrippingPluralS = true,
65                       bool tryToLower = true,
66                       bool tryLevenshtein = true,
67                       size_t levenshteinMaxDistance = 3)
68 if (isCharArray!S)
69 {
70     import std.conv: to;
71     try
72     {
73         return value.to!U;
74     }
75     catch (Exception e)
76     {
77         try
78         {
79             if (tryToLower)
80             {
81                 import std.uni: toLower;
82                 return value.toLower.tolerantTo!U(tryStrippingPluralS,
83                                                   false,
84                                                   tryLevenshtein,
85                                                   levenshteinMaxDistance);
86             }
87         }
88         catch (Exception e)
89         {
90             import nxt.array_algorithm : endsWith;
91             if (tryStrippingPluralS &&
92                 value.endsWith(`s`))
93             {
94                 try
95                 {
96                     return value[0 .. $ - 1].tolerantTo!U(false,
97                                                           tryToLower,
98                                                           tryLevenshtein,
99                                                           levenshteinMaxDistance);
100                 }
101                 catch (Exception e)
102                 {
103                 }
104             }
105         }
106     }
107 
108     static if (is(U == enum))
109     {
110         if (tryLevenshtein)
111         {
112             import std.traits: EnumMembers;
113             auto members = [EnumMembers!U]; // TODO make const
114             import std.range: empty, front;
115             if (!members.empty)
116             {
117                 import std.algorithm.iteration: map;
118                 import std.algorithm.comparison: levenshteinDistance;
119                 import std.algorithm.searching: minPos;
120                 import std.typecons: tuple;
121                 return members.map!(s => tuple(value.levenshteinDistance(s.to!string), s))
122                               .minPos!"a[0] < b[0]".front[1];
123             }
124         }
125     }
126 
127     return U.init;
128 }
129 
130 @safe /*pure*/ unittest  // TODO make pure when Issue 14962 is fixed
131 {
132     enum E { _, alpha, beta, gamma }
133 
134     assert("alpha".tolerantTo!E == E.alpha);
135     assert("alphas".tolerantTo!E == E.alpha);
136     assert("alph".tolerantTo!E == E.alpha);
137     assert("alp".tolerantTo!E == E.alpha);
138 
139     assert("gamma".tolerantTo!E == E.gamma);
140     assert("gamm".tolerantTo!E == E.gamma);
141     assert("gam".tolerantTo!E == E.gamma);
142 
143     assert("_".tolerantTo!E == E._);
144 }
145 
146 private auto parseError(lazy string msg,
147                         string fn = __FILE__,
148                         size_t ln = __LINE__) @safe pure
149 {
150     import std.conv : ConvException;
151     return new ConvException("Can't parse string: " ~ msg, fn, ln);
152 }
153 
154 private void parseCheck(alias source)(dchar c,
155                                       string fn = __FILE__,
156                                       size_t ln = __LINE__)
157 {
158     if (source.empty)
159         throw parseError(text("unexpected end of input when expecting", "\"", c, "\""));
160     if (source.front != c)
161         throw parseError(text("\"", c, "\" is missing"), fn, ln);
162     import std.range.primitives : popFront;
163     source.popFront();
164 }
165 
166 /** Parse escape characters in `s`.
167  *
168  * Copied this from std.conv.
169  *
170  * TODO Reuse std.conv.parseEscape when moved there.
171 */
172 private dchar parseEscape(Source)(ref Source s)
173 if (isSourceOfSomeChar!Source)
174 {
175     import std.range.primitives : empty, front, popFront;
176 
177     if (s.empty)
178         throw parseError("Unterminated escape sequence");
179 
180     dchar getHexDigit()(ref Source s_ = s)  // workaround
181     {
182         import std.ascii : isAlpha, isHexDigit;
183         if (s_.empty)
184             throw parseError("Unterminated escape sequence");
185         import std.range.primitives : popFront;
186         s_.popFront();
187         if (s_.empty)
188             throw parseError("Unterminated escape sequence");
189         dchar c = s_.front;
190         if (!isHexDigit(c))
191             throw parseError("Hex digit is missing");
192         return isAlpha(c) ? ((c & ~0x20) - ('A' - 10)) : c - '0';
193     }
194 
195     dchar result;
196 
197     switch (s.front)
198     {
199     case '"':   result = '\"';  break;
200     case '\'':  result = '\'';  break;
201     case '0':   result = '\0';  break;
202     case '?':   result = '\?';  break;
203     case '\\':  result = '\\';  break;
204     case 'a':   result = '\a';  break;
205     case 'b':   result = '\b';  break;
206     case 'f':   result = '\f';  break;
207     case 'n':   result = '\n';  break;
208     case 'r':   result = '\r';  break;
209     case 't':   result = '\t';  break;
210     case 'v':   result = '\v';  break;
211     case 'x':
212         result  = getHexDigit() << 4;
213         result |= getHexDigit();
214         break;
215     case 'u':
216         result  = getHexDigit() << 12;
217         result |= getHexDigit() << 8;
218         result |= getHexDigit() << 4;
219         result |= getHexDigit();
220         break;
221     case 'U':
222         result  = getHexDigit() << 28;
223         result |= getHexDigit() << 24;
224         result |= getHexDigit() << 20;
225         result |= getHexDigit() << 16;
226         result |= getHexDigit() << 12;
227         result |= getHexDigit() << 8;
228         result |= getHexDigit() << 4;
229         result |= getHexDigit();
230         break;
231     default:
232         import std.conv : to;
233         throw parseError("Unknown escape character at front of " ~ to!string(s));
234     }
235     if (s.empty)
236         throw parseError("Unterminated escape sequence");
237 
238     import std.range.primitives : popFront;
239     s.popFront();
240 
241     return result;
242 }
243 
244 /** Parse/Decode Escape Sequences in $(S s) into Unicode Characters $(D dchar).
245     Returns: $(D InputRange) of $(D dchar)
246     TODO Add to Phobos
247  */
248 auto decodeEscapes(Source)(Source s)
249 if (isSourceOfSomeChar!Source)
250 {
251     import std.range.primitives : ElementType;
252     alias E = ElementType!Source;
253     static struct Result
254     {
255         import std.range.primitives : isInfinite;
256 
257         this(Source s_)
258         {
259             _remainingSource = s_;
260             popFront;
261         }
262 
263         // empty
264         static if (isInfinite!Source)
265         {
266             enum bool empty = false;
267         }
268         else
269         {
270             @property bool empty() const { return _empty; }
271         }
272 
273         @property E front() const { return _decodedFront; }
274 
275         void popFront()
276         {
277             import std.range.primitives : empty, front, popFront;
278             if (!_remainingSource.empty)
279             {
280                 if (_remainingSource.front == '\\') // TODO nothrow
281                 {
282                     _remainingSource.popFront();
283                     _decodedFront = _remainingSource.parseEscape;
284                 }
285                 else
286                 {
287                     _decodedFront = _remainingSource.front;
288                     _remainingSource.popFront();
289                 }
290             }
291             else
292                 _empty = true;
293         }
294 
295     private:
296         Source _remainingSource;
297         E _decodedFront;
298         static if (!isInfinite!Source)
299         {
300             bool _empty;
301         }
302     }
303 
304     return Result(s);
305 }
306 
307 ///
308 @safe pure /*TODO nothrow*/ unittest
309 {
310     import std.algorithm : equal;
311     assert(`\u00F6`.decodeEscapes.equal("ö"));
312     assert(`s\u00F6der`.decodeEscapes.equal("söder"));
313     assert(`_\u00F6\u00F6_`.decodeEscapes.equal("_öö_"));
314     assert(`http://dbpedia.org/resource/Malm\u00F6`.decodeEscapes.equal(`http://dbpedia.org/resource/Malmö`));
315 }
316 
317 // import std.range.primitives : isInputRange, ElementType;
318 // /** Range Implementation of std.utf.toUTF8.
319 //     Add to Phobos std.utf
320 // */
321 // string toUTF8(S)(S s)
322 // if (isInputRange!S &&
323 //         is(ElementType!S == dchar))
324 // {
325 //     import std.range.primitives : isRandomAccessRange;
326 //     import std.utf : toUTF8;
327 //     import std.conv : to;
328 //     static if (isRandomAccessRange!S)
329 //         return std.utf.toUTF8(s); // reuse array overload
330 //     else
331 //         return s.to!(typeof(return));
332 // }
333 
334 // /** Range Implementation of std.utf.toUTF16.
335 //     Add to Phobos std.utf
336 // */
337 // wstring toUTF16(S)(S s)
338 // if (isInputRange!S &&
339 //         is(ElementType!S == dchar))
340 // {
341 //     import std.range.primitives : isRandomAccessRange;
342 //     import std.utf : toUTF16;
343 //     import std.conv : to;
344 //     static if (isRandomAccessRange!S)
345 //         return std.utf.toUTF16(s); // reuse array overload
346 //     else
347 //         return s.to!(typeof(return));
348 // }