1 module conv_ex;
2 
3 import std.traits: isSomeChar, isSomeString, CommonType;
4 import traits_ex : haveCommonType, isSourceOfSomeChar;
5 import std.range : isInputRange, ElementType;
6 
7 /** Variant of std.conv.to with $(D defaultValue) making it $(D nothrow).
8  */
9 CommonType!(T, U) to(T, U, S)(S value, U defaultValue)
10     if (haveCommonType!(T, U))
11 {
12     import std.conv : to;
13     try
14     {
15         return value.to!T;
16     }
17     catch (Exception e) // assume ConvException. TODO can we capture ConvException instead make it inferred nothrow
18     {
19         return defaultValue;
20     }
21 }
22 
23 @safe pure nothrow /*@nogc*/ unittest
24 {
25     assert("42_1".to!int(42.1) == 42.1);
26     assert(42.to!string("_42") == "42");
27 }
28 
29 /** More tolerant variant of std.conv.to.
30 */
31 auto tolerantTo(U, S)(S t,
32                       bool tryStrippingPluralS = true,
33                       bool tryToLower = true,
34                       bool tryLevenshtein = true,
35                       size_t levenshteinMaxDistance = 3) if (isSomeString!S)
36 {
37     import std.conv: to;
38 
39     try
40     {
41         return t.to!U;
42     }
43     catch (Exception e)
44     {
45         try
46         {
47             if (tryToLower)
48             {
49                 import std.uni: toLower;
50                 return t.toLower.tolerantTo!U(tryStrippingPluralS,
51                                               false,
52                                               tryLevenshtein,
53                                               levenshteinMaxDistance);
54             }
55         }
56         catch (Exception e)
57         {
58             import std.algorithm.searching: endsWith;
59             if (tryStrippingPluralS &&
60                 t.endsWith(`s`))
61             {
62                 try
63                 {
64                     return t[0 .. $ - 1].tolerantTo!U(false,
65                                                       tryToLower,
66                                                       tryLevenshtein,
67                                                       levenshteinMaxDistance);
68                 }
69                 catch (Exception e)
70                 {
71                 }
72             }
73         }
74     }
75 
76     static if (is(U == enum))
77     {
78         if (tryLevenshtein)
79         {
80             import std.traits: EnumMembers;
81             auto members = [EnumMembers!U]; // TODO make const
82             import std.range: empty, front;
83             if (!members.empty)
84             {
85                 import std.algorithm.iteration: map;
86                 import std.algorithm.comparison: levenshteinDistance;
87                 import std.algorithm.searching: minPos;
88                 import std.typecons: tuple;
89                 return members.map!(s => tuple(t.levenshteinDistance(s.to!string), s))
90                               .minPos!"a[0] < b[0]".front[1];
91             }
92         }
93     }
94 
95     return U.init;
96 }
97 
98 @safe /*pure*/ unittest  // TODO make pure when Issue 14962 is fixed
99 {
100     enum E { _, alpha, beta, gamma }
101 
102     assert("alpha".tolerantTo!E == E.alpha);
103     assert("alphas".tolerantTo!E == E.alpha);
104     assert("alph".tolerantTo!E == E.alpha);
105     assert("alp".tolerantTo!E == E.alpha);
106 
107     assert("gamma".tolerantTo!E == E.gamma);
108     assert("gamm".tolerantTo!E == E.gamma);
109     assert("gam".tolerantTo!E == E.gamma);
110 
111     assert("_".tolerantTo!E == E._);
112 }
113 
114 @safe pure
115 private auto parseError(lazy string msg, string fn = __FILE__, size_t ln = __LINE__)
116 {
117     import std.conv : ConvException;
118     return new ConvException("Can't parse string: " ~ msg, fn, ln);
119 }
120 
121 private void parseCheck(alias source)(dchar c, string fn = __FILE__, size_t ln = __LINE__)
122 {
123     if (source.empty)
124         throw parseError(text("unexpected end of input when expecting", "\"", c, "\""));
125     if (source.front != c)
126         throw parseError(text("\"", c, "\" is missing"), fn, ln);
127     import std.range : popFront;
128     source.popFront();
129 }
130 
131 /**
132    Copied this from std.conv.
133    TODO Reuse std.conv.parseEscape when moved there.
134 */
135 private dchar parseEscape(Source)(ref Source s)
136     if (isSourceOfSomeChar!Source)
137 {
138     import std.range : empty, front, popFront;
139     if (s.empty)
140         throw parseError("Unterminated escape sequence");
141 
142     dchar getHexDigit()(ref Source s_ = s)  // workaround
143     {
144         import std.ascii : isAlpha, isHexDigit;
145         if (s_.empty)
146             throw parseError("Unterminated escape sequence");
147         import std.range : popFront;
148         s_.popFront();
149         if (s_.empty)
150             throw parseError("Unterminated escape sequence");
151         dchar c = s_.front;
152         if (!isHexDigit(c))
153             throw parseError("Hex digit is missing");
154         return isAlpha(c) ? ((c & ~0x20) - ('A' - 10)) : c - '0';
155     }
156 
157     dchar result;
158 
159     switch (s.front)
160     {
161     case '"':   result = '\"';  break;
162     case '\'':  result = '\'';  break;
163     case '0':   result = '\0';  break;
164     case '?':   result = '\?';  break;
165     case '\\':  result = '\\';  break;
166     case 'a':   result = '\a';  break;
167     case 'b':   result = '\b';  break;
168     case 'f':   result = '\f';  break;
169     case 'n':   result = '\n';  break;
170     case 'r':   result = '\r';  break;
171     case 't':   result = '\t';  break;
172     case 'v':   result = '\v';  break;
173     case 'x':
174         result  = getHexDigit() << 4;
175         result |= getHexDigit();
176         break;
177     case 'u':
178         result  = getHexDigit() << 12;
179         result |= getHexDigit() << 8;
180         result |= getHexDigit() << 4;
181         result |= getHexDigit();
182         break;
183     case 'U':
184         result  = getHexDigit() << 28;
185         result |= getHexDigit() << 24;
186         result |= getHexDigit() << 20;
187         result |= getHexDigit() << 16;
188         result |= getHexDigit() << 12;
189         result |= getHexDigit() << 8;
190         result |= getHexDigit() << 4;
191         result |= getHexDigit();
192         break;
193     default:
194         import std.conv : to;
195         throw parseError("Unknown escape character at front of " ~ to!string(s));
196     }
197     if (s.empty)
198         throw parseError("Unterminated escape sequence");
199 
200     import std.range : popFront;
201     s.popFront();
202 
203     return result;
204 }
205 
206 /** Parse/Decode Escape Sequences in $(S s) into Unicode Characters $(D dchar).
207     Returns: $(D InputRange) of $(D dchar)
208     TODO Add to Phobos
209  */
210 auto decodeEscapes(Source)(Source s)
211     if (isSourceOfSomeChar!Source)
212 {
213     alias E = ElementType!Source;
214     static struct Result
215     {
216         import std.range : isInfinite;
217 
218         this(Source s_)
219         {
220             _remainingSource = s_;
221             popFront;
222         }
223 
224         // empty
225         static if (isInfinite!Source)
226             enum bool empty = false;
227         else
228             @property bool empty() const { return _empty; }
229 
230         @property E front() const { return _decodedFront; }
231 
232         void popFront()
233         {
234             import std.range : empty, front, popFront;
235             if (!_remainingSource.empty)
236             {
237                 if (_remainingSource.front == '\\') // TODO nothrow
238                 {
239                     _remainingSource.popFront;
240                     _decodedFront = _remainingSource.parseEscape;
241                 }
242                 else
243                 {
244                     _decodedFront = _remainingSource.front;
245                     _remainingSource.popFront;
246                 }
247             }
248             else
249                 _empty = true;
250         }
251 
252     private:
253         Source _remainingSource;
254         E _decodedFront;
255         static if (!isInfinite!Source)
256             bool _empty;
257     }
258 
259     return Result(s);
260 }
261 
262 string decodeEscapesToUTF8(S)(S s)
263     if (isSourceOfSomeChar!S)
264 {
265     import std.conv : to;
266     return s.decodeEscapes.to!(typeof(return));
267 }
268 
269 wstring decodeEscapesToUTF16(S)(S s)
270     if (isSourceOfSomeChar!S)
271 {
272     import std.conv : to;
273     return s.decodeEscapes.to!(typeof(return));
274 }
275 
276 ///
277 @safe pure /*TODO nothrow*/ unittest
278 {
279     import std.algorithm : equal;
280     assert(`s\u00F6der`.decodeEscapes.equal("söder"));
281     assert(`\u00F6`.decodeEscapes.equal("ö"));
282     assert(`_\u00F6\u00F6_`.decodeEscapes.equal("_öö_"));
283     assert(`http://dbpedia.org/resource/Malm\u00F6`.decodeEscapes.equal(`http://dbpedia.org/resource/Malmö`));
284     import std.array : array;
285     auto y = `_\u00F6\u00F6_`.decodeEscapes.array;
286     static assert(is(typeof(y) == dchar[]));
287     assert(y == "_öö_");
288 }
289 
290 ///
291 @safe pure /*TODO nothrow*/ unittest
292 {
293     import std.algorithm : equal;
294     auto y = `_\u00F6\u00F6_`.decodeEscapesToUTF8;
295     static assert(is(typeof(y) == string));
296     assert(y == "_öö_");
297 }
298 
299 ///
300 @safe pure /*TODO nothrow*/ unittest
301 {
302     import std.algorithm : equal;
303     auto y = `_\u00F6\u00F6_`.decodeEscapesToUTF16;
304     static assert(is(typeof(y) == wstring));
305     assert(y == "_öö_");
306 }
307 
308 auto unescaped(S)(S s)
309     if (isSomeString!S)
310 {
311     import std.algorithm.searching : canFind;
312     import std.conv : to;
313     return (s.canFind('\\') ?
314             s.decodeEscapes.to!S :
315             s);
316 }
317 
318 auto unescape(S)(ref S s)
319     if (isSomeString!S)
320 {
321     return s = s.unescaped;
322 }
323 
324 unittest
325 {
326     import std.algorithm : equal;
327     import std.meta : AliasSeq;
328     foreach (S; AliasSeq!(string, wstring))
329     {
330         S x = `_\u00F6\u00F6_`;
331         auto y = x.unescaped;
332         static assert(is(typeof(y) == S));
333         assert(y == "_öö_");
334 
335         x.unescape;
336         assert(x == "_öö_");
337     }
338 }
339 
340 // /** Range Implementation of std.utf.toUTF8.
341 //     Add to Phobos std.utf
342 // */
343 // string toUTF8(S)(S s)
344 //     if (isInputRange!S &&
345 //         is(ElementType!S == dchar))
346 // {
347 //     import std.range : isRandomAccessRange;
348 //     import std.utf : toUTF8;
349 //     import std.conv : to;
350 //     static if (isRandomAccessRange!S)
351 //         return std.utf.toUTF8(s); // reuse array overload
352 //     else
353 //         return s.to!(typeof(return));
354 // }
355 
356 // /** Range Implementation of std.utf.toUTF16.
357 //     Add to Phobos std.utf
358 // */
359 // wstring toUTF16(S)(S s)
360 //     if (isInputRange!S &&
361 //         is(ElementType!S == dchar))
362 // {
363 //     import std.range : isRandomAccessRange;
364 //     import std.utf : toUTF16;
365 //     import std.conv : to;
366 //     static if (isRandomAccessRange!S)
367 //         return std.utf.toUTF16(s); // reuse array overload
368 //     else
369 //         return s.to!(typeof(return));
370 // }