1 module nxt.conv_ex;
2 
3 import nxt.array_traits : isCharArray;
4 import nxt.traits_ex : isSourceOfSomeChar;
5 
6 /** Variant of std.conv.to with $(D defaultValue) making it $(D nothrow).
7  *
8  * See_Also: https://forum.dlang.org/post/bnbbheofzaxlabvnvrrc@forum.dlang.org
9  * See_Also: http://forum.dlang.org/post/tsszfamjalzviqjhpdcr@forum.dlang.org
10  * See_Also: https://forum.dlang.org/post/kdjbkqbnspzshdqtsntg@forum.dlang.org
11  */
12 T toDefaulted(T, S, U)(const scope S value,
13 					   /*lazy*/ U defaultValue) nothrow
14 if (!is(T == enum) &&
15 	is(typeof(() { T r = defaultValue; }))) /+ TODO: use std.traits.isAssignable!(T, U) ? +/
16 {
17 	static if (is(S == enum) &&
18 			   is(T == string)) {						   // @nogc:
19 		switch (value)			  // instead of slower `std.conv.to`:
20 		{
21 			static foreach (member; __traits(allMembers, S)) // instead of slower `EnumMembers`
22 			{
23 			case __traits(getMember, S, member):
24 				return member;
25 			}
26 		default:
27 			return defaultValue;
28 		}
29 	}
30 	else						// non-@nogc:
31 	{
32 		import std.conv : to;
33 		try
34 			return value.to!T;
35 		catch (Exception e) // assume `ConvException`. TODO: can we capture `ConvException` instead make it inferred `nothrow`
36 			return defaultValue;
37 	}
38 }
39 /// ditto
40 T toDefaulted(T)(scope const(char)[] value,
41 				 const T defaultValue) pure nothrow @safe @nogc
42 if (is(T == enum)) {
43 	switch (value)			  // instead of slower `std.conv.to`:
44 	{
45 		static foreach (member; __traits(allMembers, T)) // instead of slower `EnumMembers`
46 		{
47 		case member:
48 			return __traits(getMember, T, member); // NOTE this is slower: mixin(`return T.` ~ member ~ `;`);
49 		}
50 	default:
51 		return defaultValue;
52 	}
53 }
54 
55 ///
56 @safe pure nothrow /*TODO: @nogc*/ unittest {
57 	assert("42_1".toDefaulted!int(43) == 43);
58 	assert(42.toDefaulted!string("_43") == "42");
59 }
60 
61 ///
62 pure nothrow @safe @nogc unittest {
63 	enum E { unknown, x, y, z, z2 = z, }
64 	assert("x".toDefaulted!(E)(E.init) == E.x);
65 	assert("z".toDefaulted!(E)(E.init) == E.z);
66 	assert("z2".toDefaulted!(E)(E.init) == E.z);
67 	assert("_".toDefaulted!(E)(E.init) == E.unknown);
68 }
69 
70 ///
71 pure nothrow @safe @nogc unittest {
72 	enum E { unknown, x, }
73 	assert(E.x.toDefaulted!string("init") == "x");
74 	assert(E.init.toDefaulted!string("init") == "unknown");
75 }
76 
77 /** More tolerant variant of `std.conv.to`.
78 */
79 auto tolerantTo(U, S)(S value,
80 					  bool tryStrippingPluralS = true,
81 					  bool tryToLower = true,
82 					  bool tryLevenshtein = true,
83 					  size_t levenshteinMaxDistance = 3)
84 if (isCharArray!S) {
85 	import std.conv: to;
86 	try
87 		return value.to!U;
88 	catch (Exception e) {
89 		import std.uni: toLower;
90 		try
91 			if (tryToLower)
92 				return value.toLower.tolerantTo!U(tryStrippingPluralS,
93 												  false,
94 												  tryLevenshtein,
95 												  levenshteinMaxDistance);
96 		catch (Exception e) {
97 			import nxt.algorithm.searching : endsWith;
98 			if (tryStrippingPluralS &&
99 				value.endsWith(`s`)) {
100 				try
101 					return value[0 .. $ - 1].tolerantTo!U(false,
102 														  tryToLower,
103 														  tryLevenshtein,
104 														  levenshteinMaxDistance);
105 				catch (Exception e) {}
106 			}
107 		}
108 	}
109 
110 	static if (is(U == enum))
111 		if (tryLevenshtein) {
112 			import std.traits: EnumMembers;
113 			const members = [EnumMembers!U];
114 			import std.range: empty, front;
115 			if (!members.empty) {
116 				import std.algorithm.iteration: map;
117 				import std.algorithm.comparison: levenshteinDistance;
118 				import std.algorithm.searching: minPos;
119 				import std.typecons: tuple;
120 				return members.map!(m => tuple(value.levenshteinDistance(m.to!string), m)).minPos!"a[0] < b[0]".front[1];
121 			}
122 		}
123 
124 	return U.init;
125 }
126 
127 @safe /*pure*/ unittest  /+ TODO: make pure when Issue 14962 is fixed +/
128 {
129 	enum E { _, alpha, beta, gamma }
130 
131 	assert("alpha".tolerantTo!E == E.alpha);
132 	assert("alphas".tolerantTo!E == E.alpha);
133 	assert("alph".tolerantTo!E == E.alpha);
134 	assert("alp".tolerantTo!E == E.alpha);
135 
136 	assert("gamma".tolerantTo!E == E.gamma);
137 	assert("gamm".tolerantTo!E == E.gamma);
138 	assert("gam".tolerantTo!E == E.gamma);
139 
140 	assert("_".tolerantTo!E == E._);
141 }
142 
143 private auto parseError(lazy string msg,
144 						string fn = __FILE__,
145 						size_t ln = __LINE__) @safe pure
146 {
147 	import std.conv : ConvException;
148 	return new ConvException("Can't parse string: " ~ msg, fn, ln);
149 }
150 
151 private void parseCheck(alias source)(dchar c,
152 									  string fn = __FILE__,
153 									  size_t ln = __LINE__) {
154 	if (source.empty)
155 		throw parseError(text("unexpected end of input when expecting", "\"", c, "\""));
156 	if (source.front != c)
157 		throw parseError(text("\"", c, "\" is missing"), fn, ln);
158 	import std.range.primitives : popFront;
159 	source.popFront();
160 }
161 
162 /** Parse escape characters in `s`.
163  *
164  * Copied this from std.conv.
165  *
166  * TODO: Reuse std.conv.parseEscape when moved there.
167 */
168 private dchar parseEscape(Source)(ref Source s)
169 if (isSourceOfSomeChar!Source) {
170 	import std.range.primitives : empty, front, popFront;
171 
172 	if (s.empty)
173 		throw parseError("Unterminated escape sequence");
174 
175 	dchar getHexDigit()(ref Source s_ = s)  // workaround
176 	{
177 		import std.ascii : isAlpha, isHexDigit;
178 		if (s_.empty)
179 			throw parseError("Unterminated escape sequence");
180 		import std.range.primitives : popFront;
181 		s_.popFront();
182 		if (s_.empty)
183 			throw parseError("Unterminated escape sequence");
184 		dchar c = s_.front;
185 		if (!isHexDigit(c))
186 			throw parseError("Hex digit is missing");
187 		return isAlpha(c) ? ((c & ~0x20) - ('A' - 10)) : c - '0';
188 	}
189 
190 	dchar result;
191 
192 	switch (s.front) {
193 	case '"':   result = '\"';  break;
194 	case '\'':  result = '\'';  break;
195 	case '0':   result = '\0';  break;
196 	case '?':   result = '\?';  break;
197 	case '\\':  result = '\\';  break;
198 	case 'a':   result = '\a';  break;
199 	case 'b':   result = '\b';  break;
200 	case 'f':   result = '\f';  break;
201 	case 'n':   result = '\n';  break;
202 	case 'r':   result = '\r';  break;
203 	case 't':   result = '\t';  break;
204 	case 'v':   result = '\v';  break;
205 	case 'x':
206 		result  = getHexDigit() << 4;
207 		result |= getHexDigit();
208 		break;
209 	case 'u':
210 		result  = getHexDigit() << 12;
211 		result |= getHexDigit() << 8;
212 		result |= getHexDigit() << 4;
213 		result |= getHexDigit();
214 		break;
215 	case 'U':
216 		result  = getHexDigit() << 28;
217 		result |= getHexDigit() << 24;
218 		result |= getHexDigit() << 20;
219 		result |= getHexDigit() << 16;
220 		result |= getHexDigit() << 12;
221 		result |= getHexDigit() << 8;
222 		result |= getHexDigit() << 4;
223 		result |= getHexDigit();
224 		break;
225 	default:
226 		import std.conv : to;
227 		throw parseError("Unknown escape character at front of " ~ to!string(s));
228 	}
229 	if (s.empty)
230 		throw parseError("Unterminated escape sequence");
231 
232 	import std.range.primitives : popFront;
233 	s.popFront();
234 
235 	return result;
236 }
237 
238 /** Parse/Decode Escape Sequences in $(S s) into Unicode Characters $(D dchar).
239 	Returns: $(D InputRange) of $(D dchar)
240 	TODO: Move to Phobos
241  */
242 auto decodeEscapes(Source)(Source s)
243 if (isSourceOfSomeChar!Source) {
244 	import std.range.primitives : ElementType;
245 	alias E = ElementType!Source;
246 	static struct Result
247 	{
248 		import std.range.primitives : isInfinite;
249 
250 		this(Source s_) {
251 			_remainingSource = s_;
252 			popFront();
253 		}
254 
255 		// empty
256 		static if (isInfinite!Source)
257 			enum bool empty = false;
258 		else
259 			bool empty() const @property { return _empty; }
260 
261 		@property E front() const { return _decodedFront; }
262 
263 		void popFront() {
264 			import std.range.primitives : empty, front, popFront;
265 			if (!_remainingSource.empty) {
266 				if (_remainingSource.front == '\\') /+ TODO: nothrow +/
267 				{
268 					_remainingSource.popFront();
269 					_decodedFront = _remainingSource.parseEscape;
270 				}
271 				else
272 				{
273 					_decodedFront = _remainingSource.front;
274 					_remainingSource.popFront();
275 				}
276 			}
277 			else
278 				_empty = true;
279 		}
280 
281 	private:
282 		Source _remainingSource;
283 		E _decodedFront;
284 		static if (!isInfinite!Source)
285 			bool _empty;
286 	}
287 
288 	return Result(s);
289 }
290 
291 ///
292 @safe pure /* nothrow */ unittest {
293 	import std.algorithm.comparison : equal;
294 	assert(`\u00F6`.decodeEscapes.equal("ö"));
295 	assert(`s\u00F6der`.decodeEscapes.equal("söder"));
296 	assert(`_\u00F6\u00F6_`.decodeEscapes.equal("_öö_"));
297 	assert(`http://dbpedia.org/resource/Malm\u00F6`.decodeEscapes.equal(`http://dbpedia.org/resource/Malmö`));
298 }
299 
300 // import std.range.primitives : isInputRange, ElementType;
301 // /** Range Implementation of std.utf.toUTF8.
302 //	 Move to Phobos std.utf
303 // */
304 // string toUTF8(S)(S s)
305 // if (isInputRange!S &&
306 //		 is(ElementType!S == dchar))
307 // {
308 //	 import std.range.primitives : isRandomAccessRange;
309 //	 import std.utf : toUTF8;
310 //	 import std.conv : to;
311 //	 static if (isRandomAccessRange!S)
312 //		 return std.utf.toUTF8(s); // reuse array overload
313 //	 else
314 //		 return s.to!(typeof(return));
315 // }
316 
317 // /** Range Implementation of std.utf.toUTF16.
318 //	 Move to Phobos std.utf
319 // */
320 // wstring toUTF16(S)(S s)
321 // if (isInputRange!S &&
322 //		 is(ElementType!S == dchar))
323 // {
324 //	 import std.range.primitives : isRandomAccessRange;
325 //	 import std.utf : toUTF16;
326 //	 import std.conv : to;
327 //	 static if (isRandomAccessRange!S)
328 //		 return std.utf.toUTF16(s); // reuse array overload
329 //	 else
330 //		 return s.to!(typeof(return));
331 // }