1 module nxt.splitter_ex;
2 
3 import std.traits : isExpressions;
4 
5 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter`. */
6 auto splitterASCII(alias separatorPred, Range)(return Range r) @trusted
7 if (is(typeof(Range.init[0 .. 0])) && // can be sliced
8 	is(typeof(Range.init[0]) : char) &&
9 	is(typeof(separatorPred(char.init)) : bool)) /+ TODO: check that first parameter is bool +/
10 {
11 	static struct Result
12 	{
13 		private Range _input; // original copy of r
14 		private size_t _offset = 0; // hit offset if any, or `_haystack.length` if miss
15 
16 		this(Range input)
17 		{
18 			// dbg("input:", input);
19 			_input = input;
20 			tryFindNextFront();  // find first hit if any
21 		}
22 
23 		bool empty() const @property
24 			=> _input.length == 0; // dbg("input:", _input, " ", " offset:", _offset);
25 
26 		@property Range front() return @trusted
27 		in(!empty, "Attempting to fetch the front of an empty splitter.")
28 			=> _input.ptr[0 .. _offset]; // dbg("input:", _input, " ", " offset:", _offset);
29 
30 		/** Skip any separators. */
31 		void skipSeparators() @trusted
32 		{
33 			while (_offset < _input.length &&
34 				   separatorPred(_input.ptr[_offset]))
35 			{
36 				/* predicate `separatorPred` must only filter out ASCII, or
37 				 * incorrect UTF-8 decoding will follow */
38 				assert(isASCII(_input.ptr[_offset]));
39 				_offset += 1;
40 			}
41 			_input = _input[_offset .. $]; // skip leading separators
42 			_offset = 0;
43 		}
44 
45 		/** Skip any separators try finding the next front. */
46 		void tryFindNextFront() @trusted
47 		{
48 			skipSeparators(); // skip leading separators
49 			while (_offset < _input.length &&
50 				   !separatorPred(_input.ptr[_offset]))
51 				_offset += 1;
52 			// dbg("input:", _input, " ", " offset:", _offset);
53 		}
54 
55 		void popFront() nothrow
56 		in(!empty, "Attempting to pop the front of an empty splitter.")
57 		  	=> tryFindNextFront();
58 
59 		pragma(inline, true)
60 		static private bool isASCII(char x) pure nothrow @safe @nogc
61 			=> x < 128;
62 	}
63 
64 	return Result(r);
65 }
66 
67 ///
68 pure nothrow @safe @nogc unittest {
69 	import std.algorithm.comparison : equal;
70 	import std.algorithm.comparison : among;
71 	import nxt.array_help : s;
72 
73 	assert(``.splitterASCII!(_ => _ == ' ')
74 			 .empty);
75 
76 	assert(` `.splitterASCII!(_ => _ == ' ')
77 			  .empty);
78 
79 	assert(`   `.splitterASCII!(_ => _ == ' ')
80 				.empty);
81 
82 	assert(` - `.splitterASCII!(_ => _ == ' ')
83 				.equal([`-`].s[]));
84 
85 	assert(`a`.splitterASCII!(_ => _ == ' ')
86 			  .equal([`a`].s[]));
87 
88 	assert(` a `.splitterASCII!(_ => _ == ' ')
89 				.equal([`a`].s[]));
90 
91 	assert(` a b `.splitterASCII!(_ => _ == ' ')
92 				  .equal([`a`, `b`].s[]));
93 
94 	assert(` a_b `.splitterASCII!(_ => _ == ' ')
95 				  .equal([`a_b`].s[]));
96 
97 	assert(` - aa   bb--c-_d--_e`.splitterASCII!(_ => _.among!(' ', '-', '_') != 0)
98 								 .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
99 }
100 
101 /// DIP-1000 return ref escape analysis
102 pure nothrow @safe unittest {
103 	import nxt.dip_traits : hasPreviewDIP1000;
104 	version (none) // TODO: enable
105 	static if (hasPreviewDIP1000) {
106 		// See_Also: https://forum.dlang.org/post/pzddsrwhfvcopfaamvak@forum.dlang.org
107 		static assert(!__traits(compiles, {
108 			char[] f() {
109 				char[2] x;
110 				return x[].splitterASCII!(_ => _ == ' ').front;
111 			}
112 		}
113 						));
114 	}
115 }
116 
117 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter` that .
118  *
119  * TODO: generalize to separators being either chars or strings.
120  */
121 template splitterASCIIAmong(separators...)
122 if (separators.length != 0 &&
123 	isExpressions!separators)
124 {
125 	import std.meta : allSatisfy;
126 	import nxt.char_traits : isASCII;
127 
128 	auto splitterASCIIAmong(Range)(return Range r)
129 	if (is(typeof(Range.init[0 .. 0])) && // can be sliced
130 		is(typeof(Range.init[0]) : char) &&
131 		allSatisfy!(isASCII, separators))
132 	{
133 		static if (separators.length == 1)
134 		{
135 			// reuse common instatiation of `splitterASCII` for predicate `pred`:
136 			alias pred = (char _) => (_ == separators[0]);
137 		}
138 		else static if (separators.length == 2)
139 		{
140 			// reuse common instatiation of `splitterASCII` for predicate `pred`:
141 			alias pred = (char _) => (_ == separators[0] ||
142 									  _ == separators[1]);
143 		}
144 		else static if (separators.length == 3)
145 		{
146 			// reuse common instatiation of `splitterASCII` for predicate `pred`:
147 			alias pred = (char _) => (_ == separators[0] ||
148 									  _ == separators[1] ||
149 									  _ == separators[2]);
150 		}
151 		else
152 		{
153 			import std.algorithm.comparison : among;
154 			alias pred = (char _) => (_.among!(separators) != 0);
155 		}
156 		return splitterASCII!(pred)(r);
157 	}
158 }
159 
160 ///
161 pure nothrow @safe @nogc unittest {
162 	import std.algorithm.comparison : equal;
163 	import nxt.array_help : s;
164 
165 	assert(``.splitterASCIIAmong!(' ')
166 			 .empty);
167 
168 	assert(` `.splitterASCIIAmong!(' ')
169 			  .empty);
170 
171 	assert(`   `.splitterASCIIAmong!(' ')
172 				.empty);
173 
174 	assert(` - `.splitterASCIIAmong!(' ')
175 				.equal([`-`].s[]));
176 
177 	assert(`a`.splitterASCIIAmong!(' ')
178 			  .equal([`a`].s[]));
179 
180 	assert(` a `.splitterASCIIAmong!(' ')
181 				.equal([`a`].s[]));
182 
183 	assert(` a b `.splitterASCIIAmong!(' ')
184 				  .equal([`a`, `b`].s[]));
185 
186 	assert(` a_b `.splitterASCIIAmong!(' ')
187 				  .equal([`a_b`].s[]));
188 
189 	assert(` - aa   bb--c-d--e`.splitterASCIIAmong!(' ', '-')
190 								 .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
191 
192 	assert(` - aa   bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_')
193 								 .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
194 
195 	assert(` - aa ///  bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_', '/')
196 									.equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
197 }