1 module nxt.splitter_ex;
2 
3 import std.traits : isExpressions;
4 
5 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter`. */
6 auto splitterASCII(alias separatorPred, Range)(return Range r) @trusted
7 if (is(typeof(Range.init[0 .. 0])) && // can be sliced
8     is(typeof(Range.init[0]) : char) &&
9     is(typeof(separatorPred(char.init)) : bool)) // TODO: check that first parameter is bool
10 {
11     static struct Result
12     {
13         private Range _input; // original copy of r
14         private size_t _offset = 0; // hit offset if any, or `_haystack.length` if miss
15 
16         this(Range input)
17         {
18             // dbg("input:", input);
19             _input = input;
20             tryFindNextFront();  // find first hit if any
21         }
22 
23         bool empty() const @property
24         	=> _input.length == 0; // dbg("input:", _input, " ", " offset:", _offset);
25 
26         @property Range front() return @trusted
27 		in(!empty, "Attempting to fetch the front of an empty splitter.")
28 			=> _input.ptr[0 .. _offset]; // dbg("input:", _input, " ", " offset:", _offset);
29 
30         /** Skip any separators. */
31         void skipSeparators() @trusted
32         {
33             while (_offset < _input.length &&
34                    separatorPred(_input.ptr[_offset]))
35             {
36                 /* predicate `separatorPred` must only filter out ASCII, or
37                  * incorrect UTF-8 decoding will follow */
38                 assert(isASCII(_input.ptr[_offset]));
39                 _offset += 1;
40             }
41             _input = _input[_offset .. $]; // skip leading separators
42             _offset = 0;
43         }
44 
45         /** Skip any separators try finding the next front. */
46         void tryFindNextFront() @trusted
47         {
48             skipSeparators(); // skip leading separators
49             while (_offset < _input.length &&
50                    !separatorPred(_input.ptr[_offset]))
51                 _offset += 1;
52             // dbg("input:", _input, " ", " offset:", _offset);
53         }
54 
55         void popFront() nothrow
56 		in(!empty, "Attempting to pop the front of an empty splitter.")
57 		  	=> tryFindNextFront();
58 
59 		pragma(inline, true)
60         static private bool isASCII(char x) @safe pure nothrow @nogc
61 			=> x < 128;
62     }
63 
64     return Result(r);
65 }
66 
67 ///
68 @safe pure nothrow @nogc unittest
69 {
70     import std.algorithm.comparison : equal;
71     import std.algorithm.comparison : among;
72     import nxt.array_help : s;
73 
74     assert(``.splitterASCII!(_ => _ == ' ')
75              .empty);
76 
77     assert(` `.splitterASCII!(_ => _ == ' ')
78               .empty);
79 
80     assert(`   `.splitterASCII!(_ => _ == ' ')
81                 .empty);
82 
83     assert(` - `.splitterASCII!(_ => _ == ' ')
84                 .equal([`-`].s[]));
85 
86     assert(`a`.splitterASCII!(_ => _ == ' ')
87               .equal([`a`].s[]));
88 
89     assert(` a `.splitterASCII!(_ => _ == ' ')
90                 .equal([`a`].s[]));
91 
92     assert(` a b `.splitterASCII!(_ => _ == ' ')
93                   .equal([`a`, `b`].s[]));
94 
95     assert(` a_b `.splitterASCII!(_ => _ == ' ')
96                   .equal([`a_b`].s[]));
97 
98     assert(` - aa   bb--c-_d--_e`.splitterASCII!(_ => _.among!(' ', '-', '_') != 0)
99                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
100 }
101 
102 /// DIP-1000 return ref escape analysis
103 @safe pure nothrow unittest
104 {
105     import nxt.dip_traits : hasPreviewDIP1000;
106 
107     static if (hasPreviewDIP1000)
108     {
109         // See_Also: https://forum.dlang.org/post/pzddsrwhfvcopfaamvak@forum.dlang.org
110         static assert(!__traits(compiles, {
111                     char[] f()
112                     {
113                         char[2] x;
114                         return x[].splitterASCII!(_ => _ == ' ').front;
115                     }
116                 }));
117     }
118 }
119 
120 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter` that .
121  *
122  * TODO: generalize to separators being either chars or strings.
123  */
124 template splitterASCIIAmong(separators...)
125 if (separators.length != 0 &&
126     isExpressions!separators)
127 {
128     import std.meta : allSatisfy;
129     import nxt.char_traits : isASCII;
130 
131     auto splitterASCIIAmong(Range)(return Range r)
132     if (is(typeof(Range.init[0 .. 0])) && // can be sliced
133         is(typeof(Range.init[0]) : char) &&
134         allSatisfy!(isASCII, separators))
135     {
136         static if (separators.length == 1)
137         {
138             // reuse common instatiation of `splitterASCII` for predicate `pred`:
139             alias pred = (char _) => (_ == separators[0]);
140         }
141         else static if (separators.length == 2)
142         {
143             // reuse common instatiation of `splitterASCII` for predicate `pred`:
144             alias pred = (char _) => (_ == separators[0] ||
145                                       _ == separators[1]);
146         }
147         else static if (separators.length == 3)
148         {
149             // reuse common instatiation of `splitterASCII` for predicate `pred`:
150             alias pred = (char _) => (_ == separators[0] ||
151                                       _ == separators[1] ||
152                                       _ == separators[2]);
153         }
154         else
155         {
156             import std.algorithm.comparison : among;
157             alias pred = (char _) => (_.among!(separators) != 0);
158         }
159         return splitterASCII!(pred)(r);
160     }
161 }
162 
163 ///
164 @safe pure nothrow @nogc unittest
165 {
166     import std.algorithm.comparison : equal;
167     import nxt.array_help : s;
168 
169     assert(``.splitterASCIIAmong!(' ')
170              .empty);
171 
172     assert(` `.splitterASCIIAmong!(' ')
173               .empty);
174 
175     assert(`   `.splitterASCIIAmong!(' ')
176                 .empty);
177 
178     assert(` - `.splitterASCIIAmong!(' ')
179                 .equal([`-`].s[]));
180 
181     assert(`a`.splitterASCIIAmong!(' ')
182               .equal([`a`].s[]));
183 
184     assert(` a `.splitterASCIIAmong!(' ')
185                 .equal([`a`].s[]));
186 
187     assert(` a b `.splitterASCIIAmong!(' ')
188                   .equal([`a`, `b`].s[]));
189 
190     assert(` a_b `.splitterASCIIAmong!(' ')
191                   .equal([`a_b`].s[]));
192 
193     assert(` - aa   bb--c-d--e`.splitterASCIIAmong!(' ', '-')
194                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
195 
196     assert(` - aa   bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_')
197                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
198 
199     assert(` - aa ///  bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_', '/')
200                                     .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
201 }