1 module nxt.splitter_ex;
2 
3 import std.traits : isExpressions;
4 
5 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter`. */
6 auto splitterASCII(alias separatorPred, Range)(return Range r) @trusted
7 if (is(typeof(Range.init[0 .. 0])) && // can be sliced
8     is(typeof(Range.init[0]) : char) &&
9     is(typeof(separatorPred(char.init)) : bool)) // TODO check that first parameter is bool
10 {
11     static struct Result
12     {
13         private Range _input; // original copy of r
14         private size_t _offset = 0; // hit offset if any, or `_haystack.length` if miss
15 
16         this(Range input)
17         {
18             // dbg("input:", input);
19             _input = input;
20             tryFindNextFront();  // find first hit if any
21         }
22 
23         bool empty() const
24         {
25             // dbg("input:", _input, " ", " offset:", _offset);
26             return _input.length == 0;
27         }
28 
29         @property Range front() return @trusted
30         {
31             // dbg("input:", _input, " ", " offset:", _offset);
32             assert(!empty, "Attempting to fetch the front of an empty splitter.");
33             return _input.ptr[0 .. _offset];
34         }
35 
36         /** Skip any separators. */
37         void skipSeparators() @trusted
38         {
39             while (_offset < _input.length &&
40                    separatorPred(_input.ptr[_offset]))
41             {
42                 /* predicate `separatorPred` must only filter out ASCII, or
43                  * incorrect UTF-8 decoding will follow */
44                 assert(isASCII(_input.ptr[_offset]));
45                 _offset += 1;
46             }
47             _input = _input[_offset .. $]; // skip leading separators
48             _offset = 0;
49         }
50 
51         /** Skip any separators try finding the next front. */
52         void tryFindNextFront() @trusted
53         {
54             skipSeparators(); // skip leading separators
55             while (_offset < _input.length &&
56                    !separatorPred(_input.ptr[_offset]))
57             {
58                 _offset += 1;
59             }
60             // dbg("input:", _input, " ", " offset:", _offset);
61         }
62 
63         void popFront() nothrow
64         {
65             assert(!empty, "Attempting to pop the front of an empty splitter.");
66             tryFindNextFront();
67         }
68 
69         static private bool isASCII(char x) @safe pure nothrow @nogc
70         {
71             pragma(inline, true)
72             return x < 128;
73         }
74     }
75 
76     return Result(r);
77 }
78 
79 ///
80 @safe pure nothrow @nogc unittest
81 {
82     import std.algorithm.comparison : equal;
83     import std.algorithm.comparison : among;
84     import nxt.array_help : s;
85 
86     assert(``.splitterASCII!(_ => _ == ' ')
87              .empty);
88 
89     assert(` `.splitterASCII!(_ => _ == ' ')
90               .empty);
91 
92     assert(`   `.splitterASCII!(_ => _ == ' ')
93                 .empty);
94 
95     assert(` - `.splitterASCII!(_ => _ == ' ')
96                 .equal([`-`].s[]));
97 
98     assert(`a`.splitterASCII!(_ => _ == ' ')
99               .equal([`a`].s[]));
100 
101     assert(` a `.splitterASCII!(_ => _ == ' ')
102                 .equal([`a`].s[]));
103 
104     assert(` a b `.splitterASCII!(_ => _ == ' ')
105                   .equal([`a`, `b`].s[]));
106 
107     assert(` a_b `.splitterASCII!(_ => _ == ' ')
108                   .equal([`a_b`].s[]));
109 
110     assert(` - aa   bb--c-_d--_e`.splitterASCII!(_ => _.among!(' ', '-', '_') != 0)
111                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
112 }
113 
114 /// DIP-1000 return ref escape analysis
115 @safe pure nothrow unittest
116 {
117     import nxt.dip_traits : isDIP1000;
118 
119     static if (isDIP1000)
120     {
121         // See_Also: https://forum.dlang.org/post/pzddsrwhfvcopfaamvak@forum.dlang.org
122         static assert(!__traits(compiles, {
123                     char[] f()
124                     {
125                         char[2] x;
126                         return x[].splitterASCII!(_ => _ == ' ').front;
127                     }
128                 }));
129     }
130 }
131 
132 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter` that .
133  *
134  * TODO generalize to separators being either chars or strings.
135  */
136 template splitterASCIIAmong(separators...)
137 if (separators.length != 0 &&
138     isExpressions!separators)
139 {
140     import std.meta : allSatisfy;
141     import nxt.char_traits : isASCII;
142 
143     auto splitterASCIIAmong(Range)(return Range r)
144     if (is(typeof(Range.init[0 .. 0])) && // can be sliced
145         is(typeof(Range.init[0]) : char) &&
146         allSatisfy!(isASCII, separators))
147     {
148         static if (separators.length == 1)
149         {
150             // reuse common instatiation of `splitterASCII` for predicate `pred`:
151             alias pred = (char _) => (_ == separators[0]);
152         }
153         else static if (separators.length == 2)
154         {
155             // reuse common instatiation of `splitterASCII` for predicate `pred`:
156             alias pred = (char _) => (_ == separators[0] ||
157                                       _ == separators[1]);
158         }
159         else static if (separators.length == 3)
160         {
161             // reuse common instatiation of `splitterASCII` for predicate `pred`:
162             alias pred = (char _) => (_ == separators[0] ||
163                                       _ == separators[1] ||
164                                       _ == separators[2]);
165         }
166         else
167         {
168             import std.algorithm.comparison : among;
169             alias pred = (char _) => (_.among!(separators) != 0);
170         }
171         return splitterASCII!(pred)(r);
172     }
173 }
174 
175 ///
176 @safe pure nothrow @nogc unittest
177 {
178     import std.algorithm.comparison : equal;
179     import nxt.array_help : s;
180 
181     assert(``.splitterASCIIAmong!(' ')
182              .empty);
183 
184     assert(` `.splitterASCIIAmong!(' ')
185               .empty);
186 
187     assert(`   `.splitterASCIIAmong!(' ')
188                 .empty);
189 
190     assert(` - `.splitterASCIIAmong!(' ')
191                 .equal([`-`].s[]));
192 
193     assert(`a`.splitterASCIIAmong!(' ')
194               .equal([`a`].s[]));
195 
196     assert(` a `.splitterASCIIAmong!(' ')
197                 .equal([`a`].s[]));
198 
199     assert(` a b `.splitterASCIIAmong!(' ')
200                   .equal([`a`, `b`].s[]));
201 
202     assert(` a_b `.splitterASCIIAmong!(' ')
203                   .equal([`a_b`].s[]));
204 
205     assert(` - aa   bb--c-d--e`.splitterASCIIAmong!(' ', '-')
206                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
207 
208     assert(` - aa   bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_')
209                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
210 
211     assert(` - aa ///  bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_', '/')
212                                     .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
213 }