1 module nxt.splitter_ex;
2 
3 import std.traits : isExpressions;
4 
5 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter`. */
6 auto splitterASCII(alias separatorPred, Range)(return Range r) @trusted
7 if (is(typeof(Range.init[0 .. 0])) && // can be sliced
8     is(typeof(Range.init[0]) : char) &&
9     is(typeof(separatorPred(char.init)) : bool)) // TODO check that first parameter is bool
10 {
11     static struct Result
12     {
13         private Range _input; // original copy of r
14         private size_t _offset = 0; // hit offset if any, or `_haystack.length` if miss
15 
16         this(Range input)
17         {
18             // dbg("input:", input);
19             _input = input;
20             tryFindNextFront();  // find first hit if any
21         }
22 
23         bool empty() const
24         {
25             // dbg("input:", _input, " ", " offset:", _offset);
26             return _input.length == 0;
27         }
28 
29         @property Range front() return @trusted
30         {
31             // dbg("input:", _input, " ", " offset:", _offset);
32             assert(!empty, "Attempting to fetch the front of an empty splitter.");
33             return _input.ptr[0 .. _offset];
34         }
35 
36         /** Skip any separators. */
37         void skipSeparators() @trusted
38         {
39             while (_offset < _input.length &&
40                    separatorPred(_input.ptr[_offset]))
41             {
42                 /* predicate `separatorPred` must only filter out ASCII, or
43                  * incorrect UTF-8 decoding will follow */
44                 assert(isASCII(_input.ptr[_offset]));
45                 _offset += 1;
46             }
47             _input = _input[_offset .. $]; // skip leading separators
48             _offset = 0;
49         }
50 
51         /** Skip any separators try finding the next front. */
52         void tryFindNextFront() @trusted
53         {
54             skipSeparators(); // skip leading separators
55             while (_offset < _input.length &&
56                    !separatorPred(_input.ptr[_offset]))
57             {
58                 _offset += 1;
59             }
60             // dbg("input:", _input, " ", " offset:", _offset);
61         }
62 
63         void popFront() nothrow
64         {
65             assert(!empty, "Attempting to pop the front of an empty splitter.");
66             tryFindNextFront();
67         }
68 
69         static private bool isASCII(char x) @safe pure nothrow @nogc
70         {
71             pragma(inline, true)
72             return x < 128;
73         }
74     }
75 
76     return Result(r);
77 }
78 
79 ///
80 @safe pure nothrow @nogc unittest
81 {
82     assert(``.splitterASCII!(_ => _ == ' ')
83              .empty);
84 
85     assert(` `.splitterASCII!(_ => _ == ' ')
86               .empty);
87 
88     assert(`   `.splitterASCII!(_ => _ == ' ')
89                 .empty);
90 
91     assert(` - `.splitterASCII!(_ => _ == ' ')
92                 .equal([`-`].s[]));
93 
94     assert(`a`.splitterASCII!(_ => _ == ' ')
95               .equal([`a`].s[]));
96 
97     assert(` a `.splitterASCII!(_ => _ == ' ')
98                 .equal([`a`].s[]));
99 
100     assert(` a b `.splitterASCII!(_ => _ == ' ')
101                   .equal([`a`, `b`].s[]));
102 
103     assert(` a_b `.splitterASCII!(_ => _ == ' ')
104                   .equal([`a_b`].s[]));
105 
106     assert(` - aa   bb--c-_d--_e`.splitterASCII!(_ => _.among!(' ', '-', '_') != 0)
107                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
108 }
109 
110 /// DIP-1000 return ref escape analysis
111 @safe pure nothrow unittest
112 {
113     static if (isDIP1000)
114     {
115         // See_Also: https://forum.dlang.org/post/pzddsrwhfvcopfaamvak@forum.dlang.org
116         static assert(!__traits(compiles, {
117                     char[] f()
118                     {
119                         char[2] x;
120                         return x[].splitterASCII!(_ => _ == ' ').front;
121                     }
122                 }));
123     }
124 }
125 
126 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter` that .
127  *
128  * TODO generalize to separators being either chars or strings.
129  */
130 template splitterASCIIAmong(separators...)
131 if (separators.length != 0 &&
132     isExpressions!separators)
133 {
134     import std.meta : allSatisfy;
135     import nxt.char_traits : isASCII;
136 
137     auto splitterASCIIAmong(Range)(return Range r)
138     if (is(typeof(Range.init[0 .. 0])) && // can be sliced
139         is(typeof(Range.init[0]) : char) &&
140         allSatisfy!(isASCII, separators))
141     {
142         static if (separators.length == 1)
143         {
144             // reuse common instatiation of `splitterASCII` for predicate `pred`:
145             alias pred = (char _) => (_ == separators[0]);
146         }
147         else static if (separators.length == 2)
148         {
149             // reuse common instatiation of `splitterASCII` for predicate `pred`:
150             alias pred = (char _) => (_ == separators[0] ||
151                                       _ == separators[1]);
152         }
153         else static if (separators.length == 3)
154         {
155             // reuse common instatiation of `splitterASCII` for predicate `pred`:
156             alias pred = (char _) => (_ == separators[0] ||
157                                       _ == separators[1] ||
158                                       _ == separators[2]);
159         }
160         else
161         {
162             import std.algorithm.comparison : among;
163             alias pred = (char _) => (_.among!(separators) != 0);
164         }
165         return splitterASCII!(pred)(r);
166     }
167 }
168 
169 ///
170 @safe pure nothrow @nogc unittest
171 {
172     assert(``.splitterASCIIAmong!(' ')
173              .empty);
174 
175     assert(` `.splitterASCIIAmong!(' ')
176               .empty);
177 
178     assert(`   `.splitterASCIIAmong!(' ')
179                 .empty);
180 
181     assert(` - `.splitterASCIIAmong!(' ')
182                 .equal([`-`].s[]));
183 
184     assert(`a`.splitterASCIIAmong!(' ')
185               .equal([`a`].s[]));
186 
187     assert(` a `.splitterASCIIAmong!(' ')
188                 .equal([`a`].s[]));
189 
190     assert(` a b `.splitterASCIIAmong!(' ')
191                   .equal([`a`, `b`].s[]));
192 
193     assert(` a_b `.splitterASCIIAmong!(' ')
194                   .equal([`a_b`].s[]));
195 
196     assert(` - aa   bb--c-d--e`.splitterASCIIAmong!(' ', '-')
197                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
198 
199     assert(` - aa   bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_')
200                                  .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
201 
202     assert(` - aa ///  bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_', '/')
203                                     .equal([`aa`, `bb`, `c`, `d`, `e`].s[]));
204 }
205 
206 version(unittest)
207 {
208     import std.algorithm.comparison : equal;
209     import std.algorithm.comparison : among;
210     import nxt.array_help : s;
211     import nxt.dip_traits : isDIP1000;
212 }