1 module nxt.find_split_ex;
2 
3 import std.traits : isExpressions;
4 
5 /** Like `findSplit` but with multiple separator `needles` known at compile-time
6  * to prevent `NarrowString` decoding.
7  *
8  * TODO Do sentinel-based search when `haystack` is mutable and larger than a
9  * certain value.
10  *
11  * TODO Add to Phobos.
12  *
13  * TODO Resort to `memchr` for some case `if (!__ctfe)`.
14  * See_Also: https://forum.dlang.org/post/efpbmtyisamwwqgpxnbq@forum.dlang.org
15  *
16  * See_Also: https://forum.dlang.org/post/ycotlbfsqoupogaplkvf@forum.dlang.org
17  */
18 template findSplitAmong(needles...)
19 if (needles.length != 0 &&
20     isExpressions!needles)
21 {
22     import std.meta : allSatisfy;
23     import nxt.char_traits : isASCII;
24 
25     auto findSplitAmong(Haystack)(const scope return Haystack haystack) @trusted // TODO qualify with `inout` to reduce template bloat
26     if (is(typeof(Haystack.init[0 .. 0])) && // can be sliced
27         is(typeof(Haystack.init[0]) : char) &&
28         allSatisfy!(isASCII, needles))
29     {
30         // similar return result to `std.algorithm.searching.findSplit`
31         static struct Result
32         {
33             /* Only requires 3 words opposite to Phobos' `findSplit`,
34              * `findSplitBefore` and `findSplitAfter`:
35              */
36 
37             private Haystack _haystack; // original copy of haystack
38             private size_t _offset; // hit offset if any, or `_haystack.length` if miss
39 
40             bool opCast(T : bool)() const
41             {
42                 return !empty;
43             }
44 
45             @property:
46 
47             inout(Haystack) pre() inout
48             {
49                 return _haystack[0 .. _offset];
50             }
51 
52             inout(Haystack) separator() inout
53             {
54                 if (empty) { return _haystack[$ .. $]; }
55                 return _haystack[_offset .. _offset + 1];
56             }
57 
58             inout(Haystack) post() inout
59             {
60                 if (empty) { return _haystack[$ .. $]; }
61                 return _haystack[_offset + 1 .. $];
62             }
63 
64             inout(Haystack) opIndex()(size_t idx) inout
65             {
66                 switch (idx)
67                 {
68                 case 0: return pre;
69                 case 1: return separator;
70                 case 2: return post;
71                 default: assert(0, "Index out of bounds");
72                 }
73             }
74 
75             private @property bool empty() const
76             {
77                 return _haystack.length == _offset;
78             }
79         }
80 
81         enum use_memchr = false;
82         static if (use_memchr &&
83                    needles.length == 1)
84         {
85             // See_Also: https://forum.dlang.org/post/piowvfbimztbqjvieddj@forum.dlang.org
86             import core.stdc..string : memchr;
87             // extern (C) @system nothrow @nogc pure void* rawmemchr(return const void* s, int c);
88 
89             const void* hit = memchr(haystack.ptr, needles[0], haystack.length);
90             return Result(haystack, hit ? hit - cast(const(void)*)haystack.ptr : haystack.length);
91         }
92         else
93         {
94             foreach (immutable offset; 0 .. haystack.length)
95             {
96                 static if (needles.length == 1)
97                 {
98                     immutable hit = haystack[offset] == needles[0];
99                 }
100                 else
101                 {
102                     import std.algorithm.comparison : among;
103                     immutable hit = haystack[offset].among!(needles) != 0;
104                 }
105                 if (hit)
106                 {
107                     return Result(haystack, offset);
108                 }
109             }
110             return Result(haystack, haystack.length);
111         }
112     }
113 }
114 
115 template findSplit(needles...)
116 if (needles.length == 1 &&
117     isExpressions!needles)
118 {
119     import std.meta : allSatisfy;
120     import nxt.char_traits : isASCII;
121 
122     auto findSplit(Haystack)(const scope return Haystack haystack) @trusted // TODO qualify with `inout` to reduce template bloat
123     if (is(typeof(Haystack.init[0 .. 0])) && // can be sliced
124         is(typeof(Haystack.init[0]) : char) &&
125         isASCII!(needles[0]))
126     {
127         return findSplitAmong!(needles)(haystack);
128     }
129 }
130 
131 ///
132 @safe pure nothrow @nogc unittest
133 {
134     const r = "a*b".findSplit!('*');
135     assert(r);
136 
137     assert(r[0] == "a");
138     assert(r.pre == "a");
139 
140     assert(r[1] == "*");
141     assert(r.separator == "*");
142 
143     assert(r[2] == "b");
144     assert(r.post == "b");
145 }
146 
147 ///
148 @safe pure nothrow @nogc unittest
149 {
150     auto r = "a+b*c".findSplitAmong!('+', '-');
151 
152     static assert(r.sizeof == 24);
153     static assert(is(typeof(r.pre) == string));
154     static assert(is(typeof(r.separator) == string));
155     static assert(is(typeof(r.post) == string));
156 
157     assert(r);
158 
159     assert(r[0] == "a");
160     assert(r.pre == "a");
161 
162     assert(r[1] == "+");
163     assert(r.separator == "+");
164 
165     assert(r[2] == "b*c");
166     assert(r.post == "b*c");
167 }
168 
169 ///
170 @safe pure nothrow @nogc unittest
171 {
172     const r = "a+b*c".findSplitAmong!('-', '*');
173     assert(r);
174     assert(r.pre == "a+b");
175     assert(r.separator == "*");
176     assert(r.post == "c");
177 }
178 
179 ///
180 @safe pure nothrow @nogc unittest
181 {
182     const r = "a*".findSplitAmong!('*');
183 
184     assert(r);
185 
186     assert(r[0] == "a");
187     assert(r.pre == "a");
188 
189     assert(r[1] == "*");
190     assert(r.separator == "*");
191 
192     assert(r[2] == "");
193     assert(r.post == "");
194 }
195 
196 ///
197 @safe pure nothrow @nogc unittest
198 {
199     const r = "*b".findSplitAmong!('*');
200 
201     assert(r);
202 
203     assert(r[0] == "");
204     assert(r.pre == "");
205 
206     assert(r[1] == "*");
207     assert(r.separator == "*");
208 
209     assert(r[2] == "b");
210     assert(r.post == "b");
211 }
212 
213 ///
214 @safe pure nothrow @nogc unittest
215 {
216     const r = "*".findSplitAmong!('*');
217 
218     assert(r);
219 
220     assert(r[0] == "");
221     assert(r.pre == "");
222 
223     assert(r[1] == "*");
224     assert(r.separator == "*");
225 
226     assert(r[2] == "");
227     assert(r.post == "");
228 }
229 
230 ///
231 @safe pure nothrow @nogc unittest
232 {
233     static immutable separator_char = '/';
234 
235     immutable r = "a+b*c".findSplitAmong!(separator_char);
236 
237     static assert(r.sizeof == 24);
238     static assert(is(typeof(r.pre) == immutable string));
239     static assert(is(typeof(r.separator) == immutable string));
240     static assert(is(typeof(r.post) == immutable string));
241 
242     assert(!r);
243 
244     assert(r.pre == "a+b*c");
245     assert(r[0] == "a+b*c");
246     assert(r.separator == []);
247     assert(r[1] == []);
248     assert(r.post == []);
249     assert(r[2] == []);
250 }