1 module nxt.splitter_ex; 2 3 import std.traits : isExpressions; 4 5 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter`. */ 6 auto splitterASCII(alias separatorPred, Range)(return Range r) @trusted 7 if (is(typeof(Range.init[0 .. 0])) && // can be sliced 8 is(typeof(Range.init[0]) : char) && 9 is(typeof(separatorPred(char.init)) : bool)) // TODO check that first parameter is bool 10 { 11 static struct Result 12 { 13 private Range _input; // original copy of r 14 private size_t _offset = 0; // hit offset if any, or `_haystack.length` if miss 15 16 this(Range input) 17 { 18 // dbg("input:", input); 19 _input = input; 20 tryFindNextFront(); // find first hit if any 21 } 22 23 bool empty() const 24 { 25 // dbg("input:", _input, " ", " offset:", _offset); 26 return _input.length == 0; 27 } 28 29 @property Range front() return @trusted 30 { 31 // dbg("input:", _input, " ", " offset:", _offset); 32 assert(!empty, "Attempting to fetch the front of an empty splitter."); 33 return _input.ptr[0 .. _offset]; 34 } 35 36 /** Skip any separators. */ 37 void skipSeparators() @trusted 38 { 39 while (_offset < _input.length && 40 separatorPred(_input.ptr[_offset])) 41 { 42 /* predicate `separatorPred` must only filter out ASCII, or 43 * incorrect UTF-8 decoding will follow */ 44 assert(isASCII(_input.ptr[_offset])); 45 _offset += 1; 46 } 47 _input = _input[_offset .. $]; // skip leading separators 48 _offset = 0; 49 } 50 51 /** Skip any separators try finding the next front. */ 52 void tryFindNextFront() @trusted 53 { 54 skipSeparators(); // skip leading separators 55 while (_offset < _input.length && 56 !separatorPred(_input.ptr[_offset])) 57 { 58 _offset += 1; 59 } 60 // dbg("input:", _input, " ", " offset:", _offset); 61 } 62 63 void popFront() nothrow 64 { 65 assert(!empty, "Attempting to pop the front of an empty splitter."); 66 tryFindNextFront(); 67 } 68 69 static private bool isASCII(char x) @safe pure nothrow @nogc 70 { 71 pragma(inline, true) 72 return x < 128; 73 } 74 } 75 76 return Result(r); 77 } 78 79 /// 80 @safe pure nothrow @nogc unittest 81 { 82 assert(``.splitterASCII!(_ => _ == ' ') 83 .empty); 84 85 assert(` `.splitterASCII!(_ => _ == ' ') 86 .empty); 87 88 assert(` `.splitterASCII!(_ => _ == ' ') 89 .empty); 90 91 assert(` - `.splitterASCII!(_ => _ == ' ') 92 .equal([`-`].s[])); 93 94 assert(`a`.splitterASCII!(_ => _ == ' ') 95 .equal([`a`].s[])); 96 97 assert(` a `.splitterASCII!(_ => _ == ' ') 98 .equal([`a`].s[])); 99 100 assert(` a b `.splitterASCII!(_ => _ == ' ') 101 .equal([`a`, `b`].s[])); 102 103 assert(` a_b `.splitterASCII!(_ => _ == ' ') 104 .equal([`a_b`].s[])); 105 106 assert(` - aa bb--c-_d--_e`.splitterASCII!(_ => _.among!(' ', '-', '_') != 0) 107 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 108 } 109 110 /// DIP-1000 return ref escape analysis 111 @safe pure nothrow unittest 112 { 113 static if (isDIP1000) 114 { 115 // See_Also: https://forum.dlang.org/post/pzddsrwhfvcopfaamvak@forum.dlang.org 116 static assert(!__traits(compiles, { 117 char[] f() 118 { 119 char[2] x; 120 return x[].splitterASCII!(_ => _ == ' ').front; 121 } 122 })); 123 } 124 } 125 126 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter` that . 127 * 128 * TODO generalize to separators being either chars or strings. 129 */ 130 template splitterASCIIAmong(separators...) 131 if (separators.length != 0 && 132 isExpressions!separators) 133 { 134 import std.meta : allSatisfy; 135 import nxt.char_traits : isASCII; 136 137 auto splitterASCIIAmong(Range)(return Range r) 138 if (is(typeof(Range.init[0 .. 0])) && // can be sliced 139 is(typeof(Range.init[0]) : char) && 140 allSatisfy!(isASCII, separators)) 141 { 142 static if (separators.length == 1) 143 { 144 // reuse common instatiation of `splitterASCII` for predicate `pred`: 145 alias pred = (char _) => (_ == separators[0]); 146 } 147 else static if (separators.length == 2) 148 { 149 // reuse common instatiation of `splitterASCII` for predicate `pred`: 150 alias pred = (char _) => (_ == separators[0] || 151 _ == separators[1]); 152 } 153 else static if (separators.length == 3) 154 { 155 // reuse common instatiation of `splitterASCII` for predicate `pred`: 156 alias pred = (char _) => (_ == separators[0] || 157 _ == separators[1] || 158 _ == separators[2]); 159 } 160 else 161 { 162 import std.algorithm.comparison : among; 163 alias pred = (char _) => (_.among!(separators) != 0); 164 } 165 return splitterASCII!(pred)(r); 166 } 167 } 168 169 /// 170 @safe pure nothrow @nogc unittest 171 { 172 assert(``.splitterASCIIAmong!(' ') 173 .empty); 174 175 assert(` `.splitterASCIIAmong!(' ') 176 .empty); 177 178 assert(` `.splitterASCIIAmong!(' ') 179 .empty); 180 181 assert(` - `.splitterASCIIAmong!(' ') 182 .equal([`-`].s[])); 183 184 assert(`a`.splitterASCIIAmong!(' ') 185 .equal([`a`].s[])); 186 187 assert(` a `.splitterASCIIAmong!(' ') 188 .equal([`a`].s[])); 189 190 assert(` a b `.splitterASCIIAmong!(' ') 191 .equal([`a`, `b`].s[])); 192 193 assert(` a_b `.splitterASCIIAmong!(' ') 194 .equal([`a_b`].s[])); 195 196 assert(` - aa bb--c-d--e`.splitterASCIIAmong!(' ', '-') 197 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 198 199 assert(` - aa bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_') 200 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 201 202 assert(` - aa /// bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_', '/') 203 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 204 } 205 206 version(unittest) 207 { 208 import std.algorithm.comparison : equal; 209 import std.algorithm.comparison : among; 210 import nxt.array_help : s; 211 import nxt.dip_traits : isDIP1000; 212 }