1 module nxt.splitter_ex; 2 3 import std.traits : isExpressions; 4 5 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter`. */ 6 auto splitterASCII(alias separatorPred, Range)(return Range r) @trusted 7 if (is(typeof(Range.init[0 .. 0])) && // can be sliced 8 is(typeof(Range.init[0]) : char) && 9 is(typeof(separatorPred(char.init)) : bool)) // TODO check that first parameter is bool 10 { 11 static struct Result 12 { 13 private Range _input; // original copy of r 14 private size_t _offset = 0; // hit offset if any, or `_haystack.length` if miss 15 16 this(Range input) 17 { 18 // dbg("input:", input); 19 _input = input; 20 tryFindNextFront(); // find first hit if any 21 } 22 23 bool empty() const 24 { 25 // dbg("input:", _input, " ", " offset:", _offset); 26 return _input.length == 0; 27 } 28 29 @property Range front() return @trusted 30 { 31 // dbg("input:", _input, " ", " offset:", _offset); 32 assert(!empty, "Attempting to fetch the front of an empty splitter."); 33 return _input.ptr[0 .. _offset]; 34 } 35 36 /** Skip any separators. */ 37 void skipSeparators() @trusted 38 { 39 while (_offset < _input.length && 40 separatorPred(_input.ptr[_offset])) 41 { 42 /* predicate `separatorPred` must only filter out ASCII, or 43 * incorrect UTF-8 decoding will follow */ 44 assert(isASCII(_input.ptr[_offset])); 45 _offset += 1; 46 } 47 _input = _input[_offset .. $]; // skip leading separators 48 _offset = 0; 49 } 50 51 /** Skip any separators try finding the next front. */ 52 void tryFindNextFront() @trusted 53 { 54 skipSeparators(); // skip leading separators 55 while (_offset < _input.length && 56 !separatorPred(_input.ptr[_offset])) 57 { 58 _offset += 1; 59 } 60 // dbg("input:", _input, " ", " offset:", _offset); 61 } 62 63 void popFront() nothrow 64 { 65 assert(!empty, "Attempting to pop the front of an empty splitter."); 66 tryFindNextFront(); 67 } 68 69 static private bool isASCII(char x) @safe pure nothrow @nogc 70 { 71 pragma(inline, true) 72 return x < 128; 73 } 74 } 75 76 return Result(r); 77 } 78 79 /// 80 @safe pure nothrow @nogc unittest 81 { 82 import std.algorithm.comparison : equal; 83 import std.algorithm.comparison : among; 84 import nxt.array_help : s; 85 86 assert(``.splitterASCII!(_ => _ == ' ') 87 .empty); 88 89 assert(` `.splitterASCII!(_ => _ == ' ') 90 .empty); 91 92 assert(` `.splitterASCII!(_ => _ == ' ') 93 .empty); 94 95 assert(` - `.splitterASCII!(_ => _ == ' ') 96 .equal([`-`].s[])); 97 98 assert(`a`.splitterASCII!(_ => _ == ' ') 99 .equal([`a`].s[])); 100 101 assert(` a `.splitterASCII!(_ => _ == ' ') 102 .equal([`a`].s[])); 103 104 assert(` a b `.splitterASCII!(_ => _ == ' ') 105 .equal([`a`, `b`].s[])); 106 107 assert(` a_b `.splitterASCII!(_ => _ == ' ') 108 .equal([`a_b`].s[])); 109 110 assert(` - aa bb--c-_d--_e`.splitterASCII!(_ => _.among!(' ', '-', '_') != 0) 111 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 112 } 113 114 /// DIP-1000 return ref escape analysis 115 @safe pure nothrow unittest 116 { 117 import nxt.dip_traits : isDIP1000; 118 119 static if (isDIP1000) 120 { 121 // See_Also: https://forum.dlang.org/post/pzddsrwhfvcopfaamvak@forum.dlang.org 122 static assert(!__traits(compiles, { 123 char[] f() 124 { 125 char[2] x; 126 return x[].splitterASCII!(_ => _ == ' ').front; 127 } 128 })); 129 } 130 } 131 132 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter` that . 133 * 134 * TODO generalize to separators being either chars or strings. 135 */ 136 template splitterASCIIAmong(separators...) 137 if (separators.length != 0 && 138 isExpressions!separators) 139 { 140 import std.meta : allSatisfy; 141 import nxt.char_traits : isASCII; 142 143 auto splitterASCIIAmong(Range)(return Range r) 144 if (is(typeof(Range.init[0 .. 0])) && // can be sliced 145 is(typeof(Range.init[0]) : char) && 146 allSatisfy!(isASCII, separators)) 147 { 148 static if (separators.length == 1) 149 { 150 // reuse common instatiation of `splitterASCII` for predicate `pred`: 151 alias pred = (char _) => (_ == separators[0]); 152 } 153 else static if (separators.length == 2) 154 { 155 // reuse common instatiation of `splitterASCII` for predicate `pred`: 156 alias pred = (char _) => (_ == separators[0] || 157 _ == separators[1]); 158 } 159 else static if (separators.length == 3) 160 { 161 // reuse common instatiation of `splitterASCII` for predicate `pred`: 162 alias pred = (char _) => (_ == separators[0] || 163 _ == separators[1] || 164 _ == separators[2]); 165 } 166 else 167 { 168 import std.algorithm.comparison : among; 169 alias pred = (char _) => (_.among!(separators) != 0); 170 } 171 return splitterASCII!(pred)(r); 172 } 173 } 174 175 /// 176 @safe pure nothrow @nogc unittest 177 { 178 import std.algorithm.comparison : equal; 179 import nxt.array_help : s; 180 181 assert(``.splitterASCIIAmong!(' ') 182 .empty); 183 184 assert(` `.splitterASCIIAmong!(' ') 185 .empty); 186 187 assert(` `.splitterASCIIAmong!(' ') 188 .empty); 189 190 assert(` - `.splitterASCIIAmong!(' ') 191 .equal([`-`].s[])); 192 193 assert(`a`.splitterASCIIAmong!(' ') 194 .equal([`a`].s[])); 195 196 assert(` a `.splitterASCIIAmong!(' ') 197 .equal([`a`].s[])); 198 199 assert(` a b `.splitterASCIIAmong!(' ') 200 .equal([`a`, `b`].s[])); 201 202 assert(` a_b `.splitterASCIIAmong!(' ') 203 .equal([`a_b`].s[])); 204 205 assert(` - aa bb--c-d--e`.splitterASCIIAmong!(' ', '-') 206 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 207 208 assert(` - aa bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_') 209 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 210 211 assert(` - aa /// bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_', '/') 212 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 213 }