1 module nxt.splitter_ex; 2 3 import std.traits : isExpressions; 4 5 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter`. */ 6 auto splitterASCII(alias separatorPred, Range)(return Range r) @trusted 7 if (is(typeof(Range.init[0 .. 0])) && // can be sliced 8 is(typeof(Range.init[0]) : char) && 9 is(typeof(separatorPred(char.init)) : bool)) // TODO: check that first parameter is bool 10 { 11 static struct Result 12 { 13 private Range _input; // original copy of r 14 private size_t _offset = 0; // hit offset if any, or `_haystack.length` if miss 15 16 this(Range input) 17 { 18 // dbg("input:", input); 19 _input = input; 20 tryFindNextFront(); // find first hit if any 21 } 22 23 bool empty() const @property 24 => _input.length == 0; // dbg("input:", _input, " ", " offset:", _offset); 25 26 @property Range front() return @trusted 27 in(!empty, "Attempting to fetch the front of an empty splitter.") 28 => _input.ptr[0 .. _offset]; // dbg("input:", _input, " ", " offset:", _offset); 29 30 /** Skip any separators. */ 31 void skipSeparators() @trusted 32 { 33 while (_offset < _input.length && 34 separatorPred(_input.ptr[_offset])) 35 { 36 /* predicate `separatorPred` must only filter out ASCII, or 37 * incorrect UTF-8 decoding will follow */ 38 assert(isASCII(_input.ptr[_offset])); 39 _offset += 1; 40 } 41 _input = _input[_offset .. $]; // skip leading separators 42 _offset = 0; 43 } 44 45 /** Skip any separators try finding the next front. */ 46 void tryFindNextFront() @trusted 47 { 48 skipSeparators(); // skip leading separators 49 while (_offset < _input.length && 50 !separatorPred(_input.ptr[_offset])) 51 _offset += 1; 52 // dbg("input:", _input, " ", " offset:", _offset); 53 } 54 55 void popFront() nothrow 56 in(!empty, "Attempting to pop the front of an empty splitter.") 57 => tryFindNextFront(); 58 59 pragma(inline, true) 60 static private bool isASCII(char x) @safe pure nothrow @nogc 61 => x < 128; 62 } 63 64 return Result(r); 65 } 66 67 /// 68 @safe pure nothrow @nogc unittest 69 { 70 import std.algorithm.comparison : equal; 71 import std.algorithm.comparison : among; 72 import nxt.array_help : s; 73 74 assert(``.splitterASCII!(_ => _ == ' ') 75 .empty); 76 77 assert(` `.splitterASCII!(_ => _ == ' ') 78 .empty); 79 80 assert(` `.splitterASCII!(_ => _ == ' ') 81 .empty); 82 83 assert(` - `.splitterASCII!(_ => _ == ' ') 84 .equal([`-`].s[])); 85 86 assert(`a`.splitterASCII!(_ => _ == ' ') 87 .equal([`a`].s[])); 88 89 assert(` a `.splitterASCII!(_ => _ == ' ') 90 .equal([`a`].s[])); 91 92 assert(` a b `.splitterASCII!(_ => _ == ' ') 93 .equal([`a`, `b`].s[])); 94 95 assert(` a_b `.splitterASCII!(_ => _ == ' ') 96 .equal([`a_b`].s[])); 97 98 assert(` - aa bb--c-_d--_e`.splitterASCII!(_ => _.among!(' ', '-', '_') != 0) 99 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 100 } 101 102 /// DIP-1000 return ref escape analysis 103 @safe pure nothrow unittest 104 { 105 import nxt.dip_traits : hasPreviewDIP1000; 106 107 static if (hasPreviewDIP1000) 108 { 109 // See_Also: https://forum.dlang.org/post/pzddsrwhfvcopfaamvak@forum.dlang.org 110 static assert(!__traits(compiles, { 111 char[] f() 112 { 113 char[2] x; 114 return x[].splitterASCII!(_ => _ == ' ').front; 115 } 116 })); 117 } 118 } 119 120 /** Non-decoding ASCII-separator-only variant of Phobos' `splitter` that . 121 * 122 * TODO: generalize to separators being either chars or strings. 123 */ 124 template splitterASCIIAmong(separators...) 125 if (separators.length != 0 && 126 isExpressions!separators) 127 { 128 import std.meta : allSatisfy; 129 import nxt.char_traits : isASCII; 130 131 auto splitterASCIIAmong(Range)(return Range r) 132 if (is(typeof(Range.init[0 .. 0])) && // can be sliced 133 is(typeof(Range.init[0]) : char) && 134 allSatisfy!(isASCII, separators)) 135 { 136 static if (separators.length == 1) 137 { 138 // reuse common instatiation of `splitterASCII` for predicate `pred`: 139 alias pred = (char _) => (_ == separators[0]); 140 } 141 else static if (separators.length == 2) 142 { 143 // reuse common instatiation of `splitterASCII` for predicate `pred`: 144 alias pred = (char _) => (_ == separators[0] || 145 _ == separators[1]); 146 } 147 else static if (separators.length == 3) 148 { 149 // reuse common instatiation of `splitterASCII` for predicate `pred`: 150 alias pred = (char _) => (_ == separators[0] || 151 _ == separators[1] || 152 _ == separators[2]); 153 } 154 else 155 { 156 import std.algorithm.comparison : among; 157 alias pred = (char _) => (_.among!(separators) != 0); 158 } 159 return splitterASCII!(pred)(r); 160 } 161 } 162 163 /// 164 @safe pure nothrow @nogc unittest 165 { 166 import std.algorithm.comparison : equal; 167 import nxt.array_help : s; 168 169 assert(``.splitterASCIIAmong!(' ') 170 .empty); 171 172 assert(` `.splitterASCIIAmong!(' ') 173 .empty); 174 175 assert(` `.splitterASCIIAmong!(' ') 176 .empty); 177 178 assert(` - `.splitterASCIIAmong!(' ') 179 .equal([`-`].s[])); 180 181 assert(`a`.splitterASCIIAmong!(' ') 182 .equal([`a`].s[])); 183 184 assert(` a `.splitterASCIIAmong!(' ') 185 .equal([`a`].s[])); 186 187 assert(` a b `.splitterASCIIAmong!(' ') 188 .equal([`a`, `b`].s[])); 189 190 assert(` a_b `.splitterASCIIAmong!(' ') 191 .equal([`a_b`].s[])); 192 193 assert(` - aa bb--c-d--e`.splitterASCIIAmong!(' ', '-') 194 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 195 196 assert(` - aa bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_') 197 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 198 199 assert(` - aa /// bb--c-_d--_e`.splitterASCIIAmong!(' ', '-', '_', '/') 200 .equal([`aa`, `bb`, `c`, `d`, `e`].s[])); 201 }