1 module nxt.slicing; 2 3 /** Slice at all positions where $(D isTerminator) is $(D false) before current 4 element and $(D true) at current. 5 6 TODO: Can this be replaced by chunkBy 7 See_Also: http://dlang.org/library/std/algorithm/splitter.html. 8 See_Also: http://forum.dlang.org/post/cwqeywykubsuynkidlux@forum.dlang.org 9 */ 10 auto preSlicer(alias isTerminator, R)(R input) 11 /* if (((isRandomAccessRange!R && */ 12 /* hasSlicing!R) || */ 13 /* isSomeString!R) && */ 14 /* is(typeof(unaryFun!isTerminator(input.front)))) */ 15 { 16 import std.functional : unaryFun; 17 return PreSlicer!(unaryFun!isTerminator, R)(input); 18 } 19 20 private struct PreSlicer(alias isTerminator, R) 21 { 22 this(R input) 23 { 24 _input = input; 25 import std.range.primitives : empty; 26 if (_input.empty) 27 { 28 _end = size_t.max; 29 } 30 else 31 { 32 skipTerminatorsAndSetEnd(); 33 } 34 } 35 36 import std.range.primitives : isInfinite; 37 38 static if (isInfinite!R) 39 { 40 enum bool empty = false; // propagate infiniteness 41 } 42 else 43 { 44 @property bool empty() 45 { 46 return _end == size_t.max; 47 } 48 } 49 50 @property auto front() 51 { 52 return _input[0 .. _end]; 53 } 54 55 void popFront() 56 { 57 _input = _input[_end .. $]; 58 import std.range.primitives : empty; 59 if (_input.empty) 60 { 61 _end = size_t.max; 62 return; 63 } 64 skipTerminatorsAndSetEnd(); 65 } 66 67 @property PreSlicer save() 68 { 69 auto ret = this; 70 import std.range.primitives : save; 71 ret._input = _input.save; 72 return ret; 73 } 74 75 private void skipTerminatorsAndSetEnd() 76 { 77 // `_end` is now invalid in relation to `_input` 78 alias ElementEncodingType = typeof(_input[0]); 79 static if (is(ElementEncodingType : char) || 80 is(ElementEncodingType : wchar)) 81 { 82 size_t offset = 0; 83 while (offset != _input.length) 84 { 85 auto slice = _input[offset .. $]; 86 import std.utf : decodeFront; 87 size_t numCodeUnits; 88 const dchar dch = decodeFront(slice, numCodeUnits); 89 if (offset != 0 && // ignore terminator at offset 0 90 isTerminator(dch)) 91 { 92 break; 93 } 94 offset += numCodeUnits; // skip over 95 } 96 _end = offset; 97 } 98 else 99 { 100 size_t offset = 0; 101 if (isTerminator(_input[0])) 102 { 103 offset += 1; // skip over it 104 } 105 import std.algorithm : countUntil; 106 const count = _input[offset .. $].countUntil!isTerminator(); 107 if (count == -1) // end reached 108 { 109 _end = _input.length; 110 } 111 else 112 { 113 _end = offset + count; 114 } 115 } 116 } 117 118 private R _input; 119 private size_t _end = 0; // _input[0 .. _end] is current front 120 } 121 alias preSplitter = preSlicer; 122 123 unittest 124 { 125 import std.uni : isUpper, isWhite; 126 alias sepPred = ch => (ch == '-' || ch.isWhite); 127 assert(equal("doThis or doThat do-stuff".preSlicer!(_ => (_.isUpper || 128 sepPred(_))) 129 .map!(word => (word.length >= 1 && 130 sepPred(word[0]) ? 131 word[1 .. $] : 132 word)), 133 ["do", "This", "or", "do", "That", "do", "stuff"])); 134 135 assert(equal("isAKindOf".preSlicer!isUpper, ["is", "A", "Kind", "Of"])); 136 137 assert(equal("doThis".preSlicer!isUpper, ["do", "This"])); 138 139 assert(equal("doThisIf".preSlicer!isUpper, ["do", "This", "If"])); 140 141 assert(equal("utcOffset".preSlicer!isUpper, ["utc", "Offset"])); 142 assert(equal("isUri".preSlicer!isUpper, ["is", "Uri"])); 143 // TODO assert(equal("baseSIUnit".preSlicer!isUpper, ["base", "SI", "Unit"])); 144 145 assert(equal("SomeGreatVariableName".preSlicer!isUpper, ["Some", "Great", "Variable", "Name"])); 146 assert(equal("someGGGreatVariableName".preSlicer!isUpper, ["some", "G", "G", "Great", "Variable", "Name"])); 147 148 string[] e; 149 assert(equal("".preSlicer!isUpper, e)); 150 assert(equal("a".preSlicer!isUpper, ["a"])); 151 assert(equal("A".preSlicer!isUpper, ["A"])); 152 assert(equal("A".preSlicer!isUpper, ["A"])); 153 assert(equal("ö".preSlicer!isUpper, ["ö"])); 154 assert(equal("åa".preSlicer!isUpper, ["åa"])); 155 assert(equal("aå".preSlicer!isUpper, ["aå"])); 156 assert(equal("åäö".preSlicer!isUpper, ["åäö"])); 157 assert(equal("aB".preSlicer!isUpper, ["a", "B"])); 158 assert(equal("äB".preSlicer!isUpper, ["ä", "B"])); 159 assert(equal("aäB".preSlicer!isUpper, ["aä", "B"])); 160 assert(equal("äaB".preSlicer!isUpper, ["äa", "B"])); 161 assert(equal("äaÖ".preSlicer!isUpper, ["äa", "Ö"])); 162 163 assert(equal([1, -1, 1, -1].preSlicer!(a => a > 0), [[1, -1], [1, -1]])); 164 165 /* TODO Add bidir support */ 166 /* import std.range : retro; */ 167 /* assert(equal([-1, 1, -1, 1].retro.preSlicer!(a => a > 0), [[1, -1], [1, -1]])); */ 168 } 169 170 version(none) // TODO enable 171 auto wordByMixedCaseSubWord(Range)(Range r) 172 { 173 static struct Result 174 { 175 this(Range input) 176 { 177 _input = input; 178 import std.range.primitives : empty; 179 if (_input.empty) 180 { 181 _end = size_t.max; 182 } 183 else 184 { 185 skipTerminatorsAndSetEnd(); 186 } 187 } 188 189 @property bool empty() 190 { 191 return _end == size_t.max; 192 } 193 194 @property auto front() 195 { 196 return _input[0 .. _end]; 197 } 198 199 void popFront() 200 { 201 _input = _input[_end .. $]; 202 import std.range.primitives : empty; 203 if (_input.empty) 204 { 205 _end = size_t.max; 206 return; 207 } 208 skipTerminatorsAndSetEnd(); 209 } 210 211 private void skipTerminatorsAndSetEnd() 212 { 213 // `_end` is now invalid in relation to `_input` 214 size_t offset = 0; 215 while (offset != _input.length) 216 { 217 auto slice = _input[offset .. $]; 218 import std.utf : decodeFront; 219 size_t numCodeUnits; 220 const dchar dch = decodeFront(slice, numCodeUnits); 221 if (offset != 0 && // ignore terminator at offset 0 222 isTerminator(dch)) 223 { 224 break; 225 } 226 offset += numCodeUnits; // skip over 227 } 228 _end = offset; 229 } 230 231 private Range _input; 232 private size_t _end = 0; // _input[0 .. _end] is current front 233 } 234 return Result(r); 235 } 236 237 version(none) // TODO enable 238 @safe pure unittest 239 { 240 assert(equal("äaÖ".wordByMixedCaseSubWord, ["äa", "Ö"])); 241 } 242 243 version(unittest) 244 { 245 import std.algorithm.comparison : equal; 246 import std.algorithm.iteration : map; 247 }