1 module nxt.slicing;
2 
3 /** Slice at all positions where $(D isTerminator) is $(D false) before current
4     element and $(D true) at current.
5 
6     TODO: Can this be replaced by chunkBy
7     See_Also: http://dlang.org/library/std/algorithm/splitter.html.
8     See_Also: http://forum.dlang.org/post/cwqeywykubsuynkidlux@forum.dlang.org
9 */
10 auto preSlicer(alias isTerminator, R)(R input)
11 /* if (((isRandomAccessRange!R && */
12 /*       hasSlicing!R) || */
13 /*      isSomeString!R) && */
14 /*     is(typeof(unaryFun!isTerminator(input.front)))) */
15 {
16     import std.functional : unaryFun;
17     return PreSlicer!(unaryFun!isTerminator, R)(input);
18 }
19 
20 private struct PreSlicer(alias isTerminator, R)
21 {
22     this(R input)
23     {
24         _input = input;
25         import std.range.primitives : empty;
26         if (_input.empty)
27         {
28             _end = size_t.max;
29         }
30         else
31         {
32             skipTerminatorsAndSetEnd();
33         }
34     }
35 
36     import std.range.primitives : isInfinite;
37 
38     static if (isInfinite!R)
39     {
40         enum bool empty = false;  // propagate infiniteness
41     }
42     else
43     {
44         @property bool empty()
45         {
46             return _end == size_t.max;
47         }
48     }
49 
50     @property auto front()
51     {
52         return _input[0 .. _end];
53     }
54 
55     void popFront()
56     {
57         _input = _input[_end .. $];
58         import std.range.primitives : empty;
59         if (_input.empty)
60         {
61             _end = size_t.max;
62             return;
63         }
64         skipTerminatorsAndSetEnd();
65     }
66 
67     @property PreSlicer save()
68     {
69         auto ret = this;
70         import std.range.primitives : save;
71         ret._input = _input.save;
72         return ret;
73     }
74 
75     private void skipTerminatorsAndSetEnd()
76     {
77         // `_end` is now invalid in relation to `_input`
78         alias ElementEncodingType = typeof(_input[0]);
79         static if (is(ElementEncodingType : char) ||
80                    is(ElementEncodingType : wchar))
81         {
82             size_t offset = 0;
83             while (offset != _input.length)
84             {
85                 auto slice = _input[offset .. $];
86                 import std.utf : decodeFront;
87                 size_t numCodeUnits;
88                 const dchar dch = decodeFront(slice, numCodeUnits);
89                 if (offset != 0 && // ignore terminator at offset 0
90                     isTerminator(dch))
91                 {
92                     break;
93                 }
94                 offset += numCodeUnits; // skip over
95             }
96             _end = offset;
97         }
98         else
99         {
100             size_t offset = 0;
101             if (isTerminator(_input[0]))
102             {
103                 offset += 1;        // skip over it
104             }
105             import std.algorithm : countUntil;
106             const count = _input[offset .. $].countUntil!isTerminator();
107             if (count == -1)        // end reached
108             {
109                 _end = _input.length;
110             }
111             else
112             {
113                 _end = offset + count;
114             }
115         }
116     }
117 
118     private R _input;
119     private size_t _end = 0;    // _input[0 .. _end] is current front
120 }
121 alias preSplitter = preSlicer;
122 
123 unittest
124 {
125     import std.uni : isUpper, isWhite;
126     alias sepPred = ch => (ch == '-' || ch.isWhite);
127     assert(equal("doThis or doThat do-stuff".preSlicer!(_ => (_.isUpper ||
128                                                               sepPred(_)))
129                                    .map!(word => (word.length >= 1 &&
130                                                   sepPred(word[0]) ?
131                                                   word[1 .. $] :
132                                                   word)),
133                  ["do", "This", "or", "do", "That", "do", "stuff"]));
134 
135     assert(equal("isAKindOf".preSlicer!isUpper, ["is", "A", "Kind", "Of"]));
136 
137     assert(equal("doThis".preSlicer!isUpper, ["do", "This"]));
138 
139     assert(equal("doThisIf".preSlicer!isUpper, ["do", "This", "If"]));
140 
141     assert(equal("utcOffset".preSlicer!isUpper, ["utc", "Offset"]));
142     assert(equal("isUri".preSlicer!isUpper, ["is", "Uri"]));
143     // TODO assert(equal("baseSIUnit".preSlicer!isUpper, ["base", "SI", "Unit"]));
144 
145     assert(equal("SomeGreatVariableName".preSlicer!isUpper, ["Some", "Great", "Variable", "Name"]));
146     assert(equal("someGGGreatVariableName".preSlicer!isUpper, ["some", "G", "G", "Great", "Variable", "Name"]));
147 
148     string[] e;
149     assert(equal("".preSlicer!isUpper, e));
150     assert(equal("a".preSlicer!isUpper, ["a"]));
151     assert(equal("A".preSlicer!isUpper, ["A"]));
152     assert(equal("A".preSlicer!isUpper, ["A"]));
153     assert(equal("ö".preSlicer!isUpper, ["ö"]));
154     assert(equal("åa".preSlicer!isUpper, ["åa"]));
155     assert(equal("aå".preSlicer!isUpper, ["aå"]));
156     assert(equal("åäö".preSlicer!isUpper, ["åäö"]));
157     assert(equal("aB".preSlicer!isUpper, ["a", "B"]));
158     assert(equal("äB".preSlicer!isUpper, ["ä", "B"]));
159     assert(equal("aäB".preSlicer!isUpper, ["aä", "B"]));
160     assert(equal("äaB".preSlicer!isUpper, ["äa", "B"]));
161     assert(equal("äaÖ".preSlicer!isUpper, ["äa", "Ö"]));
162 
163     assert(equal([1, -1, 1, -1].preSlicer!(a => a > 0), [[1, -1], [1, -1]]));
164 
165     /* TODO Add bidir support */
166     /* import std.range : retro; */
167     /* assert(equal([-1, 1, -1, 1].retro.preSlicer!(a => a > 0), [[1, -1], [1, -1]])); */
168 }
169 
170 version(none)                   // TODO enable
171 auto wordByMixedCaseSubWord(Range)(Range r)
172 {
173     static struct Result
174     {
175         this(Range input)
176         {
177             _input = input;
178             import std.range.primitives : empty;
179             if (_input.empty)
180             {
181                 _end = size_t.max;
182             }
183             else
184             {
185                 skipTerminatorsAndSetEnd();
186             }
187         }
188 
189         @property bool empty()
190         {
191             return _end == size_t.max;
192         }
193 
194         @property auto front()
195         {
196             return _input[0 .. _end];
197         }
198 
199         void popFront()
200         {
201             _input = _input[_end .. $];
202             import std.range.primitives : empty;
203             if (_input.empty)
204             {
205                 _end = size_t.max;
206                 return;
207             }
208             skipTerminatorsAndSetEnd();
209         }
210 
211         private void skipTerminatorsAndSetEnd()
212         {
213             // `_end` is now invalid in relation to `_input`
214             size_t offset = 0;
215             while (offset != _input.length)
216             {
217                 auto slice = _input[offset .. $];
218                 import std.utf : decodeFront;
219                 size_t numCodeUnits;
220                 const dchar dch = decodeFront(slice, numCodeUnits);
221                 if (offset != 0 && // ignore terminator at offset 0
222                     isTerminator(dch))
223                 {
224                     break;
225                 }
226                 offset += numCodeUnits; // skip over
227             }
228             _end = offset;
229         }
230 
231         private Range _input;
232         private size_t _end = 0;    // _input[0 .. _end] is current front
233     }
234     return Result(r);
235 }
236 
237 version(none)                   // TODO enable
238 @safe pure unittest
239 {
240     assert(equal("äaÖ".wordByMixedCaseSubWord, ["äa", "Ö"]));
241 }
242 
243 version(unittest)
244 {
245      import std.algorithm.comparison : equal;
246      import std.algorithm.iteration : map;
247 }