1 module nxt.chunking;
2 
3 version(none)
4 {
5 import std.range.primitives : ElementType, empty, front, back, popFront, isForwardRange, isInputRange;
6 import std.functional : unaryFun;
7 
8 // Used by implementation of chunkBy for non-forward input ranges.
9 private struct ChunkByChunkImpl(alias pred, Range)
10 if (isInputRange!Range && !isForwardRange!Range)
11 {
12     alias fun = binaryFun!pred;
13 
14     private Range r;
15     private ElementType!Range prev;
16 
17     this(Range range, ElementType!Range _prev)
18     {
19         r = range;
20         prev = _prev;
21     }
22 
23     @property bool empty()
24     {
25         return r.empty || !fun(prev, r.front);
26     }
27 
28     @property ElementType!Range front() { return r.front; }
29     void popFront() { r.popFront(); }
30 }
31 
32 private template ChunkByImplIsUnary(alias pred, Range)
33 {
34     static if (is(typeof(binaryFun!pred(ElementType!Range.init,
35                                         ElementType!Range.init)) : bool))
36         enum ChunkByImplIsUnary = false;
37     else static if (is(typeof(
38             unaryFun!pred(ElementType!Range.init) ==
39             unaryFun!pred(ElementType!Range.init))))
40         enum ChunkByImplIsUnary = true;
41     else
42         static assert(0, "chunkBy expects either a binary predicate or "~
43                          "a unary predicate on range elements of type: "~
44                          ElementType!Range.stringof);
45 }
46 
47 // Implementation of chunkBy for non-forward input ranges.
48 private struct ChunkByImpl(alias pred, Range)
49 if (isInputRange!Range && !isForwardRange!Range)
50 {
51     enum bool isUnary = ChunkByImplIsUnary!(pred, Range);
52 
53     static if (isUnary)
54         alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b));
55     else
56         alias eq = binaryFun!pred;
57 
58     private Range r;
59     private ElementType!Range _prev;
60 
61     this(Range _r)
62     {
63         r = _r;
64         if (!empty)
65         {
66             // Check reflexivity if predicate is claimed to be an equivalence
67             // relation.
68             assert(eq(r.front, r.front),
69                    "predicate is not reflexive");
70 
71             // _prev's type may be a nested struct, so must be initialized
72             // directly in the constructor (cannot call savePred()).
73             _prev = r.front;
74         }
75         else
76         {
77             // We won't use _prev, but must be initialized.
78             _prev = typeof(_prev).init;
79         }
80     }
81     @property bool empty() { return r.empty; }
82 
83     @property auto front()
84     {
85         static if (isUnary)
86         {
87             import std.typecons : tuple;
88             return tuple(unaryFun!pred(_prev),
89                          ChunkByChunkImpl!(eq, Range)(r, _prev));
90         }
91         else
92         {
93             return ChunkByChunkImpl!(eq, Range)(r, _prev);
94         }
95     }
96 
97     void popFront()
98     {
99         while (!r.empty)
100         {
101             if (!eq(_prev, r.front))
102             {
103                 _prev = r.front;
104                 break;
105             }
106             r.popFront();
107         }
108     }
109 }
110 
111 // Single-pass implementation of chunkBy for forward ranges.
112 private struct ChunkByImpl(alias pred, Range)
113 if (isForwardRange!Range)
114 {
115     import std.typecons : RefCounted;
116 
117     enum bool isUnary = ChunkByImplIsUnary!(pred, Range);
118 
119     static if (isUnary)
120         alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b));
121     else
122         alias eq = binaryFun!pred;
123 
124     // Outer range
125     static struct Impl
126     {
127         size_t groupNum;
128         Range  current;
129         Range  next;
130     }
131 
132     // Inner range
133     static struct Group
134     {
135         private size_t groupNum;
136         private Range  start;
137         private Range  current;
138 
139         private RefCounted!Impl mothership;
140 
141         this(RefCounted!Impl origin)
142         {
143             groupNum = origin.groupNum;
144 
145             start = origin.current.save;
146             current = origin.current.save;
147             assert(!start.empty);
148 
149             mothership = origin;
150 
151             // Note: this requires reflexivity.
152             assert(eq(start.front, current.front),
153                    "predicate is not reflexive");
154         }
155 
156         @property bool empty() { return groupNum == size_t.max; }
157         @property auto ref front() { return current.front; }
158 
159         void popFront()
160         {
161             current.popFront();
162 
163             // Note: this requires transitivity.
164             if (current.empty || !eq(start.front, current.front))
165             {
166                 if (groupNum == mothership.groupNum)
167                 {
168                     // If parent range hasn't moved on yet, help it along by
169                     // saving location of start of next Group.
170                     mothership.next = current.save;
171                 }
172 
173                 groupNum = size_t.max;
174             }
175         }
176 
177         @property auto save()
178         {
179             auto copy = this;
180             copy.current = current.save;
181             return copy;
182         }
183     }
184     static assert(isForwardRange!Group);
185 
186     private RefCounted!Impl impl;
187 
188     this(Range r)
189     {
190         impl = RefCounted!Impl(0, r, r.save);
191     }
192 
193     @property bool empty() { return impl.current.empty; }
194 
195     @property auto front()
196     {
197         static if (isUnary)
198         {
199             import std.typecons : tuple;
200             return tuple(unaryFun!pred(impl.current.front), Group(impl));
201         }
202         else
203         {
204             return Group(impl);
205         }
206     }
207 
208     void popFront()
209     {
210         // Scan for next group. If we're lucky, one of our Groups would have
211         // already set .next to the start of the next group, in which case the
212         // loop is skipped.
213         while (!impl.next.empty && eq(impl.current.front, impl.next.front))
214         {
215             impl.next.popFront();
216         }
217 
218         impl.current = impl.next.save;
219 
220         // Indicate to any remaining Groups that we have moved on.
221         impl.groupNum++;
222     }
223 
224     @property auto save()
225     {
226         // Note: the new copy of the range will be detached from any existing
227         // satellite Groups, and will not benefit from the .next acceleration.
228         return typeof(this)(impl.current.save);
229     }
230 
231     static assert(isForwardRange!(typeof(this)));
232 }
233 
234 @system unittest
235 {
236     import std.algorithm.comparison : equal;
237 
238     size_t popCount = 0;
239     class RefFwdRange
240     {
241         int[]  impl;
242 
243         @safe nothrow:
244 
245         this(int[] data) { impl = data; }
246         @property bool empty() { return impl.empty; }
247         @property auto ref front() { return impl.front; }
248         void popFront()
249         {
250             impl.popFront();
251             popCount++;
252         }
253         @property auto save() { return new RefFwdRange(impl); }
254     }
255     static assert(isForwardRange!RefFwdRange);
256 
257     auto testdata = new RefFwdRange([1, 3, 5, 2, 4, 7, 6, 8, 9]);
258     auto groups = testdata.chunkBy!((a,b) => (a % 2) == (b % 2));
259     auto outerSave1 = groups.save;
260 
261     // Sanity test
262     assert(groups.equal!equal([[1, 3, 5], [2, 4], [7], [6, 8], [9]]));
263     assert(groups.empty);
264 
265     // Performance test for single-traversal use case: popFront should not have
266     // been called more times than there are elements if we traversed the
267     // segmented range exactly once.
268     assert(popCount == 9);
269 
270     // Outer range .save test
271     groups = outerSave1.save;
272     assert(!groups.empty);
273 
274     // Inner range .save test
275     auto grp1 = groups.front.save;
276     auto grp1b = grp1.save;
277     assert(grp1b.equal([1, 3, 5]));
278     assert(grp1.save.equal([1, 3, 5]));
279 
280     // Inner range should remain consistent after outer range has moved on.
281     groups.popFront();
282     assert(grp1.save.equal([1, 3, 5]));
283 
284     // Inner range should not be affected by subsequent inner ranges.
285     assert(groups.front.equal([2, 4]));
286     assert(grp1.save.equal([1, 3, 5]));
287 }
288 
289 /**
290  * Chunks an input range into subranges of equivalent adjacent elements.
291  * In other languages this is often called `partitionBy`, `groupBy`
292  * or `sliceWhen`.
293  *
294  * Equivalence is defined by the predicate $(D pred), which can be either
295  * binary, which is passed to $(REF binaryFun, std,functional), or unary, which is
296  * passed to $(REF unaryFun, std,functional). In the binary form, two _range elements
297  * $(D a) and $(D b) are considered equivalent if $(D pred(a,b)) is true. In
298  * unary form, two elements are considered equivalent if $(D pred(a) == pred(b))
299  * is true.
300  *
301  * This predicate must be an equivalence relation, that is, it must be
302  * reflexive ($(D pred(x,x)) is always true), symmetric
303  * ($(D pred(x,y) == pred(y,x))), and transitive ($(D pred(x,y) && pred(y,z))
304  * implies $(D pred(x,z))). If this is not the case, the range returned by
305  * chunkBy may assert at runtime or behave erratically.
306  *
307  * Params:
308  *  pred = Predicate for determining equivalence.
309  *  r = An $(REF_ALTTEXT input range, isInputRange, std,range,primitives) to be chunked.
310  *
311  * Returns: With a binary predicate, a range of ranges is returned in which
312  * all elements in a given subrange are equivalent under the given predicate.
313  * With a unary predicate, a range of tuples is returned, with the tuple
314  * consisting of the result of the unary predicate for each subrange, and the
315  * subrange itself.
316  *
317  * Notes:
318  *
319  * Equivalent elements separated by an intervening non-equivalent element will
320  * appear in separate subranges; this function only considers adjacent
321  * equivalence. Elements in the subranges will always appear in the same order
322  * they appear in the original range.
323  *
324  * See_Also:
325  * $(LREF group), which collapses adjacent equivalent elements into a single
326  * element.
327  */
328 auto chunkBy(alias pred, Range)(Range r)
329 if (isInputRange!Range)
330 {
331     return ChunkByImpl!(pred, Range)(r);
332 }
333 
334 /// Showing usage with binary predicate:
335 /*FIXME: @safe*/ @system unittest
336 {
337     import std.algorithm.comparison : equal;
338 
339     // Grouping by particular attribute of each element:
340     auto data = [
341         [1, 1],
342         [1, 2],
343         [2, 2],
344         [2, 3]
345     ];
346 
347     auto r1 = data.chunkBy!((a,b) => a[0] == b[0]);
348     assert(r1.equal!equal([
349         [[1, 1], [1, 2]],
350         [[2, 2], [2, 3]]
351     ]));
352 
353     auto r2 = data.chunkBy!((a,b) => a[1] == b[1]);
354     assert(r2.equal!equal([
355         [[1, 1]],
356         [[1, 2], [2, 2]],
357         [[2, 3]]
358     ]));
359 }
360 
361 version(none) // this example requires support for non-equivalence relations
362 @safe unittest
363 {
364     // Grouping by maximum adjacent difference:
365     import std.math : abs;
366     auto r3 = [1, 3, 2, 5, 4, 9, 10].chunkBy!((a, b) => abs(a-b) < 3);
367     assert(r3.equal!equal([
368         [1, 3, 2],
369         [5, 4],
370         [9, 10]
371     ]));
372 
373 }
374 
375 /// Showing usage with unary predicate:
376 /* FIXME: pure @safe nothrow*/ @system unittest
377 {
378     import std.algorithm.comparison : equal;
379     import std.range.primitives;
380     import std.typecons : tuple;
381 
382     // Grouping by particular attribute of each element:
383     auto range =
384     [
385         [1, 1],
386         [1, 1],
387         [1, 2],
388         [2, 2],
389         [2, 3],
390         [2, 3],
391         [3, 3]
392     ];
393 
394     auto byX = chunkBy!(a => a[0])(range);
395     auto expected1 =
396     [
397         tuple(1, [[1, 1], [1, 1], [1, 2]]),
398         tuple(2, [[2, 2], [2, 3], [2, 3]]),
399         tuple(3, [[3, 3]])
400     ];
401     foreach (e; byX)
402     {
403         assert(!expected1.empty);
404         assert(e[0] == expected1.front[0]);
405         assert(e[1].equal(expected1.front[1]));
406         expected1.popFront();
407     }
408 
409     auto byY = chunkBy!(a => a[1])(range);
410     auto expected2 =
411     [
412         tuple(1, [[1, 1], [1, 1]]),
413         tuple(2, [[1, 2], [2, 2]]),
414         tuple(3, [[2, 3], [2, 3], [3, 3]])
415     ];
416     foreach (e; byY)
417     {
418         assert(!expected2.empty);
419         assert(e[0] == expected2.front[0]);
420         assert(e[1].equal(expected2.front[1]));
421         expected2.popFront();
422     }
423 }
424 
425 /*FIXME: pure @safe nothrow*/ @system unittest
426 {
427     import std.algorithm.comparison : equal;
428     import std.typecons : tuple;
429 
430     struct Item { int x, y; }
431 
432     // Force R to have only an input range API with reference semantics, so
433     // that we're not unknowingly making use of array semantics outside of the
434     // range API.
435     class RefInputRange(R)
436     {
437         R data;
438         this(R _data) pure @safe nothrow { data = _data; }
439         @property bool empty() pure @safe nothrow { return data.empty; }
440         @property auto front() pure @safe nothrow { return data.front; }
441         void popFront() pure @safe nothrow { data.popFront(); }
442     }
443     auto refInputRange(R)(R range) { return new RefInputRange!R(range); }
444 
445     {
446         auto arr = [ Item(1,2), Item(1,3), Item(2,3) ];
447         static assert(isForwardRange!(typeof(arr)));
448 
449         auto byX = chunkBy!(a => a.x)(arr);
450         static assert(isForwardRange!(typeof(byX)));
451 
452         auto byX_subrange1 = byX.front[1].save;
453         auto byX_subrange2 = byX.front[1].save;
454         static assert(isForwardRange!(typeof(byX_subrange1)));
455         static assert(isForwardRange!(typeof(byX_subrange2)));
456 
457         byX.popFront();
458         assert(byX_subrange1.equal([ Item(1,2), Item(1,3) ]));
459         byX_subrange1.popFront();
460         assert(byX_subrange1.equal([ Item(1,3) ]));
461         assert(byX_subrange2.equal([ Item(1,2), Item(1,3) ]));
462 
463         auto byY = chunkBy!(a => a.y)(arr);
464         static assert(isForwardRange!(typeof(byY)));
465 
466         auto byY2 = byY.save;
467         static assert(is(typeof(byY) == typeof(byY2)));
468         byY.popFront();
469         assert(byY.front[0] == 3);
470         assert(byY.front[1].equal([ Item(1,3), Item(2,3) ]));
471         assert(byY2.front[0] == 2);
472         assert(byY2.front[1].equal([ Item(1,2) ]));
473     }
474 
475     // Test non-forward input ranges.
476     {
477         auto range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]);
478         auto byX = chunkBy!(a => a.x)(range);
479         assert(byX.front[0] == 1);
480         assert(byX.front[1].equal([ Item(1,1), Item(1,2) ]));
481         byX.popFront();
482         assert(byX.front[0] == 2);
483         assert(byX.front[1].equal([ Item(2,2) ]));
484         byX.popFront();
485         assert(byX.empty);
486         assert(range.empty);
487 
488         range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]);
489         auto byY = chunkBy!(a => a.y)(range);
490         assert(byY.front[0] == 1);
491         assert(byY.front[1].equal([ Item(1,1) ]));
492         byY.popFront();
493         assert(byY.front[0] == 2);
494         assert(byY.front[1].equal([ Item(1,2), Item(2,2) ]));
495         byY.popFront();
496         assert(byY.empty);
497         assert(range.empty);
498     }
499 }
500 
501 // Issue 13595
502 version(none) // This requires support for non-equivalence relations
503 @system unittest
504 {
505     import std.algorithm.comparison : equal;
506     auto r = [1, 2, 3, 4, 5, 6, 7, 8, 9].chunkBy!((x, y) => ((x*y) % 3) == 0);
507     assert(r.equal!equal([
508         [1],
509         [2, 3, 4],
510         [5, 6, 7],
511         [8, 9]
512     ]));
513 }
514 
515 // Issue 13805
516 @system unittest
517 {
518     [""].map!((s) => s).chunkBy!((x, y) => true);
519 }
520 }