1 module nxt.chunking;
2 
3 version(none)
4 {
5 import std.range.primitives : ElementType, empty, front, back, popFront, isForwardRange, isInputRange;
6 import std.functional : unaryFun;
7 
8 // Used by implementation of chunkBy for non-forward input ranges.
9 private struct ChunkByChunkImpl(alias pred, Range)
10 if (isInputRange!Range && !isForwardRange!Range)
11 {
12     alias fun = binaryFun!pred;
13 
14     private Range r;
15     private ElementType!Range prev;
16 
17     this(Range range, ElementType!Range _prev)
18     {
19         r = range;
20         prev = _prev;
21     }
22 
23     @property bool empty() => r.empty || !fun(prev, r.front);
24     @property ElementType!Range front() => r.front;
25     void popFront() => r.popFront();
26 }
27 
28 private template ChunkByImplIsUnary(alias pred, Range)
29 {
30     static if (is(typeof(binaryFun!pred(ElementType!Range.init,
31                                         ElementType!Range.init)) : bool))
32         enum ChunkByImplIsUnary = false;
33     else static if (is(typeof(
34             unaryFun!pred(ElementType!Range.init) ==
35             unaryFun!pred(ElementType!Range.init))))
36         enum ChunkByImplIsUnary = true;
37     else
38         static assert(0, "chunkBy expects either a binary predicate or "~
39                          "a unary predicate on range elements of type: "~
40                          ElementType!Range.stringof);
41 }
42 
43 // Implementation of chunkBy for non-forward input ranges.
44 private struct ChunkByImpl(alias pred, Range)
45 if (isInputRange!Range && !isForwardRange!Range)
46 {
47     enum bool isUnary = ChunkByImplIsUnary!(pred, Range);
48 
49     static if (isUnary)
50         alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b));
51     else
52         alias eq = binaryFun!pred;
53 
54     private Range r;
55     private ElementType!Range _prev;
56 
57     this(Range _r)
58     {
59         r = _r;
60         if (!empty)
61         {
62             // Check reflexivity if predicate is claimed to be an equivalence
63             // relation.
64             assert(eq(r.front, r.front),
65                    "predicate is not reflexive");
66 
67             // _prev's type may be a nested struct, so must be initialized
68             // directly in the constructor (cannot call savePred()).
69             _prev = r.front;
70         }
71         else
72         {
73             // We won't use _prev, but must be initialized.
74             _prev = typeof(_prev).init;
75         }
76     }
77     @property bool empty() => r.empty;
78 
79     @property auto front()
80     {
81         static if (isUnary)
82         {
83             import std.typecons : tuple;
84             return tuple(unaryFun!pred(_prev),
85                          ChunkByChunkImpl!(eq, Range)(r, _prev));
86         }
87         else
88             return ChunkByChunkImpl!(eq, Range)(r, _prev);
89     }
90 
91     void popFront()
92     {
93         while (!r.empty)
94         {
95             if (!eq(_prev, r.front))
96             {
97                 _prev = r.front;
98                 break;
99             }
100             r.popFront();
101         }
102     }
103 }
104 
105 // Single-pass implementation of chunkBy for forward ranges.
106 private struct ChunkByImpl(alias pred, Range)
107 if (isForwardRange!Range)
108 {
109     import std.typecons : RefCounted;
110 
111     enum bool isUnary = ChunkByImplIsUnary!(pred, Range);
112 
113     static if (isUnary)
114         alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b));
115     else
116         alias eq = binaryFun!pred;
117 
118     // Outer range
119     static struct Impl
120     {
121         size_t groupNum;
122         Range  current;
123         Range  next;
124     }
125 
126     // Inner range
127     static struct Group
128     {
129         private size_t groupNum;
130         private Range  start;
131         private Range  current;
132 
133         private RefCounted!Impl mothership;
134 
135         this(RefCounted!Impl origin)
136         {
137             groupNum = origin.groupNum;
138 
139             start = origin.current.save;
140             current = origin.current.save;
141             assert(!start.empty);
142 
143             mothership = origin;
144 
145             // Note: this requires reflexivity.
146             assert(eq(start.front, current.front),
147                    "predicate is not reflexive");
148         }
149 
150         @property bool empty() => groupNum == size_t.max;
151         @property auto ref front() => current.front;
152 
153         void popFront()
154         {
155             current.popFront();
156 
157             // Note: this requires transitivity.
158             if (current.empty || !eq(start.front, current.front))
159             {
160                 if (groupNum == mothership.groupNum)
161                 {
162                     // If parent range hasn't moved on yet, help it along by
163                     // saving location of start of next Group.
164                     mothership.next = current.save;
165                 }
166 
167                 groupNum = size_t.max;
168             }
169         }
170 
171         @property auto save()
172         {
173             auto copy = this;
174             copy.current = current.save;
175             return copy;
176         }
177     }
178     static assert(isForwardRange!Group);
179 
180     private RefCounted!Impl impl;
181 
182     this(Range r)
183     {
184         impl = RefCounted!Impl(0, r, r.save);
185     }
186 
187     @property bool empty() => impl.current.empty;
188 
189     @property auto front()
190     {
191         static if (isUnary)
192         {
193             import std.typecons : tuple;
194             return tuple(unaryFun!pred(impl.current.front), Group(impl));
195         }
196         else
197             return Group(impl);
198     }
199 
200     void popFront()
201     {
202         // Scan for next group. If we're lucky, one of our Groups would have
203         // already set .next to the start of the next group, in which case the
204         // loop is skipped.
205         while (!impl.next.empty &&
206 			   eq(impl.current.front, impl.next.front))
207             impl.next.popFront();
208         impl.current = impl.next.save;
209         // Indicate to any remaining Groups that we have moved on.
210         impl.groupNum++;
211     }
212 
213 	// Note: the new copy of the range will be detached from any existing
214 	// satellite Groups, and will not benefit from the .next acceleration.
215     @property auto save() => typeof(this)(impl.current.save);
216 
217     static assert(isForwardRange!(typeof(this)));
218 }
219 
220 @system unittest
221 {
222     import std.algorithm.comparison : equal;
223 
224     size_t popCount = 0;
225     class RefFwdRange
226     {
227         int[]  impl;
228         @safe nothrow:
229         this(int[] data) { impl = data; }
230         @property bool empty() => impl.empty;
231         @property auto ref front() => impl.front;
232         void popFront()
233         {
234             impl.popFront();
235             popCount++;
236         }
237         @property auto save() => new RefFwdRange(impl);
238     }
239     static assert(isForwardRange!RefFwdRange);
240 
241     auto testdata = new RefFwdRange([1, 3, 5, 2, 4, 7, 6, 8, 9]);
242     auto groups = testdata.chunkBy!((a,b) => (a % 2) == (b % 2));
243     auto outerSave1 = groups.save;
244 
245     // Sanity test
246     assert(groups.equal!equal([[1, 3, 5], [2, 4], [7], [6, 8], [9]]));
247     assert(groups.empty);
248 
249     // Performance test for single-traversal use case: popFront should not have
250     // been called more times than there are elements if we traversed the
251     // segmented range exactly once.
252     assert(popCount == 9);
253 
254     // Outer range .save test
255     groups = outerSave1.save;
256     assert(!groups.empty);
257 
258     // Inner range .save test
259     auto grp1 = groups.front.save;
260     auto grp1b = grp1.save;
261     assert(grp1b.equal([1, 3, 5]));
262     assert(grp1.save.equal([1, 3, 5]));
263 
264     // Inner range should remain consistent after outer range has moved on.
265     groups.popFront();
266     assert(grp1.save.equal([1, 3, 5]));
267 
268     // Inner range should not be affected by subsequent inner ranges.
269     assert(groups.front.equal([2, 4]));
270     assert(grp1.save.equal([1, 3, 5]));
271 }
272 
273 /**
274  * Chunks an input range into subranges of equivalent adjacent elements.
275  * In other languages this is often called `partitionBy`, `groupBy`
276  * or `sliceWhen`.
277  *
278  * Equivalence is defined by the predicate $(D pred), which can be either
279  * binary, which is passed to $(REF binaryFun, std,functional), or unary, which is
280  * passed to $(REF unaryFun, std,functional). In the binary form, two _range elements
281  * $(D a) and $(D b) are considered equivalent if $(D pred(a,b)) is true. In
282  * unary form, two elements are considered equivalent if $(D pred(a) == pred(b))
283  * is true.
284  *
285  * This predicate must be an equivalence relation, that is, it must be
286  * reflexive ($(D pred(x,x)) is always true), symmetric
287  * ($(D pred(x,y) == pred(y,x))), and transitive ($(D pred(x,y) && pred(y,z))
288  * implies $(D pred(x,z))). If this is not the case, the range returned by
289  * chunkBy may assert at runtime or behave erratically.
290  *
291  * Params:
292  *  pred = Predicate for determining equivalence.
293  *  r = An $(REF_ALTTEXT input range, isInputRange, std,range,primitives) to be chunked.
294  *
295  * Returns: With a binary predicate, a range of ranges is returned in which
296  * all elements in a given subrange are equivalent under the given predicate.
297  * With a unary predicate, a range of tuples is returned, with the tuple
298  * consisting of the result of the unary predicate for each subrange, and the
299  * subrange itself.
300  *
301  * Notes:
302  *
303  * Equivalent elements separated by an intervening non-equivalent element will
304  * appear in separate subranges; this function only considers adjacent
305  * equivalence. Elements in the subranges will always appear in the same order
306  * they appear in the original range.
307  *
308  * See_Also:
309  * $(LREF group), which collapses adjacent equivalent elements into a single
310  * element.
311  */
312 auto chunkBy(alias pred, Range)(Range r)
313 if (isInputRange!Range)
314 	=> ChunkByImpl!(pred, Range)(r);
315 
316 /// Showing usage with binary predicate:
317 /*FIXME: @safe*/ @system unittest
318 {
319     import std.algorithm.comparison : equal;
320 
321     // Grouping by particular attribute of each element:
322     auto data = [
323         [1, 1],
324         [1, 2],
325         [2, 2],
326         [2, 3]
327     ];
328 
329     auto r1 = data.chunkBy!((a,b) => a[0] == b[0]);
330     assert(r1.equal!equal([
331         [[1, 1], [1, 2]],
332         [[2, 2], [2, 3]]
333     ]));
334 
335     auto r2 = data.chunkBy!((a,b) => a[1] == b[1]);
336     assert(r2.equal!equal([
337         [[1, 1]],
338         [[1, 2], [2, 2]],
339         [[2, 3]]
340     ]));
341 }
342 
343 version(none) // this example requires support for non-equivalence relations
344 @safe unittest
345 {
346     // Grouping by maximum adjacent difference:
347     import std.math : abs;
348     auto r3 = [1, 3, 2, 5, 4, 9, 10].chunkBy!((a, b) => abs(a-b) < 3);
349     assert(r3.equal!equal([
350         [1, 3, 2],
351         [5, 4],
352         [9, 10]
353     ]));
354 
355 }
356 
357 /// Showing usage with unary predicate:
358 /* FIXME: pure @safe nothrow*/ @system unittest
359 {
360     import std.algorithm.comparison : equal;
361     import std.range.primitives;
362     import std.typecons : tuple;
363 
364     // Grouping by particular attribute of each element:
365     auto range =
366     [
367         [1, 1],
368         [1, 1],
369         [1, 2],
370         [2, 2],
371         [2, 3],
372         [2, 3],
373         [3, 3]
374     ];
375 
376     auto byX = chunkBy!(a => a[0])(range);
377     auto expected1 =
378     [
379         tuple(1, [[1, 1], [1, 1], [1, 2]]),
380         tuple(2, [[2, 2], [2, 3], [2, 3]]),
381         tuple(3, [[3, 3]])
382     ];
383     foreach (e; byX)
384     {
385         assert(!expected1.empty);
386         assert(e[0] == expected1.front[0]);
387         assert(e[1].equal(expected1.front[1]));
388         expected1.popFront();
389     }
390 
391     auto byY = chunkBy!(a => a[1])(range);
392     auto expected2 =
393     [
394         tuple(1, [[1, 1], [1, 1]]),
395         tuple(2, [[1, 2], [2, 2]]),
396         tuple(3, [[2, 3], [2, 3], [3, 3]])
397     ];
398     foreach (e; byY)
399     {
400         assert(!expected2.empty);
401         assert(e[0] == expected2.front[0]);
402         assert(e[1].equal(expected2.front[1]));
403         expected2.popFront();
404     }
405 }
406 
407 /*FIXME: pure @safe nothrow*/ @system unittest
408 {
409     import std.algorithm.comparison : equal;
410     import std.typecons : tuple;
411 
412     struct Item { int x, y; }
413 
414     // Force R to have only an input range API with reference semantics, so
415     // that we're not unknowingly making use of array semantics outside of the
416     // range API.
417     class RefInputRange(R)
418     {
419         R data;
420         this(R _data) pure @safe nothrow { data = _data; }
421         @property bool empty() pure @safe nothrow => data.empty;
422         @property auto front() pure @safe nothrow => data.front;
423         void popFront() pure @safe nothrow => data.popFront();
424     }
425     auto refInputRange(R)(R range) => new RefInputRange!R(range);
426 
427     {
428         auto arr = [ Item(1,2), Item(1,3), Item(2,3) ];
429         static assert(isForwardRange!(typeof(arr)));
430 
431         auto byX = chunkBy!(a => a.x)(arr);
432         static assert(isForwardRange!(typeof(byX)));
433 
434         auto byX_subrange1 = byX.front[1].save;
435         auto byX_subrange2 = byX.front[1].save;
436         static assert(isForwardRange!(typeof(byX_subrange1)));
437         static assert(isForwardRange!(typeof(byX_subrange2)));
438 
439         byX.popFront();
440         assert(byX_subrange1.equal([ Item(1,2), Item(1,3) ]));
441         byX_subrange1.popFront();
442         assert(byX_subrange1.equal([ Item(1,3) ]));
443         assert(byX_subrange2.equal([ Item(1,2), Item(1,3) ]));
444 
445         auto byY = chunkBy!(a => a.y)(arr);
446         static assert(isForwardRange!(typeof(byY)));
447 
448         auto byY2 = byY.save;
449         static assert(is(typeof(byY) == typeof(byY2)));
450         byY.popFront();
451         assert(byY.front[0] == 3);
452         assert(byY.front[1].equal([ Item(1,3), Item(2,3) ]));
453         assert(byY2.front[0] == 2);
454         assert(byY2.front[1].equal([ Item(1,2) ]));
455     }
456 
457     // Test non-forward input ranges.
458     {
459         auto range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]);
460         auto byX = chunkBy!(a => a.x)(range);
461         assert(byX.front[0] == 1);
462         assert(byX.front[1].equal([ Item(1,1), Item(1,2) ]));
463         byX.popFront();
464         assert(byX.front[0] == 2);
465         assert(byX.front[1].equal([ Item(2,2) ]));
466         byX.popFront();
467         assert(byX.empty);
468         assert(range.empty);
469 
470         range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]);
471         auto byY = chunkBy!(a => a.y)(range);
472         assert(byY.front[0] == 1);
473         assert(byY.front[1].equal([ Item(1,1) ]));
474         byY.popFront();
475         assert(byY.front[0] == 2);
476         assert(byY.front[1].equal([ Item(1,2), Item(2,2) ]));
477         byY.popFront();
478         assert(byY.empty);
479         assert(range.empty);
480     }
481 }
482 
483 // Issue 13595
484 version(none) // This requires support for non-equivalence relations
485 @system unittest
486 {
487     import std.algorithm.comparison : equal;
488     auto r = [1, 2, 3, 4, 5, 6, 7, 8, 9].chunkBy!((x, y) => ((x*y) % 3) == 0);
489     assert(r.equal!equal([
490         [1],
491         [2, 3, 4],
492         [5, 6, 7],
493         [8, 9]
494     ]));
495 }
496 
497 // Issue 13805
498 @system unittest
499 {
500     [""].map!((s) => s).chunkBy!((x, y) => true);
501 }
502 }