1 module nxt.chunking; 2 3 version (none) { 4 import std.range.primitives : ElementType, empty, front, back, popFront, isForwardRange, isInputRange; 5 import std.functional : unaryFun; 6 7 // Used by implementation of chunkBy for non-forward input ranges. 8 private struct ChunkByChunkImpl(alias pred, Range) 9 if (isInputRange!Range && !isForwardRange!Range) { 10 alias fun = binaryFun!pred; 11 12 private Range r; 13 private ElementType!Range prev; 14 15 this(Range range, ElementType!Range _prev) { 16 r = range; 17 prev = _prev; 18 } 19 20 @property bool empty() => r.empty || !fun(prev, r.front); 21 @property ElementType!Range front() => r.front; 22 void popFront() => r.popFront(); 23 } 24 25 private template ChunkByImplIsUnary(alias pred, Range) { 26 static if (is(typeof(binaryFun!pred(ElementType!Range.init, 27 ElementType!Range.init)) : bool)) 28 enum ChunkByImplIsUnary = false; 29 else static if (is(typeof( 30 unaryFun!pred(ElementType!Range.init) == 31 unaryFun!pred(ElementType!Range.init)))) 32 enum ChunkByImplIsUnary = true; 33 else 34 static assert(0, "chunkBy expects either a binary predicate or "~ 35 "a unary predicate on range elements of type: "~ 36 ElementType!Range.stringof); 37 } 38 39 // Implementation of chunkBy for non-forward input ranges. 40 private struct ChunkByImpl(alias pred, Range) 41 if (isInputRange!Range && !isForwardRange!Range) { 42 enum bool isUnary = ChunkByImplIsUnary!(pred, Range); 43 44 static if (isUnary) 45 alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b)); 46 else 47 alias eq = binaryFun!pred; 48 49 private Range r; 50 private ElementType!Range _prev; 51 52 this(Range _r) { 53 r = _r; 54 if (!empty) { 55 // Check reflexivity if predicate is claimed to be an equivalence 56 // relation. 57 assert(eq(r.front, r.front), 58 "predicate is not reflexive"); 59 60 // _prev's type may be a nested struct, so must be initialized 61 // directly in the constructor (cannot call savePred()). 62 _prev = r.front; 63 } 64 else 65 { 66 // We won't use _prev, but must be initialized. 67 _prev = typeof(_prev).init; 68 } 69 } 70 @property bool empty() => r.empty; 71 72 @property auto front() { 73 static if (isUnary) { 74 import std.typecons : tuple; 75 return tuple(unaryFun!pred(_prev), 76 ChunkByChunkImpl!(eq, Range)(r, _prev)); 77 } 78 else 79 return ChunkByChunkImpl!(eq, Range)(r, _prev); 80 } 81 82 void popFront() { 83 while (!r.empty) { 84 if (!eq(_prev, r.front)) { 85 _prev = r.front; 86 break; 87 } 88 r.popFront(); 89 } 90 } 91 } 92 93 // Single-pass implementation of chunkBy for forward ranges. 94 private struct ChunkByImpl(alias pred, Range) 95 if (isForwardRange!Range) { 96 import std.typecons : RefCounted; 97 98 enum bool isUnary = ChunkByImplIsUnary!(pred, Range); 99 100 static if (isUnary) 101 alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b)); 102 else 103 alias eq = binaryFun!pred; 104 105 // Outer range 106 static struct Impl 107 { 108 size_t groupNum; 109 Range current; 110 Range next; 111 } 112 113 // Inner range 114 static struct Group 115 { 116 private size_t groupNum; 117 private Range start; 118 private Range current; 119 120 private RefCounted!Impl mothership; 121 122 this(RefCounted!Impl origin) { 123 groupNum = origin.groupNum; 124 125 start = origin.current.save; 126 current = origin.current.save; 127 assert(!start.empty); 128 129 mothership = origin; 130 131 // Note: this requires reflexivity. 132 assert(eq(start.front, current.front), 133 "predicate is not reflexive"); 134 } 135 136 @property bool empty() => groupNum == size_t.max; 137 @property auto ref front() => current.front; 138 139 void popFront() { 140 current.popFront(); 141 142 // Note: this requires transitivity. 143 if (current.empty || !eq(start.front, current.front)) { 144 if (groupNum == mothership.groupNum) { 145 // If parent range hasn't moved on yet, help it along by 146 // saving location of start of next Group. 147 mothership.next = current.save; 148 } 149 150 groupNum = size_t.max; 151 } 152 } 153 154 @property auto save() { 155 auto copy = this; 156 copy.current = current.save; 157 return copy; 158 } 159 } 160 static assert(isForwardRange!Group); 161 162 private RefCounted!Impl impl; 163 164 this(Range r) { 165 impl = RefCounted!Impl(0, r, r.save); 166 } 167 168 @property bool empty() => impl.current.empty; 169 170 @property auto front() { 171 static if (isUnary) { 172 import std.typecons : tuple; 173 return tuple(unaryFun!pred(impl.current.front), Group(impl)); 174 } 175 else 176 return Group(impl); 177 } 178 179 void popFront() { 180 // Scan for next group. If we're lucky, one of our Groups would have 181 // already set .next to the start of the next group, in which case the 182 // loop is skipped. 183 while (!impl.next.empty && 184 eq(impl.current.front, impl.next.front)) 185 impl.next.popFront(); 186 impl.current = impl.next.save; 187 // Indicate to any remaining Groups that we have moved on. 188 impl.groupNum++; 189 } 190 191 // Note: the new copy of the range will be detached from any existing 192 // satellite Groups, and will not benefit from the .next acceleration. 193 @property auto save() => typeof(this)(impl.current.save); 194 195 static assert(isForwardRange!(typeof(this))); 196 } 197 198 @system unittest { 199 import std.algorithm.comparison : equal; 200 201 size_t popCount = 0; 202 class RefFwdRange 203 { 204 int[] impl; 205 @safe nothrow: 206 this(int[] data) { impl = data; } 207 @property bool empty() => impl.empty; 208 @property auto ref front() => impl.front; 209 void popFront() { 210 impl.popFront(); 211 popCount++; 212 } 213 @property auto save() => new RefFwdRange(impl); 214 } 215 static assert(isForwardRange!RefFwdRange); 216 217 auto testdata = new RefFwdRange([1, 3, 5, 2, 4, 7, 6, 8, 9]); 218 auto groups = testdata.chunkBy!((a,b) => (a % 2) == (b % 2)); 219 auto outerSave1 = groups.save; 220 221 // Sanity test 222 assert(groups.equal!equal([[1, 3, 5], [2, 4], [7], [6, 8], [9]])); 223 assert(groups.empty); 224 225 // Performance test for single-traversal use case: popFront should not have 226 // been called more times than there are elements if we traversed the 227 // segmented range exactly once. 228 assert(popCount == 9); 229 230 // Outer range .save test 231 groups = outerSave1.save; 232 assert(!groups.empty); 233 234 // Inner range .save test 235 auto grp1 = groups.front.save; 236 auto grp1b = grp1.save; 237 assert(grp1b.equal([1, 3, 5])); 238 assert(grp1.save.equal([1, 3, 5])); 239 240 // Inner range should remain consistent after outer range has moved on. 241 groups.popFront(); 242 assert(grp1.save.equal([1, 3, 5])); 243 244 // Inner range should not be affected by subsequent inner ranges. 245 assert(groups.front.equal([2, 4])); 246 assert(grp1.save.equal([1, 3, 5])); 247 } 248 249 /** 250 * Chunks an input range into subranges of equivalent adjacent elements. 251 * In other languages this is often called `partitionBy`, `groupBy` 252 * or `sliceWhen`. 253 * 254 * Equivalence is defined by the predicate $(D pred), which can be either 255 * binary, which is passed to $(REF binaryFun, std,functional), or unary, which is 256 * passed to $(REF unaryFun, std,functional). In the binary form, two _range elements 257 * $(D a) and $(D b) are considered equivalent if $(D pred(a,b)) is true. In 258 * unary form, two elements are considered equivalent if $(D pred(a) == pred(b)) 259 * is true. 260 * 261 * This predicate must be an equivalence relation, that is, it must be 262 * reflexive ($(D pred(x,x)) is always true), symmetric 263 * ($(D pred(x,y) == pred(y,x))), and transitive ($(D pred(x,y) && pred(y,z)) 264 * implies $(D pred(x,z))). If this is not the case, the range returned by 265 * chunkBy may assert at runtime or behave erratically. 266 * 267 * Params: 268 * pred = Predicate for determining equivalence. 269 * r = An $(REF_ALTTEXT input range, isInputRange, std,range,primitives) to be chunked. 270 * 271 * Returns: With a binary predicate, a range of ranges is returned in which 272 * all elements in a given subrange are equivalent under the given predicate. 273 * With a unary predicate, a range of tuples is returned, with the tuple 274 * consisting of the result of the unary predicate for each subrange, and the 275 * subrange itself. 276 * 277 * Notes: 278 * 279 * Equivalent elements separated by an intervening non-equivalent element will 280 * appear in separate subranges; this function only considers adjacent 281 * equivalence. Elements in the subranges will always appear in the same order 282 * they appear in the original range. 283 * 284 * See_Also: 285 * $(LREF group), which collapses adjacent equivalent elements into a single 286 * element. 287 */ 288 auto chunkBy(alias pred, Range)(Range r) 289 if (isInputRange!Range) 290 => ChunkByImpl!(pred, Range)(r); 291 292 /// Showing usage with binary predicate: 293 /*FIXME: @safe*/ @system unittest { 294 import std.algorithm.comparison : equal; 295 296 // Grouping by particular attribute of each element: 297 auto data = [ 298 [1, 1], 299 [1, 2], 300 [2, 2], 301 [2, 3] 302 ]; 303 304 auto r1 = data.chunkBy!((a,b) => a[0] == b[0]); 305 assert(r1.equal!equal([ 306 [[1, 1], [1, 2]], 307 [[2, 2], [2, 3]] 308 ])); 309 310 auto r2 = data.chunkBy!((a,b) => a[1] == b[1]); 311 assert(r2.equal!equal([ 312 [[1, 1]], 313 [[1, 2], [2, 2]], 314 [[2, 3]] 315 ])); 316 } 317 318 version (none) // this example requires support for non-equivalence relations 319 @safe unittest { 320 // Grouping by maximum adjacent difference: 321 import std.math : abs; 322 auto r3 = [1, 3, 2, 5, 4, 9, 10].chunkBy!((a, b) => abs(a-b) < 3); 323 assert(r3.equal!equal([ 324 [1, 3, 2], 325 [5, 4], 326 [9, 10] 327 ])); 328 329 } 330 331 /// Showing usage with unary predicate: 332 /* FIXME: pure @safe nothrow*/ @system unittest { 333 import std.algorithm.comparison : equal; 334 import std.range.primitives; 335 import std.typecons : tuple; 336 337 // Grouping by particular attribute of each element: 338 auto range = 339 [ 340 [1, 1], 341 [1, 1], 342 [1, 2], 343 [2, 2], 344 [2, 3], 345 [2, 3], 346 [3, 3] 347 ]; 348 349 auto byX = chunkBy!(a => a[0])(range); 350 auto expected1 = 351 [ 352 tuple(1, [[1, 1], [1, 1], [1, 2]]), 353 tuple(2, [[2, 2], [2, 3], [2, 3]]), 354 tuple(3, [[3, 3]]) 355 ]; 356 foreach (e; byX) { 357 assert(!expected1.empty); 358 assert(e[0] == expected1.front[0]); 359 assert(e[1].equal(expected1.front[1])); 360 expected1.popFront(); 361 } 362 363 auto byY = chunkBy!(a => a[1])(range); 364 auto expected2 = 365 [ 366 tuple(1, [[1, 1], [1, 1]]), 367 tuple(2, [[1, 2], [2, 2]]), 368 tuple(3, [[2, 3], [2, 3], [3, 3]]) 369 ]; 370 foreach (e; byY) { 371 assert(!expected2.empty); 372 assert(e[0] == expected2.front[0]); 373 assert(e[1].equal(expected2.front[1])); 374 expected2.popFront(); 375 } 376 } 377 378 /*FIXME: pure @safe nothrow*/ @system unittest { 379 import std.algorithm.comparison : equal; 380 import std.typecons : tuple; 381 382 struct Item { int x, y; } 383 384 // Force R to have only an input range API with reference semantics, so 385 // that we're not unknowingly making use of array semantics outside of the 386 // range API. 387 class RefInputRange(R) { 388 R data; 389 this(R _data) pure @safe nothrow { data = _data; } 390 @property bool empty() pure @safe nothrow => data.empty; 391 @property auto front() pure @safe nothrow => data.front; 392 void popFront() pure @safe nothrow => data.popFront(); 393 } 394 auto refInputRange(R)(R range) => new RefInputRange!R(range); 395 396 { 397 auto arr = [ Item(1,2), Item(1,3), Item(2,3) ]; 398 static assert(isForwardRange!(typeof(arr))); 399 400 auto byX = chunkBy!(a => a.x)(arr); 401 static assert(isForwardRange!(typeof(byX))); 402 403 auto byX_subrange1 = byX.front[1].save; 404 auto byX_subrange2 = byX.front[1].save; 405 static assert(isForwardRange!(typeof(byX_subrange1))); 406 static assert(isForwardRange!(typeof(byX_subrange2))); 407 408 byX.popFront(); 409 assert(byX_subrange1.equal([ Item(1,2), Item(1,3) ])); 410 byX_subrange1.popFront(); 411 assert(byX_subrange1.equal([ Item(1,3) ])); 412 assert(byX_subrange2.equal([ Item(1,2), Item(1,3) ])); 413 414 auto byY = chunkBy!(a => a.y)(arr); 415 static assert(isForwardRange!(typeof(byY))); 416 417 auto byY2 = byY.save; 418 static assert(is(typeof(byY) == typeof(byY2))); 419 byY.popFront(); 420 assert(byY.front[0] == 3); 421 assert(byY.front[1].equal([ Item(1,3), Item(2,3) ])); 422 assert(byY2.front[0] == 2); 423 assert(byY2.front[1].equal([ Item(1,2) ])); 424 } 425 426 // Test non-forward input ranges. 427 { 428 auto range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]); 429 auto byX = chunkBy!(a => a.x)(range); 430 assert(byX.front[0] == 1); 431 assert(byX.front[1].equal([ Item(1,1), Item(1,2) ])); 432 byX.popFront(); 433 assert(byX.front[0] == 2); 434 assert(byX.front[1].equal([ Item(2,2) ])); 435 byX.popFront(); 436 assert(byX.empty); 437 assert(range.empty); 438 439 range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]); 440 auto byY = chunkBy!(a => a.y)(range); 441 assert(byY.front[0] == 1); 442 assert(byY.front[1].equal([ Item(1,1) ])); 443 byY.popFront(); 444 assert(byY.front[0] == 2); 445 assert(byY.front[1].equal([ Item(1,2), Item(2,2) ])); 446 byY.popFront(); 447 assert(byY.empty); 448 assert(range.empty); 449 } 450 } 451 452 // Issue 13595 453 version (none) // This requires support for non-equivalence relations 454 @system unittest { 455 import std.algorithm.comparison : equal; 456 auto r = [1, 2, 3, 4, 5, 6, 7, 8, 9].chunkBy!((x, y) => ((x*y) % 3) == 0); 457 assert(r.equal!equal([ 458 [1], 459 [2, 3, 4], 460 [5, 6, 7], 461 [8, 9] 462 ])); 463 } 464 465 // Issue 13805 466 @system unittest { 467 [""].map!((s) => s).chunkBy!((x, y) => true); 468 } 469 }