1 module nxt.chunking; 2 3 version(none) 4 { 5 import std.range.primitives : ElementType, empty, front, back, popFront, isForwardRange, isInputRange; 6 import std.functional : unaryFun; 7 8 // Used by implementation of chunkBy for non-forward input ranges. 9 private struct ChunkByChunkImpl(alias pred, Range) 10 if (isInputRange!Range && !isForwardRange!Range) 11 { 12 alias fun = binaryFun!pred; 13 14 private Range r; 15 private ElementType!Range prev; 16 17 this(Range range, ElementType!Range _prev) 18 { 19 r = range; 20 prev = _prev; 21 } 22 23 @property bool empty() 24 { 25 return r.empty || !fun(prev, r.front); 26 } 27 28 @property ElementType!Range front() { return r.front; } 29 void popFront() { r.popFront(); } 30 } 31 32 private template ChunkByImplIsUnary(alias pred, Range) 33 { 34 static if (is(typeof(binaryFun!pred(ElementType!Range.init, 35 ElementType!Range.init)) : bool)) 36 enum ChunkByImplIsUnary = false; 37 else static if (is(typeof( 38 unaryFun!pred(ElementType!Range.init) == 39 unaryFun!pred(ElementType!Range.init)))) 40 enum ChunkByImplIsUnary = true; 41 else 42 static assert(0, "chunkBy expects either a binary predicate or "~ 43 "a unary predicate on range elements of type: "~ 44 ElementType!Range.stringof); 45 } 46 47 // Implementation of chunkBy for non-forward input ranges. 48 private struct ChunkByImpl(alias pred, Range) 49 if (isInputRange!Range && !isForwardRange!Range) 50 { 51 enum bool isUnary = ChunkByImplIsUnary!(pred, Range); 52 53 static if (isUnary) 54 alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b)); 55 else 56 alias eq = binaryFun!pred; 57 58 private Range r; 59 private ElementType!Range _prev; 60 61 this(Range _r) 62 { 63 r = _r; 64 if (!empty) 65 { 66 // Check reflexivity if predicate is claimed to be an equivalence 67 // relation. 68 assert(eq(r.front, r.front), 69 "predicate is not reflexive"); 70 71 // _prev's type may be a nested struct, so must be initialized 72 // directly in the constructor (cannot call savePred()). 73 _prev = r.front; 74 } 75 else 76 { 77 // We won't use _prev, but must be initialized. 78 _prev = typeof(_prev).init; 79 } 80 } 81 @property bool empty() { return r.empty; } 82 83 @property auto front() 84 { 85 static if (isUnary) 86 { 87 import std.typecons : tuple; 88 return tuple(unaryFun!pred(_prev), 89 ChunkByChunkImpl!(eq, Range)(r, _prev)); 90 } 91 else 92 { 93 return ChunkByChunkImpl!(eq, Range)(r, _prev); 94 } 95 } 96 97 void popFront() 98 { 99 while (!r.empty) 100 { 101 if (!eq(_prev, r.front)) 102 { 103 _prev = r.front; 104 break; 105 } 106 r.popFront(); 107 } 108 } 109 } 110 111 // Single-pass implementation of chunkBy for forward ranges. 112 private struct ChunkByImpl(alias pred, Range) 113 if (isForwardRange!Range) 114 { 115 import std.typecons : RefCounted; 116 117 enum bool isUnary = ChunkByImplIsUnary!(pred, Range); 118 119 static if (isUnary) 120 alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b)); 121 else 122 alias eq = binaryFun!pred; 123 124 // Outer range 125 static struct Impl 126 { 127 size_t groupNum; 128 Range current; 129 Range next; 130 } 131 132 // Inner range 133 static struct Group 134 { 135 private size_t groupNum; 136 private Range start; 137 private Range current; 138 139 private RefCounted!Impl mothership; 140 141 this(RefCounted!Impl origin) 142 { 143 groupNum = origin.groupNum; 144 145 start = origin.current.save; 146 current = origin.current.save; 147 assert(!start.empty); 148 149 mothership = origin; 150 151 // Note: this requires reflexivity. 152 assert(eq(start.front, current.front), 153 "predicate is not reflexive"); 154 } 155 156 @property bool empty() { return groupNum == size_t.max; } 157 @property auto ref front() { return current.front; } 158 159 void popFront() 160 { 161 current.popFront(); 162 163 // Note: this requires transitivity. 164 if (current.empty || !eq(start.front, current.front)) 165 { 166 if (groupNum == mothership.groupNum) 167 { 168 // If parent range hasn't moved on yet, help it along by 169 // saving location of start of next Group. 170 mothership.next = current.save; 171 } 172 173 groupNum = size_t.max; 174 } 175 } 176 177 @property auto save() 178 { 179 auto copy = this; 180 copy.current = current.save; 181 return copy; 182 } 183 } 184 static assert(isForwardRange!Group); 185 186 private RefCounted!Impl impl; 187 188 this(Range r) 189 { 190 impl = RefCounted!Impl(0, r, r.save); 191 } 192 193 @property bool empty() { return impl.current.empty; } 194 195 @property auto front() 196 { 197 static if (isUnary) 198 { 199 import std.typecons : tuple; 200 return tuple(unaryFun!pred(impl.current.front), Group(impl)); 201 } 202 else 203 { 204 return Group(impl); 205 } 206 } 207 208 void popFront() 209 { 210 // Scan for next group. If we're lucky, one of our Groups would have 211 // already set .next to the start of the next group, in which case the 212 // loop is skipped. 213 while (!impl.next.empty && eq(impl.current.front, impl.next.front)) 214 { 215 impl.next.popFront(); 216 } 217 218 impl.current = impl.next.save; 219 220 // Indicate to any remaining Groups that we have moved on. 221 impl.groupNum++; 222 } 223 224 @property auto save() 225 { 226 // Note: the new copy of the range will be detached from any existing 227 // satellite Groups, and will not benefit from the .next acceleration. 228 return typeof(this)(impl.current.save); 229 } 230 231 static assert(isForwardRange!(typeof(this))); 232 } 233 234 @system unittest 235 { 236 import std.algorithm.comparison : equal; 237 238 size_t popCount = 0; 239 class RefFwdRange 240 { 241 int[] impl; 242 243 @safe nothrow: 244 245 this(int[] data) { impl = data; } 246 @property bool empty() { return impl.empty; } 247 @property auto ref front() { return impl.front; } 248 void popFront() 249 { 250 impl.popFront(); 251 popCount++; 252 } 253 @property auto save() { return new RefFwdRange(impl); } 254 } 255 static assert(isForwardRange!RefFwdRange); 256 257 auto testdata = new RefFwdRange([1, 3, 5, 2, 4, 7, 6, 8, 9]); 258 auto groups = testdata.chunkBy!((a,b) => (a % 2) == (b % 2)); 259 auto outerSave1 = groups.save; 260 261 // Sanity test 262 assert(groups.equal!equal([[1, 3, 5], [2, 4], [7], [6, 8], [9]])); 263 assert(groups.empty); 264 265 // Performance test for single-traversal use case: popFront should not have 266 // been called more times than there are elements if we traversed the 267 // segmented range exactly once. 268 assert(popCount == 9); 269 270 // Outer range .save test 271 groups = outerSave1.save; 272 assert(!groups.empty); 273 274 // Inner range .save test 275 auto grp1 = groups.front.save; 276 auto grp1b = grp1.save; 277 assert(grp1b.equal([1, 3, 5])); 278 assert(grp1.save.equal([1, 3, 5])); 279 280 // Inner range should remain consistent after outer range has moved on. 281 groups.popFront(); 282 assert(grp1.save.equal([1, 3, 5])); 283 284 // Inner range should not be affected by subsequent inner ranges. 285 assert(groups.front.equal([2, 4])); 286 assert(grp1.save.equal([1, 3, 5])); 287 } 288 289 /** 290 * Chunks an input range into subranges of equivalent adjacent elements. 291 * In other languages this is often called `partitionBy`, `groupBy` 292 * or `sliceWhen`. 293 * 294 * Equivalence is defined by the predicate $(D pred), which can be either 295 * binary, which is passed to $(REF binaryFun, std,functional), or unary, which is 296 * passed to $(REF unaryFun, std,functional). In the binary form, two _range elements 297 * $(D a) and $(D b) are considered equivalent if $(D pred(a,b)) is true. In 298 * unary form, two elements are considered equivalent if $(D pred(a) == pred(b)) 299 * is true. 300 * 301 * This predicate must be an equivalence relation, that is, it must be 302 * reflexive ($(D pred(x,x)) is always true), symmetric 303 * ($(D pred(x,y) == pred(y,x))), and transitive ($(D pred(x,y) && pred(y,z)) 304 * implies $(D pred(x,z))). If this is not the case, the range returned by 305 * chunkBy may assert at runtime or behave erratically. 306 * 307 * Params: 308 * pred = Predicate for determining equivalence. 309 * r = An $(REF_ALTTEXT input range, isInputRange, std,range,primitives) to be chunked. 310 * 311 * Returns: With a binary predicate, a range of ranges is returned in which 312 * all elements in a given subrange are equivalent under the given predicate. 313 * With a unary predicate, a range of tuples is returned, with the tuple 314 * consisting of the result of the unary predicate for each subrange, and the 315 * subrange itself. 316 * 317 * Notes: 318 * 319 * Equivalent elements separated by an intervening non-equivalent element will 320 * appear in separate subranges; this function only considers adjacent 321 * equivalence. Elements in the subranges will always appear in the same order 322 * they appear in the original range. 323 * 324 * See_Also: 325 * $(LREF group), which collapses adjacent equivalent elements into a single 326 * element. 327 */ 328 auto chunkBy(alias pred, Range)(Range r) 329 if (isInputRange!Range) 330 { 331 return ChunkByImpl!(pred, Range)(r); 332 } 333 334 /// Showing usage with binary predicate: 335 /*FIXME: @safe*/ @system unittest 336 { 337 import std.algorithm.comparison : equal; 338 339 // Grouping by particular attribute of each element: 340 auto data = [ 341 [1, 1], 342 [1, 2], 343 [2, 2], 344 [2, 3] 345 ]; 346 347 auto r1 = data.chunkBy!((a,b) => a[0] == b[0]); 348 assert(r1.equal!equal([ 349 [[1, 1], [1, 2]], 350 [[2, 2], [2, 3]] 351 ])); 352 353 auto r2 = data.chunkBy!((a,b) => a[1] == b[1]); 354 assert(r2.equal!equal([ 355 [[1, 1]], 356 [[1, 2], [2, 2]], 357 [[2, 3]] 358 ])); 359 } 360 361 version(none) // this example requires support for non-equivalence relations 362 @safe unittest 363 { 364 // Grouping by maximum adjacent difference: 365 import std.math : abs; 366 auto r3 = [1, 3, 2, 5, 4, 9, 10].chunkBy!((a, b) => abs(a-b) < 3); 367 assert(r3.equal!equal([ 368 [1, 3, 2], 369 [5, 4], 370 [9, 10] 371 ])); 372 373 } 374 375 /// Showing usage with unary predicate: 376 /* FIXME: pure @safe nothrow*/ @system unittest 377 { 378 import std.algorithm.comparison : equal; 379 import std.range.primitives; 380 import std.typecons : tuple; 381 382 // Grouping by particular attribute of each element: 383 auto range = 384 [ 385 [1, 1], 386 [1, 1], 387 [1, 2], 388 [2, 2], 389 [2, 3], 390 [2, 3], 391 [3, 3] 392 ]; 393 394 auto byX = chunkBy!(a => a[0])(range); 395 auto expected1 = 396 [ 397 tuple(1, [[1, 1], [1, 1], [1, 2]]), 398 tuple(2, [[2, 2], [2, 3], [2, 3]]), 399 tuple(3, [[3, 3]]) 400 ]; 401 foreach (e; byX) 402 { 403 assert(!expected1.empty); 404 assert(e[0] == expected1.front[0]); 405 assert(e[1].equal(expected1.front[1])); 406 expected1.popFront(); 407 } 408 409 auto byY = chunkBy!(a => a[1])(range); 410 auto expected2 = 411 [ 412 tuple(1, [[1, 1], [1, 1]]), 413 tuple(2, [[1, 2], [2, 2]]), 414 tuple(3, [[2, 3], [2, 3], [3, 3]]) 415 ]; 416 foreach (e; byY) 417 { 418 assert(!expected2.empty); 419 assert(e[0] == expected2.front[0]); 420 assert(e[1].equal(expected2.front[1])); 421 expected2.popFront(); 422 } 423 } 424 425 /*FIXME: pure @safe nothrow*/ @system unittest 426 { 427 import std.algorithm.comparison : equal; 428 import std.typecons : tuple; 429 430 struct Item { int x, y; } 431 432 // Force R to have only an input range API with reference semantics, so 433 // that we're not unknowingly making use of array semantics outside of the 434 // range API. 435 class RefInputRange(R) 436 { 437 R data; 438 this(R _data) pure @safe nothrow { data = _data; } 439 @property bool empty() pure @safe nothrow { return data.empty; } 440 @property auto front() pure @safe nothrow { return data.front; } 441 void popFront() pure @safe nothrow { data.popFront(); } 442 } 443 auto refInputRange(R)(R range) { return new RefInputRange!R(range); } 444 445 { 446 auto arr = [ Item(1,2), Item(1,3), Item(2,3) ]; 447 static assert(isForwardRange!(typeof(arr))); 448 449 auto byX = chunkBy!(a => a.x)(arr); 450 static assert(isForwardRange!(typeof(byX))); 451 452 auto byX_subrange1 = byX.front[1].save; 453 auto byX_subrange2 = byX.front[1].save; 454 static assert(isForwardRange!(typeof(byX_subrange1))); 455 static assert(isForwardRange!(typeof(byX_subrange2))); 456 457 byX.popFront(); 458 assert(byX_subrange1.equal([ Item(1,2), Item(1,3) ])); 459 byX_subrange1.popFront(); 460 assert(byX_subrange1.equal([ Item(1,3) ])); 461 assert(byX_subrange2.equal([ Item(1,2), Item(1,3) ])); 462 463 auto byY = chunkBy!(a => a.y)(arr); 464 static assert(isForwardRange!(typeof(byY))); 465 466 auto byY2 = byY.save; 467 static assert(is(typeof(byY) == typeof(byY2))); 468 byY.popFront(); 469 assert(byY.front[0] == 3); 470 assert(byY.front[1].equal([ Item(1,3), Item(2,3) ])); 471 assert(byY2.front[0] == 2); 472 assert(byY2.front[1].equal([ Item(1,2) ])); 473 } 474 475 // Test non-forward input ranges. 476 { 477 auto range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]); 478 auto byX = chunkBy!(a => a.x)(range); 479 assert(byX.front[0] == 1); 480 assert(byX.front[1].equal([ Item(1,1), Item(1,2) ])); 481 byX.popFront(); 482 assert(byX.front[0] == 2); 483 assert(byX.front[1].equal([ Item(2,2) ])); 484 byX.popFront(); 485 assert(byX.empty); 486 assert(range.empty); 487 488 range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]); 489 auto byY = chunkBy!(a => a.y)(range); 490 assert(byY.front[0] == 1); 491 assert(byY.front[1].equal([ Item(1,1) ])); 492 byY.popFront(); 493 assert(byY.front[0] == 2); 494 assert(byY.front[1].equal([ Item(1,2), Item(2,2) ])); 495 byY.popFront(); 496 assert(byY.empty); 497 assert(range.empty); 498 } 499 } 500 501 // Issue 13595 502 version(none) // This requires support for non-equivalence relations 503 @system unittest 504 { 505 import std.algorithm.comparison : equal; 506 auto r = [1, 2, 3, 4, 5, 6, 7, 8, 9].chunkBy!((x, y) => ((x*y) % 3) == 0); 507 assert(r.equal!equal([ 508 [1], 509 [2, 3, 4], 510 [5, 6, 7], 511 [8, 9] 512 ])); 513 } 514 515 // Issue 13805 516 @system unittest 517 { 518 [""].map!((s) => s).chunkBy!((x, y) => true); 519 } 520 }