1 module nxt.chunking; 2 3 version(none) 4 { 5 import std.range.primitives : ElementType, empty, front, back, popFront, isForwardRange, isInputRange; 6 import std.functional : unaryFun; 7 8 // Used by implementation of chunkBy for non-forward input ranges. 9 private struct ChunkByChunkImpl(alias pred, Range) 10 if (isInputRange!Range && !isForwardRange!Range) 11 { 12 alias fun = binaryFun!pred; 13 14 private Range r; 15 private ElementType!Range prev; 16 17 this(Range range, ElementType!Range _prev) 18 { 19 r = range; 20 prev = _prev; 21 } 22 23 @property bool empty() => r.empty || !fun(prev, r.front); 24 @property ElementType!Range front() => r.front; 25 void popFront() => r.popFront(); 26 } 27 28 private template ChunkByImplIsUnary(alias pred, Range) 29 { 30 static if (is(typeof(binaryFun!pred(ElementType!Range.init, 31 ElementType!Range.init)) : bool)) 32 enum ChunkByImplIsUnary = false; 33 else static if (is(typeof( 34 unaryFun!pred(ElementType!Range.init) == 35 unaryFun!pred(ElementType!Range.init)))) 36 enum ChunkByImplIsUnary = true; 37 else 38 static assert(0, "chunkBy expects either a binary predicate or "~ 39 "a unary predicate on range elements of type: "~ 40 ElementType!Range.stringof); 41 } 42 43 // Implementation of chunkBy for non-forward input ranges. 44 private struct ChunkByImpl(alias pred, Range) 45 if (isInputRange!Range && !isForwardRange!Range) 46 { 47 enum bool isUnary = ChunkByImplIsUnary!(pred, Range); 48 49 static if (isUnary) 50 alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b)); 51 else 52 alias eq = binaryFun!pred; 53 54 private Range r; 55 private ElementType!Range _prev; 56 57 this(Range _r) 58 { 59 r = _r; 60 if (!empty) 61 { 62 // Check reflexivity if predicate is claimed to be an equivalence 63 // relation. 64 assert(eq(r.front, r.front), 65 "predicate is not reflexive"); 66 67 // _prev's type may be a nested struct, so must be initialized 68 // directly in the constructor (cannot call savePred()). 69 _prev = r.front; 70 } 71 else 72 { 73 // We won't use _prev, but must be initialized. 74 _prev = typeof(_prev).init; 75 } 76 } 77 @property bool empty() => r.empty; 78 79 @property auto front() 80 { 81 static if (isUnary) 82 { 83 import std.typecons : tuple; 84 return tuple(unaryFun!pred(_prev), 85 ChunkByChunkImpl!(eq, Range)(r, _prev)); 86 } 87 else 88 return ChunkByChunkImpl!(eq, Range)(r, _prev); 89 } 90 91 void popFront() 92 { 93 while (!r.empty) 94 { 95 if (!eq(_prev, r.front)) 96 { 97 _prev = r.front; 98 break; 99 } 100 r.popFront(); 101 } 102 } 103 } 104 105 // Single-pass implementation of chunkBy for forward ranges. 106 private struct ChunkByImpl(alias pred, Range) 107 if (isForwardRange!Range) 108 { 109 import std.typecons : RefCounted; 110 111 enum bool isUnary = ChunkByImplIsUnary!(pred, Range); 112 113 static if (isUnary) 114 alias eq = binaryFun!((a, b) => unaryFun!pred(a) == unaryFun!pred(b)); 115 else 116 alias eq = binaryFun!pred; 117 118 // Outer range 119 static struct Impl 120 { 121 size_t groupNum; 122 Range current; 123 Range next; 124 } 125 126 // Inner range 127 static struct Group 128 { 129 private size_t groupNum; 130 private Range start; 131 private Range current; 132 133 private RefCounted!Impl mothership; 134 135 this(RefCounted!Impl origin) 136 { 137 groupNum = origin.groupNum; 138 139 start = origin.current.save; 140 current = origin.current.save; 141 assert(!start.empty); 142 143 mothership = origin; 144 145 // Note: this requires reflexivity. 146 assert(eq(start.front, current.front), 147 "predicate is not reflexive"); 148 } 149 150 @property bool empty() => groupNum == size_t.max; 151 @property auto ref front() => current.front; 152 153 void popFront() 154 { 155 current.popFront(); 156 157 // Note: this requires transitivity. 158 if (current.empty || !eq(start.front, current.front)) 159 { 160 if (groupNum == mothership.groupNum) 161 { 162 // If parent range hasn't moved on yet, help it along by 163 // saving location of start of next Group. 164 mothership.next = current.save; 165 } 166 167 groupNum = size_t.max; 168 } 169 } 170 171 @property auto save() 172 { 173 auto copy = this; 174 copy.current = current.save; 175 return copy; 176 } 177 } 178 static assert(isForwardRange!Group); 179 180 private RefCounted!Impl impl; 181 182 this(Range r) 183 { 184 impl = RefCounted!Impl(0, r, r.save); 185 } 186 187 @property bool empty() => impl.current.empty; 188 189 @property auto front() 190 { 191 static if (isUnary) 192 { 193 import std.typecons : tuple; 194 return tuple(unaryFun!pred(impl.current.front), Group(impl)); 195 } 196 else 197 return Group(impl); 198 } 199 200 void popFront() 201 { 202 // Scan for next group. If we're lucky, one of our Groups would have 203 // already set .next to the start of the next group, in which case the 204 // loop is skipped. 205 while (!impl.next.empty && 206 eq(impl.current.front, impl.next.front)) 207 impl.next.popFront(); 208 impl.current = impl.next.save; 209 // Indicate to any remaining Groups that we have moved on. 210 impl.groupNum++; 211 } 212 213 // Note: the new copy of the range will be detached from any existing 214 // satellite Groups, and will not benefit from the .next acceleration. 215 @property auto save() => typeof(this)(impl.current.save); 216 217 static assert(isForwardRange!(typeof(this))); 218 } 219 220 @system unittest 221 { 222 import std.algorithm.comparison : equal; 223 224 size_t popCount = 0; 225 class RefFwdRange 226 { 227 int[] impl; 228 @safe nothrow: 229 this(int[] data) { impl = data; } 230 @property bool empty() => impl.empty; 231 @property auto ref front() => impl.front; 232 void popFront() 233 { 234 impl.popFront(); 235 popCount++; 236 } 237 @property auto save() => new RefFwdRange(impl); 238 } 239 static assert(isForwardRange!RefFwdRange); 240 241 auto testdata = new RefFwdRange([1, 3, 5, 2, 4, 7, 6, 8, 9]); 242 auto groups = testdata.chunkBy!((a,b) => (a % 2) == (b % 2)); 243 auto outerSave1 = groups.save; 244 245 // Sanity test 246 assert(groups.equal!equal([[1, 3, 5], [2, 4], [7], [6, 8], [9]])); 247 assert(groups.empty); 248 249 // Performance test for single-traversal use case: popFront should not have 250 // been called more times than there are elements if we traversed the 251 // segmented range exactly once. 252 assert(popCount == 9); 253 254 // Outer range .save test 255 groups = outerSave1.save; 256 assert(!groups.empty); 257 258 // Inner range .save test 259 auto grp1 = groups.front.save; 260 auto grp1b = grp1.save; 261 assert(grp1b.equal([1, 3, 5])); 262 assert(grp1.save.equal([1, 3, 5])); 263 264 // Inner range should remain consistent after outer range has moved on. 265 groups.popFront(); 266 assert(grp1.save.equal([1, 3, 5])); 267 268 // Inner range should not be affected by subsequent inner ranges. 269 assert(groups.front.equal([2, 4])); 270 assert(grp1.save.equal([1, 3, 5])); 271 } 272 273 /** 274 * Chunks an input range into subranges of equivalent adjacent elements. 275 * In other languages this is often called `partitionBy`, `groupBy` 276 * or `sliceWhen`. 277 * 278 * Equivalence is defined by the predicate $(D pred), which can be either 279 * binary, which is passed to $(REF binaryFun, std,functional), or unary, which is 280 * passed to $(REF unaryFun, std,functional). In the binary form, two _range elements 281 * $(D a) and $(D b) are considered equivalent if $(D pred(a,b)) is true. In 282 * unary form, two elements are considered equivalent if $(D pred(a) == pred(b)) 283 * is true. 284 * 285 * This predicate must be an equivalence relation, that is, it must be 286 * reflexive ($(D pred(x,x)) is always true), symmetric 287 * ($(D pred(x,y) == pred(y,x))), and transitive ($(D pred(x,y) && pred(y,z)) 288 * implies $(D pred(x,z))). If this is not the case, the range returned by 289 * chunkBy may assert at runtime or behave erratically. 290 * 291 * Params: 292 * pred = Predicate for determining equivalence. 293 * r = An $(REF_ALTTEXT input range, isInputRange, std,range,primitives) to be chunked. 294 * 295 * Returns: With a binary predicate, a range of ranges is returned in which 296 * all elements in a given subrange are equivalent under the given predicate. 297 * With a unary predicate, a range of tuples is returned, with the tuple 298 * consisting of the result of the unary predicate for each subrange, and the 299 * subrange itself. 300 * 301 * Notes: 302 * 303 * Equivalent elements separated by an intervening non-equivalent element will 304 * appear in separate subranges; this function only considers adjacent 305 * equivalence. Elements in the subranges will always appear in the same order 306 * they appear in the original range. 307 * 308 * See_Also: 309 * $(LREF group), which collapses adjacent equivalent elements into a single 310 * element. 311 */ 312 auto chunkBy(alias pred, Range)(Range r) 313 if (isInputRange!Range) 314 => ChunkByImpl!(pred, Range)(r); 315 316 /// Showing usage with binary predicate: 317 /*FIXME: @safe*/ @system unittest 318 { 319 import std.algorithm.comparison : equal; 320 321 // Grouping by particular attribute of each element: 322 auto data = [ 323 [1, 1], 324 [1, 2], 325 [2, 2], 326 [2, 3] 327 ]; 328 329 auto r1 = data.chunkBy!((a,b) => a[0] == b[0]); 330 assert(r1.equal!equal([ 331 [[1, 1], [1, 2]], 332 [[2, 2], [2, 3]] 333 ])); 334 335 auto r2 = data.chunkBy!((a,b) => a[1] == b[1]); 336 assert(r2.equal!equal([ 337 [[1, 1]], 338 [[1, 2], [2, 2]], 339 [[2, 3]] 340 ])); 341 } 342 343 version(none) // this example requires support for non-equivalence relations 344 @safe unittest 345 { 346 // Grouping by maximum adjacent difference: 347 import std.math : abs; 348 auto r3 = [1, 3, 2, 5, 4, 9, 10].chunkBy!((a, b) => abs(a-b) < 3); 349 assert(r3.equal!equal([ 350 [1, 3, 2], 351 [5, 4], 352 [9, 10] 353 ])); 354 355 } 356 357 /// Showing usage with unary predicate: 358 /* FIXME: pure @safe nothrow*/ @system unittest 359 { 360 import std.algorithm.comparison : equal; 361 import std.range.primitives; 362 import std.typecons : tuple; 363 364 // Grouping by particular attribute of each element: 365 auto range = 366 [ 367 [1, 1], 368 [1, 1], 369 [1, 2], 370 [2, 2], 371 [2, 3], 372 [2, 3], 373 [3, 3] 374 ]; 375 376 auto byX = chunkBy!(a => a[0])(range); 377 auto expected1 = 378 [ 379 tuple(1, [[1, 1], [1, 1], [1, 2]]), 380 tuple(2, [[2, 2], [2, 3], [2, 3]]), 381 tuple(3, [[3, 3]]) 382 ]; 383 foreach (e; byX) 384 { 385 assert(!expected1.empty); 386 assert(e[0] == expected1.front[0]); 387 assert(e[1].equal(expected1.front[1])); 388 expected1.popFront(); 389 } 390 391 auto byY = chunkBy!(a => a[1])(range); 392 auto expected2 = 393 [ 394 tuple(1, [[1, 1], [1, 1]]), 395 tuple(2, [[1, 2], [2, 2]]), 396 tuple(3, [[2, 3], [2, 3], [3, 3]]) 397 ]; 398 foreach (e; byY) 399 { 400 assert(!expected2.empty); 401 assert(e[0] == expected2.front[0]); 402 assert(e[1].equal(expected2.front[1])); 403 expected2.popFront(); 404 } 405 } 406 407 /*FIXME: pure @safe nothrow*/ @system unittest 408 { 409 import std.algorithm.comparison : equal; 410 import std.typecons : tuple; 411 412 struct Item { int x, y; } 413 414 // Force R to have only an input range API with reference semantics, so 415 // that we're not unknowingly making use of array semantics outside of the 416 // range API. 417 class RefInputRange(R) 418 { 419 R data; 420 this(R _data) pure @safe nothrow { data = _data; } 421 @property bool empty() pure @safe nothrow => data.empty; 422 @property auto front() pure @safe nothrow => data.front; 423 void popFront() pure @safe nothrow => data.popFront(); 424 } 425 auto refInputRange(R)(R range) => new RefInputRange!R(range); 426 427 { 428 auto arr = [ Item(1,2), Item(1,3), Item(2,3) ]; 429 static assert(isForwardRange!(typeof(arr))); 430 431 auto byX = chunkBy!(a => a.x)(arr); 432 static assert(isForwardRange!(typeof(byX))); 433 434 auto byX_subrange1 = byX.front[1].save; 435 auto byX_subrange2 = byX.front[1].save; 436 static assert(isForwardRange!(typeof(byX_subrange1))); 437 static assert(isForwardRange!(typeof(byX_subrange2))); 438 439 byX.popFront(); 440 assert(byX_subrange1.equal([ Item(1,2), Item(1,3) ])); 441 byX_subrange1.popFront(); 442 assert(byX_subrange1.equal([ Item(1,3) ])); 443 assert(byX_subrange2.equal([ Item(1,2), Item(1,3) ])); 444 445 auto byY = chunkBy!(a => a.y)(arr); 446 static assert(isForwardRange!(typeof(byY))); 447 448 auto byY2 = byY.save; 449 static assert(is(typeof(byY) == typeof(byY2))); 450 byY.popFront(); 451 assert(byY.front[0] == 3); 452 assert(byY.front[1].equal([ Item(1,3), Item(2,3) ])); 453 assert(byY2.front[0] == 2); 454 assert(byY2.front[1].equal([ Item(1,2) ])); 455 } 456 457 // Test non-forward input ranges. 458 { 459 auto range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]); 460 auto byX = chunkBy!(a => a.x)(range); 461 assert(byX.front[0] == 1); 462 assert(byX.front[1].equal([ Item(1,1), Item(1,2) ])); 463 byX.popFront(); 464 assert(byX.front[0] == 2); 465 assert(byX.front[1].equal([ Item(2,2) ])); 466 byX.popFront(); 467 assert(byX.empty); 468 assert(range.empty); 469 470 range = refInputRange([ Item(1,1), Item(1,2), Item(2,2) ]); 471 auto byY = chunkBy!(a => a.y)(range); 472 assert(byY.front[0] == 1); 473 assert(byY.front[1].equal([ Item(1,1) ])); 474 byY.popFront(); 475 assert(byY.front[0] == 2); 476 assert(byY.front[1].equal([ Item(1,2), Item(2,2) ])); 477 byY.popFront(); 478 assert(byY.empty); 479 assert(range.empty); 480 } 481 } 482 483 // Issue 13595 484 version(none) // This requires support for non-equivalence relations 485 @system unittest 486 { 487 import std.algorithm.comparison : equal; 488 auto r = [1, 2, 3, 4, 5, 6, 7, 8, 9].chunkBy!((x, y) => ((x*y) % 3) == 0); 489 assert(r.equal!equal([ 490 [1], 491 [2, 3, 4], 492 [5, 6, 7], 493 [8, 9] 494 ])); 495 } 496 497 // Issue 13805 498 @system unittest 499 { 500 [""].map!((s) => s).chunkBy!((x, y) => true); 501 } 502 }