1 /** A GC with segregated allocations using D's `static foreach`. 2 * 3 * Copyright: Copyright Per Nordlöw 2019 - . 4 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 5 * Authors: Per Nordlöw 6 */ 7 module segregated_gc; 8 9 private static void *os_mem_map(size_t nbytes) nothrow @nogc 10 { void *p; 11 12 import core.sys.posix.sys.mman; 13 p = mmap(null, nbytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 14 return (p == MAP_FAILED) ? null : p; 15 } 16 17 private static int os_mem_unmap(void *base, size_t nbytes) nothrow @nogc 18 { 19 import core.sys.posix.sys.mman; 20 return munmap(base, nbytes); 21 } 22 23 // import gc.os : os_mem_map, os_mem_unmap; 24 25 import core.gc.config; 26 import core.gc.gcinterface; 27 28 import paged_dynamic_array : Array = PagedDynamicArray; 29 import simple_static_bitarray : StaticBitArray; 30 31 import core.stdc.stdio: printf; 32 import cstdlib = core.stdc.stdlib : calloc, free, malloc, realloc; 33 static import core.memory; 34 35 /* debug = PRINTF; */ 36 37 extern (C) void onOutOfMemoryError(void* pretend_sideffect = null) @trusted pure nothrow @nogc; /* dmd @@@BUG11461@@@ */ 38 39 private 40 { 41 extern (C) 42 { 43 // to allow compilation of this module without access to the rt package, 44 // make these functions available from rt.lifetime 45 void rt_finalizeFromGC(void* p, size_t size, uint attr) nothrow; 46 int rt_hasFinalizerInSegment(void* p, size_t size, uint attr, in void[] segment) nothrow; 47 48 // Declared as an extern instead of importing core.exception 49 // to avoid inlining - see issue 13725. 50 void onInvalidMemoryOperationError() @nogc nothrow; 51 void onOutOfMemoryErrorNoGC() @nogc nothrow; 52 } 53 } 54 55 enum WORDSIZE = size_t.sizeof; ///< Size of word type (size_t). 56 enum PAGESIZE = 4096; ///< Page size in bytes. Linux $(shell getconf PAGESIZE). 57 58 /// Small slot sizes classes (in bytes). 59 static immutable smallSizeClasses = [8, 60 16, 61 24, // TODO: move 62 32, 63 40, // TODO: move 64 48, // TODO: move 65 56, // TODO: move 66 64, 67 72, 68 80, 69 88, 70 96, // TODO: move 71 104, // TODO: move 72 112, // TODO: move 73 120, // TODO: move 74 128, // TODO: 128 +64, 75 256, // TODO: 256 + 128, 76 512, // TODO: 512 + 256, 77 1024, // TODO: 1024 + 512, 78 2048, // TODO: 2048 + 1024, 79 ]; 80 81 /// Medium slot sizes classes (in bytes). 82 version(none) 83 static immutable mediumSizeClasses = [1 << 12, // 4096 84 1 << 13, // 8192 85 1 << 14, // 16384 86 1 << 15, // 32768 87 1 << 16, // 65536 88 ]; 89 90 /// Ceiling to closest to size class of `sz`. 91 size_t ceilPow2(size_t sz) @safe pure nothrow @nogc 92 { 93 return nextPow2(sz - 1); 94 } 95 96 @safe pure nothrow @nogc unittest 97 { 98 // TODO: assert(ceilPow2(1) == 1); 99 assert(ceilPow2(2) == 2); 100 assert(ceilPow2(3) == 4); 101 assert(ceilPow2(4) == 4); 102 assert(ceilPow2(5) == 8); 103 assert(ceilPow2(6) == 8); 104 assert(ceilPow2(7) == 8); 105 assert(ceilPow2(8) == 8); 106 assert(ceilPow2(9) == 16); 107 } 108 109 /// Small slot foreach slot contains `wordCount` machine words. 110 struct SmallSlot(uint wordCount) 111 if (wordCount >= 1) 112 { 113 size_t[wordCount] words; // words 114 } 115 116 @safe pure nothrow @nogc unittest 117 { 118 SmallSlot!(1) x; 119 SmallSlot!(1) y; 120 } 121 122 /// Small page storing slots of size `sizeClass`. 123 struct SmallPage(uint sizeClass) 124 if (sizeClass >= smallSizeClasses[0]) 125 { 126 enum wordCount = sizeClass/WORDSIZE; 127 static assert(sizeClass % WORDSIZE == 0, sizeClass); 128 enum slotCount = PAGESIZE/sizeClass; 129 alias Slot = SmallSlot!(wordCount); 130 131 Slot[slotCount] slots; 132 byte[PAGESIZE-slots.sizeof] __padding; 133 static assert(this.sizeof == PAGESIZE); // TODO: adjust if pages of different byte sizes are preferred 134 } 135 enum minimumSmallPageWordCount = PAGESIZE/WORDSIZE; // TODO: may be computed 136 137 struct SmallPageTable(uint sizeClass) 138 { 139 alias Page = SmallPage!(sizeClass); 140 Page* pagePtr; 141 enum slotCount = PAGESIZE/sizeClass; 142 143 // bit `i` indicates if slot `i` in `*pagePtr` currently contains an initialized value 144 StaticBitArray!(slotCount) slotUsages; // TODO: benchmark with a byte-array instead for comparison 145 146 // bit `i` indicates if slot `i` in `*pagePtr` has been marked 147 StaticBitArray!(slotCount) slotMarks; 148 } 149 150 /// Small pool of pages. 151 struct SmallPool(uint sizeClass, bool pointerFlag) 152 if (sizeClass >= smallSizeClasses[0]) 153 { 154 alias Page = SmallPage!(sizeClass); 155 156 this(size_t pageTableCapacity) 157 { 158 pageTables.capacity = pageTableCapacity; 159 } 160 161 void* allocateNext() @trusted // pure nothrow @nogc 162 { 163 version(LDC) pragma(inline, true); 164 165 // TODO: scan `slotUsages` at slotIndex using core.bitop.bsf to find 166 // first free page if any. Use modification of `indexOfFirstOne` that 167 // takes startIndex being `slotIndex` If no hit set `slotIndex` to 168 // `Page.slotCount` 169 // TODO: instead of this find next set bit at `slotIndex` in 170 // `slotUsages` unless whole current `slotUsages`-word is all zero. 171 172 immutable pageIndex = slotIndex / Page.slotCount; 173 immutable needNewPage = (slotIndex % Page.slotCount == 0); 174 175 if (needNewPage) 176 { 177 Page* pagePtr = cast(Page*)os_mem_map(PAGESIZE); 178 debug(PRINTF) printf("### %s(): pagePtr:%p\n", __FUNCTION__.ptr, pagePtr); 179 pageTables.insertBack(SmallPageTable!sizeClass(pagePtr)); 180 181 pageTables.ptr[pageIndex].slotUsages[0] = true; // mark slot 182 183 debug(PRINTF) printf("### %s(): slotIndex:%lu\n", __FUNCTION__.ptr, 0); 184 185 auto slotPtr = pagePtr.slots.ptr; // first slot 186 slotIndex = 1; 187 return slotPtr; 188 } 189 else 190 { 191 debug(PRINTF) printf("### %s(): slotIndex:%lu\n", __FUNCTION__.ptr, slotIndex); 192 pageTables.ptr[pageIndex].slotUsages[slotIndex] = true; // mark slot 193 return &pageTables.ptr[pageIndex].pagePtr.slots.ptr[slotIndex++]; 194 } 195 } 196 197 Array!(SmallPageTable!sizeClass) pageTables; 198 size_t slotIndex = 0; // index to first free slot in pool across multiple page 199 } 200 201 // TODO: @safe pure nothrow @nogc 202 unittest 203 { 204 static foreach (sizeClass; smallSizeClasses) 205 { 206 { 207 SmallPool!(sizeClass, false) x; 208 } 209 } 210 } 211 212 /// All small pools. 213 struct SmallPools 214 { 215 this(size_t pageTableCapacity) 216 { 217 static foreach (sizeClass; smallSizeClasses) 218 { 219 // Quote from https://olshansky.me/gc/runtime/dlang/2017/06/14/inside-d-gc.html 220 // "Fine grained locking from the start, I see no problem with per pool locking." 221 mixin(`this.unscannedPool` ~ sizeClass.stringof ~ ` = SmallPool!(sizeClass, false)(pageTableCapacity);`); 222 mixin(`this.scannedPool` ~ sizeClass.stringof ~ ` = SmallPool!(sizeClass, true)(pageTableCapacity);`); 223 } 224 } 225 BlkInfo qalloc(size_t size, uint bits) nothrow 226 { 227 debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits); 228 229 BlkInfo blkinfo = void; 230 blkinfo.attr = bits; 231 232 // TODO: optimize this: 233 blkinfo.size = ceilPow2(size); 234 if (blkinfo.size < smallSizeClasses[0]) 235 blkinfo.size = smallSizeClasses[0]; 236 237 top: 238 switch (blkinfo.size) 239 { 240 static foreach (sizeClass; smallSizeClasses) 241 { 242 case sizeClass: 243 if (bits & BlkAttr.NO_SCAN) // no scanning needed 244 mixin(`blkinfo.base = unscannedPool` ~ sizeClass.stringof ~ `.allocateNext();`); 245 else 246 mixin(`blkinfo.base = scannedPool` ~ sizeClass.stringof ~ `.allocateNext();`); 247 break top; 248 } 249 default: 250 blkinfo.base = null; 251 printf("### %s(size:%lu, bits:%u) Cannot handle blkinfo.size:%lu\n", __FUNCTION__.ptr, size, bits, blkinfo.size); 252 // TODO: find closest match of blkinfo.size <= smallSizeClasses 253 onOutOfMemoryError(); 254 assert(0, "Handle other blkinfo.size"); 255 } 256 257 return blkinfo; 258 } 259 private: 260 static foreach (sizeClass; smallSizeClasses) 261 { 262 // Quote from https://olshansky.me/gc/runtime/dlang/2017/06/14/inside-d-gc.html 263 // "Fine grained locking from the start, I see no problem with per pool locking." 264 mixin(`SmallPool!(sizeClass, false) unscannedPool` ~ sizeClass.stringof ~ `;`); 265 mixin(`SmallPool!(sizeClass, true) scannedPool` ~ sizeClass.stringof ~ `;`); 266 } 267 } 268 // pragma(msg, "SmallPools.sizeof: ", SmallPools.sizeof); 269 270 enum pageTableCapacityDefault = 256*PAGESIZE; // eight pages 271 272 struct Gcx 273 { 274 this(size_t pageTableCapacity) // 1 one megabyte per table 275 { 276 this.smallPools = SmallPools(pageTableCapacity); 277 } 278 Array!Root roots; 279 Array!Range ranges; 280 SmallPools smallPools; 281 uint disabled; // turn off collections if >0 282 } 283 284 Gcx tlGcx; // thread-local allocator instance 285 static this() 286 { 287 tlGcx = Gcx(pageTableCapacityDefault); 288 } 289 290 // size class specific overloads only for thread-local GC 291 extern (C) 292 { 293 static foreach (sizeClass; smallSizeClasses) 294 { 295 /* TODO: use template `mixin` containing, in turn, a `mixin` for generating 296 * the symbol names `gc_tlmalloc_32`, `unscannedPool32` and 297 * `scannedPool32` for sizeClass `32`. 298 * 299 * TODO: Since https://github.com/dlang/dmd/pull/8813 we can now use: 300 * `mixin("gc_tlmalloc_", sizeClass);` for symbol generation 301 */ 302 mixin(` 303 void* gc_tlmalloc_` ~ sizeClass.stringof ~ `(uint ba = 0) @trusted nothrow 304 { 305 if (ba & BlkAttr.NO_SCAN) // no scanning needed 306 return tlGcx.smallPools.unscannedPool` ~ sizeClass.stringof ~ `.allocateNext(); 307 else 308 return tlGcx.smallPools.scannedPool` ~ sizeClass.stringof ~ `.allocateNext(); 309 } 310 `); 311 } 312 } 313 314 class SegregatedGC : GC 315 { 316 import core.internal.spinlock; 317 static gcLock = shared(AlignedSpinLock)(SpinLock.Contention.lengthy); 318 static bool _inFinalizer; 319 320 // global allocator (`__gshared`) 321 __gshared Gcx gGcx; 322 323 // lock GC, throw InvalidMemoryOperationError on recursive locking during finalization 324 static void lockNR() @nogc nothrow 325 { 326 if (_inFinalizer) 327 onInvalidMemoryOperationError(); 328 gcLock.lock(); 329 } 330 331 void initialize() 332 { 333 printf("### %s()\n", __FUNCTION__.ptr); 334 335 import core.stdc.string; 336 auto p = cstdlib.malloc(__traits(classInstanceSize, SegregatedGC)); 337 if (!p) 338 onOutOfMemoryError(); 339 340 auto init = typeid(SegregatedGC).initializer(); 341 assert(init.length == __traits(classInstanceSize, SegregatedGC)); 342 auto instance = cast(SegregatedGC)memcpy(p, init.ptr, init.length); 343 instance.__ctor(); 344 345 instance.gGcx = Gcx(pageTableCapacityDefault); 346 } 347 348 void finalize() 349 { 350 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 351 } 352 353 this() 354 { 355 } 356 357 void enable() 358 { 359 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 360 static void go(Gcx* tlGcx) nothrow 361 { 362 tlGcx.disabled--; 363 } 364 runLocked!(go)(&tlGcx); 365 } 366 367 void disable() 368 { 369 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 370 static void go(Gcx* tlGcx) nothrow 371 { 372 tlGcx.disabled++; 373 } 374 runLocked!(go)(&tlGcx); 375 } 376 377 auto runLocked(alias func, Args...)(auto ref Args args) 378 { 379 debug(PROFILE_API) immutable tm = (config.profile > 1 ? currTime.ticks : 0); 380 lockNR(); 381 scope (failure) gcLock.unlock(); 382 debug(PROFILE_API) immutable tm2 = (config.profile > 1 ? currTime.ticks : 0); 383 384 static if (is(typeof(func(args)) == void)) 385 func(args); 386 else 387 auto res = func(args); 388 389 debug(PROFILE_API) if (config.profile > 1) { lockTime += tm2 - tm; } 390 gcLock.unlock(); 391 392 static if (!is(typeof(func(args)) == void)) 393 return res; 394 } 395 396 void collect() nothrow 397 { 398 printf("### %s: \n", __FUNCTION__.ptr); 399 } 400 401 void collectNoStack() nothrow 402 { 403 printf("### %s: \n", __FUNCTION__.ptr); 404 } 405 406 void minimize() nothrow 407 { 408 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 409 } 410 411 uint getAttr(void* p) nothrow 412 { 413 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 414 return 0; 415 } 416 417 uint setAttr(void* p, uint mask) nothrow 418 { 419 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 420 return 0; 421 } 422 423 uint clrAttr(void* p, uint mask) nothrow 424 { 425 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 426 return 0; 427 } 428 429 void* malloc(size_t size, uint bits, const TypeInfo ti) nothrow 430 { 431 debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits); 432 lockNR(); 433 scope (failure) gcLock.unlock(); // TODO: why needed? 434 void* p = gGcx.smallPools.qalloc(size, bits).base; 435 gcLock.unlock(); 436 if (size && p is null) 437 onOutOfMemoryError(); 438 return p; 439 } 440 441 BlkInfo qalloc(size_t size, uint bits, const TypeInfo ti) nothrow 442 { 443 debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits); 444 lockNR(); 445 scope (failure) gcLock.unlock(); // TODO: why needed? 446 BlkInfo blkinfo = gGcx.smallPools.qalloc(size, bits); 447 gcLock.unlock(); 448 if (size && blkinfo.base is null) 449 onOutOfMemoryError(); 450 return blkinfo; 451 } 452 453 void* calloc(size_t size, uint bits, const TypeInfo ti) nothrow 454 { 455 debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits); 456 lockNR(); 457 scope (failure) gcLock.unlock(); 458 void* p = gGcx.smallPools.qalloc(size, bits).base; 459 gcLock.unlock(); 460 if (size && p is null) 461 onOutOfMemoryError(); 462 import core.stdc.string : memset; 463 memset(p, 0, size); // zero 464 // why is this slower than memset? (cast(size_t*)p)[0 .. size/size_t.sizeof] = 0; 465 return p; 466 } 467 468 void* realloc(void* p, size_t size, uint bits, const TypeInfo ti) nothrow 469 { 470 debug(PRINTF) printf("### %s(p:%p, size:%lu, bits:%u)\n", __FUNCTION__.ptr, p, size, bits); 471 p = cstdlib.realloc(p, size); 472 if (size && p is null) 473 onOutOfMemoryError(); 474 return p; 475 } 476 477 /** 478 * Attempt to in-place enlarge the memory block pointed to by p by at least 479 * minsize bytes, up to a maximum of maxsize additional bytes. 480 * This does not attempt to move the memory block (like realloc() does). 481 * 482 * Returns: 483 * 0 if could not extend p, 484 * total size of entire memory block if successful. 485 */ 486 size_t extend(void* p, size_t minsize, size_t maxsize, const TypeInfo ti) nothrow 487 { 488 debug(PRINTF) printf("### %s(p:%p, minsize:%lu, maxsize:%lu)\n", __FUNCTION__.ptr, p, minsize, maxsize); 489 return 0; 490 } 491 492 size_t reserve(size_t size) nothrow 493 { 494 debug(PRINTF) printf("### %s(size:%lu)\n", __FUNCTION__.ptr, size); 495 return 0; 496 } 497 498 void free(void* p) nothrow @nogc 499 { 500 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 501 cstdlib.free(p); 502 } 503 504 /** 505 * Determine the base address of the block containing p. If p is not a gc 506 * allocated pointer, return null. 507 */ 508 void* addrOf(void* p) nothrow @nogc 509 { 510 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 511 return null; 512 } 513 514 /** 515 * Determine the allocated size of pointer p. If p is an interior pointer 516 * or not a gc allocated pointer, return 0. 517 */ 518 size_t sizeOf(void* p) nothrow @nogc 519 { 520 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 521 return 0; 522 } 523 524 /** 525 * Determine the base address of the block containing p. If p is not a gc 526 * allocated pointer, return null. 527 */ 528 BlkInfo query(void* p) nothrow 529 { 530 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 531 return BlkInfo.init; 532 } 533 534 void addRoot(void* p) nothrow @nogc 535 { 536 printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 537 tlGcx.roots.insertBack(Root(p)); 538 } 539 540 /** 541 * remove p from list of roots 542 */ 543 void removeRoot(void* p) nothrow @nogc 544 { 545 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 546 foreach (ref root; tlGcx.roots) 547 { 548 if (root is p) 549 { 550 root = tlGcx.roots.back; 551 tlGcx.roots.popBack(); 552 return; 553 } 554 } 555 assert(false); 556 } 557 558 @property RootIterator rootIter() return @nogc 559 { 560 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 561 return &rootsApply; 562 } 563 564 private int rootsApply(scope int delegate(ref Root) nothrow dg) 565 { 566 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 567 foreach (ref r; tlGcx.roots) 568 if (auto result = dg(r)) 569 return result; 570 return 0; 571 } 572 573 /** 574 * Add range to scan for roots. 575 */ 576 void addRange(void* p, size_t sz, const TypeInfo ti = null) nothrow @nogc 577 { 578 int x; 579 printf("### %s(p:%p, sz:%lu ti:%p, stack:%p)\n", __FUNCTION__.ptr, p, sz, ti, &x); 580 if (p is null) 581 return; 582 tlGcx.ranges.insertBack(Range(p, p + sz, cast() ti)); 583 } 584 585 /** 586 * Remove range `p`. 587 */ 588 void removeRange(void* p) nothrow @nogc 589 { 590 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 591 if (p is null) 592 return; 593 foreach (ref range; tlGcx.ranges) 594 { 595 if (range.pbot is p) 596 { 597 range = tlGcx.ranges.back; 598 tlGcx.ranges.popBack(); 599 return; 600 } 601 } 602 assert(false); 603 } 604 605 @property RangeIterator rangeIter() return @nogc 606 { 607 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 608 return &rangesApply; 609 } 610 611 private int rangesApply(scope int delegate(ref Range) nothrow dg) 612 { 613 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 614 foreach (ref range; tlGcx.ranges) 615 if (auto result = dg(range)) 616 return result; 617 return 0; 618 } 619 620 /** 621 * Run finalizers. 622 */ 623 void runFinalizers(const scope void[] segment) nothrow 624 { 625 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 626 } 627 628 bool inFinalizer() nothrow 629 { 630 return typeof(return).init; 631 } 632 633 /** 634 * Retrieve statistics about garbage collection. 635 * Useful for debugging and tuning. 636 */ 637 core.memory.GC.Stats stats() nothrow 638 { 639 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 640 // TODO: fill in 641 return typeof(return)(); 642 } 643 644 /** 645 * Retrieve profile statistics about garbage collection. 646 * Useful for debugging and tuning. 647 */ 648 core.memory.GC.ProfileStats profileStats() nothrow 649 { 650 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 651 // TODO: fill in 652 return typeof(return)(); 653 } 654 655 /** 656 * Returns the number of bytes allocated for the current thread 657 * since program start. It is the same as 658 * GC.stats().allocatedInCurrentThread, but faster. 659 */ 660 ulong allocatedInCurrentThread() nothrow 661 { 662 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 663 // TODO: fill in 664 return typeof(return).init; 665 } 666 } 667 668 private enum PowType 669 { 670 floor, 671 ceil 672 } 673 674 private T powIntegralImpl(PowType type, T)(T val) 675 { 676 version(D_Coverage) {} else pragma(inline, true); 677 import core.bitop : bsr; 678 if (val == 0 || 679 (type == PowType.ceil && 680 (val > T.max / 2 || 681 val == T.min))) 682 return 0; 683 else 684 return (T(1) << bsr(val) + type); 685 } 686 687 private T nextPow2(T)(const T val) 688 if (is(T == size_t) || 689 is(T == uint)) 690 { 691 return powIntegralImpl!(PowType.ceil)(val); 692 }