1 /** A GC with segregated allocations using D's `static foreach`. 2 * 3 * Copyright: Copyright Per Nordlöw 2019 - . 4 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 5 * Authors: Per Nordlöw 6 */ 7 module segregated_gc; 8 9 private static void *os_mem_map(size_t nbytes) nothrow @nogc 10 { void *p; 11 12 import core.sys.posix.sys.mman; 13 p = mmap(null, nbytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 14 return (p == MAP_FAILED) ? null : p; 15 } 16 17 private static int os_mem_unmap(void *base, size_t nbytes) nothrow @nogc 18 { 19 import core.sys.posix.sys.mman; 20 return munmap(base, nbytes); 21 } 22 23 // import gc.os : os_mem_map, os_mem_unmap; 24 25 import core.gc.config; 26 import core.gc.gcinterface; 27 28 import paged_dynamic_array : Array = PagedDynamicArray; 29 import simple_static_bitarray : StaticBitArray; 30 31 import core.stdc.stdio: printf; 32 import cstdlib = core.stdc.stdlib : calloc, free, malloc, realloc; 33 static import core.memory; 34 35 /* debug = PRINTF; */ 36 37 extern (C) void onOutOfMemoryError(void* pretend_sideffect = null) @trusted pure nothrow @nogc; /* dmd @@@BUG11461@@@ */ 38 39 private 40 { 41 extern (C) 42 { 43 // to allow compilation of this module without access to the rt package, 44 // make these functions available from rt.lifetime 45 void rt_finalizeFromGC(void* p, size_t size, uint attr) nothrow; 46 int rt_hasFinalizerInSegment(void* p, size_t size, uint attr, in void[] segment) nothrow; 47 48 // Declared as an extern instead of importing core.exception 49 // to avoid inlining - see issue 13725. 50 void onInvalidMemoryOperationError() @nogc nothrow; 51 void onOutOfMemoryErrorNoGC() @nogc nothrow; 52 } 53 } 54 55 enum WORDSIZE = size_t.sizeof; ///< Size of word type (size_t). 56 enum PAGESIZE = 4096; ///< Page size in bytes. Linux $(shell getconf PAGESIZE). 57 58 /// Small slot sizes classes (in bytes). 59 static immutable smallSizeClasses = [8, 60 16, 61 24, // TODO move 62 32, 63 40, // TODO move 64 48, // TODO move 65 56, // TODO move 66 64, 67 72, 68 80, 69 88, 70 96, // TODO move 71 104, // TODO move 72 112, // TODO move 73 120, // TODO move 74 128, // TODO 128 +64, 75 256, // TODO 256 + 128, 76 512, // TODO 512 + 256, 77 1024, // TODO 1024 + 512, 78 2048, // TODO 2048 + 1024, 79 ]; 80 81 /// Medium slot sizes classes (in bytes). 82 version(none) 83 static immutable mediumSizeClasses = [1 << 12, // 4096 84 1 << 13, // 8192 85 1 << 14, // 16384 86 1 << 15, // 32768 87 1 << 16, // 65536 88 ]; 89 90 /// Ceiling to closest to size class of `sz`. 91 size_t ceilPow2(size_t sz) @safe pure nothrow @nogc 92 { 93 return nextPow2(sz - 1); 94 } 95 96 @safe pure nothrow @nogc unittest 97 { 98 // TODO assert(ceilPow2(1) == 1); 99 assert(ceilPow2(2) == 2); 100 assert(ceilPow2(3) == 4); 101 assert(ceilPow2(4) == 4); 102 assert(ceilPow2(5) == 8); 103 assert(ceilPow2(6) == 8); 104 assert(ceilPow2(7) == 8); 105 assert(ceilPow2(8) == 8); 106 assert(ceilPow2(9) == 16); 107 } 108 109 /// Small slot foreach slot contains `wordCount` machine words. 110 struct SmallSlot(uint wordCount) 111 if (wordCount >= 1) 112 { 113 size_t[wordCount] words; // words 114 } 115 116 @safe pure nothrow @nogc unittest 117 { 118 SmallSlot!(1) x; 119 SmallSlot!(1) y; 120 } 121 122 /// Small page storing slots of size `sizeClass`. 123 struct SmallPage(uint sizeClass) 124 if (sizeClass >= smallSizeClasses[0]) 125 { 126 enum wordCount = sizeClass/WORDSIZE; 127 static assert(sizeClass % WORDSIZE == 0, sizeClass); 128 enum slotCount = PAGESIZE/sizeClass; 129 alias Slot = SmallSlot!(wordCount); 130 131 Slot[slotCount] slots; 132 byte[PAGESIZE-slots.sizeof] __padding; 133 static assert(this.sizeof == PAGESIZE); // TODO adjust if pages of different byte sizes are preferred 134 } 135 enum minimumSmallPageWordCount = PAGESIZE/WORDSIZE; // TODO may be computed 136 137 struct SmallPageTable(uint sizeClass) 138 { 139 alias Page = SmallPage!(sizeClass); 140 Page* pagePtr; 141 enum slotCount = PAGESIZE/sizeClass; 142 143 // bit `i` indicates if slot `i` in `*pagePtr` currently contains an initialized value 144 StaticBitArray!(slotCount) slotUsages; // TODO benchmark with a byte-array instead for comparison 145 146 // bit `i` indicates if slot `i` in `*pagePtr` has been marked 147 StaticBitArray!(slotCount) slotMarks; 148 } 149 150 /// Small pool of pages. 151 struct SmallPool(uint sizeClass, bool pointerFlag) 152 if (sizeClass >= smallSizeClasses[0]) 153 { 154 alias Page = SmallPage!(sizeClass); 155 156 this(size_t pageTableCapacity) 157 { 158 pageTables.capacity = pageTableCapacity; 159 } 160 161 void* allocateNext() @trusted // pure nothrow @nogc 162 { 163 version(LDC) pragma(inline, true); 164 165 // TODO scan `slotUsages` at slotIndex using core.bitop.bsf to find 166 // first free page if any. Use modification of `indexOfFirstOne` that 167 // takes startIndex being `slotIndex` If no hit set `slotIndex` to 168 // `Page.slotCount` 169 // TODO instead of this find next set bit at `slotIndex` in 170 // `slotUsages` unless whole current `slotUsages`-word is all zero. 171 172 immutable pageIndex = slotIndex / Page.slotCount; 173 immutable needNewPage = (slotIndex % Page.slotCount == 0); 174 175 if (needNewPage) 176 { 177 Page* pagePtr = cast(Page*)os_mem_map(PAGESIZE); 178 debug(PRINTF) printf("### %s(): pagePtr:%p\n", __FUNCTION__.ptr, pagePtr); 179 pageTables.insertBack(SmallPageTable!sizeClass(pagePtr)); 180 181 pageTables.ptr[pageIndex].slotUsages[0] = true; // mark slot 182 183 debug(PRINTF) printf("### %s(): slotIndex:%lu\n", __FUNCTION__.ptr, 0); 184 185 auto slotPtr = pagePtr.slots.ptr; // first slot 186 slotIndex = 1; 187 return slotPtr; 188 } 189 else 190 { 191 debug(PRINTF) printf("### %s(): slotIndex:%lu\n", __FUNCTION__.ptr, slotIndex); 192 pageTables.ptr[pageIndex].slotUsages[slotIndex] = true; // mark slot 193 return &pageTables.ptr[pageIndex].pagePtr.slots.ptr[slotIndex++]; 194 } 195 } 196 197 Array!(SmallPageTable!sizeClass) pageTables; 198 size_t slotIndex = 0; // index to first free slot in pool across multiple page 199 } 200 201 // TODO @safe pure nothrow @nogc 202 unittest 203 { 204 static foreach (sizeClass; smallSizeClasses) 205 { 206 { 207 SmallPool!(sizeClass, false) x; 208 } 209 } 210 } 211 212 /// All small pools. 213 struct SmallPools 214 { 215 this(size_t pageTableCapacity) 216 { 217 static foreach (sizeClass; smallSizeClasses) 218 { 219 // Quote from https://olshansky.me/gc/runtime/dlang/2017/06/14/inside-d-gc.html 220 // "Fine grained locking from the start, I see no problem with per pool locking." 221 mixin(`this.unscannedPool` ~ sizeClass.stringof ~ ` = SmallPool!(sizeClass, false)(pageTableCapacity);`); 222 mixin(`this.scannedPool` ~ sizeClass.stringof ~ ` = SmallPool!(sizeClass, true)(pageTableCapacity);`); 223 } 224 } 225 BlkInfo qalloc(size_t size, uint bits) nothrow 226 { 227 debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits); 228 229 BlkInfo blkinfo = void; 230 blkinfo.attr = bits; 231 232 // TODO optimize this: 233 blkinfo.size = ceilPow2(size); 234 if (blkinfo.size < smallSizeClasses[0]) 235 { 236 blkinfo.size = smallSizeClasses[0]; 237 } 238 239 top: 240 switch (blkinfo.size) 241 { 242 static foreach (sizeClass; smallSizeClasses) 243 { 244 case sizeClass: 245 if (bits & BlkAttr.NO_SCAN) // no scanning needed 246 { 247 mixin(`blkinfo.base = unscannedPool` ~ sizeClass.stringof ~ `.allocateNext();`); 248 } 249 else 250 { 251 mixin(`blkinfo.base = scannedPool` ~ sizeClass.stringof ~ `.allocateNext();`); 252 } 253 break top; 254 } 255 default: 256 blkinfo.base = null; 257 printf("### %s(size:%lu, bits:%u) Cannot handle blkinfo.size:%lu\n", __FUNCTION__.ptr, size, bits, blkinfo.size); 258 // TODO find closest match of blkinfo.size <= smallSizeClasses 259 onOutOfMemoryError(); 260 assert(0, "Handle other blkinfo.size"); 261 } 262 263 return blkinfo; 264 } 265 private: 266 static foreach (sizeClass; smallSizeClasses) 267 { 268 // Quote from https://olshansky.me/gc/runtime/dlang/2017/06/14/inside-d-gc.html 269 // "Fine grained locking from the start, I see no problem with per pool locking." 270 mixin(`SmallPool!(sizeClass, false) unscannedPool` ~ sizeClass.stringof ~ `;`); 271 mixin(`SmallPool!(sizeClass, true) scannedPool` ~ sizeClass.stringof ~ `;`); 272 } 273 } 274 // pragma(msg, "SmallPools.sizeof: ", SmallPools.sizeof); 275 276 enum pageTableCapacityDefault = 256*PAGESIZE; // eight pages 277 278 struct Gcx 279 { 280 this(size_t pageTableCapacity) // 1 one megabyte per table 281 { 282 this.smallPools = SmallPools(pageTableCapacity); 283 } 284 Array!Root roots; 285 Array!Range ranges; 286 SmallPools smallPools; 287 uint disabled; // turn off collections if >0 288 } 289 290 Gcx tlGcx; // thread-local allocator instance 291 static this() 292 { 293 tlGcx = Gcx(pageTableCapacityDefault); 294 } 295 296 // size class specific overloads only for thread-local GC 297 extern (C) 298 { 299 static foreach (sizeClass; smallSizeClasses) 300 { 301 /* TODO use template `mixin` containing, in turn, a `mixin` for generating 302 * the symbol names `gc_tlmalloc_32`, `unscannedPool32` and 303 * `scannedPool32` for sizeClass `32`. 304 * 305 * TODO Since https://github.com/dlang/dmd/pull/8813 we can now use: 306 * `mixin("gc_tlmalloc_", sizeClass);` for symbol generation 307 */ 308 mixin(` 309 void* gc_tlmalloc_` ~ sizeClass.stringof ~ `(uint ba = 0) @trusted nothrow 310 { 311 if (ba & BlkAttr.NO_SCAN) // no scanning needed 312 return tlGcx.smallPools.unscannedPool` ~ sizeClass.stringof ~ `.allocateNext(); 313 else 314 return tlGcx.smallPools.scannedPool` ~ sizeClass.stringof ~ `.allocateNext(); 315 } 316 `); 317 } 318 } 319 320 class SegregatedGC : GC 321 { 322 import core.internal.spinlock; 323 static gcLock = shared(AlignedSpinLock)(SpinLock.Contention.lengthy); 324 static bool _inFinalizer; 325 326 // global allocator (`__gshared`) 327 __gshared Gcx gGcx; 328 329 // lock GC, throw InvalidMemoryOperationError on recursive locking during finalization 330 static void lockNR() @nogc nothrow 331 { 332 if (_inFinalizer) 333 onInvalidMemoryOperationError(); 334 gcLock.lock(); 335 } 336 337 void initialize() 338 { 339 printf("### %s()\n", __FUNCTION__.ptr); 340 341 import core.stdc..string; 342 auto p = cstdlib.malloc(__traits(classInstanceSize, SegregatedGC)); 343 if (!p) 344 onOutOfMemoryError(); 345 346 auto init = typeid(SegregatedGC).initializer(); 347 assert(init.length == __traits(classInstanceSize, SegregatedGC)); 348 auto instance = cast(SegregatedGC)memcpy(p, init.ptr, init.length); 349 instance.__ctor(); 350 351 instance.gGcx = Gcx(pageTableCapacityDefault); 352 } 353 354 void finalize() 355 { 356 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 357 } 358 359 this() 360 { 361 } 362 363 void enable() 364 { 365 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 366 static void go(Gcx* tlGcx) nothrow 367 { 368 tlGcx.disabled--; 369 } 370 runLocked!(go)(&tlGcx); 371 } 372 373 void disable() 374 { 375 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 376 static void go(Gcx* tlGcx) nothrow 377 { 378 tlGcx.disabled++; 379 } 380 runLocked!(go)(&tlGcx); 381 } 382 383 auto runLocked(alias func, Args...)(auto ref Args args) 384 { 385 debug(PROFILE_API) immutable tm = (config.profile > 1 ? currTime.ticks : 0); 386 lockNR(); 387 scope (failure) gcLock.unlock(); 388 debug(PROFILE_API) immutable tm2 = (config.profile > 1 ? currTime.ticks : 0); 389 390 static if (is(typeof(func(args)) == void)) 391 func(args); 392 else 393 auto res = func(args); 394 395 debug(PROFILE_API) if (config.profile > 1) { lockTime += tm2 - tm; } 396 gcLock.unlock(); 397 398 static if (!is(typeof(func(args)) == void)) 399 return res; 400 } 401 402 void collect() nothrow 403 { 404 printf("### %s: \n", __FUNCTION__.ptr); 405 } 406 407 void collectNoStack() nothrow 408 { 409 printf("### %s: \n", __FUNCTION__.ptr); 410 } 411 412 void minimize() nothrow 413 { 414 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 415 } 416 417 uint getAttr(void* p) nothrow 418 { 419 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 420 return 0; 421 } 422 423 uint setAttr(void* p, uint mask) nothrow 424 { 425 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 426 return 0; 427 } 428 429 uint clrAttr(void* p, uint mask) nothrow 430 { 431 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 432 return 0; 433 } 434 435 void* malloc(size_t size, uint bits, const TypeInfo ti) nothrow 436 { 437 debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits); 438 lockNR(); 439 scope (failure) gcLock.unlock(); // TODO why needed? 440 void* p = gGcx.smallPools.qalloc(size, bits).base; 441 gcLock.unlock(); 442 if (size && p is null) 443 onOutOfMemoryError(); 444 return p; 445 } 446 447 BlkInfo qalloc(size_t size, uint bits, const TypeInfo ti) nothrow 448 { 449 debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits); 450 lockNR(); 451 scope (failure) gcLock.unlock(); // TODO why needed? 452 BlkInfo blkinfo = gGcx.smallPools.qalloc(size, bits); 453 gcLock.unlock(); 454 if (size && blkinfo.base is null) 455 onOutOfMemoryError(); 456 return blkinfo; 457 } 458 459 void* calloc(size_t size, uint bits, const TypeInfo ti) nothrow 460 { 461 debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits); 462 lockNR(); 463 scope (failure) gcLock.unlock(); 464 void* p = gGcx.smallPools.qalloc(size, bits).base; 465 gcLock.unlock(); 466 if (size && p is null) 467 onOutOfMemoryError(); 468 import core.stdc..string : memset; 469 memset(p, 0, size); // zero 470 // why is this slower than memset? (cast(size_t*)p)[0 .. size/size_t.sizeof] = 0; 471 return p; 472 } 473 474 void* realloc(void* p, size_t size, uint bits, const TypeInfo ti) nothrow 475 { 476 debug(PRINTF) printf("### %s(p:%p, size:%lu, bits:%u)\n", __FUNCTION__.ptr, p, size, bits); 477 p = cstdlib.realloc(p, size); 478 if (size && p is null) 479 onOutOfMemoryError(); 480 return p; 481 } 482 483 /** 484 * Attempt to in-place enlarge the memory block pointed to by p by at least 485 * minsize bytes, up to a maximum of maxsize additional bytes. 486 * This does not attempt to move the memory block (like realloc() does). 487 * 488 * Returns: 489 * 0 if could not extend p, 490 * total size of entire memory block if successful. 491 */ 492 size_t extend(void* p, size_t minsize, size_t maxsize, const TypeInfo ti) nothrow 493 { 494 debug(PRINTF) printf("### %s(p:%p, minsize:%lu, maxsize:%lu)\n", __FUNCTION__.ptr, p, minsize, maxsize); 495 return 0; 496 } 497 498 size_t reserve(size_t size) nothrow 499 { 500 debug(PRINTF) printf("### %s(size:%lu)\n", __FUNCTION__.ptr, size); 501 return 0; 502 } 503 504 void free(void* p) nothrow @nogc 505 { 506 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 507 cstdlib.free(p); 508 } 509 510 /** 511 * Determine the base address of the block containing p. If p is not a gc 512 * allocated pointer, return null. 513 */ 514 void* addrOf(void* p) nothrow @nogc 515 { 516 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 517 return null; 518 } 519 520 /** 521 * Determine the allocated size of pointer p. If p is an interior pointer 522 * or not a gc allocated pointer, return 0. 523 */ 524 size_t sizeOf(void* p) nothrow @nogc 525 { 526 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 527 return 0; 528 } 529 530 /** 531 * Determine the base address of the block containing p. If p is not a gc 532 * allocated pointer, return null. 533 */ 534 BlkInfo query(void* p) nothrow 535 { 536 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 537 return BlkInfo.init; 538 } 539 540 void addRoot(void* p) nothrow @nogc 541 { 542 printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 543 tlGcx.roots.insertBack(Root(p)); 544 } 545 546 /** 547 * remove p from list of roots 548 */ 549 void removeRoot(void* p) nothrow @nogc 550 { 551 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 552 foreach (ref root; tlGcx.roots) 553 { 554 if (root is p) 555 { 556 root = tlGcx.roots.back; 557 tlGcx.roots.popBack(); 558 return; 559 } 560 } 561 assert(false); 562 } 563 564 @property RootIterator rootIter() return @nogc 565 { 566 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 567 return &rootsApply; 568 } 569 570 private int rootsApply(scope int delegate(ref Root) nothrow dg) 571 { 572 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 573 foreach (ref r; tlGcx.roots) 574 { 575 if (auto result = dg(r)) 576 return result; 577 } 578 return 0; 579 } 580 581 /** 582 * Add range to scan for roots. 583 */ 584 void addRange(void* p, size_t sz, const TypeInfo ti = null) nothrow @nogc 585 { 586 int x; 587 printf("### %s(p:%p, sz:%lu ti:%p, stack:%p)\n", __FUNCTION__.ptr, p, sz, ti, &x); 588 if (p is null) return; 589 tlGcx.ranges.insertBack(Range(p, p + sz, cast() ti)); 590 } 591 592 /** 593 * Remove range `p`. 594 */ 595 void removeRange(void* p) nothrow @nogc 596 { 597 debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p); 598 if (p is null) return; 599 foreach (ref range; tlGcx.ranges) 600 { 601 if (range.pbot is p) 602 { 603 range = tlGcx.ranges.back; 604 tlGcx.ranges.popBack(); 605 return; 606 } 607 } 608 assert(false); 609 } 610 611 @property RangeIterator rangeIter() return @nogc 612 { 613 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 614 return &rangesApply; 615 } 616 617 private int rangesApply(scope int delegate(ref Range) nothrow dg) 618 { 619 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 620 foreach (ref range; tlGcx.ranges) 621 { 622 if (auto result = dg(range)) 623 return result; 624 } 625 return 0; 626 } 627 628 /** 629 * Run finalizers. 630 */ 631 void runFinalizers(const scope void[] segment) nothrow 632 { 633 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 634 } 635 636 // TODO what is this? 637 bool inFinalizer() nothrow 638 { 639 return false; 640 } 641 642 /** 643 * Retrieve statistics about garbage collection. 644 * Useful for debugging and tuning. 645 */ 646 core.memory.GC.Stats stats() nothrow 647 { 648 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 649 // TODO fill in 650 return typeof(return)(); 651 } 652 653 /** 654 * Retrieve profile statistics about garbage collection. 655 * Useful for debugging and tuning. 656 */ 657 core.memory.GC.ProfileStats profileStats() nothrow 658 { 659 debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr); 660 // TODO fill in 661 return typeof(return)(); 662 } 663 664 } 665 666 private enum PowType 667 { 668 floor, 669 ceil 670 } 671 672 private T powIntegralImpl(PowType type, T)(T val) 673 { 674 version(D_Coverage) {} else pragma(inline, true); 675 import core.bitop : bsr; 676 if (val == 0 || (type == PowType.ceil && (val > T.max / 2 || val == T.min))) 677 { 678 return 0; 679 } 680 else 681 { 682 return (T(1) << bsr(val) + type); 683 } 684 } 685 686 private T nextPow2(T)(const T val) 687 if (is(T == size_t) || 688 is(T == uint)) 689 { 690 return powIntegralImpl!(PowType.ceil)(val); 691 }