1 /** A GC with segregated allocations using D's `static foreach`.
2  *
3  * Copyright: Copyright Per Nordlöw 2019 - .
4  * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
5  * Authors:   Per Nordlöw
6  */
7 module segregated_gc;
8 
9 private static void *os_mem_map(size_t nbytes) nothrow @nogc
10 {   void *p;
11 
12     import core.sys.posix.sys.mman;
13     p = mmap(null, nbytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
14     return (p == MAP_FAILED) ? null : p;
15 }
16 
17 private static int os_mem_unmap(void *base, size_t nbytes) nothrow @nogc
18 {
19     import core.sys.posix.sys.mman;
20     return munmap(base, nbytes);
21 }
22 
23 // import gc.os : os_mem_map, os_mem_unmap;
24 
25 import core.gc.config;
26 import core.gc.gcinterface;
27 
28 import paged_dynamic_array : Array = PagedDynamicArray;
29 import simple_static_bitarray : StaticBitArray;
30 
31 import core.stdc.stdio: printf;
32 import cstdlib = core.stdc.stdlib : calloc, free, malloc, realloc;
33 static import core.memory;
34 
35 /* debug = PRINTF; */
36 
37 extern (C) void onOutOfMemoryError(void* pretend_sideffect = null) @trusted pure nothrow @nogc; /* dmd @@@BUG11461@@@ */
38 
39 private
40 {
41     extern (C)
42     {
43         // to allow compilation of this module without access to the rt package,
44         //  make these functions available from rt.lifetime
45         void rt_finalizeFromGC(void* p, size_t size, uint attr) nothrow;
46         int rt_hasFinalizerInSegment(void* p, size_t size, uint attr, in void[] segment) nothrow;
47 
48         // Declared as an extern instead of importing core.exception
49         // to avoid inlining - see issue 13725.
50         void onInvalidMemoryOperationError() @nogc nothrow;
51         void onOutOfMemoryErrorNoGC() @nogc nothrow;
52     }
53 }
54 
55 enum WORDSIZE = size_t.sizeof;  ///< Size of word type (size_t).
56 enum PAGESIZE = 4096;           ///< Page size in bytes. Linux $(shell getconf PAGESIZE).
57 
58 /// Small slot sizes classes (in bytes).
59 static immutable smallSizeClasses = [8,
60                                      16,
61                                      24, // TODO move
62                                      32,
63                                      40, // TODO move
64                                      48, // TODO move
65                                      56, // TODO move
66                                      64,
67                                      72,
68                                      80,
69                                      88,
70                                      96, // TODO move
71                                      104, // TODO move
72                                      112, // TODO move
73                                      120, // TODO move
74                                      128, // TODO 128 +64,
75                                      256, // TODO 256 + 128,
76                                      512, // TODO 512 + 256,
77                                      1024, // TODO 1024 + 512,
78                                      2048, // TODO 2048 + 1024,
79     ];
80 
81 /// Medium slot sizes classes (in bytes).
82 version(none)
83 static immutable mediumSizeClasses = [1 << 12, // 4096
84                                       1 << 13, // 8192
85                                       1 << 14, // 16384
86                                       1 << 15, // 32768
87                                       1 << 16, // 65536
88     ];
89 
90 /// Ceiling to closest to size class of `sz`.
91 size_t ceilPow2(size_t sz) @safe pure nothrow @nogc
92 {
93     return nextPow2(sz - 1);
94 }
95 
96 @safe pure nothrow @nogc unittest
97 {
98     // TODO assert(ceilPow2(1) == 1);
99     assert(ceilPow2(2) == 2);
100     assert(ceilPow2(3) == 4);
101     assert(ceilPow2(4) == 4);
102     assert(ceilPow2(5) == 8);
103     assert(ceilPow2(6) == 8);
104     assert(ceilPow2(7) == 8);
105     assert(ceilPow2(8) == 8);
106     assert(ceilPow2(9) == 16);
107 }
108 
109 /// Small slot foreach slot contains `wordCount` machine words.
110 struct SmallSlot(uint wordCount)
111 if (wordCount >= 1)
112 {
113     size_t[wordCount] words;    // words
114 }
115 
116 @safe pure nothrow @nogc unittest
117 {
118     SmallSlot!(1) x;
119     SmallSlot!(1) y;
120 }
121 
122 /// Small page storing slots of size `sizeClass`.
123 struct SmallPage(uint sizeClass)
124 if (sizeClass >= smallSizeClasses[0])
125 {
126     enum wordCount = sizeClass/WORDSIZE;
127     static assert(sizeClass % WORDSIZE == 0, sizeClass);
128     enum slotCount = PAGESIZE/sizeClass;
129     alias Slot = SmallSlot!(wordCount);
130 
131     Slot[slotCount] slots;
132     byte[PAGESIZE-slots.sizeof] __padding;
133     static assert(this.sizeof == PAGESIZE); // TODO adjust if pages of different byte sizes are preferred
134 }
135 enum minimumSmallPageWordCount = PAGESIZE/WORDSIZE; // TODO may be computed
136 
137 struct SmallPageTable(uint sizeClass)
138 {
139     alias Page = SmallPage!(sizeClass);
140     Page* pagePtr;
141     enum slotCount = PAGESIZE/sizeClass;
142 
143     // bit `i` indicates if slot `i` in `*pagePtr` currently contains an initialized value
144     StaticBitArray!(slotCount) slotUsages; // TODO benchmark with a byte-array instead for comparison
145 
146     // bit `i` indicates if slot `i` in `*pagePtr` has been marked
147     StaticBitArray!(slotCount) slotMarks;
148 }
149 
150 /// Small pool of pages.
151 struct SmallPool(uint sizeClass, bool pointerFlag)
152 if (sizeClass >= smallSizeClasses[0])
153 {
154     alias Page = SmallPage!(sizeClass);
155 
156     this(size_t pageTableCapacity)
157     {
158         pageTables.capacity = pageTableCapacity;
159     }
160 
161     void* allocateNext() @trusted // pure nothrow @nogc
162     {
163         version(LDC) pragma(inline, true);
164 
165         // TODO scan `slotUsages` at slotIndex using core.bitop.bsf to find
166         // first free page if any. Use modification of `indexOfFirstOne` that
167         // takes startIndex being `slotIndex` If no hit set `slotIndex` to
168         // `Page.slotCount`
169         // TODO instead of this find next set bit at `slotIndex` in
170         // `slotUsages` unless whole current `slotUsages`-word is all zero.
171 
172         immutable pageIndex = slotIndex / Page.slotCount;
173         immutable needNewPage = (slotIndex % Page.slotCount == 0);
174 
175         if (needNewPage)
176         {
177             Page* pagePtr = cast(Page*)os_mem_map(PAGESIZE);
178             debug(PRINTF) printf("### %s(): pagePtr:%p\n", __FUNCTION__.ptr, pagePtr);
179             pageTables.insertBack(SmallPageTable!sizeClass(pagePtr));
180 
181             pageTables.ptr[pageIndex].slotUsages[0] = true; // mark slot
182 
183             debug(PRINTF) printf("### %s(): slotIndex:%lu\n", __FUNCTION__.ptr, 0);
184 
185             auto slotPtr = pagePtr.slots.ptr; // first slot
186             slotIndex = 1;
187             return slotPtr;
188         }
189         else
190         {
191             debug(PRINTF) printf("### %s(): slotIndex:%lu\n", __FUNCTION__.ptr, slotIndex);
192             pageTables.ptr[pageIndex].slotUsages[slotIndex] = true; // mark slot
193             return &pageTables.ptr[pageIndex].pagePtr.slots.ptr[slotIndex++];
194         }
195     }
196 
197     Array!(SmallPageTable!sizeClass) pageTables;
198     size_t slotIndex = 0;       // index to first free slot in pool across multiple page
199 }
200 
201 // TODO @safe pure nothrow @nogc
202 unittest
203 {
204     static foreach (sizeClass; smallSizeClasses)
205     {
206         {
207             SmallPool!(sizeClass, false) x;
208         }
209     }
210 }
211 
212 /// All small pools.
213 struct SmallPools
214 {
215     this(size_t pageTableCapacity)
216     {
217         static foreach (sizeClass; smallSizeClasses)
218         {
219             // Quote from https://olshansky.me/gc/runtime/dlang/2017/06/14/inside-d-gc.html
220             // "Fine grained locking from the start, I see no problem with per pool locking."
221             mixin(`this.unscannedPool` ~ sizeClass.stringof ~ ` = SmallPool!(sizeClass, false)(pageTableCapacity);`);
222             mixin(`this.scannedPool`    ~ sizeClass.stringof ~ ` = SmallPool!(sizeClass, true)(pageTableCapacity);`);
223         }
224     }
225     BlkInfo qalloc(size_t size, uint bits) nothrow
226     {
227         debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits);
228 
229         BlkInfo blkinfo = void;
230         blkinfo.attr = bits;
231 
232         // TODO optimize this:
233         blkinfo.size = ceilPow2(size);
234         if (blkinfo.size < smallSizeClasses[0])
235         {
236             blkinfo.size = smallSizeClasses[0];
237         }
238 
239     top:
240         switch (blkinfo.size)
241         {
242             static foreach (sizeClass; smallSizeClasses)
243             {
244             case sizeClass:
245                 if (bits & BlkAttr.NO_SCAN) // no scanning needed
246                 {
247                     mixin(`blkinfo.base = unscannedPool` ~ sizeClass.stringof ~ `.allocateNext();`);
248                 }
249                 else
250                 {
251                     mixin(`blkinfo.base = scannedPool` ~ sizeClass.stringof ~ `.allocateNext();`);
252                 }
253                 break top;
254             }
255         default:
256             blkinfo.base = null;
257             printf("### %s(size:%lu, bits:%u) Cannot handle blkinfo.size:%lu\n", __FUNCTION__.ptr, size, bits, blkinfo.size);
258             // TODO find closest match of blkinfo.size <= smallSizeClasses
259             onOutOfMemoryError();
260             assert(0, "Handle other blkinfo.size");
261         }
262 
263         return blkinfo;
264     }
265 private:
266     static foreach (sizeClass; smallSizeClasses)
267     {
268         // Quote from https://olshansky.me/gc/runtime/dlang/2017/06/14/inside-d-gc.html
269         // "Fine grained locking from the start, I see no problem with per pool locking."
270         mixin(`SmallPool!(sizeClass, false) unscannedPool` ~ sizeClass.stringof ~ `;`);
271         mixin(`SmallPool!(sizeClass, true) scannedPool` ~ sizeClass.stringof ~ `;`);
272     }
273 }
274 // pragma(msg, "SmallPools.sizeof: ", SmallPools.sizeof);
275 
276 enum pageTableCapacityDefault = 256*PAGESIZE; // eight pages
277 
278 struct Gcx
279 {
280     this(size_t pageTableCapacity) // 1 one megabyte per table
281     {
282         this.smallPools = SmallPools(pageTableCapacity);
283     }
284     Array!Root roots;
285     Array!Range ranges;
286     SmallPools smallPools;
287     uint disabled; // turn off collections if >0
288 }
289 
290 Gcx tlGcx;                      // thread-local allocator instance
291 static this()
292 {
293     tlGcx = Gcx(pageTableCapacityDefault);
294 }
295 
296 // size class specific overloads only for thread-local GC
297 extern (C)
298 {
299     static foreach (sizeClass; smallSizeClasses)
300     {
301         /* TODO use template `mixin` containing, in turn, a `mixin` for generating
302          * the symbol names `gc_tlmalloc_32`, `unscannedPool32` and
303          * `scannedPool32` for sizeClass `32`.
304          *
305          * TODO Since https://github.com/dlang/dmd/pull/8813 we can now use:
306          * `mixin("gc_tlmalloc_", sizeClass);` for symbol generation
307          */
308         mixin(`
309         void* gc_tlmalloc_` ~ sizeClass.stringof ~ `(uint ba = 0) @trusted nothrow
310         {
311             if (ba & BlkAttr.NO_SCAN) // no scanning needed
312                 return tlGcx.smallPools.unscannedPool` ~ sizeClass.stringof ~ `.allocateNext();
313             else
314                 return tlGcx.smallPools.scannedPool` ~ sizeClass.stringof ~ `.allocateNext();
315         }
316 `);
317     }
318 }
319 
320 class SegregatedGC : GC
321 {
322     import core.internal.spinlock;
323     static gcLock = shared(AlignedSpinLock)(SpinLock.Contention.lengthy);
324     static bool _inFinalizer;
325 
326     // global allocator (`__gshared`)
327     __gshared Gcx gGcx;
328 
329     // lock GC, throw InvalidMemoryOperationError on recursive locking during finalization
330     static void lockNR() @nogc nothrow
331     {
332         if (_inFinalizer)
333             onInvalidMemoryOperationError();
334         gcLock.lock();
335     }
336 
337     void initialize()
338     {
339         printf("### %s()\n", __FUNCTION__.ptr);
340 
341         import core.stdc..string;
342         auto p = cstdlib.malloc(__traits(classInstanceSize, SegregatedGC));
343         if (!p)
344             onOutOfMemoryError();
345 
346         auto init = typeid(SegregatedGC).initializer();
347         assert(init.length == __traits(classInstanceSize, SegregatedGC));
348         auto instance = cast(SegregatedGC)memcpy(p, init.ptr, init.length);
349         instance.__ctor();
350 
351         instance.gGcx = Gcx(pageTableCapacityDefault);
352     }
353 
354     void finalize()
355     {
356         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
357     }
358 
359     this()
360     {
361     }
362 
363     void enable()
364     {
365         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
366         static void go(Gcx* tlGcx) nothrow
367         {
368             tlGcx.disabled--;
369         }
370         runLocked!(go)(&tlGcx);
371     }
372 
373     void disable()
374     {
375         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
376         static void go(Gcx* tlGcx) nothrow
377         {
378             tlGcx.disabled++;
379         }
380         runLocked!(go)(&tlGcx);
381     }
382 
383     auto runLocked(alias func, Args...)(auto ref Args args)
384     {
385         debug(PROFILE_API) immutable tm = (config.profile > 1 ? currTime.ticks : 0);
386         lockNR();
387         scope (failure) gcLock.unlock();
388         debug(PROFILE_API) immutable tm2 = (config.profile > 1 ? currTime.ticks : 0);
389 
390         static if (is(typeof(func(args)) == void))
391             func(args);
392         else
393             auto res = func(args);
394 
395         debug(PROFILE_API) if (config.profile > 1) { lockTime += tm2 - tm; }
396         gcLock.unlock();
397 
398         static if (!is(typeof(func(args)) == void))
399             return res;
400     }
401 
402     void collect() nothrow
403     {
404         printf("### %s: \n", __FUNCTION__.ptr);
405     }
406 
407     void collectNoStack() nothrow
408     {
409         printf("### %s: \n", __FUNCTION__.ptr);
410     }
411 
412     void minimize() nothrow
413     {
414         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
415     }
416 
417     uint getAttr(void* p) nothrow
418     {
419         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
420         return 0;
421     }
422 
423     uint setAttr(void* p, uint mask) nothrow
424     {
425         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
426         return 0;
427     }
428 
429     uint clrAttr(void* p, uint mask) nothrow
430     {
431         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
432         return 0;
433     }
434 
435     void* malloc(size_t size, uint bits, const TypeInfo ti) nothrow
436     {
437         debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits);
438         lockNR();
439         scope (failure) gcLock.unlock(); // TODO why needed?
440         void* p = gGcx.smallPools.qalloc(size, bits).base;
441         gcLock.unlock();
442         if (size && p is null)
443             onOutOfMemoryError();
444         return p;
445     }
446 
447     BlkInfo qalloc(size_t size, uint bits, const TypeInfo ti) nothrow
448     {
449         debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits);
450         lockNR();
451         scope (failure) gcLock.unlock(); // TODO why needed?
452         BlkInfo blkinfo = gGcx.smallPools.qalloc(size, bits);
453         gcLock.unlock();
454         if (size && blkinfo.base is null)
455             onOutOfMemoryError();
456         return blkinfo;
457     }
458 
459     void* calloc(size_t size, uint bits, const TypeInfo ti) nothrow
460     {
461         debug(PRINTF) printf("### %s(size:%lu, bits:%u)\n", __FUNCTION__.ptr, size, bits);
462         lockNR();
463         scope (failure) gcLock.unlock();
464         void* p = gGcx.smallPools.qalloc(size, bits).base;
465         gcLock.unlock();
466         if (size && p is null)
467             onOutOfMemoryError();
468         import core.stdc..string : memset;
469         memset(p, 0, size);     // zero
470         // why is this slower than memset? (cast(size_t*)p)[0 .. size/size_t.sizeof] = 0;
471         return p;
472     }
473 
474     void* realloc(void* p, size_t size, uint bits, const TypeInfo ti) nothrow
475     {
476         debug(PRINTF) printf("### %s(p:%p, size:%lu, bits:%u)\n", __FUNCTION__.ptr, p, size, bits);
477         p = cstdlib.realloc(p, size);
478         if (size && p is null)
479             onOutOfMemoryError();
480         return p;
481     }
482 
483     /**
484      * Attempt to in-place enlarge the memory block pointed to by p by at least
485      * minsize bytes, up to a maximum of maxsize additional bytes.
486      * This does not attempt to move the memory block (like realloc() does).
487      *
488      * Returns:
489      *  0 if could not extend p,
490      *  total size of entire memory block if successful.
491      */
492     size_t extend(void* p, size_t minsize, size_t maxsize, const TypeInfo ti) nothrow
493     {
494         debug(PRINTF) printf("### %s(p:%p, minsize:%lu, maxsize:%lu)\n", __FUNCTION__.ptr, p, minsize, maxsize);
495         return 0;
496     }
497 
498     size_t reserve(size_t size) nothrow
499     {
500         debug(PRINTF) printf("### %s(size:%lu)\n", __FUNCTION__.ptr, size);
501         return 0;
502     }
503 
504     void free(void* p) nothrow @nogc
505     {
506         debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p);
507         cstdlib.free(p);
508     }
509 
510     /**
511      * Determine the base address of the block containing p.  If p is not a gc
512      * allocated pointer, return null.
513      */
514     void* addrOf(void* p) nothrow @nogc
515     {
516         debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p);
517         return null;
518     }
519 
520     /**
521      * Determine the allocated size of pointer p.  If p is an interior pointer
522      * or not a gc allocated pointer, return 0.
523      */
524     size_t sizeOf(void* p) nothrow @nogc
525     {
526         debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p);
527         return 0;
528     }
529 
530     /**
531      * Determine the base address of the block containing p.  If p is not a gc
532      * allocated pointer, return null.
533      */
534     BlkInfo query(void* p) nothrow
535     {
536         debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p);
537         return BlkInfo.init;
538     }
539 
540     void addRoot(void* p) nothrow @nogc
541     {
542         printf("### %s(p:%p)\n", __FUNCTION__.ptr, p);
543         tlGcx.roots.insertBack(Root(p));
544     }
545 
546     /**
547      * remove p from list of roots
548      */
549     void removeRoot(void* p) nothrow @nogc
550     {
551         debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p);
552         foreach (ref root; tlGcx.roots)
553         {
554             if (root is p)
555             {
556                 root = tlGcx.roots.back;
557                 tlGcx.roots.popBack();
558                 return;
559             }
560         }
561         assert(false);
562     }
563 
564     @property RootIterator rootIter() return @nogc
565     {
566         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
567         return &rootsApply;
568     }
569 
570     private int rootsApply(scope int delegate(ref Root) nothrow dg)
571     {
572         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
573         foreach (ref r; tlGcx.roots)
574         {
575             if (auto result = dg(r))
576                 return result;
577         }
578         return 0;
579     }
580 
581     /**
582      * Add range to scan for roots.
583      */
584     void addRange(void* p, size_t sz, const TypeInfo ti = null) nothrow @nogc
585     {
586         int x;
587         printf("### %s(p:%p, sz:%lu ti:%p, stack:%p)\n", __FUNCTION__.ptr, p, sz, ti, &x);
588         if (p is null) return;
589         tlGcx.ranges.insertBack(Range(p, p + sz, cast() ti));
590     }
591 
592     /**
593      * Remove range `p`.
594      */
595     void removeRange(void* p) nothrow @nogc
596     {
597         debug(PRINTF) printf("### %s(p:%p)\n", __FUNCTION__.ptr, p);
598         if (p is null) return;
599         foreach (ref range; tlGcx.ranges)
600         {
601             if (range.pbot is p)
602             {
603                 range = tlGcx.ranges.back;
604                 tlGcx.ranges.popBack();
605                 return;
606             }
607         }
608         assert(false);
609     }
610 
611     @property RangeIterator rangeIter() return @nogc
612     {
613         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
614         return &rangesApply;
615     }
616 
617     private int rangesApply(scope int delegate(ref Range) nothrow dg)
618     {
619         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
620         foreach (ref range; tlGcx.ranges)
621         {
622             if (auto result = dg(range))
623                 return result;
624         }
625         return 0;
626     }
627 
628     /**
629      * Run finalizers.
630      */
631     void runFinalizers(const scope void[] segment) nothrow
632     {
633         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
634     }
635 
636     // TODO what is this?
637     bool inFinalizer() nothrow
638     {
639         return false;
640     }
641 
642     /**
643      * Retrieve statistics about garbage collection.
644      * Useful for debugging and tuning.
645      */
646     core.memory.GC.Stats stats() nothrow
647     {
648         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
649         // TODO fill in
650         return typeof(return)();
651     }
652 
653     /**
654      * Retrieve profile statistics about garbage collection.
655      * Useful for debugging and tuning.
656      */
657     core.memory.GC.ProfileStats profileStats() nothrow
658     {
659         debug(PRINTF) printf("### %s: \n", __FUNCTION__.ptr);
660         // TODO fill in
661         return typeof(return)();
662     }
663 
664 }
665 
666 private enum PowType
667 {
668     floor,
669     ceil
670 }
671 
672 private T powIntegralImpl(PowType type, T)(T val)
673 {
674     version(D_Coverage) {} else pragma(inline, true);
675     import core.bitop : bsr;
676     if (val == 0 || (type == PowType.ceil && (val > T.max / 2 || val == T.min)))
677     {
678         return 0;
679     }
680     else
681     {
682         return (T(1) << bsr(val) + type);
683     }
684 }
685 
686 private T nextPow2(T)(const T val)
687 if (is(T == size_t) ||
688     is(T == uint))
689 {
690     return powIntegralImpl!(PowType.ceil)(val);
691 }