1 module nxt.sso_open_hashset;
2 
3 import nxt.container.flat_hashmap;
4 
5 import std.traits : isInstanceOf;
6 import nxt.nullable_traits : isNullable, isAddress;
7 import nxt.my_mallocator : Mallocator;
8 
9 /** Small-set-optimized `FlatHashSet`.
10  *
11  * TODO: search for `nullify`, `isNull`, `nullValue` and support deleted keys (`isDull`)
12  *
13  * TODO: use opPostMove to update `gc_addRange` and `gc_removeRange` when
14  * implemented. See: https://github.com/dlang/DIPs/pull/109
15  *
16  * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com
17  */
18 struct SSOFlatHashSet(K,
19                       alias hasher = hashOf,
20                       alias Allocator = Mallocator.instance)
21     if (isNullable!K)
22 {
23     import nxt.qcmeman : gc_addRange, gc_removeRange;
24 
25     import core.lifetime : emplace, move, moveEmplace;
26     import std.traits : isDynamicArray;
27     import core.internal.traits : hasElaborateDestructor;
28     import nxt.nullable_traits : defaultNullKeyConstantOf, isNull, nullify;
29     import nxt.bit_traits : isAllZeroBits;
30 
31     alias InsertionStatus = Large.InsertionStatus;
32 
33     @safe:
34 
35     static if (!large.hasAddressLikeKey &&
36                (__traits(hasMember, K, `nullValue`) && // if key has a null value
37                 __traits(compiles, { enum _ = isAllZeroBits!(K, K.nullValue); }) && // prevent strange error given when `K` is `knet.data.Data`
38                 !isAllZeroBits!(K, K.nullValue)))
39     {
40         pragma(msg, "TODO: warning key type ", K, " has non-zero-bit init value, default construction should be disabled or Small._store should be set to init value");
41     }
42     // TODO: @disable this();
43 
44     static typeof(this) withCapacity()(size_t minimumCapacity) @trusted /* template-lazy */
45     {
46         typeof(return) result;                   // TODO: `result = void` for nullify case
47         if (minimumCapacity > Small.maxCapacity) // will be large
48         {
49             result.large = Large.withCapacity(minimumCapacity);
50         }
51         else                    // small
52         {
53             static if (Large.hasAddressLikeKey ||
54                        (__traits(hasMember, K, `nullValue`) && // if key has a null value
55                         __traits(compiles, { enum _ = isAllZeroBits!(K, K.nullValue); }) && // prevent strange error given when `K` is `knet.data.Data`
56                         isAllZeroBits!(K, K.nullValue))) // check that it's zero bits only
57             {
58                 // nothing needed
59             }
60             else                // needs explicit null
61             {
62                 static foreach (immutable index; 0 .. small.maxCapacity)
63                 {
64                     result.small._store[index].nullify();
65                 }
66             }
67 
68             result.small._capacityDummy = 2; // tag as small
69         }
70         return result;
71     }
72 
73     ~this() nothrow @trusted @nogc
74     {
75         if (isLarge)
76         {
77             static if (hasElaborateDestructor!Large)
78             {
79                 .destroy(large);
80             }
81         }
82         else
83         {
84             static if (hasElaborateDestructor!K)
85             {
86                 static assert(0, "Destroy non-null elements");
87             }
88         }
89     }
90 
91     @disable this(this);
92 
93     @property size_t capacity() const pure nothrow @trusted @nogc
94     {
95         version(D_Coverage) {} else pragma(inline, true);
96         return small._capacityDummy;
97     }
98 
99     @property size_t length() const pure nothrow @trusted @nogc
100     {
101         version(LDC) pragma(inline, true);
102         if (isLarge)
103         {
104             return large.length;
105         }
106         else
107         {
108             import std.algorithm.searching : count;
109             return small._store[].count!(bin => Large.isOccupiedBin(bin));
110         }
111     }
112 
113     InsertionStatus insert(K key) @trusted
114     {
115         if (isLarge)
116         {
117             return large.insert(key);
118         }
119         else
120         {
121             assert(!key.isNull);
122 
123             // try inserting into small
124             foreach (immutable index; 0 .. small.maxCapacity) // TODO: benchmark with `static foreach`
125             {
126                 if (small._store[index].isNull) // free slot
127                 {
128                     move(key, small._store[index]);
129                     return InsertionStatus.added;
130                 }
131             }
132 
133             // not hit
134             expandWithExtraCapacity(1);
135             assert(isLarge);
136             return large.insert(key);
137         }
138     }
139 
140     bool remove()(const scope K key) @trusted /* template-lazy */
141     {
142         assert(!key.isNull);
143         if (isLarge)
144         {
145             const hit = large.remove(key);
146             if (large.length <= small.maxCapacity)
147             {
148                 shrinkLargeToSmall();
149             }
150             return hit;
151         }
152         else
153         {
154             foreach (immutable index; 0 .. small.maxCapacity) // TODO: benchmark with `static foreach`
155             {
156                 if (small._store[index] is key)
157                 {
158                     small._store[index].nullify(); // don't need holes for small array
159                     return true;
160                 }
161             }
162             return false;
163         }
164     }
165 
166     private void shrinkLargeToSmall()() @trusted /* template-lazy */
167     {
168         Large largeCopy = void;
169         moveEmplace(large, largeCopy); // TODO: no need to reset `large`
170 
171         size_t count = 0;
172         foreach (ref e; largeCopy.rawStore)
173         {
174             if (e.isNull) { continue; }
175             static if (Large.hasAddressLikeKey)
176             {
177                 if (Large.isHoleKeyConstant(e)) { continue; }
178             }
179             moveEmplace(e, small._store[count]);
180             count += 1;
181         }
182 
183         foreach (immutable index; count .. small.maxCapacity)
184         {
185             static if (hasElaborateDestructor!K)
186             {
187                 emplace(&small._store[index]); // reset
188             }
189             small._store[index].nullify();
190         }
191     }
192 
193     /** Check if `element` is stored.
194         Returns: `true` if element is present, `false` otherwise.
195     */
196     bool contains(const scope K key) const @trusted /* template-lazy */, `auto ref` here makes things slow
197     {
198         if (isLarge)
199         {
200             return large.contains(key);
201         }
202         else
203         {
204             assert(!key.isNull);
205             static if (Large.hasAddressLikeKey)
206             {
207                 assert(!Large.isHoleKeyConstant(key));
208             }
209             // TODO: is static foreach faster here?
210             import std.algorithm.searching : canFind;
211             alias pred = (a, b) => a is b;            // TODO: add to template
212             return small._store[].canFind!(pred)(key);
213         }
214     }
215 
216     private void expandWithExtraCapacity(size_t extraCapacity) @trusted
217     {
218         Small.Bins binsCopy = small._store; // TODO: moveEmplace
219         // TODO: merge these lines?
220         emplace!Large(&large);
221         large.reserveExtra(Small.maxCapacity + extraCapacity);
222         large.insertN(binsCopy);
223     }
224 
225     /// Get bin count.
226     @property size_t binCount() const @trusted
227     {
228         pragma(inline, true)
229         if (isLarge)
230         {
231             return large.binCount;
232         }
233         else
234         {
235             return small.maxCapacity;
236         }
237     }
238 
239     @property private scope inout(K)[] bins() inout @trusted return
240     {
241         version(D_Coverage) {} else pragma(inline, true);
242         if (isLarge)
243         {
244             return large.rawStore;
245         }
246         else
247         {
248             return small._store[];
249         }
250     }
251 
252 private:
253     bool isLarge() const pure nothrow @trusted @nogc
254     {
255         version(D_Coverage) {} else pragma(inline, true);
256         return small._capacityDummy > Small.maxCapacity;
257     }
258 
259     /// Returns: `true` if `this` currently uses small (packed) array storage.
260     bool isSmall() const pure nothrow @trusted @nogc { return !isLarge; }
261 
262     union
263     {
264         alias Large = FlatHashSet!(K, hasher);
265         Large large;
266         static struct Small
267         {
268             /* discriminator between large and small storage; must be placed at
269              * exactly here (maps to position of `large._store.length`) and
270              * always contain `maxCapacity` when this is small */
271             size_t _capacityDummy;
272             enum maxCapacity = (large.sizeof - _capacityDummy.sizeof)/K.sizeof;
273             static assert(maxCapacity, "Cannot fit a single element in a Small");
274             alias Bins = K[maxCapacity];
275             Bins _store;
276         }
277         Small small;
278     };
279 }
280 
281 /** L-value element reference (and in turn range iterator).
282  */
283 static private struct LvalueElementRef(Table)
284 {
285     import std.traits : isMutable;
286     debug static assert(isMutable!Table, "Table type should always be mutable");
287 
288     private Table* _table;      // scoped access
289     private size_t _binIndex;   // index to bin inside `table`
290     private size_t _hitCounter; // counter over number of elements popped (needed for length)
291 
292     this(Table* table) @trusted
293     {
294         version(D_Coverage) {} else pragma(inline, true);
295         this._table = table;
296         // static if (Table.isBorrowChecked)
297         // {
298         //     debug
299         //     {
300         //         _table.incBorrowCount();
301         //     }
302         // }
303     }
304 
305     ~this() nothrow @trusted @nogc
306     {
307         version(D_Coverage) {} else pragma(inline, true);
308         // static if (Table.isBorrowChecked)
309         // {
310         //     debug
311         //     {
312         //         _table.decBorrowCount();
313         //     }
314         // }
315     }
316 
317     this(this) @trusted
318     {
319         version(D_Coverage) {} else pragma(inline, true);
320         // static if (Table.isBorrowChecked)
321         // {
322         //     debug
323         //     {
324         //         assert(_table._borrowCount != 0);
325         //         _table.incBorrowCount();
326         //     }
327         // }
328     }
329 
330     /// Check if empty.
331     bool empty() const @property @safe pure nothrow @nogc
332     {
333         version(D_Coverage) {} else pragma(inline, true);
334         return _binIndex == _table.binCount;
335     }
336 
337     /// Get number of element left to pop.
338     @property size_t length() const @safe pure nothrow @nogc
339     {
340         version(D_Coverage) {} else pragma(inline, true);
341         return _table.length - _hitCounter;
342     }
343 
344     @property typeof(this) save() // ForwardRange
345     {
346         version(D_Coverage) {} else pragma(inline, true);
347         return this;
348     }
349 
350     void popFront()
351     {
352         version(LDC) pragma(inline, true);
353         assert(!empty);
354         _binIndex += 1;
355         findNextNonEmptyBin();
356         _hitCounter += 1;
357     }
358 
359     private void findNextNonEmptyBin()
360     {
361         version(D_Coverage) {} else pragma(inline, true);
362         while (_binIndex != (*_table).binCount &&
363                !Table.Large.isOccupiedBin(_table.bins[_binIndex]))
364         {
365             _binIndex += 1;
366         }
367     }
368 }
369 
370 /// Range over elements of l-value instance of this.
371 static private struct ByLvalueElement(Table)
372 {
373 pragma(inline, true):
374     import std.traits : isMutable;
375     static if (isAddress!(Table.Large.ElementType)) // for reference types
376     {
377         /// Get reference to front element.
378         @property scope Table.Large.ElementType front()() return @trusted
379         {
380             // cast to head-const for class key
381             return (cast(typeof(return))_table.bins[_binIndex]);
382         }
383     }
384     else
385     {
386         /// Get reference to front element.
387         @property scope auto ref front() return @trusted
388         {
389             return *(cast(const(Table.ElementType)*)&_table._store[_binIndex]); // propagate constnes
390         }
391     }
392     import core.internal.traits : Unqual;
393     // unqual to reduce number of instantations of `LvalueElementRef`
394     public LvalueElementRef!(Unqual!Table) _elementRef;
395     alias _elementRef this;
396 }
397 
398 /** Returns: range that iterates through the elements of `c` in undefined order.
399  */
400 auto byElement(Table)(auto ref return Table c) @trusted
401     if (isInstanceOf!(SSOFlatHashSet, Table))
402 {
403     import core.internal.traits : Unqual;
404     alias M = Unqual!Table;
405     alias C = const(Table);        // be const for now
406     static if (__traits(isRef, c)) // `c` is an l-value and must be borrowed
407     {
408         auto result = ByLvalueElement!C((LvalueElementRef!(M)(cast(M*)&c)));
409     }
410     else                        // `c` was is an r-value and can be moved
411     {
412         static assert(0, "R-value parameter not supported");
413     }
414     result.findNextNonEmptyBin();
415     return result;
416 }
417 alias range = byElement;        // EMSI-container naming
418 
419 /// start small and expand to large
420 @safe pure nothrow unittest
421 {
422     import std.algorithm.comparison : equal;
423     import nxt.digestx.fnv : FNV;
424     import nxt.array_help : s;
425 
426     // construct small
427     alias X = SSOFlatHashSet!(K, FNV!(64, true));
428     static assert(X.sizeof == 24);
429     auto x = X.withCapacity(X.small.maxCapacity);
430     assert(x.isSmall);
431     assert(x.capacity == X.small.maxCapacity);
432     assert(x.length == 0);
433 
434     auto k42 = new K(42);
435     auto k43 = new K(43);
436     auto k44 = new K(44);
437 
438     // insert first into small
439 
440     assert(!x.contains(k42));
441 
442     assert(x.insert(k42) == x.InsertionStatus.added);
443     assert(x.contains(k42));
444 
445     assert(x.remove(k42));
446     assert(!x.contains(k42));
447 
448     assert(x.insert(k42) == x.InsertionStatus.added);
449     assert(x.contains(k42));
450 
451     assert(x.byElement.equal!((a, b) => a is b)([k42].s[]));
452     assert(x.isSmall);
453     assert(x.length == 1);
454 
455     // insert second into small
456 
457     assert(!x.contains(k43));
458 
459     assert(x.insert(k43) == x.InsertionStatus.added);
460     assert(x.contains(k42));
461     assert(x.contains(k43));
462 
463     assert(x.remove(k43));
464     assert(x.remove(k42));
465     assert(!x.contains(k43));
466     assert(!x.contains(k42));
467 
468     assert(x.insert(k43) == x.InsertionStatus.added);
469     assert(x.insert(k42) == x.InsertionStatus.added);
470 
471     assert(x.byElement.equal!((a, b) => a is b)([k43, k42].s[]));
472     assert(x.isSmall);
473     assert(x.length == 2);
474 
475     // expanding insert third into large
476 
477     assert(!x.contains(k44));
478     assert(x.insert(k44) == x.InsertionStatus.added);
479     // unordered store so equal doesn't work anymore
480     assert(x.contains(k42));
481     assert(x.contains(k43));
482     assert(x.contains(k44));
483     foreach (ref e; x.byElement)
484     {
485         static assert(is(typeof(e) == K));
486         assert(x.contains(e));
487     }
488     assert(x.isLarge);
489     assert(x.length == 3);
490 
491     // shrinking remove third into small
492     assert(x.remove(k44));
493     assert(!x.contains(k44));
494     assert(x.isSmall);
495     assert(x.length == 2);
496     assert(x.byElement.equal!((a, b) => a is b)([k43, k42].s[]));
497     // auto xr = x.byElement;
498     // assert(xr.front.value == 43);
499     // assert(xr.front is k43);
500     // xr.popFront();
501     // assert(xr.front.value == 42);
502     // assert(xr.front is k42);
503     // xr.popFront();
504     // assert(xr.empty);
505 
506     const cx = X.withCapacity(X.small.maxCapacity);
507     foreach (ref e; cx.byElement)
508     {
509         static assert(is(typeof(e) == K)); // head-const because of `is`-equality
510     }
511 }
512 
513 /// start large
514 @safe pure nothrow unittest
515 {
516     import nxt.digestx.fnv : FNV;
517     alias X = SSOFlatHashSet!(K, FNV!(64, true));
518     import nxt.container.traits : mustAddGCRange;
519     static assert(mustAddGCRange!K);
520     static assert(mustAddGCRange!X);
521     auto x = X.withCapacity(3);
522     assert(x.isLarge);
523     assert(x.capacity == 4);    // nextPow2(3)
524     assert(x.length == 0);
525     assert(x.insert(new K(42)) == x.InsertionStatus.added);
526     assert(x.length == 1);
527     assert(x.insert(new K(43)) == x.InsertionStatus.added);
528     assert(x.length == 2);
529 }
530 
531 version(unittest)
532 {
533     class K
534     {
535         this(uint value) @safe pure nothrow @nogc
536         {
537             this.value = value;
538         }
539         uint value;
540     }
541 }