1 module nxt.sso_string;
2 
3 /** Small-size-optimized (SSO) variant of `string`.
4  *
5  * Storage is placed on the stack if the number of `char`s is less than
6  * `smallCapacity`, otherwise as a normal (large) `string`. The large `string`
7  * will be allocated on the GC-heap if the `SSOString` is constructed from a
8  * non-`string` (non-`immutable` `char[]`) parameter.
9  *
10  * Because `SSOString` doesn't have a destructor it can safely allocate using a
11  * GC-backed region allocator without relying on a GC finalizer.
12  *
13  * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for
14  * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to
15  * default to a large string in which large pointer is set to `null`.
16  *
17  * Big-endian platform support hasn't been verified.
18  *
19  * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com
20  * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
21  * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org
22  *
23  * TODO Use extra bits in `Short.length` for these special text encodings:
24  * - 5-bit lowercase English letter into 128/5 = 25 chars
25  * - 5-bit uppercase English letter into 120/5 = 25 chars
26  * - 6-bit mixedcase English letter into 120/6 = 20 chars
27  *
28  * TODO Add to Phobos' std.typecons or std.array or std.string
29  */
30 struct SSOString
31 {
32 @safe:
33     @property void toString(scope void delegate(const(char)[]) @safe sink) const
34     {
35         sink(opSlice());
36     }
37 
38 pure:
39 
40     /** Construct from `source`, which potentially needs GC-allocation (iff
41      * `source.length > smallCapacity` and `source` is not a `string`).
42      */
43     this(Chars)(const scope auto ref Chars source) @trusted nothrow
44     if (is(Chars : const(char)[])) // `isCharArray`
45     {
46         static if (__traits(isStaticArray, Chars))
47         {
48             static if (source.length <= smallCapacity) // inferred @nogc
49             {
50                 small.data[0 .. source.length] = source;
51                 small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
52             }
53             else
54             {
55                 static if (is(typeof(source[0]) == immutable(char)))
56                 {
57                     raw.ptr[0 .. source.length] = source; // copy elements
58                 }
59                 else
60                 {
61                     raw.ptr = source.idup.ptr; // GC-allocate
62                 }
63                 raw.length = encodeLargeLength(source.length);
64             }
65         }
66         else                    // `Chars` is a (dynamic) array slice
67         {
68             if (source.length <= smallCapacity)
69             {
70                 (cast(char*)small.data.ptr)[0 .. source.length] = source;
71                 small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
72             }
73             else
74             {
75                 static if (is(typeof(source[0]) == immutable(char)))
76                 {
77                     raw.ptr = source.ptr; // already immutable so no duplication needed
78                 }
79                 else
80                 {
81                     raw.ptr = source.idup.ptr; // GC-allocate
82                 }
83                 raw.length = encodeLargeLength(source.length);
84             }
85         }
86     }
87 
88     /** Construct from `source` of `dchar`
89      */
90     this(Source)(scope Source source) @trusted
91     if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && // TODO `isConstRefIterable`
92         is(typeof(Source.init.front) == dchar))
93     {
94         import std.utf : encode;
95 
96         // pre-calculate number of `char`s needed
97         size_t charCount = 0;
98         foreach (const e; source)
99         {
100             char[4] chars;      // TODO `= void`
101             charCount += encode(chars, e);
102         }
103 
104         if (charCount <= smallCapacity) // fits in small
105         {
106             size_t offset = 0;
107             foreach (const e; source)
108             {
109                 char[4] chars;
110                 const count = encode(chars, e);
111                 (cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count];
112                 offset += count;
113             }
114             assert(offset <= smallCapacity);
115             small.length = cast(typeof(small.length))(encodeSmallLength(offset));
116         }
117         else                    // needs large
118         {
119             large = new immutable(char)[charCount];
120             size_t offset = 0;
121             foreach (const e; source)
122             {
123                 char[4] chars;
124                 const count = encode(chars, e);
125                 (cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data
126                 offset += count;
127             }
128             raw.length = encodeLargeLength(charCount);
129         }
130     }
131 
132 nothrow:
133 
134     /** Return `this` converted to a `string`, without any GC-allocation because
135      * `this` is `immutable`.
136      */
137     @property string toString() immutable @trusted pure nothrow @nogc // never allocates
138     {
139         version(D_Coverage) {} else pragma(inline, true);
140         return opSlice();
141     }
142 
143     /** Return `this` converted to a `string`, which potentially needs
144      * GC-allocation (iff `length > smallCapacity`).
145      *
146      * implementation kept in sync with `opSlice`.
147      */
148     @property string toString() const return @trusted pure nothrow // may GC-allocate
149     {
150         if (isLarge)
151         {
152             // GC-allocated slice has immutable members so ok to cast
153             return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
154         }
155         else
156         {
157             return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable`
158         }
159     }
160 
161     @nogc:
162 
163     /** Get hash of `this`, with extra fast computation for the small case.
164      */
165     @property hash_t toHash() const scope @trusted
166     {
167         version(D_Coverage) {} else version(LDC) pragma(inline, true);
168         if (isLarge)
169         {
170             import core.internal.hash : hashOf;
171             return hashOf(opSliceLarge()); // use default
172         }
173         else                    // fast path for small string
174         {
175             import nxt.hash_functions : lemireHash64;
176             return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string
177                     lemireHash64(words[1]));
178         }
179     }
180 
181     /** Get length. */
182     @property size_t length() const scope @trusted
183     {
184         version(D_Coverage) {} else pragma(inline, true);
185         if (isLarge)
186         {
187             return decodeRawLength(large.length); // skip first bit
188         }
189         else
190         {
191             return decodeRawLength(small.length); // skip fist bit
192         }
193     }
194     /// ditto
195     alias opDollar = length;
196 
197     /** Check if `this` is empty. */
198     @property bool empty() const scope @safe pure nothrow @nogc
199     {
200         return length == 0;
201     }
202 
203     /** Check if `this` is `null`. */
204     @property bool isNull() const scope @trusted pure nothrow @nogc
205     {
206         return raw.length == 0;
207     }
208 
209     /** Return a slice to either the whole large or whole small `string`.
210      *
211      * Implementation is kept in sync with `toString`.
212      */
213     inout(char)[] opSlice() inout return scope @trusted @nogc
214     {
215         version(D_Coverage) {} else pragma(inline, true);   // TODO: maybe remove
216         if (isLarge)
217         {
218             return opSliceLarge();
219         }
220         else
221         {
222             return opSliceSmall();
223         }
224     }
225 
226     /** Return a slice at `[i .. j]` to either the internally stored large or small `string`.
227      *
228      * Implementation is kept in sync with `toString`.
229      */
230     inout(char)[] opSlice(size_t i, size_t j) inout return @safe
231     {
232         version(D_Coverage) {} else pragma(inline, true);
233         return opSlice()[i .. j];
234     }
235 
236     private inout(char)[] opSliceLarge() inout return scope @system @nogc
237     {
238         version(D_Coverage) {} else pragma(inline, true);
239         version(unittest) assert(isLarge);
240         return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
241         // alternative:  return large.ptr[0 .. large.length/2];
242     }
243 
244     private inout(char)[] opSliceSmall() inout return scope @trusted @nogc
245     {
246         version(D_Coverage) {} else pragma(inline, true);
247         version(unittest) assert(!isLarge);
248         return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped
249     }
250 
251     /** Return the `index`ed `char` of `this`.
252      */
253     ref inout(char) opIndex(size_t index) inout return @trusted
254     {
255         version(D_Coverage) {} else pragma(inline, true);
256         return opSlice()[index]; // does range check
257     }
258 
259     /// Get pointer to the internally stored `char`s.
260     @property private immutable(char)* ptr() const return @trusted
261     {
262         if (isLarge)
263         {
264             return large.ptr;   // GC-heap pointer
265         }
266         else
267         {
268             return small.data.ptr; // stack pointer
269         }
270     }
271 
272     /** Check if `this` is equal to `rhs`. */
273     bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted
274     {
275         version(D_Coverage) {} else pragma(inline, true);
276         return opSlice() == rhs.opSlice();
277     }
278 
279     /** Check if `this` is equal to `rhs`. */
280     bool opEquals()(const scope const(char)[] rhs) const scope @trusted
281     {
282         version(D_Coverage) {} else pragma(inline, true);
283         return opSlice() == rhs;
284     }
285 
286     /** Compare `this` with `that`.
287      *
288      * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org
289      */
290     @property int opCmp()(const scope typeof(this) that) const scope // template-lazy
291     {
292         version(D_Coverage) {} else pragma(inline, true);
293         scope const a = this.opSlice();
294         scope const b = that.opSlice();
295         return a < b ? -1 : (a > b);
296         // import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`;
297         // return __cmp(this[], that[]);
298     }
299 
300     bool opCast(T : bool)() const scope @trusted
301     {
302         version(D_Coverage) {} else pragma(inline, true);
303         if (isLarge)
304         {
305             return large !is null;
306         }
307         else
308         {
309             return small.length != 0;
310         }
311     }
312 
313     /** Support trait `isNullable`. */
314     static immutable nullValue = typeof(this).init;
315 
316     /** Support trait `isHoleable`. */
317     static immutable holeValue = typeof(this).asHole();
318 
319     /** Check if this a hole, meaning a removed/erase value. */
320     bool isHole() const scope @safe nothrow @nogc
321     {
322         return words[0] == size_t.max;
323     }
324 
325     /** That this a hole, meaning a removed/erase value. */
326     void holeify() @system @nogc scope
327     {
328         words[0] = size_t.max;
329         words[1] = size_t.max;
330     }
331 
332     /** Returns: a holed `SSOString`, meaning a removed/erase value. */
333     private static typeof(this) asHole() @system
334     {
335         typeof(return) result = void;
336         result.holeify();
337         return result;
338     }
339 
340     /** Check if `this` is a small ASCII string. */
341     bool isSmallASCII() const scope @trusted
342     {
343         version(D_Coverage) {} else pragma(inline, true);
344         static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small
345         // should be fast on 64-bit platforms:
346         return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small
347                 (words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0);
348     }
349 
350 private:
351 
352     /** Returns: `true` iff this is a large string, otherwise `false.` */
353     @property bool isLarge() const scope @trusted
354     {
355         version(D_Coverage) {} else pragma(inline, true);
356         return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large
357     }
358 
359     alias Large = immutable(char)[];
360 
361     public enum smallCapacity = Large.sizeof - Small.length.sizeof;
362     static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof);
363 
364     enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length.
365     enum smallLengthBitCount = 4;
366     static assert(smallCapacity == 2^^smallLengthBitCount-1);
367 
368     enum metaBits = 3;               ///< Number of bits used for metadata.
369     enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom.
370     enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data.
371     static assert(smallLengthBitCount + tagsBitCount == 8);
372 
373     /// Get metadata byte with first `metaBits` bits set.
374     @property ubyte metadata() const @safe pure nothrow @nogc
375     {
376         return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits]
377     }
378 
379     /// Set metadata.
380     @property void metadata(ubyte data) @trusted pure nothrow @nogc
381     {
382         assert(data < (1 << metaBits));
383         if (isLarge)
384         {
385             raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
386         }
387         else
388         {
389             small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
390         }
391     }
392 
393     /// Decode raw length `rawLength` by shifting away tag bits.
394     static size_t decodeRawLength(size_t rawLength) @safe pure nothrow @nogc
395     {
396         return rawLength >> tagsBitCount;
397     }
398 
399     /// Encode `Large` length from `Length`.
400     static size_t encodeLargeLength(size_t length) @safe pure nothrow @nogc
401     {
402         return (length << tagsBitCount);
403     }
404 
405     /// Encode `Small` length from `Length`.
406     static size_t encodeSmallLength(size_t length) @safe pure nothrow @nogc
407     {
408         assert(length <= smallCapacity);
409         return (length << tagsBitCount) | (1 << largeLengthTagBitOffset);
410     }
411 
412     version(LittleEndian) // see: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw
413     {
414         struct Small
415         {
416             /* TODO only first 4 bits are needed to represent a length between
417              * 0-15, use other 4 bits.
418              */
419             ubyte length = 0;
420             immutable(char)[smallCapacity] data = [0,0,0,0,0,
421                                                    0,0,0,0,0,
422                                                    0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
423         }
424     }
425     else
426     {
427         struct Small
428         {
429             immutable(char)[smallCapacity] data = [0,0,0,0,0,
430                                                    0,0,0,0,0,
431                                                    0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
432             /* TODO only first 4 bits are needed to represent a length between
433              * 0-15, use other 4 bits.
434              */
435             ubyte length;
436         }
437         static assert(0, "TODO add BigEndian support and test");
438     }
439 
440     struct Raw                  // same memory layout as `immutable(char)[]`
441     {
442         size_t length = 0;      // can be bit-fiddled without GC allocation
443         immutable(char)* ptr = null;
444     }
445 
446     union
447     {
448         Raw raw;
449         Large large;
450         Small small;
451         size_t[2] words;
452     }
453 }
454 version(unittest) static assert(SSOString.sizeof == string.sizeof);
455 
456 /** Returns: `x` lowercased. */
457 SSOString toLower()(const SSOString x) @trusted // template-lazy
458 {
459     if (x.isSmallASCII)         // small ASCII fast-path
460     {
461         typeof(return) result = void;
462         result.small.length = x.small.length;
463         foreach (const index; 0 .. x.smallCapacity)
464         {
465             import std.ascii : toLower;
466             (cast(char[])(result.small.data))[index] = toLower(x.small.data[index]);
467         }
468         return result;
469     }
470     else if (x.isLarge)
471     {
472         import std.uni : asLowerCase;
473         import std.conv : to;
474         return typeof(return)(x.opSlice().asLowerCase.to!string); // TODO make .to!string nothrow
475     }
476     else                   // small non-ASCII path usually without GC-allocation
477     {
478         typeof(return) result = x; // copy
479         import std.uni : toLowerInPlace;
480         auto slice = cast(char[])(result.opSlice()); // need ref to slice
481         toLowerInPlace(slice);
482         if (slice is result.opSlice() || // no reallocation
483             slice.length == result.length) // or same length (happens for German double-s)
484         {
485             return result;
486         }
487         else
488         {
489             version(none)
490             {
491                 import nxt.dbgio;
492                 dbg(`toLowerInPlace reallocated from "`,
493                     result.opSlice(), `" of length `, result.opSlice().length,
494                     ` to "`
495                     , slice, `" of length `, slice.length);
496             }
497             return typeof(return)(slice); // reallocation occurred
498         }
499     }
500 }
501 
502 /** Returns: `x` uppercased. */
503 SSOString toUpper()(const SSOString x) @trusted // template-lazy
504 {
505     if (x.isSmallASCII)         // small ASCII fast-path
506     {
507         typeof(return) result = void;
508         result.small.length = x.small.length;
509         foreach (const index; 0 .. x.smallCapacity)
510         {
511             import std.ascii : toUpper;
512             (cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]);
513         }
514         return result;
515     }
516     else if (x.isLarge)
517     {
518         import std.uni : asUpperCase;
519         import std.conv : to;
520         return typeof(return)(x.opSlice().asUpperCase.to!string); // TODO make .to!string nothrow
521     }
522     else                   // small non-ASCII path usually without GC-allocation
523     {
524         typeof(return) result = x; // copy
525         import std.uni : toUpperInPlace;
526         auto slice = cast(char[])(result.opSlice()); // need ref to slice
527         toUpperInPlace(slice);
528         if (slice is result.opSlice() || // no reallocation
529             slice.length == result.length) // or same length (happens for German double-s)
530         {
531             return result;
532         }
533         else
534         {
535             version(none)
536             {
537                 import nxt.dbgio;
538                 dbg(`toUpperInPlace reallocated from "`,
539                     result.opSlice(), `" of length `, result.opSlice().length,
540                     ` to "`
541                     , slice, `" of length `, slice.length);
542             }
543             return typeof(return)(slice); // reallocation occurred
544         }
545     }
546 }
547 
548 /// construct from non-immutable source is allowed in non-`@nogc`-scope
549 @safe pure nothrow unittest
550 {
551     alias S = SSOString;
552 
553     scope const char[] x0;
554     const s0 = SSOString(x0);           // no .idup
555 
556     scope const char[] x16 = new char[16];
557     const s16 = SSOString(x16);         // will call .idup
558 }
559 
560 /// construct from non-immutable source is not allowed in `@nogc`-scope
561 @safe pure nothrow @nogc unittest
562 {
563     scope const char[] s;
564     // TODO why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); }));
565 }
566 
567 /// verify `isNull` when @nogc constructing from small static array of `char`s
568 @trusted pure nothrow @nogc unittest
569 {
570     static foreach (const n; 0 .. SSOString.smallCapacity + 1)
571     {
572         {
573             immutable(char)[n] x;
574             assert(!SSOString(x).isNull);
575         }
576     }
577 }
578 
579 /// verify `isNull` when constructing from large static array of `char`s
580 @trusted pure nothrow unittest
581 {
582     static foreach (const n; SSOString.smallCapacity + 1 .. 32)
583     {
584         {
585             immutable(char)[n] x;
586             assert(!SSOString(x).isNull);
587         }
588     }
589 }
590 
591 /// verify `isNull` when constructing from dynamic array of `char`s
592 @trusted pure nothrow unittest
593 {
594     foreach (const n; 0 .. 32)
595     {
596         scope x = new immutable(char)[n];
597         assert(!SSOString(x).isNull);
598     }
599 }
600 
601 /// test behaviour of `==` and `is` operator
602 @trusted pure nothrow @nogc unittest
603 {
604     const SSOString x = "42";
605     assert(!x.isNull);
606     assert(x == "42");
607 
608     const SSOString y = "42";
609     assert(!y.isNull);
610     assert(y == "42");
611 
612     assert(x == y);
613     assert(x == y[]);
614     assert(x[] == y);
615     assert(x[] == y[]);
616     assert(x[] is x[]);
617     assert(y[] is y[]);
618     assert(x[] !is y[]);
619     assert(x.ptr !is y.ptr);
620 
621     const SSOString z = "43";
622     assert(!z.isNull);
623     assert(z == "43");
624     assert(x != z);
625     assert(x[] != z[]);
626     assert(x !is z);
627     assert(x[] !is z[]);
628 }
629 
630 ///
631 @safe pure nothrow @nogc unittest
632 {
633     static assert(SSOString.smallCapacity == 15);
634 
635     import nxt.gc_traits : mustAddGCRange;
636     static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned
637 
638     static assert(__traits(isZeroInit, SSOString));
639     // TODO assert(SSOString.init == SSOString.nullValue);
640 
641     auto s0 = SSOString.init;
642     assert(s0.isNull);
643     assert(s0.length == 0);
644     assert(s0.isLarge);
645     assert(s0[] == []);
646 
647     char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string
648     const sSmallCapacity = SSOString(charsSmallCapacity);
649     assert(!sSmallCapacity.isLarge);
650     assert(sSmallCapacity.length == SSOString.smallCapacity);
651     assert(sSmallCapacity == charsSmallCapacity);
652 
653     const s0_ = SSOString("");
654     assert(!s0_.isNull);         // cannot distinguish
655     assert(s0 == s0_);
656 
657     const s7 = SSOString("0123456");
658     assert(!s7.isNull);
659 
660     const s7_ = SSOString("0123456_"[0 .. $ - 1]);
661     assert(s7.ptr !is s7_.ptr); // string data shall not overlap
662     assert(s7 == s7_);
663 
664     const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal
665     assert(s7.ptr !is _s7.ptr); // string data shall not overlap
666     assert(s7 == _s7);
667 
668     assert(!s7.isLarge);
669     assert(s7.length == 7);
670     assert(s7[] == "0123456");
671     assert(s7[] == "_0123456"[1 .. $]);
672     assert(s7[] == "0123456_"[0 .. $ - 1]);
673     assert(s7[0 .. 4] == "0123");
674 
675     const s15 = SSOString("0123456789abcde");
676     assert(!s15.isNull);
677     static assert(is(typeof(s15[]) == const(char)[]));
678     assert(!s15.isLarge);
679     assert(s15.length == 15);
680     assert(s15[] == "0123456789abcde");
681     assert(s15[0 .. 4] == "0123");
682     assert(s15[10 .. 15] == "abcde");
683     assert(s15[10 .. $] == "abcde");
684 
685     const s16 = SSOString("0123456789abcdef");
686     assert(!s16.isNull);
687     static assert(is(typeof(s16[]) == const(char)[]));
688     assert(s16.isLarge);
689 
690     const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]);
691     assert(s16.length == s16_.length);
692     assert(s16[] == s16_[]);
693     assert(s16.ptr !is s16_.ptr); // string data shall not overlap
694     assert(s16 == s16_);              // but contents is equal
695 
696     const _s16 = SSOString("_0123456789abcdef"[1 .. $]);
697     assert(s16.length == _s16.length);
698     assert(s16[] == _s16[]);    // contents is equal
699     assert(s16 == _s16);        // contents is equal
700 
701     assert(s16.length == 16);
702     assert(s16[] == "0123456789abcdef");
703     assert(s16[0] == '0');
704     assert(s16[10] == 'a');
705     assert(s16[15] == 'f');
706     assert(s16[0 .. 4] == "0123");
707     assert(s16[10 .. 16] == "abcdef");
708     assert(s16[10 .. $] == "abcdef");
709 }
710 
711 /// metadata for null string
712 @safe pure nothrow @nogc unittest
713 {
714     auto s = SSOString.init;
715     assert(s.isNull);
716     foreach (const i; 0 .. 8)
717     {
718         s.metadata = i;
719         assert(s.metadata == i);
720         assert(s.length == 0);
721     }
722 }
723 
724 /// metadata for small string
725 @safe pure nothrow @nogc unittest
726 {
727     auto s = SSOString("0123456");
728     assert(!s.isNull);
729     assert(!s.isLarge);
730     foreach (const i; 0 .. 8)
731     {
732         s.metadata = i;
733         assert(s.metadata == i);
734         assert(s.length == 7);
735         assert(!s.isLarge);
736         assert(!s.isNull);
737     }
738 }
739 
740 /// metadata for small string with maximum length
741 @safe pure nothrow @nogc unittest
742 {
743     auto s = SSOString("0123456789abcde");
744     assert(s.length == SSOString.smallCapacity);
745     assert(!s.isNull);
746     assert(!s.isLarge);
747     foreach (const i; 0 .. 8)
748     {
749         s.metadata = i;
750         assert(s.metadata == i);
751         assert(s.length == 15);
752         assert(!s.isLarge);
753         assert(!s.isNull);
754     }
755 }
756 
757 /// metadata for large string with minimum length
758 @safe pure nothrow @nogc unittest
759 {
760     auto s = SSOString("0123456789abcdef");
761     assert(s.length == SSOString.smallCapacity + 1);
762     assert(!s.isNull);
763     assert(s.isLarge);
764     assert(!s.empty);
765     foreach (const i; 0 .. 8)
766     {
767         s.metadata = i;
768         assert(s.metadata == i);
769         assert(s.length == 16);
770         assert(s.isLarge);
771         assert(!s.isNull);
772     }
773 }
774 
775 /// equality and equivalence
776 @safe pure nothrow @nogc unittest
777 {
778     assert(SSOString() == SSOString(""));
779     assert(SSOString() !is SSOString(""));
780 }
781 
782 /// hashing of null, empty and non-empty
783 @safe pure nothrow @nogc unittest
784 {
785     assert(SSOString().toHash == 0);
786     assert(SSOString("").toHash == 0);
787     assert(SSOString("a").toHash != 0);
788     assert(SSOString("0123456789abcdef").toHash != 0);
789 }
790 
791 /// construct from static array larger than `smallCapacity`
792 @safe pure nothrow unittest
793 {
794     char[SSOString.smallCapacity + 1] charsMinLargeCapacity;
795     const _ = SSOString(charsMinLargeCapacity);
796 }
797 
798 // test construction from range
799 @safe pure unittest
800 {
801     static void test(const scope char[] x,
802                      const bool isLarge) @safe pure
803     {
804         import std.utf : byDchar;
805         const scope s = SSOString(x.byDchar);
806         assert(s == x);
807         assert(s.isLarge == isLarge);
808     }
809     test("", false);
810     test("_", false);
811     test("123456789_12345", false);
812     test("123456789_123456", true);
813     test("123456789_123456789_123456789_", true);
814 }
815 
816 /// hole handling
817 @trusted pure nothrow @nogc unittest
818 {
819     assert(!SSOString.init.isHole);
820     assert(!SSOString("").isHole);
821     assert(!SSOString("a").isHole);
822     assert(SSOString.asHole.isHole);
823 }
824 
825 /// DIP-1000 return ref escape analysis
826 @safe pure nothrow unittest
827 {
828     static if (isDIP1000)
829     {
830         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
831         static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } }));
832         static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } }));
833         static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { SSOString x; return x[0]; } }));
834     }
835 }
836 
837 /// ASCII purity and case-conversion
838 @safe pure nothrow @nogc unittest
839 {
840     // these are all small ASCII
841     assert( SSOString("a").isSmallASCII);
842     assert( SSOString("b").isSmallASCII);
843     assert( SSOString("z").isSmallASCII);
844     assert( SSOString("_").isSmallASCII);
845     assert( SSOString("abcd").isSmallASCII);
846     assert( SSOString("123456789_12345").isSmallASCII);
847 
848     // these are not
849     assert(!SSOString("123456789_123456").isSmallASCII); // too large
850     assert(!SSOString("123456789_123ö").isSmallASCII);
851     assert(!SSOString("ö").isSmallASCII);
852     assert(!SSOString("Ö").isSmallASCII);
853     assert(!SSOString("åäö").isSmallASCII);
854     assert(!SSOString("ö-värld").isSmallASCII);
855 }
856 
857 /// ASCII purity and case-conversion
858 @safe pure unittest
859 {
860     assert(SSOString("A").toLower[] == "a");
861     assert(SSOString("a").toUpper[] == "A");
862     assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small
863     assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small
864     assert(SSOString("ÅÄÖ").toLower[] == "åäö");
865     assert(SSOString("åäö").toUpper[] == "ÅÄÖ");
866     assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large
867     assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large
868 
869     char[6] x = "ÅÄÖ";
870     import std.uni : toLowerInPlace;
871     auto xref = x[];
872     toLowerInPlace(xref);
873     assert(x == "åäö");
874     assert(xref == "åäö");
875 }
876 
877 /// lexicographic comparison
878 @safe pure unittest
879 {
880     const SSOString a = SSOString("a");
881     assert(a == SSOString("a"));
882 
883     immutable SSOString b = SSOString("b");
884 
885     assert(a < b);
886     assert(b > a);
887     assert(a[] < b[]);
888 
889     assert("a" < "b");
890     assert("a" < "å");
891     assert("Å" < "å");
892     assert(SSOString("a") < SSOString("å"));
893     assert(SSOString("ÅÄÖ") < SSOString("åäö"));
894 }
895 
896 /// cast to bool
897 @safe pure unittest
898 {
899     // mimics behaviour of casting of `string` to `bool`
900     assert(!SSOString());
901     assert(SSOString(""));
902     assert(SSOString("abc"));
903 }
904 
905 /// to string conversion
906 @safe pure unittest
907 {
908     // mutable small will GC-allocate
909     {
910         SSOString s = SSOString("123456789_12345");
911         assert(s.ptr is &s.opSlice()[0]);
912         assert(s.ptr !is &s.toString()[0]);
913     }
914 
915     // const small will GC-allocate
916     {
917         const SSOString s = SSOString("123456789_12345");
918         assert(s.ptr is &s.opSlice()[0]);
919         assert(s.ptr !is &s.toString()[0]);
920     }
921 
922     // immutable small will not allocate
923     {
924         immutable SSOString s = SSOString("123456789_12345");
925         assert(s.ptr is &s.opSlice()[0]);
926         assert(s.ptr is &s.toString()[0]);
927         // TODO check return via -dip1000
928     }
929 
930     /* Forbid return of possibly locally scoped `Smll` small stack object
931      * regardless of head-mutability.
932      */
933     static if (isDIP1000)
934     {
935         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
936         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } }));
937         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } }));
938 
939         /** TODO Enable the following line when DIP-1000 works for opSlice()
940          *
941          * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
942          */
943         // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } }));
944     }
945 
946     // large will never allocate regardless of head-mutability
947     {
948         SSOString s = SSOString("123456789_123456");
949         assert(s.ptr is &s.opSlice()[0]);
950         assert(s.ptr is &s.toString()[0]); // shouldn't this change?
951     }
952 }
953 
954 version(unittest)
955 {
956     import nxt.dip_traits : isDIP1000;
957 }