1 module nxt.sso_string;
2 
3 /** Small-size-optimized (SSO) variant of `string`.
4  *
5  * Storage is placed on the stack if the number of `char`s is less than
6  * `smallCapacity`, otherwise as a normal (large) `string`. The large `string`
7  * will be allocated on the GC-heap if the `SSOString` is constructed from a
8  * non-`string` (non-`immutable` `char[]`) parameter.
9  *
10  * Because `SSOString` doesn't have a destructor it can safely allocate using a
11  * GC-backed region allocator without relying on a GC finalizer.
12  *
13  * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for
14  * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to
15  * default to a large string in which large pointer is set to `null`.
16  *
17  * Big-endian platform support hasn't been verified.
18  *
19  * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com
20  * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
21  * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org
22  *
23  * TODO: Use extra bits in `Short.length` for these special text encodings:
24  * - 5-bit lowercase English letter into 128/5 = 25 chars
25  * - 5-bit uppercase English letter into 120/5 = 25 chars
26  * - 6-bit mixedcase English letter into 120/6 = 20 chars
27  *
28  * TODO: Add to Phobos' std.typecons or std.array or std.string
29  */
30 struct SSOString
31 {
32 @safe:
33     @property void toString(scope void delegate(const(char)[]) @safe sink) const
34     {
35         sink(opSlice());
36     }
37 
38 pure:
39 
40     /** Construct from `source`, which potentially needs GC-allocation (iff
41      * `source.length > smallCapacity` and `source` is not a `string`).
42      */
43     this(Chars)(const scope auto ref Chars source) @trusted nothrow
44     if (is(Chars : const(char)[])) // `isCharArray`
45     {
46         static if (__traits(isStaticArray, Chars))
47         {
48             static if (source.length <= smallCapacity) // inferred @nogc
49             {
50                 small.data[0 .. source.length] = source;
51                 small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
52             }
53             else
54             {
55                 static if (is(typeof(source[0]) == immutable(char)))
56                     raw.ptr[0 .. source.length] = source; // copy elements
57                 else
58                     raw.ptr = source.idup.ptr; // GC-allocate
59                 raw.length = encodeLargeLength(source.length);
60             }
61         }
62         else                    // `Chars` is a (dynamic) array slice
63         {
64             if (source.length <= smallCapacity)
65             {
66                 (cast(char*)small.data.ptr)[0 .. source.length] = source;
67                 small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
68             }
69             else
70             {
71                 static if (is(typeof(source[0]) == immutable(char)))
72                     raw.ptr = source.ptr; // already immutable so no duplication needed
73                 else
74                     raw.ptr = source.idup.ptr; // GC-allocate
75                 raw.length = encodeLargeLength(source.length);
76             }
77         }
78     }
79 
80     /** Construct from `source` of `dchar`
81      */
82     this(Source)(scope Source source) @trusted
83     if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && // TODO: `isConstRefIterable`
84         is(typeof(Source.init.front) == dchar))
85     {
86         import std.utf : encode;
87 
88         // pre-calculate number of `char`s needed
89         size_t charCount = 0;
90         foreach (const e; source)
91         {
92             char[4] chars;      // TODO: `= void`
93             charCount += encode(chars, e);
94         }
95 
96         if (charCount <= smallCapacity) // fits in small
97         {
98             size_t offset = 0;
99             foreach (const e; source)
100             {
101                 char[4] chars;
102                 const count = encode(chars, e);
103                 (cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count];
104                 offset += count;
105             }
106             assert(offset <= smallCapacity);
107             small.length = cast(typeof(small.length))(encodeSmallLength(offset));
108         }
109         else                    // needs large
110         {
111             large = new immutable(char)[charCount];
112             size_t offset = 0;
113             foreach (const e; source)
114             {
115                 char[4] chars;
116                 const count = encode(chars, e);
117                 (cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data
118                 offset += count;
119             }
120             raw.length = encodeLargeLength(charCount);
121         }
122     }
123 
124 nothrow:
125 
126     /** Return `this` converted to a `string`, without any GC-allocation because
127      * `this` is `immutable`.
128      */
129     @property string toString() immutable @trusted return pure nothrow @nogc // never allocates
130     {
131         version(D_Coverage) {} else pragma(inline, true);
132         return opSlice();
133     }
134 
135     /** Return `this` converted to a `string`, which potentially needs
136      * GC-allocation (iff `length > smallCapacity`).
137      *
138      * implementation kept in sync with `opSlice`.
139      */
140     @property string toString() const return @trusted pure nothrow // may GC-allocate
141     {
142         if (isLarge)
143             // GC-allocated slice has immutable members so ok to cast
144             return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
145         else
146             return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable`
147     }
148 
149     @nogc:
150 
151     /** Get hash of `this`, with extra fast computation for the small case.
152      */
153     @property hash_t toHash() const scope @trusted
154     {
155         version(D_Coverage) {} else version(LDC) pragma(inline, true);
156         import core.internal.hash : hashOf;
157         import nxt.hash_functions : lemireHash64;
158         if (isLarge)
159             return hashOf(opSliceLarge()); // use default
160         else                    // fast path for small string
161             return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string
162                     lemireHash64(words[1]));
163     }
164 
165     /** Get length. */
166     @property size_t length() const scope @trusted
167     {
168         version(D_Coverage) {} else pragma(inline, true);
169         if (isLarge)
170             return decodeRawLength(large.length); // skip first bit
171         else
172             return decodeRawLength(small.length); // skip fist bit
173     }
174     /// ditto
175     alias opDollar = length;
176 
177     /** Check if `this` is empty. */
178     @property bool empty() const scope @safe pure nothrow @nogc
179     {
180         return length == 0;
181     }
182 
183     /** Check if `this` is `null`. */
184     @property bool isNull() const scope @trusted pure nothrow @nogc
185     {
186         return raw.length == 0;
187     }
188 
189     /** Return a slice to either the whole large or whole small `string`.
190      *
191      * Implementation is kept in sync with `toString`.
192      */
193     inout(char)[] opSlice() inout return @trusted @nogc
194     {
195         version(D_Coverage) {} else pragma(inline, true);   // TODO: maybe remove
196         if (isLarge)
197             return opSliceLarge();
198         else
199             return opSliceSmall();
200     }
201 
202     /** Return a slice at `[i .. j]` to either the internally stored large or small `string`.
203      *
204      * Implementation is kept in sync with `toString`.
205      */
206     inout(char)[] opSlice(size_t i, size_t j) inout return @safe
207     {
208         version(D_Coverage) {} else pragma(inline, true);
209         return opSlice()[i .. j];
210     }
211 
212     private inout(char)[] opSliceLarge() inout return scope @system @nogc
213     {
214         version(D_Coverage) {} else pragma(inline, true);
215         version(unittest) assert(isLarge);
216         return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
217         // alternative:  return large.ptr[0 .. large.length/2];
218     }
219 
220     private inout(char)[] opSliceSmall() inout return @trusted @nogc
221     {
222         version(D_Coverage) {} else pragma(inline, true);
223         version(unittest) assert(!isLarge);
224         return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped
225     }
226 
227     /** Return the `index`ed `char` of `this`.
228      */
229     ref inout(char) opIndex(size_t index) inout return @trusted
230     {
231         version(D_Coverage) {} else pragma(inline, true);
232         return opSlice()[index]; // does range check
233     }
234 
235     /// Get pointer to the internally stored `char`s.
236     @property private immutable(char)* ptr() const return @trusted
237     {
238         if (isLarge)
239             return large.ptr;   // GC-heap pointer
240         else
241             return small.data.ptr; // stack pointer
242     }
243 
244     /** Check if `this` is equal to `rhs`. */
245     bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted
246     {
247         version(D_Coverage) {} else pragma(inline, true);
248         return opSlice() == rhs.opSlice();
249     }
250 
251     /** Check if `this` is equal to `rhs`. */
252     bool opEquals()(const scope const(char)[] rhs) const scope @trusted
253     {
254         version(D_Coverage) {} else pragma(inline, true);
255         return opSlice() == rhs;
256     }
257 
258     /** Compare `this` with `that`.
259      *
260      * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org
261      */
262     @property int opCmp()(const scope typeof(this) that) const scope // template-lazy
263     {
264         version(D_Coverage) {} else pragma(inline, true);
265         scope const a = this.opSlice();
266         scope const b = that.opSlice();
267         return a < b ? -1 : (a > b);
268         // import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`;
269         // return __cmp(this[], that[]);
270     }
271 
272     bool opCast(T : bool)() const scope @trusted
273     {
274         version(D_Coverage) {} else pragma(inline, true);
275         if (isLarge)
276             return large !is null;
277         else
278             return small.length != 0;
279     }
280 
281     /** Support trait `isNullable`. */
282     static immutable nullValue = typeof(this).init;
283 
284     /** Support trait `isHoleable`. */
285     static immutable holeValue = typeof(this).asHole();
286 
287     /** Check if this a hole, meaning a removed/erase value. */
288     bool isHole() const scope @safe nothrow @nogc
289     {
290         return words[0] == size_t.max;
291     }
292 
293     /** That this a hole, meaning a removed/erase value. */
294     void holeify() @system @nogc scope
295     {
296         words[0] = size_t.max;
297         words[1] = size_t.max;
298     }
299 
300     /** Returns: a holed `SSOString`, meaning a removed/erase value. */
301     private static typeof(this) asHole() @system
302     {
303         typeof(return) result = void;
304         result.holeify();
305         return result;
306     }
307 
308     /** Check if `this` is a small ASCII string. */
309     bool isSmallASCII() const scope @trusted
310     {
311         version(D_Coverage) {} else pragma(inline, true);
312         static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small
313         // should be fast on 64-bit platforms:
314         return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small
315                 (words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0);
316     }
317 
318 private:
319 
320     /** Returns: `true` iff this is a large string, otherwise `false.` */
321     @property bool isLarge() const scope @trusted
322     {
323         version(D_Coverage) {} else pragma(inline, true);
324         return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large
325     }
326 
327     alias Large = immutable(char)[];
328 
329     public enum smallCapacity = Large.sizeof - Small.length.sizeof;
330     static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof);
331 
332     enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length.
333     enum smallLengthBitCount = 4;
334     static assert(smallCapacity == 2^^smallLengthBitCount-1);
335 
336     enum metaBits = 3;               ///< Number of bits used for metadata.
337     enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom.
338     enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data.
339     static assert(smallLengthBitCount + tagsBitCount == 8);
340 
341     /// Get metadata byte with first `metaBits` bits set.
342     @property ubyte metadata() const @safe pure nothrow @nogc
343     {
344         return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits]
345     }
346 
347     /// Set metadata.
348     @property void metadata(ubyte data) @trusted pure nothrow @nogc
349     {
350         assert(data < (1 << metaBits));
351         if (isLarge)
352             raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
353         else
354             small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
355     }
356 
357     /// Decode raw length `rawLength` by shifting away tag bits.
358     static size_t decodeRawLength(size_t rawLength) @safe pure nothrow @nogc
359     {
360         return rawLength >> tagsBitCount;
361     }
362 
363     /// Encode `Large` length from `Length`.
364     static size_t encodeLargeLength(size_t length) @safe pure nothrow @nogc
365     {
366         return (length << tagsBitCount);
367     }
368 
369     /// Encode `Small` length from `Length`.
370     static size_t encodeSmallLength(size_t length) @safe pure nothrow @nogc
371     {
372         assert(length <= smallCapacity);
373         return (length << tagsBitCount) | (1 << largeLengthTagBitOffset);
374     }
375 
376     version(LittleEndian) // see: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw
377     {
378         struct Small
379         {
380             /* TODO: only first 4 bits are needed to represent a length between
381              * 0-15, use other 4 bits.
382              */
383             ubyte length = 0;
384             immutable(char)[smallCapacity] data = [0,0,0,0,0,
385                                                    0,0,0,0,0,
386                                                    0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
387         }
388     }
389     else
390     {
391         struct Small
392         {
393             immutable(char)[smallCapacity] data = [0,0,0,0,0,
394                                                    0,0,0,0,0,
395                                                    0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
396             /* TODO: only first 4 bits are needed to represent a length between
397              * 0-15, use other 4 bits.
398              */
399             ubyte length;
400         }
401         static assert(0, "TODO: add BigEndian support and test");
402     }
403 
404     struct Raw                  // same memory layout as `immutable(char)[]`
405     {
406         size_t length = 0;      // can be bit-fiddled without GC allocation
407         immutable(char)* ptr = null;
408     }
409 
410     union
411     {
412         Raw raw;
413         Large large;
414         Small small;
415         size_t[2] words;
416     }
417 }
418 version(unittest) static assert(SSOString.sizeof == string.sizeof);
419 
420 /** Returns: `x` lowercased. */
421 SSOString toLower()(const SSOString x) @trusted // template-lazy
422 {
423     if (x.isSmallASCII)         // small ASCII fast-path
424     {
425         typeof(return) result = void;
426         result.small.length = x.small.length;
427         import std.ascii : toLower;
428         foreach (const index; 0 .. x.smallCapacity)
429             (cast(char[])(result.small.data))[index] = toLower(x.small.data[index]);
430         return result;
431     }
432     else if (x.isLarge)
433     {
434         import std.uni : asLowerCase;
435         import std.conv : to;
436         return typeof(return)(x.opSlice().asLowerCase.to!string); // TODO: make .to!string nothrow
437     }
438     else                   // small non-ASCII path usually without GC-allocation
439     {
440         typeof(return) result = x; // copy
441         import std.uni : toLowerInPlace;
442         auto slice = cast(char[])(result.opSlice()); // need ref to slice
443         toLowerInPlace(slice);
444         if (slice is result.opSlice() || // no reallocation
445             slice.length == result.length) // or same length (happens for German double-s)
446             return result;
447         else
448             // version(none)
449             // {
450             //     import nxt.dbgio;
451             //     dbg(`toLowerInPlace reallocated from "`,
452             //         result.opSlice(), `" of length `, result.opSlice().length,
453             //         ` to "`
454             //         , slice, `" of length `, slice.length);
455             // }
456             return typeof(return)(slice); // reallocation occurred
457     }
458 }
459 
460 /** Returns: `x` uppercased. */
461 SSOString toUpper()(const SSOString x) @trusted // template-lazy
462 {
463     if (x.isSmallASCII)         // small ASCII fast-path
464     {
465         typeof(return) result = void;
466         result.small.length = x.small.length;
467         import std.ascii : toUpper;
468         foreach (const index; 0 .. x.smallCapacity)
469             (cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]);
470         return result;
471     }
472     else if (x.isLarge)
473     {
474         import std.uni : asUpperCase;
475         import std.conv : to;
476         return typeof(return)(x.opSlice().asUpperCase.to!string); // TODO: make .to!string nothrow
477     }
478     else                   // small non-ASCII path usually without GC-allocation
479     {
480         typeof(return) result = x; // copy
481         import std.uni : toUpperInPlace;
482         auto slice = cast(char[])(result.opSlice()); // need ref to slice
483         toUpperInPlace(slice);
484         if (slice is result.opSlice() || // no reallocation
485             slice.length == result.length) // or same length (happens for German double-s)
486             return result;
487         else
488             // version(none)
489             // {
490             //     import nxt.dbgio;
491             //     dbg(`toUpperInPlace reallocated from "`,
492             //         result.opSlice(), `" of length `, result.opSlice().length,
493             //         ` to "`
494             //         , slice, `" of length `, slice.length);
495             // }
496             return typeof(return)(slice); // reallocation occurred
497     }
498 }
499 
500 /// construct from non-immutable source is allowed in non-`@nogc`-scope
501 @safe pure nothrow unittest
502 {
503     alias S = SSOString;
504 
505     scope const char[] x0;
506     const s0 = SSOString(x0);           // no .idup
507 
508     scope const char[] x16 = new char[16];
509     const s16 = SSOString(x16);         // will call .idup
510 }
511 
512 /// construct from non-immutable source is not allowed in `@nogc`-scope
513 @safe pure nothrow @nogc unittest
514 {
515     scope const char[] s;
516     // TODO: why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); }));
517 }
518 
519 /// verify `isNull` when @nogc constructing from small static array of `char`s
520 @trusted pure nothrow @nogc unittest
521 {
522     static foreach (const n; 0 .. SSOString.smallCapacity + 1)
523     {
524         {
525             immutable(char)[n] x;
526             assert(!SSOString(x).isNull);
527         }
528     }
529 }
530 
531 /// verify `isNull` when constructing from large static array of `char`s
532 @trusted pure nothrow unittest
533 {
534     static foreach (const n; SSOString.smallCapacity + 1 .. 32)
535     {
536         {
537             immutable(char)[n] x;
538             assert(!SSOString(x).isNull);
539         }
540     }
541 }
542 
543 /// verify `isNull` when constructing from dynamic array of `char`s
544 @trusted pure nothrow unittest
545 {
546     foreach (const n; 0 .. 32)
547     {
548         scope x = new immutable(char)[n];
549         assert(!SSOString(x).isNull);
550     }
551 }
552 
553 /// test behaviour of `==` and `is` operator
554 @trusted pure nothrow @nogc unittest
555 {
556     const SSOString x = "42";
557     assert(!x.isNull);
558     assert(x == "42");
559 
560     const SSOString y = "42";
561     assert(!y.isNull);
562     assert(y == "42");
563 
564     assert(x == y);
565     assert(x == y[]);
566     assert(x[] == y);
567     assert(x[] == y[]);
568     assert(x[] is x[]);
569     assert(y[] is y[]);
570     assert(x[] !is y[]);
571     assert(x.ptr !is y.ptr);
572 
573     const SSOString z = "43";
574     assert(!z.isNull);
575     assert(z == "43");
576     assert(x != z);
577     assert(x[] != z[]);
578     assert(x !is z);
579     assert(x[] !is z[]);
580 }
581 
582 ///
583 @safe pure nothrow @nogc unittest
584 {
585     static assert(SSOString.smallCapacity == 15);
586 
587     import nxt.gc_traits : mustAddGCRange;
588     static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned
589 
590     static assert(__traits(isZeroInit, SSOString));
591     // TODO: assert(SSOString.init == SSOString.nullValue);
592 
593     auto s0 = SSOString.init;
594     assert(s0.isNull);
595     assert(s0.length == 0);
596     assert(s0.isLarge);
597     assert(s0[] == []);
598 
599     char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string
600     const sSmallCapacity = SSOString(charsSmallCapacity);
601     assert(!sSmallCapacity.isLarge);
602     assert(sSmallCapacity.length == SSOString.smallCapacity);
603     assert(sSmallCapacity == charsSmallCapacity);
604 
605     const s0_ = SSOString("");
606     assert(!s0_.isNull);         // cannot distinguish
607     assert(s0 == s0_);
608 
609     const s7 = SSOString("0123456");
610     assert(!s7.isNull);
611 
612     const s7_ = SSOString("0123456_"[0 .. $ - 1]);
613     assert(s7.ptr !is s7_.ptr); // string data shall not overlap
614     assert(s7 == s7_);
615 
616     const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal
617     assert(s7.ptr !is _s7.ptr); // string data shall not overlap
618     assert(s7 == _s7);
619 
620     assert(!s7.isLarge);
621     assert(s7.length == 7);
622     assert(s7[] == "0123456");
623     assert(s7[] == "_0123456"[1 .. $]);
624     assert(s7[] == "0123456_"[0 .. $ - 1]);
625     assert(s7[0 .. 4] == "0123");
626 
627     const s15 = SSOString("0123456789abcde");
628     assert(!s15.isNull);
629     static assert(is(typeof(s15[]) == const(char)[]));
630     assert(!s15.isLarge);
631     assert(s15.length == 15);
632     assert(s15[] == "0123456789abcde");
633     assert(s15[0 .. 4] == "0123");
634     assert(s15[10 .. 15] == "abcde");
635     assert(s15[10 .. $] == "abcde");
636 
637     const s16 = SSOString("0123456789abcdef");
638     assert(!s16.isNull);
639     static assert(is(typeof(s16[]) == const(char)[]));
640     assert(s16.isLarge);
641 
642     const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]);
643     assert(s16.length == s16_.length);
644     assert(s16[] == s16_[]);
645     assert(s16.ptr !is s16_.ptr); // string data shall not overlap
646     assert(s16 == s16_);              // but contents is equal
647 
648     const _s16 = SSOString("_0123456789abcdef"[1 .. $]);
649     assert(s16.length == _s16.length);
650     assert(s16[] == _s16[]);    // contents is equal
651     assert(s16 == _s16);        // contents is equal
652 
653     assert(s16.length == 16);
654     assert(s16[] == "0123456789abcdef");
655     assert(s16[0] == '0');
656     assert(s16[10] == 'a');
657     assert(s16[15] == 'f');
658     assert(s16[0 .. 4] == "0123");
659     assert(s16[10 .. 16] == "abcdef");
660     assert(s16[10 .. $] == "abcdef");
661 }
662 
663 /// metadata for null string
664 @safe pure nothrow @nogc unittest
665 {
666     auto s = SSOString.init;
667     assert(s.isNull);
668     foreach (const i; 0 .. 8)
669     {
670         s.metadata = i;
671         assert(s.metadata == i);
672         assert(s.length == 0);
673     }
674 }
675 
676 /// metadata for small string
677 @safe pure nothrow @nogc unittest
678 {
679     auto s = SSOString("0123456");
680     assert(!s.isNull);
681     assert(!s.isLarge);
682     foreach (const i; 0 .. 8)
683     {
684         s.metadata = i;
685         assert(s.metadata == i);
686         assert(s.length == 7);
687         assert(!s.isLarge);
688         assert(!s.isNull);
689     }
690 }
691 
692 /// metadata for small string with maximum length
693 @safe pure nothrow @nogc unittest
694 {
695     auto s = SSOString("0123456789abcde");
696     assert(s.length == SSOString.smallCapacity);
697     assert(!s.isNull);
698     assert(!s.isLarge);
699     foreach (const i; 0 .. 8)
700     {
701         s.metadata = i;
702         assert(s.metadata == i);
703         assert(s.length == 15);
704         assert(!s.isLarge);
705         assert(!s.isNull);
706     }
707 }
708 
709 /// metadata for large string with minimum length
710 @safe pure nothrow @nogc unittest
711 {
712     auto s = SSOString("0123456789abcdef");
713     assert(s.length == SSOString.smallCapacity + 1);
714     assert(!s.isNull);
715     assert(s.isLarge);
716     assert(!s.empty);
717     foreach (const i; 0 .. 8)
718     {
719         s.metadata = i;
720         assert(s.metadata == i);
721         assert(s.length == 16);
722         assert(s.isLarge);
723         assert(!s.isNull);
724     }
725 }
726 
727 /// equality and equivalence
728 @safe pure nothrow @nogc unittest
729 {
730     assert(SSOString() == SSOString(""));
731     assert(SSOString() !is SSOString(""));
732 }
733 
734 /// hashing of null, empty and non-empty
735 @safe pure nothrow @nogc unittest
736 {
737     assert(SSOString().toHash == 0);
738     assert(SSOString("").toHash == 0);
739     assert(SSOString("a").toHash != 0);
740     assert(SSOString("0123456789abcdef").toHash != 0);
741 }
742 
743 /// construct from static array larger than `smallCapacity`
744 @safe pure nothrow unittest
745 {
746     char[SSOString.smallCapacity + 1] charsMinLargeCapacity;
747     const _ = SSOString(charsMinLargeCapacity);
748 }
749 
750 // test construction from range
751 @safe pure unittest
752 {
753     static void test(const scope char[] x,
754                      const bool isLarge) @safe pure
755     {
756         import std.utf : byDchar;
757         const scope s = SSOString(x.byDchar);
758         assert(s == x);
759         assert(s.isLarge == isLarge);
760     }
761     test("", false);
762     test("_", false);
763     test("123456789_12345", false);
764     test("123456789_123456", true);
765     test("123456789_123456789_123456789_", true);
766 }
767 
768 /// hole handling
769 @trusted pure nothrow @nogc unittest
770 {
771     assert(!SSOString.init.isHole);
772     assert(!SSOString("").isHole);
773     assert(!SSOString("a").isHole);
774     assert(SSOString.asHole.isHole);
775 }
776 
777 /// DIP-1000 return ref escape analysis
778 @safe pure nothrow unittest
779 {
780     static if (isDIP1000)
781     {
782         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
783         static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } }));
784         static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } }));
785         static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { SSOString x; return x[0]; } }));
786     }
787 }
788 
789 /// ASCII purity and case-conversion
790 @safe pure nothrow @nogc unittest
791 {
792     // these are all small ASCII
793     assert( SSOString("a").isSmallASCII);
794     assert( SSOString("b").isSmallASCII);
795     assert( SSOString("z").isSmallASCII);
796     assert( SSOString("_").isSmallASCII);
797     assert( SSOString("abcd").isSmallASCII);
798     assert( SSOString("123456789_12345").isSmallASCII);
799 
800     // these are not
801     assert(!SSOString("123456789_123456").isSmallASCII); // too large
802     assert(!SSOString("123456789_123ö").isSmallASCII);
803     assert(!SSOString("ö").isSmallASCII);
804     assert(!SSOString("Ö").isSmallASCII);
805     assert(!SSOString("åäö").isSmallASCII);
806     assert(!SSOString("ö-värld").isSmallASCII);
807 }
808 
809 /// ASCII purity and case-conversion
810 @safe pure unittest
811 {
812     assert(SSOString("A").toLower[] == "a");
813     assert(SSOString("a").toUpper[] == "A");
814     assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small
815     assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small
816     assert(SSOString("ÅÄÖ").toLower[] == "åäö");
817     assert(SSOString("åäö").toUpper[] == "ÅÄÖ");
818     assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large
819     assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large
820 
821     char[6] x = "ÅÄÖ";
822     import std.uni : toLowerInPlace;
823     auto xref = x[];
824     toLowerInPlace(xref);
825     assert(x == "åäö");
826     assert(xref == "åäö");
827 }
828 
829 /// lexicographic comparison
830 @safe pure unittest
831 {
832     const SSOString a = SSOString("a");
833     assert(a == SSOString("a"));
834 
835     immutable SSOString b = SSOString("b");
836 
837     assert(a < b);
838     assert(b > a);
839     assert(a[] < b[]);
840 
841     assert("a" < "b");
842     assert("a" < "å");
843     assert("Å" < "å");
844     assert(SSOString("a") < SSOString("å"));
845     assert(SSOString("ÅÄÖ") < SSOString("åäö"));
846 }
847 
848 /// cast to bool
849 @safe pure unittest
850 {
851     // mimics behaviour of casting of `string` to `bool`
852     assert(!SSOString());
853     assert(SSOString(""));
854     assert(SSOString("abc"));
855 }
856 
857 /// to string conversion
858 @safe pure unittest
859 {
860     // mutable small will GC-allocate
861     {
862         SSOString s = SSOString("123456789_12345");
863         assert(s.ptr is &s.opSlice()[0]);
864         assert(s.ptr !is &s.toString()[0]);
865     }
866 
867     // const small will GC-allocate
868     {
869         const SSOString s = SSOString("123456789_12345");
870         assert(s.ptr is &s.opSlice()[0]);
871         assert(s.ptr !is &s.toString()[0]);
872     }
873 
874     // immutable small will not allocate
875     {
876         immutable SSOString s = SSOString("123456789_12345");
877         assert(s.ptr is &s.opSlice()[0]);
878         assert(s.ptr is &s.toString()[0]);
879         // TODO: check return via -dip1000
880     }
881 
882     /* Forbid return of possibly locally scoped `Smll` small stack object
883      * regardless of head-mutability.
884      */
885     static if (isDIP1000)
886     {
887         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
888         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } }));
889         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } }));
890 
891         /** TODO: Enable the following line when DIP-1000 works for opSlice()
892          *
893          * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
894          */
895         // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } }));
896     }
897 
898     // large will never allocate regardless of head-mutability
899     {
900         SSOString s = SSOString("123456789_123456");
901         assert(s.ptr is &s.opSlice()[0]);
902         assert(s.ptr is &s.toString()[0]); // shouldn't this change?
903     }
904 }
905 
906 version(unittest)
907 {
908     import nxt.dip_traits : isDIP1000;
909 }