1 module nxt.sso_string;
2 
3 /** Small-size-optimized (SSO) variant of `string`.
4  *
5  * Storage is placed on the stack if the number of `char`s is less than
6  * `smallCapacity`, otherwise as a normal (large) `string`. The large `string`
7  * will be allocated on the GC-heap if the `SSOString` is constructed from a
8  * non-`string` (non-`immutable` `char[]`) parameter.
9  *
10  * Because `SSOString` doesn't have a destructor it can safely allocate using a
11  * GC-backed region allocator without relying on a GC finalizer.
12  *
13  * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for
14  * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to
15  * default to a large string in which large pointer is set to `null`.
16  *
17  * Big-endian platform support hasn't been verified.
18  *
19  * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com
20  * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
21  * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org
22  *
23  * TODO: Use extra bits in `Short.length` for these special text encodings:
24  * - 5-bit lowercase English letter into 128/5 = 25 chars
25  * - 5-bit uppercase English letter into 120/5 = 25 chars
26  * - 6-bit mixedcase English letter into 120/6 = 20 chars
27  *
28  * TODO: Move to Phobos' std.typecons or std.array or std.string
29  */
30 struct SSOString
31 {
32 @safe:
33     @property void toString(Sink)(ref scope Sink sink) const scope
34     {
35         sink(opSlice());
36     }
37 
38 pure:
39 
40     /** Construct from `source`, which potentially needs GC-allocation (iff
41      * `source.length > smallCapacity` and `source` is not a `string`).
42      */
43     this(Chars)(const scope auto ref Chars source) @trusted nothrow
44     if (is(Chars : const(char)[])) // `isCharArray`
45     {
46         static if (__traits(isStaticArray, Chars))
47         {
48             static if (source.length <= smallCapacity) // inferred @nogc
49             {
50                 small.data[0 .. source.length] = source;
51                 small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
52             }
53             else
54             {
55                 static if (is(typeof(source[0]) == immutable(char)))
56                     raw.ptr[0 .. source.length] = source; // copy elements
57                 else
58                     raw.ptr = source.idup.ptr; // GC-allocate
59                 raw.length = encodeLargeLength(source.length);
60             }
61         }
62         else                    // `Chars` is a (dynamic) array slice
63         {
64             if (source.length <= smallCapacity)
65             {
66                 (cast(char*)small.data.ptr)[0 .. source.length] = source;
67                 small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
68             }
69             else
70             {
71                 static if (is(typeof(source[0]) == immutable(char)))
72                     raw.ptr = source.ptr; // already immutable so no duplication needed
73                 else
74                     raw.ptr = source.idup.ptr; // GC-allocate
75                 raw.length = encodeLargeLength(source.length);
76             }
77         }
78     }
79 
80     /** Construct from `source` of `dchar`
81      */
82     this(Source)(scope Source source) @trusted
83     if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && // TODO: `isConstRefIterable`
84         is(typeof(Source.init.front) == dchar))
85     {
86         import std.utf : encode;
87 
88         // pre-calculate number of `char`s needed
89         size_t charCount = 0;
90         foreach (const e; source)
91         {
92             char[4] chars;      // TODO: `= void`
93             charCount += encode(chars, e);
94         }
95 
96         if (charCount <= smallCapacity) // fits in small
97         {
98             size_t offset = 0;
99             foreach (const e; source)
100             {
101                 char[4] chars;
102                 const count = encode(chars, e);
103                 (cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count];
104                 offset += count;
105             }
106             assert(offset <= smallCapacity);
107             small.length = cast(typeof(small.length))(encodeSmallLength(offset));
108         }
109         else                    // needs large
110         {
111             large = new immutable(char)[charCount];
112             size_t offset = 0;
113             foreach (const e; source)
114             {
115                 char[4] chars;
116                 const count = encode(chars, e);
117                 (cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data
118                 offset += count;
119             }
120             raw.length = encodeLargeLength(charCount);
121         }
122     }
123 
124 nothrow:
125 
126     /** Return `this` converted to a `string`, without any GC-allocation because
127      * `this` is `immutable`.
128      */
129     @property string toString() immutable @trusted return pure nothrow @nogc // never allocates
130     {
131         version(D_Coverage) {} else pragma(inline, true);
132         return opSlice();
133     }
134 
135     /** Return `this` converted to a `string`, which potentially needs
136      * GC-allocation (iff `length > smallCapacity`).
137      *
138      * implementation kept in sync with `opSlice`.
139      */
140     @property string toString() const return @trusted pure nothrow // may GC-allocate
141     {
142         if (isLarge)
143             // GC-allocated slice has immutable members so ok to cast
144             return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
145         else
146             return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable`
147     }
148 
149     @nogc:
150 
151     /** Get hash of `this`, with extra fast computation for the small case.
152      */
153     @property hash_t toHash() const scope @trusted
154     {
155         version(D_Coverage) {} else version(LDC) pragma(inline, true);
156         import core.internal.hash : hashOf;
157         import nxt.hash_functions : lemireHash64;
158         if (isLarge)
159             return hashOf(opSliceLarge()); // use default
160         else                    // fast path for small string
161             return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string
162                     lemireHash64(words[1]));
163     }
164 
165     /** Get length. */
166     @property size_t length() const scope @trusted
167     {
168         version(D_Coverage) {} else pragma(inline, true);
169         if (isLarge)
170             return decodeRawLength(large.length); // skip first bit
171         else
172             return decodeRawLength(small.length); // skip fist bit
173     }
174     /// ditto
175     alias opDollar = length;
176 
177     /** Check if `this` is empty. */
178     bool empty() const @property scope @safe pure nothrow @nogc
179     {
180         return length == 0;
181     }
182 
183     /** Check if `this` is `null`. */
184     @property bool isNull() const scope @trusted pure nothrow @nogc
185     {
186         return raw.length == 0;
187     }
188 
189     /** Return a slice to either the whole large or whole small `string`.
190      *
191      * Implementation is kept in sync with `toString`.
192      */
193     inout(char)[] opSlice() inout scope return @trusted @nogc
194     {
195         version(D_Coverage) {} else pragma(inline, true);   // TODO: maybe remove
196         if (isLarge)
197             return opSliceLarge();
198         else
199             return opSliceSmall();
200     }
201 
202     /** Return a slice at `[i .. j]` to either the internally stored large or small `string`.
203      *
204      * Implementation is kept in sync with `toString`.
205      */
206     inout(char)[] opSlice(size_t i, size_t j) inout return @safe
207     {
208         version(D_Coverage) {} else pragma(inline, true);
209         return opSlice()[i .. j];
210     }
211 
212     private inout(char)[] opSliceLarge() inout return scope @system @nogc
213     in { version(unittest) assert(isLarge); }
214     do
215     {
216         version(D_Coverage) {} else pragma(inline, true);
217         return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
218         // alternative:  return large.ptr[0 .. large.length/2];
219     }
220 
221     private inout(char)[] opSliceSmall() inout return @trusted @nogc
222     in { version(unittest) assert(!isLarge); }
223 	do
224     {
225         version(D_Coverage) {} else pragma(inline, true);
226         return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped
227     }
228 
229     /** Return the `index`ed `char` of `this`.
230      */
231     ref inout(char) opIndex(size_t index) inout return @trusted
232     {
233         version(D_Coverage) {} else pragma(inline, true);
234         return opSlice()[index]; // does range check
235     }
236 
237     /// Get pointer to the internally stored `char`s.
238     @property private immutable(char)* ptr() const return @trusted
239     {
240         if (isLarge)
241             return large.ptr;   // GC-heap pointer
242         else
243             return small.data.ptr; // stack pointer
244     }
245 
246     /** Check if `this` is equal to `rhs`. */
247     bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted
248     {
249         version(D_Coverage) {} else pragma(inline, true);
250         return opSlice() == rhs.opSlice();
251     }
252 
253     /** Check if `this` is equal to `rhs`. */
254     bool opEquals()(const scope const(char)[] rhs) const scope @trusted
255     {
256         version(D_Coverage) {} else pragma(inline, true);
257         return opSlice() == rhs;
258     }
259 
260     /** Compare `this` with `that`.
261      *
262      * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org
263      */
264     @property int opCmp()(const scope typeof(this) that) const scope /* template-lazy */
265     {
266         version(D_Coverage) {} else pragma(inline, true);
267         scope const a = this.opSlice();
268         scope const b = that.opSlice();
269         return a < b ? -1 : (a > b);
270         // import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`;
271         // return __cmp(this[], that[]);
272     }
273 
274     bool opCast(T : bool)() const scope @trusted
275     {
276         version(D_Coverage) {} else pragma(inline, true);
277         if (isLarge)
278             return large !is null;
279         else
280             return small.length != 0;
281     }
282 
283     /** Support trait `isNullable`. */
284     static immutable nullValue = typeof(this).init;
285 
286     /** Support trait `isHoleable`. */
287     static immutable holeValue = typeof(this).asHole();
288 
289     /** Check if this a hole, meaning a removed/erase value. */
290     bool isHole() const scope @safe nothrow @nogc
291     {
292         return words[0] == size_t.max;
293     }
294 
295     /** That this a hole, meaning a removed/erase value. */
296     void holeify() @system @nogc scope
297     {
298         words[0] = size_t.max;
299         words[1] = size_t.max;
300     }
301 
302     /** Returns: a holed `SSOString`, meaning a removed/erase value. */
303     private static typeof(this) asHole() @system
304     {
305         typeof(return) result = void;
306         result.holeify();
307         return result;
308     }
309 
310     /** Check if `this` is a small ASCII string. */
311     bool isSmallASCII() const scope @trusted
312     {
313         version(D_Coverage) {} else pragma(inline, true);
314         static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small
315         // should be fast on 64-bit platforms:
316         return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small
317                 (words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0);
318     }
319 
320 private:
321 
322     /** Returns: `true` iff this is a large string, otherwise `false.` */
323     @property bool isLarge() const scope @trusted
324     {
325         version(D_Coverage) {} else pragma(inline, true);
326         return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large
327     }
328 
329     alias Large = immutable(char)[];
330 
331     public enum smallCapacity = Large.sizeof - Small.length.sizeof;
332     static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof);
333 
334     enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length.
335     enum smallLengthBitCount = 4;
336     static assert(smallCapacity == 2^^smallLengthBitCount-1);
337 
338     enum metaBits = 3;               ///< Number of bits used for metadata.
339     enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom.
340     enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data.
341     static assert(smallLengthBitCount + tagsBitCount == 8);
342 
343     /// Get metadata byte with first `metaBits` bits set.
344     @property ubyte metadata() const @safe pure nothrow @nogc
345     {
346         return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits]
347     }
348 
349     /// Set metadata.
350     @property void metadata(ubyte data) @trusted pure nothrow @nogc
351     in(data < (1 << metaBits))
352     {
353         if (isLarge)
354             raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
355         else
356             small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
357     }
358 
359     /// Decode raw length `rawLength` by shifting away tag bits.
360     static size_t decodeRawLength(size_t rawLength) @safe pure nothrow @nogc
361     {
362         return rawLength >> tagsBitCount;
363     }
364 
365     /// Encode `Large` length from `Length`.
366     static size_t encodeLargeLength(size_t length) @safe pure nothrow @nogc
367     {
368         return (length << tagsBitCount);
369     }
370 
371     /// Encode `Small` length from `Length`.
372     static size_t encodeSmallLength(size_t length) @safe pure nothrow @nogc
373     in(length <= smallCapacity)
374     {
375         return (length << tagsBitCount) | (1 << largeLengthTagBitOffset);
376     }
377 
378     version(LittleEndian) // see: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw
379     {
380         struct Small
381         {
382             /* TODO: only first 4 bits are needed to represent a length between
383              * 0-15, use other 4 bits.
384              */
385             ubyte length = 0;
386             immutable(char)[smallCapacity] data = [0,0,0,0,0,
387                                                    0,0,0,0,0,
388                                                    0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
389         }
390     }
391     else
392     {
393         struct Small
394         {
395             immutable(char)[smallCapacity] data = [0,0,0,0,0,
396                                                    0,0,0,0,0,
397                                                    0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
398             /* TODO: only first 4 bits are needed to represent a length between
399              * 0-15, use other 4 bits.
400              */
401             ubyte length;
402         }
403         static assert(0, "TODO: add BigEndian support and test");
404     }
405 
406     struct Raw                  // same memory layout as `immutable(char)[]`
407     {
408         size_t length = 0;      // can be bit-fiddled without GC allocation
409         immutable(char)* ptr = null;
410     }
411 
412     union
413     {
414         Raw raw;
415         Large large;
416         Small small;
417         size_t[2] words;
418     }
419 }
420 version(unittest) static assert(SSOString.sizeof == string.sizeof);
421 
422 /** Returns: `x` lowercased. */
423 SSOString toLower()(const SSOString x) @trusted /* template-lazy */
424 {
425     if (x.isSmallASCII)         // small ASCII fast-path
426     {
427         typeof(return) result = void;
428         result.small.length = x.small.length;
429         import std.ascii : toLower;
430         foreach (const index; 0 .. x.smallCapacity)
431             (cast(char[])(result.small.data))[index] = toLower(x.small.data[index]);
432         return result;
433     }
434     else if (x.isLarge)
435     {
436         import std.uni : asLowerCase;
437         import std.conv : to;
438         return typeof(return)(x.opSlice().asLowerCase.to!string); // TODO: make .to!string nothrow
439     }
440     else                   // small non-ASCII path usually without GC-allocation
441     {
442         typeof(return) result = x; // copy
443         import std.uni : toLowerInPlace;
444         auto slice = cast(char[])(result.opSlice()); // need ref to slice
445         toLowerInPlace(slice);
446         if (slice is result.opSlice() || // no reallocation
447             slice.length == result.length) // or same length (happens for German double-s)
448             return result;
449         else
450             // version(none)
451             // {
452             //     import nxt.dbgio;
453             //     dbg(`toLowerInPlace reallocated from "`,
454             //         result.opSlice(), `" of length `, result.opSlice().length,
455             //         ` to "`
456             //         , slice, `" of length `, slice.length);
457             // }
458             return typeof(return)(slice); // reallocation occurred
459     }
460 }
461 
462 /** Returns: `x` uppercased. */
463 SSOString toUpper()(const SSOString x) @trusted /* template-lazy */
464 {
465     if (x.isSmallASCII)         // small ASCII fast-path
466     {
467         typeof(return) result = void;
468         result.small.length = x.small.length;
469         import std.ascii : toUpper;
470         foreach (const index; 0 .. x.smallCapacity)
471             (cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]);
472         return result;
473     }
474     else if (x.isLarge)
475     {
476         import std.uni : asUpperCase;
477         import std.conv : to;
478         return typeof(return)(x.opSlice().asUpperCase.to!string); // TODO: make .to!string nothrow
479     }
480     else                   // small non-ASCII path usually without GC-allocation
481     {
482         typeof(return) result = x; // copy
483         import std.uni : toUpperInPlace;
484         auto slice = cast(char[])(result.opSlice()); // need ref to slice
485         toUpperInPlace(slice);
486         if (slice is result.opSlice() || // no reallocation
487             slice.length == result.length) // or same length (happens for German double-s)
488             return result;
489         else
490             // version(none)
491             // {
492             //     import nxt.dbgio;
493             //     dbg(`toUpperInPlace reallocated from "`,
494             //         result.opSlice(), `" of length `, result.opSlice().length,
495             //         ` to "`
496             //         , slice, `" of length `, slice.length);
497             // }
498             return typeof(return)(slice); // reallocation occurred
499     }
500 }
501 
502 /// construct from non-immutable source is allowed in non-`@nogc`-scope
503 @safe pure nothrow unittest
504 {
505     alias S = SSOString;
506 
507     scope const char[] x0;
508     const s0 = SSOString(x0);           // no .idup
509 
510     scope const char[] x16 = new char[16];
511     const s16 = SSOString(x16);         // will call .idup
512 }
513 
514 /// construct from non-immutable source is not allowed in `@nogc`-scope
515 @safe pure nothrow @nogc unittest
516 {
517     scope const char[] s;
518     // TODO: why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); }));
519 }
520 
521 /// verify `isNull` when @nogc constructing from small static array of `char`s
522 @trusted pure nothrow @nogc unittest
523 {
524     static foreach (const n; 0 .. SSOString.smallCapacity + 1)
525     {
526         {
527             immutable(char)[n] x;
528             assert(!SSOString(x).isNull);
529         }
530     }
531 }
532 
533 /// verify `isNull` when constructing from large static array of `char`s
534 @trusted pure nothrow unittest
535 {
536     static foreach (const n; SSOString.smallCapacity + 1 .. 32)
537     {
538         {
539             immutable(char)[n] x;
540             assert(!SSOString(x).isNull);
541         }
542     }
543 }
544 
545 /// verify `isNull` when constructing from dynamic array of `char`s
546 @trusted pure nothrow unittest
547 {
548     foreach (const n; 0 .. 32)
549     {
550         scope x = new immutable(char)[n];
551         assert(!SSOString(x).isNull);
552     }
553 }
554 
555 /// test behaviour of `==` and `is` operator
556 @trusted pure nothrow @nogc unittest
557 {
558     const SSOString x = "42";
559     assert(!x.isNull);
560     assert(x == "42");
561 
562     const SSOString y = "42";
563     assert(!y.isNull);
564     assert(y == "42");
565 
566     assert(x == y);
567     assert(x == y[]);
568     assert(x[] == y);
569     assert(x[] == y[]);
570     assert(x[] is x[]);
571     assert(y[] is y[]);
572     assert(x[] !is y[]);
573     assert(x.ptr !is y.ptr);
574 
575     const SSOString z = "43";
576     assert(!z.isNull);
577     assert(z == "43");
578     assert(x != z);
579     assert(x[] != z[]);
580     assert(x !is z);
581     assert(x[] !is z[]);
582 }
583 
584 ///
585 @safe pure nothrow @nogc unittest
586 {
587     static assert(SSOString.smallCapacity == 15);
588 
589     import nxt.gc_traits : mustAddGCRange;
590     static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned
591 
592     static assert(__traits(isZeroInit, SSOString));
593     // TODO: assert(SSOString.init == SSOString.nullValue);
594 
595     auto s0 = SSOString.init;
596     assert(s0.isNull);
597     assert(s0.length == 0);
598     assert(s0.isLarge);
599     assert(s0[] == []);
600 
601     char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string
602     const sSmallCapacity = SSOString(charsSmallCapacity);
603     assert(!sSmallCapacity.isLarge);
604     assert(sSmallCapacity.length == SSOString.smallCapacity);
605     assert(sSmallCapacity == charsSmallCapacity);
606 
607     const s0_ = SSOString("");
608     assert(!s0_.isNull);         // cannot distinguish
609     assert(s0 == s0_);
610 
611     const s7 = SSOString("0123456");
612     assert(!s7.isNull);
613 
614     const s7_ = SSOString("0123456_"[0 .. $ - 1]);
615     assert(s7.ptr !is s7_.ptr); // string data shall not overlap
616     assert(s7 == s7_);
617 
618     const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal
619     assert(s7.ptr !is _s7.ptr); // string data shall not overlap
620     assert(s7 == _s7);
621 
622     assert(!s7.isLarge);
623     assert(s7.length == 7);
624     assert(s7[] == "0123456");
625     assert(s7[] == "_0123456"[1 .. $]);
626     assert(s7[] == "0123456_"[0 .. $ - 1]);
627     assert(s7[0 .. 4] == "0123");
628 
629     const s15 = SSOString("0123456789abcde");
630     assert(!s15.isNull);
631     static assert(is(typeof(s15[]) == const(char)[]));
632     assert(!s15.isLarge);
633     assert(s15.length == 15);
634     assert(s15[] == "0123456789abcde");
635     assert(s15[0 .. 4] == "0123");
636     assert(s15[10 .. 15] == "abcde");
637     assert(s15[10 .. $] == "abcde");
638 
639     const s16 = SSOString("0123456789abcdef");
640     assert(!s16.isNull);
641     static assert(is(typeof(s16[]) == const(char)[]));
642     assert(s16.isLarge);
643 
644     const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]);
645     assert(s16.length == s16_.length);
646     assert(s16[] == s16_[]);
647     assert(s16.ptr !is s16_.ptr); // string data shall not overlap
648     assert(s16 == s16_);              // but contents is equal
649 
650     const _s16 = SSOString("_0123456789abcdef"[1 .. $]);
651     assert(s16.length == _s16.length);
652     assert(s16[] == _s16[]);    // contents is equal
653     assert(s16 == _s16);        // contents is equal
654 
655     assert(s16.length == 16);
656     assert(s16[] == "0123456789abcdef");
657     assert(s16[0] == '0');
658     assert(s16[10] == 'a');
659     assert(s16[15] == 'f');
660     assert(s16[0 .. 4] == "0123");
661     assert(s16[10 .. 16] == "abcdef");
662     assert(s16[10 .. $] == "abcdef");
663 }
664 
665 /// metadata for null string
666 @safe pure nothrow @nogc unittest
667 {
668     auto s = SSOString.init;
669     assert(s.isNull);
670     foreach (const i; 0 .. 8)
671     {
672         s.metadata = i;
673         assert(s.metadata == i);
674         assert(s.length == 0);
675     }
676 }
677 
678 /// metadata for small string
679 @safe pure nothrow @nogc unittest
680 {
681     auto s = SSOString("0123456");
682     assert(!s.isNull);
683     assert(!s.isLarge);
684     foreach (const i; 0 .. 8)
685     {
686         s.metadata = i;
687         assert(s.metadata == i);
688         assert(s.length == 7);
689         assert(!s.isLarge);
690         assert(!s.isNull);
691     }
692 }
693 
694 /// metadata for small string with maximum length
695 @safe pure nothrow @nogc unittest
696 {
697     auto s = SSOString("0123456789abcde");
698     assert(s.length == SSOString.smallCapacity);
699     assert(!s.isNull);
700     assert(!s.isLarge);
701     foreach (const i; 0 .. 8)
702     {
703         s.metadata = i;
704         assert(s.metadata == i);
705         assert(s.length == 15);
706         assert(!s.isLarge);
707         assert(!s.isNull);
708     }
709 }
710 
711 /// metadata for large string with minimum length
712 @safe pure nothrow @nogc unittest
713 {
714     auto s = SSOString("0123456789abcdef");
715     assert(s.length == SSOString.smallCapacity + 1);
716     assert(!s.isNull);
717     assert(s.isLarge);
718     assert(!s.empty);
719     foreach (const i; 0 .. 8)
720     {
721         s.metadata = i;
722         assert(s.metadata == i);
723         assert(s.length == 16);
724         assert(s.isLarge);
725         assert(!s.isNull);
726     }
727 }
728 
729 /// equality and equivalence
730 @safe pure nothrow @nogc unittest
731 {
732     assert(SSOString() == SSOString(""));
733     assert(SSOString() !is SSOString(""));
734 }
735 
736 /// hashing of null, empty and non-empty
737 @safe pure nothrow @nogc unittest
738 {
739     assert(SSOString().toHash == 0);
740     assert(SSOString("").toHash == 0);
741     assert(SSOString("a").toHash != 0);
742     assert(SSOString("0123456789abcdef").toHash != 0);
743 }
744 
745 /// construct from static array larger than `smallCapacity`
746 @safe pure nothrow unittest
747 {
748     char[SSOString.smallCapacity + 1] charsMinLargeCapacity;
749     const _ = SSOString(charsMinLargeCapacity);
750 }
751 
752 // test construction from range
753 @safe pure unittest
754 {
755     static void test(const scope char[] x,
756                      const bool isLarge) @safe pure
757     {
758         import std.utf : byDchar;
759         const scope s = SSOString(x.byDchar);
760         assert(s == x);
761         assert(s.isLarge == isLarge);
762     }
763     test("", false);
764     test("_", false);
765     test("123456789_12345", false);
766     test("123456789_123456", true);
767     test("123456789_123456789_123456789_", true);
768 }
769 
770 /// hole handling
771 @trusted pure nothrow @nogc unittest
772 {
773     assert(!SSOString.init.isHole);
774     assert(!SSOString("").isHole);
775     assert(!SSOString("a").isHole);
776     assert(SSOString.asHole.isHole);
777 }
778 
779 /// DIP-1000 return ref escape analysis
780 @safe pure nothrow unittest
781 {
782     static if (hasPreviewDIP1000)
783     {
784         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
785         static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } }));
786         static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } }));
787         static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { SSOString x; return x[0]; } }));
788     }
789 }
790 
791 /// ASCII purity and case-conversion
792 @safe pure nothrow @nogc unittest
793 {
794     // these are all small ASCII
795     assert( SSOString("a").isSmallASCII);
796     assert( SSOString("b").isSmallASCII);
797     assert( SSOString("z").isSmallASCII);
798     assert( SSOString("_").isSmallASCII);
799     assert( SSOString("abcd").isSmallASCII);
800     assert( SSOString("123456789_12345").isSmallASCII);
801 
802     // these are not
803     assert(!SSOString("123456789_123456").isSmallASCII); // too large
804     assert(!SSOString("123456789_123ö").isSmallASCII);
805     assert(!SSOString("ö").isSmallASCII);
806     assert(!SSOString("Ö").isSmallASCII);
807     assert(!SSOString("åäö").isSmallASCII);
808     assert(!SSOString("ö-värld").isSmallASCII);
809 }
810 
811 /// ASCII purity and case-conversion
812 @safe pure unittest
813 {
814     assert(SSOString("A").toLower[] == "a");
815     assert(SSOString("a").toUpper[] == "A");
816     assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small
817     assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small
818     assert(SSOString("ÅÄÖ").toLower[] == "åäö");
819     assert(SSOString("åäö").toUpper[] == "ÅÄÖ");
820     assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large
821     assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large
822 
823     char[6] x = "ÅÄÖ";
824     import std.uni : toLowerInPlace;
825     auto xref = x[];
826     toLowerInPlace(xref);
827     assert(x == "åäö");
828     assert(xref == "åäö");
829 }
830 
831 /// lexicographic comparison
832 @safe pure unittest
833 {
834     const SSOString a = SSOString("a");
835     assert(a == SSOString("a"));
836 
837     immutable SSOString b = SSOString("b");
838 
839     assert(a < b);
840     assert(b > a);
841     assert(a[] < b[]);
842 
843     assert("a" < "b");
844     assert("a" < "å");
845     assert("Å" < "å");
846     assert(SSOString("a") < SSOString("å"));
847     assert(SSOString("ÅÄÖ") < SSOString("åäö"));
848 }
849 
850 /// cast to bool
851 @safe pure unittest
852 {
853     // mimics behaviour of casting of `string` to `bool`
854     assert(!SSOString());
855     assert(SSOString(""));
856     assert(SSOString("abc"));
857 }
858 
859 /// to string conversion
860 @safe pure unittest
861 {
862     // mutable small will GC-allocate
863     {
864         SSOString s = SSOString("123456789_12345");
865         assert(s.ptr is &s.opSlice()[0]);
866         assert(s.ptr !is &s.toString()[0]);
867     }
868 
869     // const small will GC-allocate
870     {
871         const SSOString s = SSOString("123456789_12345");
872         assert(s.ptr is &s.opSlice()[0]);
873         assert(s.ptr !is &s.toString()[0]);
874     }
875 
876     // immutable small will not allocate
877     {
878         immutable SSOString s = SSOString("123456789_12345");
879         assert(s.ptr is &s.opSlice()[0]);
880         assert(s.ptr is &s.toString()[0]);
881         // TODO: check return via -dip1000
882     }
883 
884     /* Forbid return of possibly locally scoped `Smll` small stack object
885      * regardless of head-mutability.
886      */
887     static if (hasPreviewDIP1000)
888     {
889         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
890         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } }));
891         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } }));
892 
893         /** TODO: Enable the following line when DIP-1000 works for opSlice()
894          *
895          * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
896          */
897         // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } }));
898     }
899 
900     // large will never allocate regardless of head-mutability
901     {
902         SSOString s = SSOString("123456789_123456");
903         assert(s.ptr is &s.opSlice()[0]);
904         assert(s.ptr is &s.toString()[0]); // shouldn't this change?
905     }
906 }
907 
908 version(unittest)
909 {
910     import nxt.dip_traits : hasPreviewDIP1000;
911 }