1 module nxt.sso_string;
2 
3 /** Small-size-optimized (SSO) variant of `string`.
4  *
5  * Storage is placed on the stack if the number of `char`s is less than
6  * `smallCapacity`, otherwise as a normal (large) `string`. The large `string`
7  * will be allocated on the GC-heap if the `SSOString` is constructed from a
8  * non-`string` (non-`immutable` `char[]`) parameter.
9  *
10  * Because `SSOString` doesn't have a destructor it can safely allocate using a
11  * GC-backed region allocator without relying on a GC finalizer.
12  *
13  * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for
14  * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to
15  * default to a large string in which large pointer is set to `null`.
16  *
17  * Big-endian platform support hasn't been verified.
18  *
19  * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com
20  * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
21  *
22  * TODO Use extra bits in `Short.length` for these special text encodings:
23  * - 5-bit lowercase English letter into 128/5 = 25 chars
24  * - 5-bit uppercase English letter into 120/5 = 25 chars
25  * - 6-bit mixedcase English letter into 120/6 = 20 chars
26  *
27  * TODO Add to Phobos' std.typecons or std.array or std.string
28  */
29 struct SSOString
30 {
31 @safe:
32     @property void toString(scope void delegate(const(char)[]) @safe sink) const
33     {
34         sink(opSlice());
35     }
36 
37 pure:
38 
39     /** Construct from `source`, which potentially needs GC-allocation (iff
40      * `source.length > smallCapacity` and `source` is not a `string`).
41      */
42     this(Chars)(const scope auto ref Chars source) @trusted nothrow
43     if (is(Chars : const(char)[])) // `isCharArray`
44     {
45         static if (__traits(isStaticArray, Chars))
46         {
47             static if (source.length <= smallCapacity) // inferred @nogc
48             {
49                 small.data[0 .. source.length] = source;
50                 small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
51             }
52             else
53             {
54                 static if (is(typeof(source[0]) == immutable(char)))
55                 {
56                     raw.ptr[0 .. source.length] = source; // copy elements
57                 }
58                 else
59                 {
60                     raw.ptr = source.idup.ptr; // GC-allocate
61                 }
62                 raw.length = encodeLargeLength(source.length);
63             }
64         }
65         else                    // `Chars` is a (dynamic) array slice
66         {
67             if (source.length <= smallCapacity)
68             {
69                 (cast(char*)small.data.ptr)[0 .. source.length] = source;
70                 small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
71             }
72             else
73             {
74                 static if (is(typeof(source[0]) == immutable(char)))
75                 {
76                     raw.ptr = source.ptr; // already immutable so no duplication needed
77                 }
78                 else
79                 {
80                     raw.ptr = source.idup.ptr; // GC-allocate
81                 }
82                 raw.length = encodeLargeLength(source.length);
83             }
84         }
85     }
86 
87     /** Construct from `source` of `dchar`
88      */
89     this(Source)(scope Source source) @trusted
90     if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && // TODO `isConstRefIterable`
91         is(typeof(Source.init.front) == dchar))
92     {
93         import std.utf : encode;
94 
95         // pre-calculate number of `char`s needed
96         size_t charCount = 0;
97         foreach (const e; source)
98         {
99             char[4] chars;      // TODO `= void`
100             charCount += encode(chars, e);
101         }
102 
103         if (charCount <= smallCapacity) // fits in small
104         {
105             size_t offset = 0;
106             foreach (const e; source)
107             {
108                 char[4] chars;
109                 const count = encode(chars, e);
110                 (cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count];
111                 offset += count;
112             }
113             assert(offset <= smallCapacity);
114             small.length = cast(typeof(small.length))(encodeSmallLength(offset));
115         }
116         else                    // needs large
117         {
118             large = new immutable(char)[charCount];
119             size_t offset = 0;
120             foreach (const e; source)
121             {
122                 char[4] chars;
123                 const count = encode(chars, e);
124                 (cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data
125                 offset += count;
126             }
127             raw.length = encodeLargeLength(charCount);
128         }
129     }
130 
131 nothrow:
132 
133     /** Return `this` converted to a `string`, without any GC-allocation because
134      * `this` is `immutable`.
135      */
136     @property string toString() immutable @trusted pure nothrow @nogc // never allocates
137     {
138         return opSlice();
139     }
140 
141     /** Return `this` converted to a `string`, which potentially needs
142      * GC-allocation (iff `length > smallCapacity`).
143      *
144      * implementation kept in sync with `opSlice`.
145      */
146     @property string toString() const return @trusted pure nothrow // may GC-allocate
147     {
148         if (isLarge)
149         {
150             // GC-allocated slice has immutable members so ok to cast
151             return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
152         }
153         else
154         {
155             return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable`
156         }
157     }
158 
159     @nogc:
160 
161     /** Get hash of `this`, with extra fast computation for the small case.
162      */
163     @property hash_t toHash() const scope @trusted
164     {
165         version(LDC) pragma(inline, true);
166         if (isLarge)
167         {
168             import core.internal.hash : hashOf;
169             return hashOf(opSliceLarge()); // use default
170         }
171         else                    // fast path for small string
172         {
173             import nxt.hash_functions : lemireHash64;
174             return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string
175                     lemireHash64(words[1]));
176         }
177     }
178 
179     /** Get length. */
180     @property size_t length() const scope @trusted
181     {
182         pragma(inline, true);
183         if (isLarge)
184         {
185             return decodeRawLength(large.length); // skip first bit
186         }
187         else
188         {
189             return decodeRawLength(small.length); // skip fist bit
190         }
191     }
192     /// ditto
193     alias opDollar = length;
194 
195     /** Check if `this` is empty. */
196     @property bool empty() const scope @safe pure nothrow @nogc
197     {
198         return length == 0;
199     }
200 
201     /** Check if `this` is `null`. */
202     @property bool isNull() const scope @trusted pure nothrow @nogc
203     {
204         return raw.length == 0;
205     }
206 
207     /** Return a slice to either the whole large or whole small `string`.
208      *
209      * Implementation is kept in sync with `toString`.
210      */
211     inout(char)[] opSlice() inout return scope @trusted @nogc
212     {
213         pragma(inline, true);
214         if (isLarge)
215         {
216             return opSliceLarge();
217         }
218         else
219         {
220             return opSliceSmall();
221         }
222     }
223 
224     /** Return a slice at `[i .. j]` to either the internally stored large or small `string`.
225      *
226      * Implementation is kept in sync with `toString`.
227      */
228     inout(char)[] opSlice(size_t i, size_t j) inout return @safe
229     {
230         pragma(inline, true);
231         return opSlice()[i .. j];
232     }
233 
234     private inout(char)[] opSliceLarge() inout return scope @system @nogc
235     {
236         pragma(inline, true);
237         version(unittest) assert(isLarge);
238         return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
239         // alternative:  return large.ptr[0 .. large.length/2];
240     }
241 
242     private inout(char)[] opSliceSmall() inout return scope @trusted @nogc
243     {
244         pragma(inline, true);
245         version(unittest) assert(!isLarge);
246         return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped
247     }
248 
249     /** Return the `index`ed `char` of `this`.
250      */
251     ref inout(char) opIndex(size_t index) inout return @trusted
252     {
253         pragma(inline, true);
254         return opSlice()[index]; // does range check
255     }
256 
257     /// Get pointer to the internally stored `char`s.
258     @property private immutable(char)* ptr() const return @trusted
259     {
260         if (isLarge)
261         {
262             return large.ptr;   // GC-heap pointer
263         }
264         else
265         {
266             return small.data.ptr; // stack pointer
267         }
268     }
269 
270     /** Check if `this` is equal to `rhs`. */
271     bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted
272     {
273         pragma(inline, true);
274         return opSlice() == rhs.opSlice();
275     }
276 
277     /** Check if `this` is equal to `rhs`. */
278     bool opEquals()(const scope const(char)[] rhs) const scope @trusted
279     {
280         pragma(inline, true);
281         return opSlice() == rhs;
282     }
283 
284     /** Compare `this` with `that`.
285      *
286      * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org
287      */
288     @property int opCmp()(const scope typeof(this) that) const scope // template-lazy
289     {
290         pragma(inline, true);
291         auto a = this[];
292         auto b = that[];
293         return a < b ? -1 : (a > b);
294         // import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`;
295         // return __cmp(this[], that[]);
296     }
297 
298     bool opCast(T : bool)() const scope @trusted
299     {
300         pragma(inline, true);
301         if (isLarge)
302         {
303             return large !is null;
304         }
305         else
306         {
307             return small.length != 0;
308         }
309     }
310 
311     /** Check if is the same as to `rhs`.
312      *
313      * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org.
314      */
315     version(none)               // `is` operator cannot be overloaded. See: https://forum.dlang.org/post/prmrli$1146$1@digitalmars.com
316     bool opBinary(string op)(const scope auto ref typeof(this) rhs) const scope @trusted
317     if (op == `is`)         // TODO has not effect
318     {
319         pragma(inline, true);
320         return opSlice() == rhs.opSlice();
321     }
322 
323     /** Support trait `isNullable`. */
324     static immutable nullValue = typeof(this).init;
325 
326     /** Support trait `isHoleable`. */
327     static immutable holeValue = typeof(this).asHole();
328 
329     /** Check if this a hole, meaning a removed/erase value. */
330     bool isHole() const scope @safe nothrow @nogc
331     {
332         return words[0] == size_t.max;
333     }
334 
335     /** That this a hole, meaning a removed/erase value. */
336     void holeify() @system @nogc scope
337     {
338         words[0] = size_t.max;
339         words[1] = size_t.max;
340     }
341 
342     /** Returns: a holed `SSOString`, meaning a removed/erase value. */
343     private static typeof(this) asHole() @system
344     {
345         typeof(return) result = void;
346         result.holeify();
347         return result;
348     }
349 
350     /** Check if `this` is a small ASCII string. */
351     bool isSmallASCII() const scope @trusted
352     {
353         pragma(inline, true);
354         static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small
355         // should be fast on 64-bit platforms:
356         return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small
357                 (words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0);
358     }
359 
360 private:
361 
362     /** Returns: `true` iff this is a large string, otherwise `false.` */
363     @property bool isLarge() const scope @trusted
364     {
365         pragma(inline, true);
366         return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large
367     }
368 
369     alias Large = immutable(char)[];
370 
371     public enum smallCapacity = Large.sizeof - Small.length.sizeof;
372     static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof);
373 
374     enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length.
375     enum smallLengthBitCount = 4;
376     static assert(smallCapacity == 2^^smallLengthBitCount-1);
377 
378     enum metaBits = 3;               ///< Number of bits used for metadata.
379     enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom.
380     enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data.
381     static assert(smallLengthBitCount + tagsBitCount == 8);
382 
383     /// Get metadata byte with first `metaBits` bits set.
384     @property ubyte metadata() const @safe pure nothrow @nogc
385     {
386         return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits]
387     }
388 
389     /// Set metadata.
390     @property void metadata(ubyte data) @trusted pure nothrow @nogc
391     {
392         assert(data < (1 << metaBits));
393         if (isLarge)
394         {
395             raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
396         }
397         else
398         {
399             small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
400         }
401     }
402 
403     /// Decode raw length `rawLength` by shifting away tag bits.
404     static size_t decodeRawLength(size_t rawLength) @safe pure nothrow @nogc
405     {
406         return rawLength >> tagsBitCount;
407     }
408 
409     /// Encode `Large` length from `Length`.
410     static size_t encodeLargeLength(size_t length) @safe pure nothrow @nogc
411     {
412         return (length << tagsBitCount);
413     }
414 
415     /// Encode `Small` length from `Length`.
416     static size_t encodeSmallLength(size_t length) @safe pure nothrow @nogc
417     {
418         assert(length <= smallCapacity);
419         return (length << tagsBitCount) | (1 << largeLengthTagBitOffset);
420     }
421 
422     version(LittleEndian) // see: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw
423     {
424         struct Small
425         {
426             /* TODO only first 4 bits are needed to represent a length between
427              * 0-15, use other 4 bits.
428              */
429             ubyte length = 0;
430             immutable(char)[smallCapacity] data = [0,0,0,0,0,
431                                                    0,0,0,0,0,
432                                                    0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
433         }
434     }
435     else
436     {
437         struct Small
438         {
439             immutable(char)[smallCapacity] data = [0,0,0,0,0,
440                                                    0,0,0,0,0,
441                                                    0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
442             /* TODO only first 4 bits are needed to represent a length between
443              * 0-15, use other 4 bits.
444              */
445             ubyte length;
446         }
447         static assert(0, "TODO add BigEndian support and test");
448     }
449 
450     struct Raw                  // same memory layout as `immutable(char)[]`
451     {
452         size_t length = 0;      // can be bit-fiddled without GC allocation
453         immutable(char)* ptr = null;
454     }
455 
456     union
457     {
458         Raw raw;
459         Large large;
460         Small small;
461         size_t[2] words;
462     }
463 }
464 version(unittest) static assert(SSOString.sizeof == string.sizeof);
465 
466 /** Returns: `x` lowercased. */
467 SSOString toLower()(const SSOString x) @trusted // template-lazy
468 {
469     if (x.isSmallASCII)         // small ASCII fast-path
470     {
471         typeof(return) result = void;
472         result.small.length = x.small.length;
473         foreach (const index; 0 .. x.smallCapacity)
474         {
475             import std.ascii : toLower;
476             (cast(char[])(result.small.data))[index] = toLower(x.small.data[index]);
477         }
478         return result;
479     }
480     else if (x.isLarge)
481     {
482         import std.uni : asLowerCase;
483         import std.conv : to;
484         return typeof(return)(x.opSlice().asLowerCase.to!string); // TODO make .to!string nothrow
485     }
486     else                   // small non-ASCII path usually without GC-allocation
487     {
488         typeof(return) result = x; // copy
489         import std.uni : toLowerInPlace;
490         auto slice = cast(char[])(result.opSlice()); // need ref to slice
491         toLowerInPlace(slice);
492         if (slice is result.opSlice() || // no reallocation
493             slice.length == result.length) // or same length (happens for German double-s)
494         {
495             return result;
496         }
497         else
498         {
499             version(none)
500             {
501                 import nxt.dbgio;
502                 dbg(`toLowerInPlace reallocated from "`,
503                     result.opSlice(), `" of length `, result.opSlice().length,
504                     ` to "`
505                     , slice, `" of length `, slice.length);
506             }
507             return typeof(return)(slice); // reallocation occurred
508         }
509     }
510 }
511 
512 /** Returns: `x` uppercased. */
513 SSOString toUpper()(const SSOString x) @trusted // template-lazy
514 {
515     if (x.isSmallASCII)         // small ASCII fast-path
516     {
517         typeof(return) result = void;
518         result.small.length = x.small.length;
519         foreach (const index; 0 .. x.smallCapacity)
520         {
521             import std.ascii : toUpper;
522             (cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]);
523         }
524         return result;
525     }
526     else if (x.isLarge)
527     {
528         import std.uni : asUpperCase;
529         import std.conv : to;
530         return typeof(return)(x.opSlice().asUpperCase.to!string); // TODO make .to!string nothrow
531     }
532     else                   // small non-ASCII path usually without GC-allocation
533     {
534         typeof(return) result = x; // copy
535         import std.uni : toUpperInPlace;
536         auto slice = cast(char[])(result.opSlice()); // need ref to slice
537         toUpperInPlace(slice);
538         if (slice is result.opSlice() || // no reallocation
539             slice.length == result.length) // or same length (happens for German double-s)
540         {
541             return result;
542         }
543         else
544         {
545             version(none)
546             {
547                 import nxt.dbgio;
548                 dbg(`toUpperInPlace reallocated from "`,
549                     result.opSlice(), `" of length `, result.opSlice().length,
550                     ` to "`
551                     , slice, `" of length `, slice.length);
552             }
553             return typeof(return)(slice); // reallocation occurred
554         }
555     }
556 }
557 
558 /// construct from non-immutable source is allowed in non-`@nogc`-scope
559 @safe pure nothrow unittest
560 {
561     alias S = SSOString;
562 
563     scope const char[] x0;
564     const s0 = SSOString(x0);           // no .idup
565 
566     scope const char[] x16 = new char[16];
567     const s16 = SSOString(x16);         // will call .idup
568 }
569 
570 /// construct from non-immutable source is not allowed in `@nogc`-scope
571 @safe pure nothrow @nogc unittest
572 {
573     scope const char[] s;
574     // TODO why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); }));
575 }
576 
577 /// verify `isNull` when @nogc constructing from small static array of `char`s
578 @trusted pure nothrow @nogc unittest
579 {
580     static foreach (const n; 0 .. SSOString.smallCapacity + 1)
581     {
582         {
583             immutable(char)[n] x;
584             assert(!SSOString(x).isNull);
585         }
586     }
587 }
588 
589 /// verify `isNull` when constructing from large static array of `char`s
590 @trusted pure nothrow unittest
591 {
592     static foreach (const n; SSOString.smallCapacity + 1 .. 32)
593     {
594         {
595             immutable(char)[n] x;
596             assert(!SSOString(x).isNull);
597         }
598     }
599 }
600 
601 /// verify `isNull` when constructing from dynamic array of `char`s
602 @trusted pure nothrow unittest
603 {
604     foreach (const n; 0 .. 32)
605     {
606         scope x = new immutable(char)[n];
607         assert(!SSOString(x).isNull);
608     }
609 }
610 
611 /// test behaviour of `==` and `is` operator
612 @trusted pure nothrow @nogc unittest
613 {
614     const SSOString x = "42";
615     assert(!x.isNull);
616     assert(x == "42");
617 
618     const SSOString y = "42";
619     assert(!y.isNull);
620     assert(y == "42");
621 
622     assert(x == y);
623     assert(x == y[]);
624     assert(x[] == y);
625     assert(x[] == y[]);
626     assert(x[] is x[]);
627     assert(y[] is y[]);
628     assert(x[] !is y[]);
629     assert(x.ptr !is y.ptr);
630 
631     const SSOString z = "43";
632     assert(!z.isNull);
633     assert(z == "43");
634     assert(x != z);
635     assert(x[] != z[]);
636     assert(x !is z);
637     assert(x[] !is z[]);
638 }
639 
640 ///
641 @safe pure nothrow @nogc unittest
642 {
643     static assert(SSOString.smallCapacity == 15);
644 
645     import nxt.gc_traits : mustAddGCRange;
646     static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned
647 
648     static assert(__traits(isZeroInit, SSOString));
649     // TODO assert(SSOString.init == SSOString.nullValue);
650 
651     auto s0 = SSOString.init;
652     assert(s0.isNull);
653     assert(s0.length == 0);
654     assert(s0.isLarge);
655     assert(s0[] == []);
656 
657     char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string
658     const sSmallCapacity = SSOString(charsSmallCapacity);
659     assert(!sSmallCapacity.isLarge);
660     assert(sSmallCapacity.length == SSOString.smallCapacity);
661     assert(sSmallCapacity == charsSmallCapacity);
662 
663     const s0_ = SSOString("");
664     assert(!s0_.isNull);         // cannot distinguish
665     assert(s0 == s0_);
666 
667     const s7 = SSOString("0123456");
668     assert(!s7.isNull);
669 
670     const s7_ = SSOString("0123456_"[0 .. $ - 1]);
671     assert(s7.ptr !is s7_.ptr); // string data shall not overlap
672     assert(s7 == s7_);
673 
674     const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal
675     assert(s7.ptr !is _s7.ptr); // string data shall not overlap
676     assert(s7 == _s7);
677 
678     assert(!s7.isLarge);
679     assert(s7.length == 7);
680     assert(s7[] == "0123456");
681     assert(s7[] == "_0123456"[1 .. $]);
682     assert(s7[] == "0123456_"[0 .. $ - 1]);
683     assert(s7[0 .. 4] == "0123");
684 
685     const s15 = SSOString("0123456789abcde");
686     assert(!s15.isNull);
687     static assert(is(typeof(s15[]) == const(char)[]));
688     assert(!s15.isLarge);
689     assert(s15.length == 15);
690     assert(s15[] == "0123456789abcde");
691     assert(s15[0 .. 4] == "0123");
692     assert(s15[10 .. 15] == "abcde");
693     assert(s15[10 .. $] == "abcde");
694 
695     const s16 = SSOString("0123456789abcdef");
696     assert(!s16.isNull);
697     static assert(is(typeof(s16[]) == const(char)[]));
698     assert(s16.isLarge);
699 
700     const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]);
701     assert(s16.length == s16_.length);
702     assert(s16[] == s16_[]);
703     assert(s16.ptr !is s16_.ptr); // string data shall not overlap
704     assert(s16 == s16_);              // but contents is equal
705 
706     const _s16 = SSOString("_0123456789abcdef"[1 .. $]);
707     assert(s16.length == _s16.length);
708     assert(s16[] == _s16[]);    // contents is equal
709     assert(s16 == _s16);        // contents is equal
710 
711     assert(s16.length == 16);
712     assert(s16[] == "0123456789abcdef");
713     assert(s16[0] == '0');
714     assert(s16[10] == 'a');
715     assert(s16[15] == 'f');
716     assert(s16[0 .. 4] == "0123");
717     assert(s16[10 .. 16] == "abcdef");
718     assert(s16[10 .. $] == "abcdef");
719 }
720 
721 /// metadata for null string
722 @safe pure nothrow @nogc unittest
723 {
724     auto s = SSOString.init;
725     assert(s.isNull);
726     foreach (const i; 0 .. 8)
727     {
728         s.metadata = i;
729         assert(s.metadata == i);
730         assert(s.length == 0);
731     }
732 }
733 
734 /// metadata for small string
735 @safe pure nothrow @nogc unittest
736 {
737     auto s = SSOString("0123456");
738     assert(!s.isNull);
739     assert(!s.isLarge);
740     foreach (const i; 0 .. 8)
741     {
742         s.metadata = i;
743         assert(s.metadata == i);
744         assert(s.length == 7);
745         assert(!s.isLarge);
746         assert(!s.isNull);
747     }
748 }
749 
750 /// metadata for small string with maximum length
751 @safe pure nothrow @nogc unittest
752 {
753     auto s = SSOString("0123456789abcde");
754     assert(s.length == SSOString.smallCapacity);
755     assert(!s.isNull);
756     assert(!s.isLarge);
757     foreach (const i; 0 .. 8)
758     {
759         s.metadata = i;
760         assert(s.metadata == i);
761         assert(s.length == 15);
762         assert(!s.isLarge);
763         assert(!s.isNull);
764     }
765 }
766 
767 /// metadata for large string with minimum length
768 @safe pure nothrow @nogc unittest
769 {
770     auto s = SSOString("0123456789abcdef");
771     assert(s.length == SSOString.smallCapacity + 1);
772     assert(!s.isNull);
773     assert(s.isLarge);
774     assert(!s.empty);
775     foreach (const i; 0 .. 8)
776     {
777         s.metadata = i;
778         assert(s.metadata == i);
779         assert(s.length == 16);
780         assert(s.isLarge);
781         assert(!s.isNull);
782     }
783 }
784 
785 /// equality and equivalence
786 @safe pure nothrow @nogc unittest
787 {
788     assert(SSOString() == SSOString(""));
789     assert(SSOString() !is SSOString(""));
790 }
791 
792 /// hashing of null, empty and non-empty
793 @safe pure nothrow @nogc unittest
794 {
795     assert(SSOString().toHash == 0);
796     assert(SSOString("").toHash == 0);
797     assert(SSOString("a").toHash != 0);
798     assert(SSOString("0123456789abcdef").toHash != 0);
799 }
800 
801 /// construct from static array larger than `smallCapacity`
802 @safe pure nothrow unittest
803 {
804     char[SSOString.smallCapacity + 1] charsMinLargeCapacity;
805     const _ = SSOString(charsMinLargeCapacity);
806 }
807 
808 // test construction from range
809 @safe pure unittest
810 {
811     static void test(const scope char[] x,
812                      const bool isLarge) @safe pure
813     {
814         import std.utf : byDchar;
815         const scope s = SSOString(x.byDchar);
816         assert(s == x);
817         assert(s.isLarge == isLarge);
818     }
819     test("", false);
820     test("_", false);
821     test("123456789_12345", false);
822     test("123456789_123456", true);
823     test("123456789_123456789_123456789_", true);
824 }
825 
826 /// hole handling
827 @trusted pure nothrow @nogc unittest
828 {
829     assert(!SSOString.init.isHole);
830     assert(!SSOString("").isHole);
831     assert(!SSOString("a").isHole);
832     assert(SSOString.asHole.isHole);
833 }
834 
835 /// DIP-1000 return ref escape analysis
836 @safe pure nothrow unittest
837 {
838     static if (isDIP1000)
839     {
840         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
841         static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } }));
842         static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } }));
843         static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { SSOString x; return x[0]; } }));
844     }
845 }
846 
847 /// ASCII purity and case-conversion
848 @safe pure nothrow @nogc unittest
849 {
850     // these are all small ASCII
851     assert( SSOString("a").isSmallASCII);
852     assert( SSOString("b").isSmallASCII);
853     assert( SSOString("z").isSmallASCII);
854     assert( SSOString("_").isSmallASCII);
855     assert( SSOString("abcd").isSmallASCII);
856     assert( SSOString("123456789_12345").isSmallASCII);
857 
858     // these are not
859     assert(!SSOString("123456789_123456").isSmallASCII); // too large
860     assert(!SSOString("123456789_123ö").isSmallASCII);
861     assert(!SSOString("ö").isSmallASCII);
862     assert(!SSOString("Ö").isSmallASCII);
863     assert(!SSOString("åäö").isSmallASCII);
864     assert(!SSOString("ö-värld").isSmallASCII);
865 }
866 
867 /// ASCII purity and case-conversion
868 @safe pure unittest
869 {
870     assert(SSOString("A").toLower[] == "a");
871     assert(SSOString("a").toUpper[] == "A");
872     assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small
873     assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small
874     assert(SSOString("ÅÄÖ").toLower[] == "åäö");
875     assert(SSOString("åäö").toUpper[] == "ÅÄÖ");
876     assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large
877     assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large
878 
879     char[6] x = "ÅÄÖ";
880     import std.uni : toLowerInPlace;
881     auto xref = x[];
882     toLowerInPlace(xref);
883     assert(x == "åäö");
884     assert(xref == "åäö");
885 }
886 
887 /// lexicographic comparison
888 @safe pure unittest
889 {
890     const SSOString a = SSOString("a");
891     assert(a == SSOString("a"));
892 
893     immutable SSOString b = SSOString("b");
894 
895     assert(a < b);
896     assert(b > a);
897     assert(a[] < b[]);
898 
899     assert("a" < "b");
900     assert("a" < "å");
901     assert("Å" < "å");
902     assert(SSOString("a") < SSOString("å"));
903     assert(SSOString("ÅÄÖ") < SSOString("åäö"));
904 }
905 
906 /// cast to bool
907 @safe pure unittest
908 {
909     // mimics behaviour of casting of `string` to `bool`
910     assert(!SSOString());
911     assert(SSOString(""));
912     assert(SSOString("abc"));
913 }
914 
915 /// to string conversion
916 @safe pure unittest
917 {
918     // mutable small will GC-allocate
919     {
920         SSOString s = SSOString("123456789_12345");
921         assert(s.ptr is &s.opSlice()[0]);
922         assert(s.ptr !is &s.toString()[0]);
923     }
924 
925     // const small will GC-allocate
926     {
927         const SSOString s = SSOString("123456789_12345");
928         assert(s.ptr is &s.opSlice()[0]);
929         assert(s.ptr !is &s.toString()[0]);
930     }
931 
932     // immutable small will not allocate
933     {
934         immutable SSOString s = SSOString("123456789_12345");
935         assert(s.ptr is &s.opSlice()[0]);
936         assert(s.ptr is &s.toString()[0]);
937         // TODO check return via -dip1000
938     }
939 
940     /* Forbid return of possibly locally scoped `Smll` small stack object
941      * regardless of head-mutability.
942      */
943     static if (isDIP1000)
944     {
945         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
946         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } }));
947         static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } }));
948 
949         /** TODO Enable the following line when DIP-1000 works for opSlice()
950          *
951          * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
952          */
953         // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } }));
954     }
955 
956     // large will never allocate regardless of head-mutability
957     {
958         SSOString s = SSOString("123456789_123456");
959         assert(s.ptr is &s.opSlice()[0]);
960         assert(s.ptr is &s.toString()[0]); // shouldn't this change?
961     }
962 }
963 
964 version(unittest)
965 {
966     import nxt.dip_traits : isDIP1000;
967 }