1 module nxt.sso_string;
2 
3 /** Small-size-optimized (SSO) variant of `string`.
4  *
5  * Storage is placed on the stack if the number of `char`s is less than
6  * `smallCapacity`, otherwise as a normal (large) `string`. The large `string`
7  * will be allocated on the GC-heap if the `SSOString` is constructed from a
8  * non-`string` (non-`immutable` `char[]`) parameter.
9  *
10  * Because `SSOString` doesn't have a destructor it can safely allocate using a
11  * GC-backed region allocator without relying on a GC finalizer.
12  *
13  * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for
14  * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to
15  * default to a large string in which large pointer is set to `null`.
16  *
17  * Big-endian platform support hasn't been verified.
18  *
19  * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com
20  * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
21  * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org
22  *
23  * TODO: Use extra bits in `Short.length` for these special text encodings:
24  * - 5-bit lowercase English letter into 128/5 = 25 chars
25  * - 5-bit uppercase English letter into 120/5 = 25 chars
26  * - 6-bit mixedcase English letter into 120/6 = 20 chars
27  *
28  * TODO: Move to Phobos' std.typecons or std.array or std.string
29  */
30 @safe struct SSOString
31 {
32 	@property void toString(Sink)(ref scope Sink sink) const scope {
33 		sink(opSlice());
34 	}
35 
36 pure:
37 
38 	/** Construct from `source`, which potentially needs GC-allocation (iff
39 	 * `source.length > smallCapacity` and `source` is not a `string`).
40 	 */
41 	this(Chars)(const scope auto ref Chars source) @trusted nothrow
42 	if (is(Chars : const(char)[])) // `isCharArray`
43 	{
44 		static if (__traits(isStaticArray, Chars)) {
45 			static if (source.length <= smallCapacity) { // inferred @nogc
46 				small.data[0 .. source.length] = source;
47 				small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
48 			} else {
49 				static if (is(typeof(source[0]) == immutable(char)))
50 					raw.ptr[0 .. source.length] = source; // copy elements
51 				else
52 					raw.ptr = source.idup.ptr; // GC-allocate
53 				raw.length = encodeLargeLength(source.length);
54 			}
55 		} else {					// `Chars` is a (dynamic) array slice
56 			if (source.length <= smallCapacity) {
57 				(cast(char*)small.data.ptr)[0 .. source.length] = source;
58 				small.length = cast(typeof(small.length))(encodeSmallLength(source.length));
59 			} else {
60 				static if (is(typeof(source[0]) == immutable(char)))
61 					raw.ptr = source.ptr; // already immutable so no duplication needed
62 				else
63 					raw.ptr = source.idup.ptr; // GC-allocate
64 				raw.length = encodeLargeLength(source.length);
65 			}
66 		}
67 	}
68 
69 	/** Construct from `source` of `dchar`
70 	 */
71 	this(Source)(scope Source source) @trusted
72 	if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && /+ TODO: `isConstRefIterable` +/
73 		is(typeof(Source.init.front) == dchar))
74 	{
75 		import std.utf : encode;
76 
77 		// pre-calculate number of `char`s needed
78 		size_t charCount = 0;
79 		foreach (const e; source) {
80 			char[4] chars;	  /+ TODO: `= void` +/
81 			charCount += encode(chars, e);
82 		}
83 
84 		if (charCount <= smallCapacity) { // fits in small
85 			size_t offset = 0;
86 			foreach (const e; source) {
87 				char[4] chars;
88 				const count = encode(chars, e);
89 				(cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count];
90 				offset += count;
91 			}
92 			assert(offset <= smallCapacity);
93 			small.length = cast(typeof(small.length))(encodeSmallLength(offset));
94 		} else {				// needs large
95 			large = new immutable(char)[charCount];
96 			size_t offset = 0;
97 			foreach (const e; source) {
98 				char[4] chars;
99 				const count = encode(chars, e);
100 				(cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data
101 				offset += count;
102 			}
103 			raw.length = encodeLargeLength(charCount);
104 		}
105 	}
106 
107 nothrow:
108 
109 	/** Return `this` converted to a `string`, without any GC-allocation because
110 	 * `this` is `immutable`.
111 	 */
112 	@property string toString() immutable @trusted return pure nothrow @nogc { // never allocates
113 		version (D_Coverage) {} else pragma(inline, true);
114 		return opSlice();
115 	}
116 
117 	/** Return `this` converted to a `string`, which potentially needs
118 	 * GC-allocation (iff `length > smallCapacity`).
119 	 *
120 	 * implementation kept in sync with `opSlice`.
121 	 */
122 	@property string toString() const return @trusted pure nothrow { // may GC-allocate
123 		if (isLarge)
124 			// GC-allocated slice has immutable members so ok to cast
125 			return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
126 		else
127 			return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable`
128 	}
129 
130 	@nogc:
131 
132 	/** Get hash of `this`, with extra fast computation for the small case.
133 	 */
134 	@property hash_t toHash() const scope @trusted {
135 		version (D_Coverage) {} else version (LDC) pragma(inline, true);
136 		import core.internal.hash : hashOf;
137 		import nxt.hash_functions : lemireHash64;
138 		if (isLarge)
139 			return hashOf(opSliceLarge()); // use default
140 		else					// fast path for small string
141 			return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string
142 					lemireHash64(words[1]));
143 	}
144 
145 	/** Get length. */
146 	@property size_t length() const scope @trusted {
147 		version (D_Coverage) {} else pragma(inline, true);
148 		if (isLarge)
149 			return decodeRawLength(large.length); // skip first bit
150 		else
151 			return decodeRawLength(small.length); // skip fist bit
152 	}
153 	/// ditto
154 	alias opDollar = length;
155 
156 	/** Check if `this` is empty. */
157 	bool empty() const @property scope pure nothrow @safe @nogc {
158 		return length == 0;
159 	}
160 
161 	/** Check if `this` is `null`. */
162 	@property bool isNull() const scope @trusted pure nothrow @nogc	{
163 		return raw.length == 0;
164 	}
165 
166 	/** Return a slice to either the whole large or whole small `string`.
167 	 *
168 	 * Implementation is kept in sync with `toString`.
169 	 */
170 	inout(char)[] opSlice() inout scope return @trusted @nogc {
171 		version (D_Coverage) {} else pragma(inline, true);   /+ TODO: maybe remove +/
172 		if (isLarge)
173 			return opSliceLarge();
174 		else
175 			return opSliceSmall();
176 	}
177 
178 	/** Return a slice at `[i .. j]` to either the internally stored large or small `string`.
179 	 *
180 	 * Implementation is kept in sync with `toString`.
181 	 */
182 	inout(char)[] opSlice(size_t i, size_t j) inout return @safe {
183 		version (D_Coverage) {} else pragma(inline, true);
184 		return opSlice()[i .. j];
185 	}
186 
187 	private inout(char)[] opSliceLarge() inout return scope @system @nogc
188 	in { version (unittest) assert(isLarge); }
189 	do {
190 		version (D_Coverage) {} else pragma(inline, true);
191 		return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation
192 		// alternative:  return large.ptr[0 .. large.length/2];
193 	}
194 
195 	private inout(char)[] opSliceSmall() inout return @trusted @nogc
196 	in { version (unittest) assert(!isLarge); }
197     do {
198 		version (D_Coverage) {} else pragma(inline, true);
199 		return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped
200 	}
201 
202 	/** Return the `index`ed `char` of `this`.
203 	 */
204 	ref inout(char) opIndex(size_t index) inout return @trusted	{
205 		version (D_Coverage) {} else pragma(inline, true);
206 		return opSlice()[index]; // does range check
207 	}
208 
209 	/// Get pointer to the internally stored `char`s.
210 	@property private immutable(char)* ptr() const return @trusted {
211 		if (isLarge)
212 			return large.ptr;   // GC-heap pointer
213 		else
214 			return small.data.ptr; // stack pointer
215 	}
216 
217 	/** Check if `this` is equal to `rhs`. */
218 	bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted	{
219 		version (D_Coverage) {} else pragma(inline, true);
220 		return opSlice() == rhs.opSlice();
221 	}
222 
223 	/** Check if `this` is equal to `rhs`. */
224 	bool opEquals()(const scope const(char)[] rhs) const scope @trusted	{
225 		version (D_Coverage) {} else pragma(inline, true);
226 		return opSlice() == rhs;
227 	}
228 
229 	/** Compare `this` with `that`.
230 	 *
231 	 * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org
232 	 */
233 	@property int opCmp()(const scope typeof(this) that) const scope /*tlm*/ {
234 		version (D_Coverage) {} else pragma(inline, true);
235 		scope const a = this.opSlice();
236 		scope const b = that.opSlice();
237 		return a < b ? -1 : (a > b);
238 		// import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`;
239 		// return __cmp(this[], that[]);
240 	}
241 
242 	bool opCast(T : bool)() const scope @trusted {
243 		version (D_Coverage) {} else pragma(inline, true);
244 		if (isLarge)
245 			return large !is null;
246 		else
247 			return small.length != 0;
248 	}
249 
250 	/** Support trait `isNullable`. */
251 	static immutable nullValue = typeof(this).init;
252 
253 	/** Support trait `isHoleable`. */
254 	static immutable holeValue = typeof(this).asHole();
255 
256 	/** Check if this a hole, meaning a removed/erase value. */
257 	bool isHole() const scope @safe nothrow @nogc {
258 		return words[0] == size_t.max;
259 	}
260 
261 	/** That this a hole, meaning a removed/erase value. */
262 	void holeify() @system @nogc scope {
263 		words[0] = size_t.max;
264 		words[1] = size_t.max;
265 	}
266 
267 	/** Returns: a holed `SSOString`, meaning a removed/erase value. */
268 	private static typeof(this) asHole() @system {
269 		typeof(return) result = void;
270 		result.holeify();
271 		return result;
272 	}
273 
274 	/** Check if `this` is a small ASCII string. */
275 	bool isSmallASCII() const scope @trusted {
276 		version (D_Coverage) {} else pragma(inline, true);
277 		static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small
278 		// should be fast on 64-bit platforms:
279 		return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small
280 				(words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0);
281 	}
282 
283 private:
284 
285 	/** Returns: `true` iff this is a large string, otherwise `false.` */
286 	@property bool isLarge() const scope @trusted {
287 		version (D_Coverage) {} else pragma(inline, true);
288 		return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large
289 	}
290 
291 	alias Large = immutable(char)[];
292 
293 	public enum smallCapacity = Large.sizeof - Small.length.sizeof;
294 	static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof);
295 
296 	enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length.
297 	enum smallLengthBitCount = 4;
298 	static assert(smallCapacity == 2^^smallLengthBitCount-1);
299 
300 	enum metaBits = 3;			   ///< Number of bits used for metadata.
301 	enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom.
302 	enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data.
303 	static assert(smallLengthBitCount + tagsBitCount == 8);
304 
305 	/// Get metadata byte with first `metaBits` bits set.
306 	@property ubyte metadata() const pure nothrow @safe @nogc {
307 		return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits]
308 	}
309 
310 	/// Set metadata.
311 	@property void metadata(ubyte data) @trusted pure nothrow @nogc in(data < (1 << metaBits)) {
312 		if (isLarge)
313 			raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
314 		else
315 			small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1));
316 	}
317 
318 	/// Decode raw length `rawLength` by shifting away tag bits.
319 	static size_t decodeRawLength(size_t rawLength) pure nothrow @safe @nogc {
320 		return rawLength >> tagsBitCount;
321 	}
322 
323 	/// Encode `Large` length from `Length`.
324 	static size_t encodeLargeLength(size_t length) pure nothrow @safe @nogc {
325 		return (length << tagsBitCount);
326 	}
327 
328 	/// Encode `Small` length from `Length`.
329 	static size_t encodeSmallLength(size_t length) pure nothrow @safe @nogc
330 	in(length <= smallCapacity) {
331 		return (length << tagsBitCount) | (1 << largeLengthTagBitOffset);
332 	}
333 
334 	version (LittleEndian) { // See: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw
335 		struct Small {
336 			/* TODO: only first 4 bits are needed to represent a length between
337 			 * 0-15, use other 4 bits.
338 			 */
339 			ubyte length = 0;
340 			immutable(char)[smallCapacity] data = [0,0,0,0,0,
341 												   0,0,0,0,0,
342 												   0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
343 		}
344 	} else {
345 		struct Small {
346 			immutable(char)[smallCapacity] data = [0,0,0,0,0,
347 												   0,0,0,0,0,
348 												   0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true.
349 			/* TODO: only first 4 bits are needed to represent a length between
350 			 * 0-15, use other 4 bits.
351 			 */
352 			ubyte length;
353 		}
354 		static assert(0, "TODO: add BigEndian support and test");
355 	}
356 
357 	struct Raw {				  // same memory layout as `immutable(char)[]`
358 		size_t length = 0;	  // can be bit-fiddled without GC allocation
359 		immutable(char)* ptr = null;
360 	}
361 
362 	union {
363 		Raw raw;
364 		Large large;
365 		Small small;
366 		size_t[2] words;
367 	}
368 }
369 version (unittest) static assert(SSOString.sizeof == string.sizeof);
370 
371 /** Returns: `x` lowercased. */
372 SSOString toLower()(const SSOString x) @trusted /*tlm*/
373 {
374 	if (x.isSmallASCII)	{		 // small ASCII fast-path
375 		typeof(return) result = void;
376 		result.small.length = x.small.length;
377 		import std.ascii : toLower;
378 		foreach (const index; 0 .. x.smallCapacity)
379 			(cast(char[])(result.small.data))[index] = toLower(x.small.data[index]);
380 		return result;
381 	} else if (x.isLarge) {
382 		import std.uni : asLowerCase;
383 		import std.conv : to;
384 		return typeof(return)(x.opSlice().asLowerCase.to!string); /+ TODO: make .to!string nothrow +/
385 	} else {				   // small non-ASCII path usually without GC-allocation
386 		typeof(return) result = x; // copy
387 		import std.uni : toLowerInPlace;
388 		auto slice = cast(char[])(result.opSlice()); // need ref to slice
389 		toLowerInPlace(slice);
390 		if (slice is result.opSlice() || // no reallocation
391 			slice.length == result.length) // or same length (happens for German double-s)
392 			return result;
393 		else
394 			// version (none)
395 			// {
396 			//	 import nxt.debugio;
397 			//	 dbg(`toLowerInPlace reallocated from "`,
398 			//		 result.opSlice(), `" of length `, result.opSlice().length,
399 			//		 ` to "`
400 			//		 , slice, `" of length `, slice.length);
401 			// }
402 			return typeof(return)(slice); // reallocation occurred
403 	}
404 }
405 
406 /** Returns: `x` uppercased. */
407 SSOString toUpper()(const SSOString x) @trusted /*tlm*/
408 {
409 	if (x.isSmallASCII)	{		 // small ASCII fast-path
410 		typeof(return) result = void;
411 		result.small.length = x.small.length;
412 		import std.ascii : toUpper;
413 		foreach (const index; 0 .. x.smallCapacity)
414 			(cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]);
415 		return result;
416 	} else if (x.isLarge) {
417 		import std.uni : asUpperCase;
418 		import std.conv : to;
419 		return typeof(return)(x.opSlice().asUpperCase.to!string); /+ TODO: make .to!string nothrow +/
420 	} else {				   // small non-ASCII path usually without GC-allocation
421 		typeof(return) result = x; // copy
422 		import std.uni : toUpperInPlace;
423 		auto slice = cast(char[])(result.opSlice()); // need ref to slice
424 		toUpperInPlace(slice);
425 		if (slice is result.opSlice() || // no reallocation
426 			slice.length == result.length) // or same length (happens for German double-s)
427 			return result;
428 		else
429 			// version (none)
430 			// {
431 			//	 import nxt.debugio;
432 			//	 dbg(`toUpperInPlace reallocated from "`,
433 			//		 result.opSlice(), `" of length `, result.opSlice().length,
434 			//		 ` to "`
435 			//		 , slice, `" of length `, slice.length);
436 			// }
437 			return typeof(return)(slice); // reallocation occurred
438 	}
439 }
440 
441 /// construct from non-immutable source is allowed in non-`@nogc`-scope
442 pure nothrow @safe unittest {
443 	alias S = SSOString;
444 
445 	scope const char[] x0;
446 	const s0 = SSOString(x0);		   // no .idup
447 
448 	scope const char[] x16 = new char[16];
449 	const s16 = SSOString(x16);		 // will call .idup
450 }
451 
452 /// construct from non-immutable source is not allowed in `@nogc`-scope
453 pure nothrow @safe @nogc unittest {
454 	scope const char[] s;
455 	/+ TODO: why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); })); +/
456 }
457 
458 /// verify `isNull` when @nogc constructing from small static array of `char`s
459 @trusted pure nothrow @nogc unittest {
460 	static foreach (const n; 0 .. SSOString.smallCapacity + 1) {
461 		{
462 			immutable(char)[n] x;
463 			assert(!SSOString(x).isNull);
464 		}
465 	}
466 }
467 
468 /// verify `isNull` when constructing from large static array of `char`s
469 @trusted pure nothrow unittest {
470 	static foreach (const n; SSOString.smallCapacity + 1 .. 32) {
471 		{
472 			immutable(char)[n] x;
473 			assert(!SSOString(x).isNull);
474 		}
475 	}
476 }
477 
478 /// verify `isNull` when constructing from dynamic array of `char`s
479 @trusted pure nothrow unittest {
480 	foreach (const n; 0 .. 32) {
481 		scope x = new immutable(char)[n];
482 		assert(!SSOString(x).isNull);
483 	}
484 }
485 
486 /// test behaviour of `==` and `is` operator
487 @trusted pure nothrow @nogc unittest {
488 	const SSOString x = "42";
489 	assert(!x.isNull);
490 	assert(x == "42");
491 
492 	const SSOString y = "42";
493 	assert(!y.isNull);
494 	assert(y == "42");
495 
496 	assert(x == y);
497 	assert(x == y[]);
498 	assert(x[] == y);
499 	assert(x[] == y[]);
500 	assert(x[] is x[]);
501 	assert(y[] is y[]);
502 	assert(x[] !is y[]);
503 	assert(x.ptr !is y.ptr);
504 
505 	const SSOString z = "43";
506 	assert(!z.isNull);
507 	assert(z == "43");
508 	assert(x != z);
509 	assert(x[] != z[]);
510 	assert(x !is z);
511 	assert(x[] !is z[]);
512 }
513 
514 ///
515 pure nothrow @safe @nogc unittest {
516 	static assert(SSOString.smallCapacity == 15);
517 
518 	import nxt.gc_traits : mustAddGCRange;
519 	static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned
520 
521 	static assert(__traits(isZeroInit, SSOString));
522 	/+ TODO: assert(SSOString.init == SSOString.nullValue); +/
523 
524 	auto s0 = SSOString.init;
525 	assert(s0.isNull);
526 	assert(s0.length == 0);
527 	assert(s0.isLarge);
528 	assert(s0[] == []);
529 
530 	char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string
531 	const sSmallCapacity = SSOString(charsSmallCapacity);
532 	assert(!sSmallCapacity.isLarge);
533 	assert(sSmallCapacity.length == SSOString.smallCapacity);
534 	() @trusted { assert(sSmallCapacity == charsSmallCapacity); }(); // TODO: -dip1000 without @trusted
535 
536 	const s0_ = SSOString("");
537 	assert(!s0_.isNull);		 // cannot distinguish
538 	() @trusted { assert(s0 == s0_); }(); // TODO: -dip1000 without @trusted
539 
540 	const s7 = SSOString("0123456");
541 	assert(!s7.isNull);
542 
543 	const s7_ = SSOString("0123456_"[0 .. $ - 1]);
544 	assert(s7.ptr !is s7_.ptr); // string data shall not overlap
545 	() @trusted { assert(s7 == s7_); }(); // TODO: -dip1000 without @trusted
546 
547 	const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal
548 	assert(s7.ptr !is _s7.ptr); // string data shall not overlap
549 	() @trusted { assert(s7 == _s7); }(); // TODO: -dip1000 without @trusted
550 
551 	assert(!s7.isLarge);
552 	assert(s7.length == 7);
553 	assert(s7[] == "0123456");
554 	assert(s7[] == "_0123456"[1 .. $]);
555 	assert(s7[] == "0123456_"[0 .. $ - 1]);
556 	assert(s7[0 .. 4] == "0123");
557 
558 	const s15 = SSOString("0123456789abcde");
559 	assert(!s15.isNull);
560 	static assert(is(typeof(s15[]) == const(char)[]));
561 	assert(!s15.isLarge);
562 	assert(s15.length == 15);
563 	assert(s15[] == "0123456789abcde");
564 	assert(s15[0 .. 4] == "0123");
565 	assert(s15[10 .. 15] == "abcde");
566 	assert(s15[10 .. $] == "abcde");
567 
568 	const s16 = SSOString("0123456789abcdef");
569 	assert(!s16.isNull);
570 	static assert(is(typeof(s16[]) == const(char)[]));
571 	assert(s16.isLarge);
572 
573 	const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]);
574 	assert(s16.length == s16_.length);
575 	assert(s16[] == s16_[]);
576 	assert(s16.ptr !is s16_.ptr); // string data shall not overlap
577 	() @trusted { assert(s16 == s16_); }(); // but contents is equal // TODO: -dip1000 without @trusted
578 
579 	const _s16 = SSOString("_0123456789abcdef"[1 .. $]);
580 	assert(s16.length == _s16.length);
581 	assert(s16[] == _s16[]);	// contents is equal
582 	() @trusted { assert(s16 == _s16); }(); // contents is equal // TODO: -dip1000 without @trusted
583 
584 	assert(s16.length == 16);
585 	assert(s16[] == "0123456789abcdef");
586 	assert(s16[0] == '0');
587 	assert(s16[10] == 'a');
588 	assert(s16[15] == 'f');
589 	assert(s16[0 .. 4] == "0123");
590 	assert(s16[10 .. 16] == "abcdef");
591 	assert(s16[10 .. $] == "abcdef");
592 }
593 
594 /// metadata for null string
595 pure nothrow @safe @nogc unittest {
596 	auto s = SSOString.init;
597 	assert(s.isNull);
598 	foreach (const i; 0 .. 8) {
599 		s.metadata = i;
600 		assert(s.metadata == i);
601 		assert(s.length == 0);
602 	}
603 }
604 
605 /// metadata for small string
606 pure nothrow @safe @nogc unittest {
607 	auto s = SSOString("0123456");
608 	assert(!s.isNull);
609 	assert(!s.isLarge);
610 	foreach (const i; 0 .. 8) {
611 		s.metadata = i;
612 		assert(s.metadata == i);
613 		assert(s.length == 7);
614 		assert(!s.isLarge);
615 		assert(!s.isNull);
616 	}
617 }
618 
619 /// metadata for small string with maximum length
620 pure nothrow @safe @nogc unittest {
621 	auto s = SSOString("0123456789abcde");
622 	assert(s.length == SSOString.smallCapacity);
623 	assert(!s.isNull);
624 	assert(!s.isLarge);
625 	foreach (const i; 0 .. 8) {
626 		s.metadata = i;
627 		assert(s.metadata == i);
628 		assert(s.length == 15);
629 		assert(!s.isLarge);
630 		assert(!s.isNull);
631 	}
632 }
633 
634 /// metadata for large string with minimum length
635 pure nothrow @safe @nogc unittest {
636 	auto s = SSOString("0123456789abcdef");
637 	assert(s.length == SSOString.smallCapacity + 1);
638 	assert(!s.isNull);
639 	assert(s.isLarge);
640 	assert(!s.empty);
641 	foreach (const i; 0 .. 8) {
642 		s.metadata = i;
643 		assert(s.metadata == i);
644 		assert(s.length == 16);
645 		assert(s.isLarge);
646 		assert(!s.isNull);
647 	}
648 }
649 
650 /// equality and equivalence
651 pure nothrow @safe @nogc unittest {
652 	() @trusted { assert(SSOString() == SSOString("")); }(); // TODO: -dip1000 without @trusted
653 	() @trusted { assert(SSOString() !is SSOString("")); }(); // TODO: -dip1000 without @trusted
654 }
655 
656 /// hashing of null, empty and non-empty
657 pure nothrow @safe @nogc unittest {
658 	assert(SSOString().toHash == 0);
659 	assert(SSOString("").toHash == 0);
660 	assert(SSOString("a").toHash != 0);
661 	assert(SSOString("0123456789abcdef").toHash != 0);
662 }
663 
664 /// construct from static array larger than `smallCapacity`
665 pure nothrow @safe unittest {
666 	char[SSOString.smallCapacity + 1] charsMinLargeCapacity;
667 	const _ = SSOString(charsMinLargeCapacity);
668 }
669 
670 // test construction from range
671 pure @safe unittest {
672 	static void test(const scope char[] x, const bool isLarge) @safe pure {
673 		import std.utf : byDchar;
674 		const scope s = SSOString(x.byDchar);
675 		() @trusted { assert(s == x); }(); // TODO: -dip1000 without @trusted
676 		assert(s.isLarge == isLarge);
677 	}
678 	test("", false);
679 	test("_", false);
680 	test("123456789_12345", false);
681 	test("123456789_123456", true);
682 	test("123456789_123456789_123456789_", true);
683 }
684 
685 /// hole handling
686 @trusted pure nothrow @nogc unittest {
687 	assert(!SSOString.init.isHole);
688 	assert(!SSOString("").isHole);
689 	assert(!SSOString("a").isHole);
690 	assert(SSOString.asHole.isHole);
691 }
692 
693 /// DIP-1000 return ref escape analysis
694 pure nothrow @safe unittest {
695 	static if (hasPreviewDIP1000)
696 	{
697 		static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
698 		static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } }));
699 		static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } }));
700 		static assert(!__traits(compiles, { ref immutable(char) g() pure nothrow @safe @nogc { SSOString x; return x[0]; } }));
701 	}
702 }
703 
704 /// ASCII purity and case-conversion
705 pure nothrow @safe @nogc unittest {
706 	// these are all small ASCII
707 	assert( SSOString("a").isSmallASCII);
708 	assert( SSOString("b").isSmallASCII);
709 	assert( SSOString("z").isSmallASCII);
710 	assert( SSOString("_").isSmallASCII);
711 	assert( SSOString("abcd").isSmallASCII);
712 	assert( SSOString("123456789_12345").isSmallASCII);
713 
714 	// these are not
715 	assert(!SSOString("123456789_123456").isSmallASCII); // too large
716 	assert(!SSOString("123456789_123ö").isSmallASCII);
717 	assert(!SSOString("ö").isSmallASCII);
718 	assert(!SSOString("Ö").isSmallASCII);
719 	assert(!SSOString("åäö").isSmallASCII);
720 	assert(!SSOString("ö-värld").isSmallASCII);
721 }
722 
723 /// ASCII purity and case-conversion
724 pure @safe unittest {
725 	assert(SSOString("A").toLower[] == "a");
726 	assert(SSOString("a").toUpper[] == "A");
727 	assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small
728 	assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small
729 	assert(SSOString("ÅÄÖ").toLower[] == "åäö");
730 	assert(SSOString("åäö").toUpper[] == "ÅÄÖ");
731 	assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large
732 	assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large
733 
734 	char[6] x = "ÅÄÖ";
735 	import std.uni : toLowerInPlace;
736 	auto xref = x[];
737 	() @trusted { toLowerInPlace(xref); }(); // TODO: -dip1000 without @trusted
738 	assert(x == "åäö");
739 	assert(xref == "åäö");
740 }
741 
742 /// lexicographic comparison
743 pure @safe unittest {
744 	const SSOString a = SSOString("a");
745 	() @trusted { assert(a == SSOString("a")); }(); // TODO: -dip1000 without @trusted
746 
747 	immutable SSOString b = SSOString("b");
748 
749 	() @trusted { assert(a < b); }(); // TODO: -dip1000 without @trusted
750 	() @trusted { assert(b > a); }(); // TODO: -dip1000 without @trusted
751 	assert(a[] < b[]);
752 
753 	assert("a" < "b");
754 	assert("a" < "å");
755 	assert("Å" < "å");
756 	() @trusted { assert(SSOString("a") < SSOString("å")); }(); // TODO: -dip1000 without @trusted
757 	() @trusted { assert(SSOString("ÅÄÖ") < SSOString("åäö")); }(); // TODO: -dip1000 without @trusted
758 }
759 
760 /// cast to bool
761 pure @safe unittest {
762 	// mimics behaviour of casting of `string` to `bool`
763 	() @trusted { assert(!SSOString()); }(); // TODO: -dip1000 without @trusted
764 	() @trusted { assert(SSOString("")); }(); // TODO: -dip1000 without @trusted
765 	() @trusted { assert(SSOString("abc")); }(); // TODO: -dip1000 without @trusted
766 }
767 
768 /// to string conversion
769 pure @safe unittest {
770 	// mutable small will GC-allocate
771 	{
772 		SSOString s = SSOString("123456789_12345");
773 		assert(s.ptr is &s.opSlice()[0]);
774 		assert(s.ptr !is &s.toString()[0]);
775 	}
776 
777 	// const small will GC-allocate
778 	{
779 		const SSOString s = SSOString("123456789_12345");
780 		assert(s.ptr is &s.opSlice()[0]);
781 		assert(s.ptr !is &s.toString()[0]);
782 	}
783 
784 	// immutable small will not allocate
785 	{
786 		immutable SSOString s = SSOString("123456789_12345");
787 		assert(s.ptr is &s.opSlice()[0]);
788 		assert(s.ptr is &s.toString()[0]);
789 		/+ TODO: check return via -dip1000 +/
790 	}
791 
792 	/* Forbid return of possibly locally scoped `Smll` small stack object
793 	 * regardless of head-mutability.
794 	 */
795 	static if (hasPreviewDIP1000) {
796 		static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } }));
797 		static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } }));
798 		static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } }));
799 
800 		/** TODO: Enable the following line when DIP-1000 works for opSlice()
801 		 *
802 		 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
803 		 */
804 		// static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } }));
805 	}
806 
807 	// large will never allocate regardless of head-mutability
808 	{
809 		SSOString s = SSOString("123456789_123456");
810 		assert(s.ptr is &s.opSlice()[0]);
811 		assert(s.ptr is &s.toString()[0]); // shouldn't this change?
812 	}
813 }
814 
815 version (unittest)
816 {
817 	import nxt.dip_traits : hasPreviewDIP1000;
818 }