1 module nxt.sso_string; 2 3 /** Small-size-optimized (SSO) variant of `string`. 4 * 5 * Storage is placed on the stack if the number of `char`s is less than 6 * `smallCapacity`, otherwise as a normal (large) `string`. The large `string` 7 * will be allocated on the GC-heap if the `SSOString` is constructed from a 8 * non-`string` (non-`immutable` `char[]`) parameter. 9 * 10 * Because `SSOString` doesn't have a destructor it can safely allocate using a 11 * GC-backed region allocator without relying on a GC finalizer. 12 * 13 * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for 14 * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to 15 * default to a large string in which large pointer is set to `null`. 16 * 17 * Big-endian platform support hasn't been verified. 18 * 19 * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com 20 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792 21 * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org 22 * 23 * TODO: Use extra bits in `Short.length` for these special text encodings: 24 * - 5-bit lowercase English letter into 128/5 = 25 chars 25 * - 5-bit uppercase English letter into 120/5 = 25 chars 26 * - 6-bit mixedcase English letter into 120/6 = 20 chars 27 * 28 * TODO: Add to Phobos' std.typecons or std.array or std.string 29 */ 30 struct SSOString 31 { 32 @safe: 33 @property void toString(scope void delegate(const(char)[]) @safe sink) const 34 { 35 sink(opSlice()); 36 } 37 38 pure: 39 40 /** Construct from `source`, which potentially needs GC-allocation (iff 41 * `source.length > smallCapacity` and `source` is not a `string`). 42 */ 43 this(Chars)(const scope auto ref Chars source) @trusted nothrow 44 if (is(Chars : const(char)[])) // `isCharArray` 45 { 46 static if (__traits(isStaticArray, Chars)) 47 { 48 static if (source.length <= smallCapacity) // inferred @nogc 49 { 50 small.data[0 .. source.length] = source; 51 small.length = cast(typeof(small.length))(encodeSmallLength(source.length)); 52 } 53 else 54 { 55 static if (is(typeof(source[0]) == immutable(char))) 56 raw.ptr[0 .. source.length] = source; // copy elements 57 else 58 raw.ptr = source.idup.ptr; // GC-allocate 59 raw.length = encodeLargeLength(source.length); 60 } 61 } 62 else // `Chars` is a (dynamic) array slice 63 { 64 if (source.length <= smallCapacity) 65 { 66 (cast(char*)small.data.ptr)[0 .. source.length] = source; 67 small.length = cast(typeof(small.length))(encodeSmallLength(source.length)); 68 } 69 else 70 { 71 static if (is(typeof(source[0]) == immutable(char))) 72 raw.ptr = source.ptr; // already immutable so no duplication needed 73 else 74 raw.ptr = source.idup.ptr; // GC-allocate 75 raw.length = encodeLargeLength(source.length); 76 } 77 } 78 } 79 80 /** Construct from `source` of `dchar` 81 */ 82 this(Source)(scope Source source) @trusted 83 if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && // TODO: `isConstRefIterable` 84 is(typeof(Source.init.front) == dchar)) 85 { 86 import std.utf : encode; 87 88 // pre-calculate number of `char`s needed 89 size_t charCount = 0; 90 foreach (const e; source) 91 { 92 char[4] chars; // TODO: `= void` 93 charCount += encode(chars, e); 94 } 95 96 if (charCount <= smallCapacity) // fits in small 97 { 98 size_t offset = 0; 99 foreach (const e; source) 100 { 101 char[4] chars; 102 const count = encode(chars, e); 103 (cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count]; 104 offset += count; 105 } 106 assert(offset <= smallCapacity); 107 small.length = cast(typeof(small.length))(encodeSmallLength(offset)); 108 } 109 else // needs large 110 { 111 large = new immutable(char)[charCount]; 112 size_t offset = 0; 113 foreach (const e; source) 114 { 115 char[4] chars; 116 const count = encode(chars, e); 117 (cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data 118 offset += count; 119 } 120 raw.length = encodeLargeLength(charCount); 121 } 122 } 123 124 nothrow: 125 126 /** Return `this` converted to a `string`, without any GC-allocation because 127 * `this` is `immutable`. 128 */ 129 @property string toString() immutable @trusted return pure nothrow @nogc // never allocates 130 { 131 version(D_Coverage) {} else pragma(inline, true); 132 return opSlice(); 133 } 134 135 /** Return `this` converted to a `string`, which potentially needs 136 * GC-allocation (iff `length > smallCapacity`). 137 * 138 * implementation kept in sync with `opSlice`. 139 */ 140 @property string toString() const return @trusted pure nothrow // may GC-allocate 141 { 142 if (isLarge) 143 // GC-allocated slice has immutable members so ok to cast 144 return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation 145 else 146 return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable` 147 } 148 149 @nogc: 150 151 /** Get hash of `this`, with extra fast computation for the small case. 152 */ 153 @property hash_t toHash() const scope @trusted 154 { 155 version(D_Coverage) {} else version(LDC) pragma(inline, true); 156 import core.internal.hash : hashOf; 157 import nxt.hash_functions : lemireHash64; 158 if (isLarge) 159 return hashOf(opSliceLarge()); // use default 160 else // fast path for small string 161 return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string 162 lemireHash64(words[1])); 163 } 164 165 /** Get length. */ 166 @property size_t length() const scope @trusted 167 { 168 version(D_Coverage) {} else pragma(inline, true); 169 if (isLarge) 170 return decodeRawLength(large.length); // skip first bit 171 else 172 return decodeRawLength(small.length); // skip fist bit 173 } 174 /// ditto 175 alias opDollar = length; 176 177 /** Check if `this` is empty. */ 178 @property bool empty() const scope @safe pure nothrow @nogc 179 { 180 return length == 0; 181 } 182 183 /** Check if `this` is `null`. */ 184 @property bool isNull() const scope @trusted pure nothrow @nogc 185 { 186 return raw.length == 0; 187 } 188 189 /** Return a slice to either the whole large or whole small `string`. 190 * 191 * Implementation is kept in sync with `toString`. 192 */ 193 inout(char)[] opSlice() inout return @trusted @nogc 194 { 195 version(D_Coverage) {} else pragma(inline, true); // TODO: maybe remove 196 if (isLarge) 197 return opSliceLarge(); 198 else 199 return opSliceSmall(); 200 } 201 202 /** Return a slice at `[i .. j]` to either the internally stored large or small `string`. 203 * 204 * Implementation is kept in sync with `toString`. 205 */ 206 inout(char)[] opSlice(size_t i, size_t j) inout return @safe 207 { 208 version(D_Coverage) {} else pragma(inline, true); 209 return opSlice()[i .. j]; 210 } 211 212 private inout(char)[] opSliceLarge() inout return scope @system @nogc 213 { 214 version(D_Coverage) {} else pragma(inline, true); 215 version(unittest) assert(isLarge); 216 return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation 217 // alternative: return large.ptr[0 .. large.length/2]; 218 } 219 220 private inout(char)[] opSliceSmall() inout return @trusted @nogc 221 { 222 version(D_Coverage) {} else pragma(inline, true); 223 version(unittest) assert(!isLarge); 224 return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped 225 } 226 227 /** Return the `index`ed `char` of `this`. 228 */ 229 ref inout(char) opIndex(size_t index) inout return @trusted 230 { 231 version(D_Coverage) {} else pragma(inline, true); 232 return opSlice()[index]; // does range check 233 } 234 235 /// Get pointer to the internally stored `char`s. 236 @property private immutable(char)* ptr() const return @trusted 237 { 238 if (isLarge) 239 return large.ptr; // GC-heap pointer 240 else 241 return small.data.ptr; // stack pointer 242 } 243 244 /** Check if `this` is equal to `rhs`. */ 245 bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted 246 { 247 version(D_Coverage) {} else pragma(inline, true); 248 return opSlice() == rhs.opSlice(); 249 } 250 251 /** Check if `this` is equal to `rhs`. */ 252 bool opEquals()(const scope const(char)[] rhs) const scope @trusted 253 { 254 version(D_Coverage) {} else pragma(inline, true); 255 return opSlice() == rhs; 256 } 257 258 /** Compare `this` with `that`. 259 * 260 * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org 261 */ 262 @property int opCmp()(const scope typeof(this) that) const scope // template-lazy 263 { 264 version(D_Coverage) {} else pragma(inline, true); 265 scope const a = this.opSlice(); 266 scope const b = that.opSlice(); 267 return a < b ? -1 : (a > b); 268 // import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`; 269 // return __cmp(this[], that[]); 270 } 271 272 bool opCast(T : bool)() const scope @trusted 273 { 274 version(D_Coverage) {} else pragma(inline, true); 275 if (isLarge) 276 return large !is null; 277 else 278 return small.length != 0; 279 } 280 281 /** Support trait `isNullable`. */ 282 static immutable nullValue = typeof(this).init; 283 284 /** Support trait `isHoleable`. */ 285 static immutable holeValue = typeof(this).asHole(); 286 287 /** Check if this a hole, meaning a removed/erase value. */ 288 bool isHole() const scope @safe nothrow @nogc 289 { 290 return words[0] == size_t.max; 291 } 292 293 /** That this a hole, meaning a removed/erase value. */ 294 void holeify() @system @nogc scope 295 { 296 words[0] = size_t.max; 297 words[1] = size_t.max; 298 } 299 300 /** Returns: a holed `SSOString`, meaning a removed/erase value. */ 301 private static typeof(this) asHole() @system 302 { 303 typeof(return) result = void; 304 result.holeify(); 305 return result; 306 } 307 308 /** Check if `this` is a small ASCII string. */ 309 bool isSmallASCII() const scope @trusted 310 { 311 version(D_Coverage) {} else pragma(inline, true); 312 static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small 313 // should be fast on 64-bit platforms: 314 return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small 315 (words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0); 316 } 317 318 private: 319 320 /** Returns: `true` iff this is a large string, otherwise `false.` */ 321 @property bool isLarge() const scope @trusted 322 { 323 version(D_Coverage) {} else pragma(inline, true); 324 return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large 325 } 326 327 alias Large = immutable(char)[]; 328 329 public enum smallCapacity = Large.sizeof - Small.length.sizeof; 330 static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof); 331 332 enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length. 333 enum smallLengthBitCount = 4; 334 static assert(smallCapacity == 2^^smallLengthBitCount-1); 335 336 enum metaBits = 3; ///< Number of bits used for metadata. 337 enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom. 338 enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data. 339 static assert(smallLengthBitCount + tagsBitCount == 8); 340 341 /// Get metadata byte with first `metaBits` bits set. 342 @property ubyte metadata() const @safe pure nothrow @nogc 343 { 344 return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits] 345 } 346 347 /// Set metadata. 348 @property void metadata(ubyte data) @trusted pure nothrow @nogc 349 { 350 assert(data < (1 << metaBits)); 351 if (isLarge) 352 raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1)); 353 else 354 small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1)); 355 } 356 357 /// Decode raw length `rawLength` by shifting away tag bits. 358 static size_t decodeRawLength(size_t rawLength) @safe pure nothrow @nogc 359 { 360 return rawLength >> tagsBitCount; 361 } 362 363 /// Encode `Large` length from `Length`. 364 static size_t encodeLargeLength(size_t length) @safe pure nothrow @nogc 365 { 366 return (length << tagsBitCount); 367 } 368 369 /// Encode `Small` length from `Length`. 370 static size_t encodeSmallLength(size_t length) @safe pure nothrow @nogc 371 { 372 assert(length <= smallCapacity); 373 return (length << tagsBitCount) | (1 << largeLengthTagBitOffset); 374 } 375 376 version(LittleEndian) // see: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw 377 { 378 struct Small 379 { 380 /* TODO: only first 4 bits are needed to represent a length between 381 * 0-15, use other 4 bits. 382 */ 383 ubyte length = 0; 384 immutable(char)[smallCapacity] data = [0,0,0,0,0, 385 0,0,0,0,0, 386 0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true. 387 } 388 } 389 else 390 { 391 struct Small 392 { 393 immutable(char)[smallCapacity] data = [0,0,0,0,0, 394 0,0,0,0,0, 395 0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true. 396 /* TODO: only first 4 bits are needed to represent a length between 397 * 0-15, use other 4 bits. 398 */ 399 ubyte length; 400 } 401 static assert(0, "TODO: add BigEndian support and test"); 402 } 403 404 struct Raw // same memory layout as `immutable(char)[]` 405 { 406 size_t length = 0; // can be bit-fiddled without GC allocation 407 immutable(char)* ptr = null; 408 } 409 410 union 411 { 412 Raw raw; 413 Large large; 414 Small small; 415 size_t[2] words; 416 } 417 } 418 version(unittest) static assert(SSOString.sizeof == string.sizeof); 419 420 /** Returns: `x` lowercased. */ 421 SSOString toLower()(const SSOString x) @trusted // template-lazy 422 { 423 if (x.isSmallASCII) // small ASCII fast-path 424 { 425 typeof(return) result = void; 426 result.small.length = x.small.length; 427 import std.ascii : toLower; 428 foreach (const index; 0 .. x.smallCapacity) 429 (cast(char[])(result.small.data))[index] = toLower(x.small.data[index]); 430 return result; 431 } 432 else if (x.isLarge) 433 { 434 import std.uni : asLowerCase; 435 import std.conv : to; 436 return typeof(return)(x.opSlice().asLowerCase.to!string); // TODO: make .to!string nothrow 437 } 438 else // small non-ASCII path usually without GC-allocation 439 { 440 typeof(return) result = x; // copy 441 import std.uni : toLowerInPlace; 442 auto slice = cast(char[])(result.opSlice()); // need ref to slice 443 toLowerInPlace(slice); 444 if (slice is result.opSlice() || // no reallocation 445 slice.length == result.length) // or same length (happens for German double-s) 446 return result; 447 else 448 // version(none) 449 // { 450 // import nxt.dbgio; 451 // dbg(`toLowerInPlace reallocated from "`, 452 // result.opSlice(), `" of length `, result.opSlice().length, 453 // ` to "` 454 // , slice, `" of length `, slice.length); 455 // } 456 return typeof(return)(slice); // reallocation occurred 457 } 458 } 459 460 /** Returns: `x` uppercased. */ 461 SSOString toUpper()(const SSOString x) @trusted // template-lazy 462 { 463 if (x.isSmallASCII) // small ASCII fast-path 464 { 465 typeof(return) result = void; 466 result.small.length = x.small.length; 467 import std.ascii : toUpper; 468 foreach (const index; 0 .. x.smallCapacity) 469 (cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]); 470 return result; 471 } 472 else if (x.isLarge) 473 { 474 import std.uni : asUpperCase; 475 import std.conv : to; 476 return typeof(return)(x.opSlice().asUpperCase.to!string); // TODO: make .to!string nothrow 477 } 478 else // small non-ASCII path usually without GC-allocation 479 { 480 typeof(return) result = x; // copy 481 import std.uni : toUpperInPlace; 482 auto slice = cast(char[])(result.opSlice()); // need ref to slice 483 toUpperInPlace(slice); 484 if (slice is result.opSlice() || // no reallocation 485 slice.length == result.length) // or same length (happens for German double-s) 486 return result; 487 else 488 // version(none) 489 // { 490 // import nxt.dbgio; 491 // dbg(`toUpperInPlace reallocated from "`, 492 // result.opSlice(), `" of length `, result.opSlice().length, 493 // ` to "` 494 // , slice, `" of length `, slice.length); 495 // } 496 return typeof(return)(slice); // reallocation occurred 497 } 498 } 499 500 /// construct from non-immutable source is allowed in non-`@nogc`-scope 501 @safe pure nothrow unittest 502 { 503 alias S = SSOString; 504 505 scope const char[] x0; 506 const s0 = SSOString(x0); // no .idup 507 508 scope const char[] x16 = new char[16]; 509 const s16 = SSOString(x16); // will call .idup 510 } 511 512 /// construct from non-immutable source is not allowed in `@nogc`-scope 513 @safe pure nothrow @nogc unittest 514 { 515 scope const char[] s; 516 // TODO: why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); })); 517 } 518 519 /// verify `isNull` when @nogc constructing from small static array of `char`s 520 @trusted pure nothrow @nogc unittest 521 { 522 static foreach (const n; 0 .. SSOString.smallCapacity + 1) 523 { 524 { 525 immutable(char)[n] x; 526 assert(!SSOString(x).isNull); 527 } 528 } 529 } 530 531 /// verify `isNull` when constructing from large static array of `char`s 532 @trusted pure nothrow unittest 533 { 534 static foreach (const n; SSOString.smallCapacity + 1 .. 32) 535 { 536 { 537 immutable(char)[n] x; 538 assert(!SSOString(x).isNull); 539 } 540 } 541 } 542 543 /// verify `isNull` when constructing from dynamic array of `char`s 544 @trusted pure nothrow unittest 545 { 546 foreach (const n; 0 .. 32) 547 { 548 scope x = new immutable(char)[n]; 549 assert(!SSOString(x).isNull); 550 } 551 } 552 553 /// test behaviour of `==` and `is` operator 554 @trusted pure nothrow @nogc unittest 555 { 556 const SSOString x = "42"; 557 assert(!x.isNull); 558 assert(x == "42"); 559 560 const SSOString y = "42"; 561 assert(!y.isNull); 562 assert(y == "42"); 563 564 assert(x == y); 565 assert(x == y[]); 566 assert(x[] == y); 567 assert(x[] == y[]); 568 assert(x[] is x[]); 569 assert(y[] is y[]); 570 assert(x[] !is y[]); 571 assert(x.ptr !is y.ptr); 572 573 const SSOString z = "43"; 574 assert(!z.isNull); 575 assert(z == "43"); 576 assert(x != z); 577 assert(x[] != z[]); 578 assert(x !is z); 579 assert(x[] !is z[]); 580 } 581 582 /// 583 @safe pure nothrow @nogc unittest 584 { 585 static assert(SSOString.smallCapacity == 15); 586 587 import nxt.gc_traits : mustAddGCRange; 588 static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned 589 590 static assert(__traits(isZeroInit, SSOString)); 591 // TODO: assert(SSOString.init == SSOString.nullValue); 592 593 auto s0 = SSOString.init; 594 assert(s0.isNull); 595 assert(s0.length == 0); 596 assert(s0.isLarge); 597 assert(s0[] == []); 598 599 char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string 600 const sSmallCapacity = SSOString(charsSmallCapacity); 601 assert(!sSmallCapacity.isLarge); 602 assert(sSmallCapacity.length == SSOString.smallCapacity); 603 assert(sSmallCapacity == charsSmallCapacity); 604 605 const s0_ = SSOString(""); 606 assert(!s0_.isNull); // cannot distinguish 607 assert(s0 == s0_); 608 609 const s7 = SSOString("0123456"); 610 assert(!s7.isNull); 611 612 const s7_ = SSOString("0123456_"[0 .. $ - 1]); 613 assert(s7.ptr !is s7_.ptr); // string data shall not overlap 614 assert(s7 == s7_); 615 616 const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal 617 assert(s7.ptr !is _s7.ptr); // string data shall not overlap 618 assert(s7 == _s7); 619 620 assert(!s7.isLarge); 621 assert(s7.length == 7); 622 assert(s7[] == "0123456"); 623 assert(s7[] == "_0123456"[1 .. $]); 624 assert(s7[] == "0123456_"[0 .. $ - 1]); 625 assert(s7[0 .. 4] == "0123"); 626 627 const s15 = SSOString("0123456789abcde"); 628 assert(!s15.isNull); 629 static assert(is(typeof(s15[]) == const(char)[])); 630 assert(!s15.isLarge); 631 assert(s15.length == 15); 632 assert(s15[] == "0123456789abcde"); 633 assert(s15[0 .. 4] == "0123"); 634 assert(s15[10 .. 15] == "abcde"); 635 assert(s15[10 .. $] == "abcde"); 636 637 const s16 = SSOString("0123456789abcdef"); 638 assert(!s16.isNull); 639 static assert(is(typeof(s16[]) == const(char)[])); 640 assert(s16.isLarge); 641 642 const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]); 643 assert(s16.length == s16_.length); 644 assert(s16[] == s16_[]); 645 assert(s16.ptr !is s16_.ptr); // string data shall not overlap 646 assert(s16 == s16_); // but contents is equal 647 648 const _s16 = SSOString("_0123456789abcdef"[1 .. $]); 649 assert(s16.length == _s16.length); 650 assert(s16[] == _s16[]); // contents is equal 651 assert(s16 == _s16); // contents is equal 652 653 assert(s16.length == 16); 654 assert(s16[] == "0123456789abcdef"); 655 assert(s16[0] == '0'); 656 assert(s16[10] == 'a'); 657 assert(s16[15] == 'f'); 658 assert(s16[0 .. 4] == "0123"); 659 assert(s16[10 .. 16] == "abcdef"); 660 assert(s16[10 .. $] == "abcdef"); 661 } 662 663 /// metadata for null string 664 @safe pure nothrow @nogc unittest 665 { 666 auto s = SSOString.init; 667 assert(s.isNull); 668 foreach (const i; 0 .. 8) 669 { 670 s.metadata = i; 671 assert(s.metadata == i); 672 assert(s.length == 0); 673 } 674 } 675 676 /// metadata for small string 677 @safe pure nothrow @nogc unittest 678 { 679 auto s = SSOString("0123456"); 680 assert(!s.isNull); 681 assert(!s.isLarge); 682 foreach (const i; 0 .. 8) 683 { 684 s.metadata = i; 685 assert(s.metadata == i); 686 assert(s.length == 7); 687 assert(!s.isLarge); 688 assert(!s.isNull); 689 } 690 } 691 692 /// metadata for small string with maximum length 693 @safe pure nothrow @nogc unittest 694 { 695 auto s = SSOString("0123456789abcde"); 696 assert(s.length == SSOString.smallCapacity); 697 assert(!s.isNull); 698 assert(!s.isLarge); 699 foreach (const i; 0 .. 8) 700 { 701 s.metadata = i; 702 assert(s.metadata == i); 703 assert(s.length == 15); 704 assert(!s.isLarge); 705 assert(!s.isNull); 706 } 707 } 708 709 /// metadata for large string with minimum length 710 @safe pure nothrow @nogc unittest 711 { 712 auto s = SSOString("0123456789abcdef"); 713 assert(s.length == SSOString.smallCapacity + 1); 714 assert(!s.isNull); 715 assert(s.isLarge); 716 assert(!s.empty); 717 foreach (const i; 0 .. 8) 718 { 719 s.metadata = i; 720 assert(s.metadata == i); 721 assert(s.length == 16); 722 assert(s.isLarge); 723 assert(!s.isNull); 724 } 725 } 726 727 /// equality and equivalence 728 @safe pure nothrow @nogc unittest 729 { 730 assert(SSOString() == SSOString("")); 731 assert(SSOString() !is SSOString("")); 732 } 733 734 /// hashing of null, empty and non-empty 735 @safe pure nothrow @nogc unittest 736 { 737 assert(SSOString().toHash == 0); 738 assert(SSOString("").toHash == 0); 739 assert(SSOString("a").toHash != 0); 740 assert(SSOString("0123456789abcdef").toHash != 0); 741 } 742 743 /// construct from static array larger than `smallCapacity` 744 @safe pure nothrow unittest 745 { 746 char[SSOString.smallCapacity + 1] charsMinLargeCapacity; 747 const _ = SSOString(charsMinLargeCapacity); 748 } 749 750 // test construction from range 751 @safe pure unittest 752 { 753 static void test(const scope char[] x, 754 const bool isLarge) @safe pure 755 { 756 import std.utf : byDchar; 757 const scope s = SSOString(x.byDchar); 758 assert(s == x); 759 assert(s.isLarge == isLarge); 760 } 761 test("", false); 762 test("_", false); 763 test("123456789_12345", false); 764 test("123456789_123456", true); 765 test("123456789_123456789_123456789_", true); 766 } 767 768 /// hole handling 769 @trusted pure nothrow @nogc unittest 770 { 771 assert(!SSOString.init.isHole); 772 assert(!SSOString("").isHole); 773 assert(!SSOString("a").isHole); 774 assert(SSOString.asHole.isHole); 775 } 776 777 /// DIP-1000 return ref escape analysis 778 @safe pure nothrow unittest 779 { 780 static if (isDIP1000) 781 { 782 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } })); 783 static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } })); 784 static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } })); 785 static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { SSOString x; return x[0]; } })); 786 } 787 } 788 789 /// ASCII purity and case-conversion 790 @safe pure nothrow @nogc unittest 791 { 792 // these are all small ASCII 793 assert( SSOString("a").isSmallASCII); 794 assert( SSOString("b").isSmallASCII); 795 assert( SSOString("z").isSmallASCII); 796 assert( SSOString("_").isSmallASCII); 797 assert( SSOString("abcd").isSmallASCII); 798 assert( SSOString("123456789_12345").isSmallASCII); 799 800 // these are not 801 assert(!SSOString("123456789_123456").isSmallASCII); // too large 802 assert(!SSOString("123456789_123ö").isSmallASCII); 803 assert(!SSOString("ö").isSmallASCII); 804 assert(!SSOString("Ö").isSmallASCII); 805 assert(!SSOString("åäö").isSmallASCII); 806 assert(!SSOString("ö-värld").isSmallASCII); 807 } 808 809 /// ASCII purity and case-conversion 810 @safe pure unittest 811 { 812 assert(SSOString("A").toLower[] == "a"); 813 assert(SSOString("a").toUpper[] == "A"); 814 assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small 815 assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small 816 assert(SSOString("ÅÄÖ").toLower[] == "åäö"); 817 assert(SSOString("åäö").toUpper[] == "ÅÄÖ"); 818 assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large 819 assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large 820 821 char[6] x = "ÅÄÖ"; 822 import std.uni : toLowerInPlace; 823 auto xref = x[]; 824 toLowerInPlace(xref); 825 assert(x == "åäö"); 826 assert(xref == "åäö"); 827 } 828 829 /// lexicographic comparison 830 @safe pure unittest 831 { 832 const SSOString a = SSOString("a"); 833 assert(a == SSOString("a")); 834 835 immutable SSOString b = SSOString("b"); 836 837 assert(a < b); 838 assert(b > a); 839 assert(a[] < b[]); 840 841 assert("a" < "b"); 842 assert("a" < "å"); 843 assert("Å" < "å"); 844 assert(SSOString("a") < SSOString("å")); 845 assert(SSOString("ÅÄÖ") < SSOString("åäö")); 846 } 847 848 /// cast to bool 849 @safe pure unittest 850 { 851 // mimics behaviour of casting of `string` to `bool` 852 assert(!SSOString()); 853 assert(SSOString("")); 854 assert(SSOString("abc")); 855 } 856 857 /// to string conversion 858 @safe pure unittest 859 { 860 // mutable small will GC-allocate 861 { 862 SSOString s = SSOString("123456789_12345"); 863 assert(s.ptr is &s.opSlice()[0]); 864 assert(s.ptr !is &s.toString()[0]); 865 } 866 867 // const small will GC-allocate 868 { 869 const SSOString s = SSOString("123456789_12345"); 870 assert(s.ptr is &s.opSlice()[0]); 871 assert(s.ptr !is &s.toString()[0]); 872 } 873 874 // immutable small will not allocate 875 { 876 immutable SSOString s = SSOString("123456789_12345"); 877 assert(s.ptr is &s.opSlice()[0]); 878 assert(s.ptr is &s.toString()[0]); 879 // TODO: check return via -dip1000 880 } 881 882 /* Forbid return of possibly locally scoped `Smll` small stack object 883 * regardless of head-mutability. 884 */ 885 static if (isDIP1000) 886 { 887 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } })); 888 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } })); 889 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } })); 890 891 /** TODO: Enable the following line when DIP-1000 works for opSlice() 892 * 893 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792 894 */ 895 // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } })); 896 } 897 898 // large will never allocate regardless of head-mutability 899 { 900 SSOString s = SSOString("123456789_123456"); 901 assert(s.ptr is &s.opSlice()[0]); 902 assert(s.ptr is &s.toString()[0]); // shouldn't this change? 903 } 904 } 905 906 version(unittest) 907 { 908 import nxt.dip_traits : isDIP1000; 909 }