1 module nxt.sso_string; 2 3 /** Small-size-optimized (SSO) variant of `string`. 4 * 5 * Storage is placed on the stack if the number of `char`s is less than 6 * `smallCapacity`, otherwise as a normal (large) `string`. The large `string` 7 * will be allocated on the GC-heap if the `SSOString` is constructed from a 8 * non-`string` (non-`immutable` `char[]`) parameter. 9 * 10 * Because `SSOString` doesn't have a destructor it can safely allocate using a 11 * GC-backed region allocator without relying on a GC finalizer. 12 * 13 * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for 14 * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to 15 * default to a large string in which large pointer is set to `null`. 16 * 17 * Big-endian platform support hasn't been verified. 18 * 19 * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com 20 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792 21 * 22 * TODO Use extra bits in `Short.length` for these special text encodings: 23 * - 5-bit lowercase English letter into 128/5 = 25 chars 24 * - 5-bit uppercase English letter into 120/5 = 25 chars 25 * - 6-bit mixedcase English letter into 120/6 = 20 chars 26 * 27 * TODO Add to Phobos' std.typecons or std.array or std.string 28 */ 29 struct SSOString 30 { 31 @safe: 32 @property void toString(scope void delegate(const(char)[]) @safe sink) const 33 { 34 sink(opSlice()); 35 } 36 37 pure: 38 39 /** Construct from `source`, which potentially needs GC-allocation (iff 40 * `source.length > smallCapacity` and `source` is not a `string`). 41 */ 42 this(Chars)(const scope auto ref Chars source) @trusted nothrow 43 if (is(Chars : const(char)[])) // `isCharArray` 44 { 45 static if (__traits(isStaticArray, Chars)) 46 { 47 static if (source.length <= smallCapacity) // inferred @nogc 48 { 49 small.data[0 .. source.length] = source; 50 small.length = cast(typeof(small.length))(encodeSmallLength(source.length)); 51 } 52 else 53 { 54 static if (is(typeof(source[0]) == immutable(char))) 55 { 56 raw.ptr[0 .. source.length] = source; // copy elements 57 } 58 else 59 { 60 raw.ptr = source.idup.ptr; // GC-allocate 61 } 62 raw.length = encodeLargeLength(source.length); 63 } 64 } 65 else // `Chars` is a (dynamic) array slice 66 { 67 if (source.length <= smallCapacity) 68 { 69 (cast(char*)small.data.ptr)[0 .. source.length] = source; 70 small.length = cast(typeof(small.length))(encodeSmallLength(source.length)); 71 } 72 else 73 { 74 static if (is(typeof(source[0]) == immutable(char))) 75 { 76 raw.ptr = source.ptr; // already immutable so no duplication needed 77 } 78 else 79 { 80 raw.ptr = source.idup.ptr; // GC-allocate 81 } 82 raw.length = encodeLargeLength(source.length); 83 } 84 } 85 } 86 87 /** Construct from `source` of `dchar` 88 */ 89 this(Source)(scope Source source) @trusted 90 if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && // TODO `isConstRefIterable` 91 is(typeof(Source.init.front) == dchar)) 92 { 93 import std.utf : encode; 94 95 // pre-calculate number of `char`s needed 96 size_t charCount = 0; 97 foreach (const e; source) 98 { 99 char[4] chars; // TODO `= void` 100 charCount += encode(chars, e); 101 } 102 103 if (charCount <= smallCapacity) // fits in small 104 { 105 size_t offset = 0; 106 foreach (const e; source) 107 { 108 char[4] chars; 109 const count = encode(chars, e); 110 (cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count]; 111 offset += count; 112 } 113 assert(offset <= smallCapacity); 114 small.length = cast(typeof(small.length))(encodeSmallLength(offset)); 115 } 116 else // needs large 117 { 118 large = new immutable(char)[charCount]; 119 size_t offset = 0; 120 foreach (const e; source) 121 { 122 char[4] chars; 123 const count = encode(chars, e); 124 (cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data 125 offset += count; 126 } 127 raw.length = encodeLargeLength(charCount); 128 } 129 } 130 131 nothrow: 132 133 /** Return `this` converted to a `string`, without any GC-allocation because 134 * `this` is `immutable`. 135 */ 136 @property string toString() immutable @trusted pure nothrow @nogc // never allocates 137 { 138 return opSlice(); 139 } 140 141 /** Return `this` converted to a `string`, which potentially needs 142 * GC-allocation (iff `length > smallCapacity`). 143 * 144 * implementation kept in sync with `opSlice`. 145 */ 146 @property string toString() const return @trusted pure nothrow // may GC-allocate 147 { 148 if (isLarge) 149 { 150 // GC-allocated slice has immutable members so ok to cast 151 return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation 152 } 153 else 154 { 155 return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable` 156 } 157 } 158 159 @nogc: 160 161 /** Get hash of `this`, with extra fast computation for the small case. 162 */ 163 @property hash_t toHash() const scope @trusted 164 { 165 version(LDC) pragma(inline, true); 166 if (isLarge) 167 { 168 import core.internal.hash : hashOf; 169 return hashOf(opSliceLarge()); // use default 170 } 171 else // fast path for small string 172 { 173 import nxt.hash_functions : lemireHash64; 174 return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string 175 lemireHash64(words[1])); 176 } 177 } 178 179 /** Get length. */ 180 @property size_t length() const scope @trusted 181 { 182 pragma(inline, true); 183 if (isLarge) 184 { 185 return decodeRawLength(large.length); // skip first bit 186 } 187 else 188 { 189 return decodeRawLength(small.length); // skip fist bit 190 } 191 } 192 /// ditto 193 alias opDollar = length; 194 195 /** Check if `this` is empty. */ 196 @property bool empty() const scope @safe pure nothrow @nogc 197 { 198 return length == 0; 199 } 200 201 /** Check if `this` is `null`. */ 202 @property bool isNull() const scope @trusted pure nothrow @nogc 203 { 204 return raw.length == 0; 205 } 206 207 /** Return a slice to either the whole large or whole small `string`. 208 * 209 * Implementation is kept in sync with `toString`. 210 */ 211 inout(char)[] opSlice() inout return scope @trusted @nogc 212 { 213 pragma(inline, true); 214 if (isLarge) 215 { 216 return opSliceLarge(); 217 } 218 else 219 { 220 return opSliceSmall(); 221 } 222 } 223 224 /** Return a slice at `[i .. j]` to either the internally stored large or small `string`. 225 * 226 * Implementation is kept in sync with `toString`. 227 */ 228 inout(char)[] opSlice(size_t i, size_t j) inout return @safe 229 { 230 pragma(inline, true); 231 return opSlice()[i .. j]; 232 } 233 234 private inout(char)[] opSliceLarge() inout return scope @system @nogc 235 { 236 pragma(inline, true); 237 version(unittest) assert(isLarge); 238 return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation 239 // alternative: return large.ptr[0 .. large.length/2]; 240 } 241 242 private inout(char)[] opSliceSmall() inout return scope @trusted @nogc 243 { 244 pragma(inline, true); 245 version(unittest) assert(!isLarge); 246 return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped 247 } 248 249 /** Return the `index`ed `char` of `this`. 250 */ 251 ref inout(char) opIndex(size_t index) inout return @trusted 252 { 253 pragma(inline, true); 254 return opSlice()[index]; // does range check 255 } 256 257 /// Get pointer to the internally stored `char`s. 258 @property private immutable(char)* ptr() const return @trusted 259 { 260 if (isLarge) 261 { 262 return large.ptr; // GC-heap pointer 263 } 264 else 265 { 266 return small.data.ptr; // stack pointer 267 } 268 } 269 270 /** Check if `this` is equal to `rhs`. */ 271 bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted 272 { 273 pragma(inline, true); 274 return opSlice() == rhs.opSlice(); 275 } 276 277 /** Check if `this` is equal to `rhs`. */ 278 bool opEquals()(const scope const(char)[] rhs) const scope @trusted 279 { 280 pragma(inline, true); 281 return opSlice() == rhs; 282 } 283 284 /** Compare `this` with `that`. 285 * 286 * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org 287 */ 288 @property int opCmp()(const scope typeof(this) that) const scope // template-lazy 289 { 290 pragma(inline, true); 291 auto a = this[]; 292 auto b = that[]; 293 return a < b ? -1 : (a > b); 294 // import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`; 295 // return __cmp(this[], that[]); 296 } 297 298 bool opCast(T : bool)() const scope @trusted 299 { 300 pragma(inline, true); 301 if (isLarge) 302 { 303 return large !is null; 304 } 305 else 306 { 307 return small.length != 0; 308 } 309 } 310 311 /** Check if is the same as to `rhs`. 312 * 313 * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org. 314 */ 315 version(none) // `is` operator cannot be overloaded. See: https://forum.dlang.org/post/prmrli$1146$1@digitalmars.com 316 bool opBinary(string op)(const scope auto ref typeof(this) rhs) const scope @trusted 317 if (op == `is`) // TODO has not effect 318 { 319 pragma(inline, true); 320 return opSlice() == rhs.opSlice(); 321 } 322 323 /** Support trait `isNullable`. */ 324 static immutable nullValue = typeof(this).init; 325 326 /** Support trait `isHoleable`. */ 327 static immutable holeValue = typeof(this).asHole(); 328 329 /** Check if this a hole, meaning a removed/erase value. */ 330 bool isHole() const scope @safe nothrow @nogc 331 { 332 return words[0] == size_t.max; 333 } 334 335 /** That this a hole, meaning a removed/erase value. */ 336 void holeify() @system @nogc scope 337 { 338 words[0] = size_t.max; 339 words[1] = size_t.max; 340 } 341 342 /** Returns: a holed `SSOString`, meaning a removed/erase value. */ 343 private static typeof(this) asHole() @system 344 { 345 typeof(return) result = void; 346 result.holeify(); 347 return result; 348 } 349 350 /** Check if `this` is a small ASCII string. */ 351 bool isSmallASCII() const scope @trusted 352 { 353 pragma(inline, true); 354 static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small 355 // should be fast on 64-bit platforms: 356 return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small 357 (words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0); 358 } 359 360 private: 361 362 /** Returns: `true` iff this is a large string, otherwise `false.` */ 363 @property bool isLarge() const scope @trusted 364 { 365 pragma(inline, true); 366 return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large 367 } 368 369 alias Large = immutable(char)[]; 370 371 public enum smallCapacity = Large.sizeof - Small.length.sizeof; 372 static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof); 373 374 enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length. 375 enum smallLengthBitCount = 4; 376 static assert(smallCapacity == 2^^smallLengthBitCount-1); 377 378 enum metaBits = 3; ///< Number of bits used for metadata. 379 enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom. 380 enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data. 381 static assert(smallLengthBitCount + tagsBitCount == 8); 382 383 /// Get metadata byte with first `metaBits` bits set. 384 @property ubyte metadata() const @safe pure nothrow @nogc 385 { 386 return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits] 387 } 388 389 /// Set metadata. 390 @property void metadata(ubyte data) @trusted pure nothrow @nogc 391 { 392 assert(data < (1 << metaBits)); 393 if (isLarge) 394 { 395 raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1)); 396 } 397 else 398 { 399 small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1)); 400 } 401 } 402 403 /// Decode raw length `rawLength` by shifting away tag bits. 404 static size_t decodeRawLength(size_t rawLength) @safe pure nothrow @nogc 405 { 406 return rawLength >> tagsBitCount; 407 } 408 409 /// Encode `Large` length from `Length`. 410 static size_t encodeLargeLength(size_t length) @safe pure nothrow @nogc 411 { 412 return (length << tagsBitCount); 413 } 414 415 /// Encode `Small` length from `Length`. 416 static size_t encodeSmallLength(size_t length) @safe pure nothrow @nogc 417 { 418 assert(length <= smallCapacity); 419 return (length << tagsBitCount) | (1 << largeLengthTagBitOffset); 420 } 421 422 version(LittleEndian) // see: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw 423 { 424 struct Small 425 { 426 /* TODO only first 4 bits are needed to represent a length between 427 * 0-15, use other 4 bits. 428 */ 429 ubyte length = 0; 430 immutable(char)[smallCapacity] data = [0,0,0,0,0, 431 0,0,0,0,0, 432 0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true. 433 } 434 } 435 else 436 { 437 struct Small 438 { 439 immutable(char)[smallCapacity] data = [0,0,0,0,0, 440 0,0,0,0,0, 441 0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true. 442 /* TODO only first 4 bits are needed to represent a length between 443 * 0-15, use other 4 bits. 444 */ 445 ubyte length; 446 } 447 static assert(0, "TODO add BigEndian support and test"); 448 } 449 450 struct Raw // same memory layout as `immutable(char)[]` 451 { 452 size_t length = 0; // can be bit-fiddled without GC allocation 453 immutable(char)* ptr = null; 454 } 455 456 union 457 { 458 Raw raw; 459 Large large; 460 Small small; 461 size_t[2] words; 462 } 463 } 464 version(unittest) static assert(SSOString.sizeof == string.sizeof); 465 466 /** Returns: `x` lowercased. */ 467 SSOString toLower()(const SSOString x) @trusted // template-lazy 468 { 469 if (x.isSmallASCII) // small ASCII fast-path 470 { 471 typeof(return) result = void; 472 result.small.length = x.small.length; 473 foreach (const index; 0 .. x.smallCapacity) 474 { 475 import std.ascii : toLower; 476 (cast(char[])(result.small.data))[index] = toLower(x.small.data[index]); 477 } 478 return result; 479 } 480 else if (x.isLarge) 481 { 482 import std.uni : asLowerCase; 483 import std.conv : to; 484 return typeof(return)(x.opSlice().asLowerCase.to!string); // TODO make .to!string nothrow 485 } 486 else // small non-ASCII path usually without GC-allocation 487 { 488 typeof(return) result = x; // copy 489 import std.uni : toLowerInPlace; 490 auto slice = cast(char[])(result.opSlice()); // need ref to slice 491 toLowerInPlace(slice); 492 if (slice is result.opSlice() || // no reallocation 493 slice.length == result.length) // or same length (happens for German double-s) 494 { 495 return result; 496 } 497 else 498 { 499 version(none) 500 { 501 import nxt.dbgio; 502 dbg(`toLowerInPlace reallocated from "`, 503 result.opSlice(), `" of length `, result.opSlice().length, 504 ` to "` 505 , slice, `" of length `, slice.length); 506 } 507 return typeof(return)(slice); // reallocation occurred 508 } 509 } 510 } 511 512 /** Returns: `x` uppercased. */ 513 SSOString toUpper()(const SSOString x) @trusted // template-lazy 514 { 515 if (x.isSmallASCII) // small ASCII fast-path 516 { 517 typeof(return) result = void; 518 result.small.length = x.small.length; 519 foreach (const index; 0 .. x.smallCapacity) 520 { 521 import std.ascii : toUpper; 522 (cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]); 523 } 524 return result; 525 } 526 else if (x.isLarge) 527 { 528 import std.uni : asUpperCase; 529 import std.conv : to; 530 return typeof(return)(x.opSlice().asUpperCase.to!string); // TODO make .to!string nothrow 531 } 532 else // small non-ASCII path usually without GC-allocation 533 { 534 typeof(return) result = x; // copy 535 import std.uni : toUpperInPlace; 536 auto slice = cast(char[])(result.opSlice()); // need ref to slice 537 toUpperInPlace(slice); 538 if (slice is result.opSlice() || // no reallocation 539 slice.length == result.length) // or same length (happens for German double-s) 540 { 541 return result; 542 } 543 else 544 { 545 version(none) 546 { 547 import nxt.dbgio; 548 dbg(`toUpperInPlace reallocated from "`, 549 result.opSlice(), `" of length `, result.opSlice().length, 550 ` to "` 551 , slice, `" of length `, slice.length); 552 } 553 return typeof(return)(slice); // reallocation occurred 554 } 555 } 556 } 557 558 /// construct from non-immutable source is allowed in non-`@nogc`-scope 559 @safe pure nothrow unittest 560 { 561 alias S = SSOString; 562 563 scope const char[] x0; 564 const s0 = SSOString(x0); // no .idup 565 566 scope const char[] x16 = new char[16]; 567 const s16 = SSOString(x16); // will call .idup 568 } 569 570 /// construct from non-immutable source is not allowed in `@nogc`-scope 571 @safe pure nothrow @nogc unittest 572 { 573 scope const char[] s; 574 // TODO why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); })); 575 } 576 577 /// verify `isNull` when @nogc constructing from small static array of `char`s 578 @trusted pure nothrow @nogc unittest 579 { 580 static foreach (const n; 0 .. SSOString.smallCapacity + 1) 581 { 582 { 583 immutable(char)[n] x; 584 assert(!SSOString(x).isNull); 585 } 586 } 587 } 588 589 /// verify `isNull` when constructing from large static array of `char`s 590 @trusted pure nothrow unittest 591 { 592 static foreach (const n; SSOString.smallCapacity + 1 .. 32) 593 { 594 { 595 immutable(char)[n] x; 596 assert(!SSOString(x).isNull); 597 } 598 } 599 } 600 601 /// verify `isNull` when constructing from dynamic array of `char`s 602 @trusted pure nothrow unittest 603 { 604 foreach (const n; 0 .. 32) 605 { 606 scope x = new immutable(char)[n]; 607 assert(!SSOString(x).isNull); 608 } 609 } 610 611 /// test behaviour of `==` and `is` operator 612 @trusted pure nothrow @nogc unittest 613 { 614 const SSOString x = "42"; 615 assert(!x.isNull); 616 assert(x == "42"); 617 618 const SSOString y = "42"; 619 assert(!y.isNull); 620 assert(y == "42"); 621 622 assert(x == y); 623 assert(x == y[]); 624 assert(x[] == y); 625 assert(x[] == y[]); 626 assert(x[] is x[]); 627 assert(y[] is y[]); 628 assert(x[] !is y[]); 629 assert(x.ptr !is y.ptr); 630 631 const SSOString z = "43"; 632 assert(!z.isNull); 633 assert(z == "43"); 634 assert(x != z); 635 assert(x[] != z[]); 636 assert(x !is z); 637 assert(x[] !is z[]); 638 } 639 640 /// 641 @safe pure nothrow @nogc unittest 642 { 643 static assert(SSOString.smallCapacity == 15); 644 645 import nxt.gc_traits : mustAddGCRange; 646 static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned 647 648 static assert(__traits(isZeroInit, SSOString)); 649 // TODO assert(SSOString.init == SSOString.nullValue); 650 651 auto s0 = SSOString.init; 652 assert(s0.isNull); 653 assert(s0.length == 0); 654 assert(s0.isLarge); 655 assert(s0[] == []); 656 657 char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string 658 const sSmallCapacity = SSOString(charsSmallCapacity); 659 assert(!sSmallCapacity.isLarge); 660 assert(sSmallCapacity.length == SSOString.smallCapacity); 661 assert(sSmallCapacity == charsSmallCapacity); 662 663 const s0_ = SSOString(""); 664 assert(!s0_.isNull); // cannot distinguish 665 assert(s0 == s0_); 666 667 const s7 = SSOString("0123456"); 668 assert(!s7.isNull); 669 670 const s7_ = SSOString("0123456_"[0 .. $ - 1]); 671 assert(s7.ptr !is s7_.ptr); // string data shall not overlap 672 assert(s7 == s7_); 673 674 const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal 675 assert(s7.ptr !is _s7.ptr); // string data shall not overlap 676 assert(s7 == _s7); 677 678 assert(!s7.isLarge); 679 assert(s7.length == 7); 680 assert(s7[] == "0123456"); 681 assert(s7[] == "_0123456"[1 .. $]); 682 assert(s7[] == "0123456_"[0 .. $ - 1]); 683 assert(s7[0 .. 4] == "0123"); 684 685 const s15 = SSOString("0123456789abcde"); 686 assert(!s15.isNull); 687 static assert(is(typeof(s15[]) == const(char)[])); 688 assert(!s15.isLarge); 689 assert(s15.length == 15); 690 assert(s15[] == "0123456789abcde"); 691 assert(s15[0 .. 4] == "0123"); 692 assert(s15[10 .. 15] == "abcde"); 693 assert(s15[10 .. $] == "abcde"); 694 695 const s16 = SSOString("0123456789abcdef"); 696 assert(!s16.isNull); 697 static assert(is(typeof(s16[]) == const(char)[])); 698 assert(s16.isLarge); 699 700 const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]); 701 assert(s16.length == s16_.length); 702 assert(s16[] == s16_[]); 703 assert(s16.ptr !is s16_.ptr); // string data shall not overlap 704 assert(s16 == s16_); // but contents is equal 705 706 const _s16 = SSOString("_0123456789abcdef"[1 .. $]); 707 assert(s16.length == _s16.length); 708 assert(s16[] == _s16[]); // contents is equal 709 assert(s16 == _s16); // contents is equal 710 711 assert(s16.length == 16); 712 assert(s16[] == "0123456789abcdef"); 713 assert(s16[0] == '0'); 714 assert(s16[10] == 'a'); 715 assert(s16[15] == 'f'); 716 assert(s16[0 .. 4] == "0123"); 717 assert(s16[10 .. 16] == "abcdef"); 718 assert(s16[10 .. $] == "abcdef"); 719 } 720 721 /// metadata for null string 722 @safe pure nothrow @nogc unittest 723 { 724 auto s = SSOString.init; 725 assert(s.isNull); 726 foreach (const i; 0 .. 8) 727 { 728 s.metadata = i; 729 assert(s.metadata == i); 730 assert(s.length == 0); 731 } 732 } 733 734 /// metadata for small string 735 @safe pure nothrow @nogc unittest 736 { 737 auto s = SSOString("0123456"); 738 assert(!s.isNull); 739 assert(!s.isLarge); 740 foreach (const i; 0 .. 8) 741 { 742 s.metadata = i; 743 assert(s.metadata == i); 744 assert(s.length == 7); 745 assert(!s.isLarge); 746 assert(!s.isNull); 747 } 748 } 749 750 /// metadata for small string with maximum length 751 @safe pure nothrow @nogc unittest 752 { 753 auto s = SSOString("0123456789abcde"); 754 assert(s.length == SSOString.smallCapacity); 755 assert(!s.isNull); 756 assert(!s.isLarge); 757 foreach (const i; 0 .. 8) 758 { 759 s.metadata = i; 760 assert(s.metadata == i); 761 assert(s.length == 15); 762 assert(!s.isLarge); 763 assert(!s.isNull); 764 } 765 } 766 767 /// metadata for large string with minimum length 768 @safe pure nothrow @nogc unittest 769 { 770 auto s = SSOString("0123456789abcdef"); 771 assert(s.length == SSOString.smallCapacity + 1); 772 assert(!s.isNull); 773 assert(s.isLarge); 774 assert(!s.empty); 775 foreach (const i; 0 .. 8) 776 { 777 s.metadata = i; 778 assert(s.metadata == i); 779 assert(s.length == 16); 780 assert(s.isLarge); 781 assert(!s.isNull); 782 } 783 } 784 785 /// equality and equivalence 786 @safe pure nothrow @nogc unittest 787 { 788 assert(SSOString() == SSOString("")); 789 assert(SSOString() !is SSOString("")); 790 } 791 792 /// hashing of null, empty and non-empty 793 @safe pure nothrow @nogc unittest 794 { 795 assert(SSOString().toHash == 0); 796 assert(SSOString("").toHash == 0); 797 assert(SSOString("a").toHash != 0); 798 assert(SSOString("0123456789abcdef").toHash != 0); 799 } 800 801 /// construct from static array larger than `smallCapacity` 802 @safe pure nothrow unittest 803 { 804 char[SSOString.smallCapacity + 1] charsMinLargeCapacity; 805 const _ = SSOString(charsMinLargeCapacity); 806 } 807 808 // test construction from range 809 @safe pure unittest 810 { 811 static void test(const scope char[] x, 812 const bool isLarge) @safe pure 813 { 814 import std.utf : byDchar; 815 const scope s = SSOString(x.byDchar); 816 assert(s == x); 817 assert(s.isLarge == isLarge); 818 } 819 test("", false); 820 test("_", false); 821 test("123456789_12345", false); 822 test("123456789_123456", true); 823 test("123456789_123456789_123456789_", true); 824 } 825 826 /// hole handling 827 @trusted pure nothrow @nogc unittest 828 { 829 assert(!SSOString.init.isHole); 830 assert(!SSOString("").isHole); 831 assert(!SSOString("a").isHole); 832 assert(SSOString.asHole.isHole); 833 } 834 835 /// DIP-1000 return ref escape analysis 836 @safe pure nothrow unittest 837 { 838 static if (isDIP1000) 839 { 840 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } })); 841 static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } })); 842 static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } })); 843 static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { SSOString x; return x[0]; } })); 844 } 845 } 846 847 /// ASCII purity and case-conversion 848 @safe pure nothrow @nogc unittest 849 { 850 // these are all small ASCII 851 assert( SSOString("a").isSmallASCII); 852 assert( SSOString("b").isSmallASCII); 853 assert( SSOString("z").isSmallASCII); 854 assert( SSOString("_").isSmallASCII); 855 assert( SSOString("abcd").isSmallASCII); 856 assert( SSOString("123456789_12345").isSmallASCII); 857 858 // these are not 859 assert(!SSOString("123456789_123456").isSmallASCII); // too large 860 assert(!SSOString("123456789_123ö").isSmallASCII); 861 assert(!SSOString("ö").isSmallASCII); 862 assert(!SSOString("Ö").isSmallASCII); 863 assert(!SSOString("åäö").isSmallASCII); 864 assert(!SSOString("ö-värld").isSmallASCII); 865 } 866 867 /// ASCII purity and case-conversion 868 @safe pure unittest 869 { 870 assert(SSOString("A").toLower[] == "a"); 871 assert(SSOString("a").toUpper[] == "A"); 872 assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small 873 assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small 874 assert(SSOString("ÅÄÖ").toLower[] == "åäö"); 875 assert(SSOString("åäö").toUpper[] == "ÅÄÖ"); 876 assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large 877 assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large 878 879 char[6] x = "ÅÄÖ"; 880 import std.uni : toLowerInPlace; 881 auto xref = x[]; 882 toLowerInPlace(xref); 883 assert(x == "åäö"); 884 assert(xref == "åäö"); 885 } 886 887 /// lexicographic comparison 888 @safe pure unittest 889 { 890 const SSOString a = SSOString("a"); 891 assert(a == SSOString("a")); 892 893 immutable SSOString b = SSOString("b"); 894 895 assert(a < b); 896 assert(b > a); 897 assert(a[] < b[]); 898 899 assert("a" < "b"); 900 assert("a" < "å"); 901 assert("Å" < "å"); 902 assert(SSOString("a") < SSOString("å")); 903 assert(SSOString("ÅÄÖ") < SSOString("åäö")); 904 } 905 906 /// cast to bool 907 @safe pure unittest 908 { 909 // mimics behaviour of casting of `string` to `bool` 910 assert(!SSOString()); 911 assert(SSOString("")); 912 assert(SSOString("abc")); 913 } 914 915 /// to string conversion 916 @safe pure unittest 917 { 918 // mutable small will GC-allocate 919 { 920 SSOString s = SSOString("123456789_12345"); 921 assert(s.ptr is &s.opSlice()[0]); 922 assert(s.ptr !is &s.toString()[0]); 923 } 924 925 // const small will GC-allocate 926 { 927 const SSOString s = SSOString("123456789_12345"); 928 assert(s.ptr is &s.opSlice()[0]); 929 assert(s.ptr !is &s.toString()[0]); 930 } 931 932 // immutable small will not allocate 933 { 934 immutable SSOString s = SSOString("123456789_12345"); 935 assert(s.ptr is &s.opSlice()[0]); 936 assert(s.ptr is &s.toString()[0]); 937 // TODO check return via -dip1000 938 } 939 940 /* Forbid return of possibly locally scoped `Smll` small stack object 941 * regardless of head-mutability. 942 */ 943 static if (isDIP1000) 944 { 945 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } })); 946 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } })); 947 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } })); 948 949 /** TODO Enable the following line when DIP-1000 works for opSlice() 950 * 951 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792 952 */ 953 // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } })); 954 } 955 956 // large will never allocate regardless of head-mutability 957 { 958 SSOString s = SSOString("123456789_123456"); 959 assert(s.ptr is &s.opSlice()[0]); 960 assert(s.ptr is &s.toString()[0]); // shouldn't this change? 961 } 962 } 963 964 version(unittest) 965 { 966 import nxt.dip_traits : isDIP1000; 967 }