1 module nxt.sso_string; 2 3 /** Small-size-optimized (SSO) variant of `string`. 4 * 5 * Storage is placed on the stack if the number of `char`s is less than 6 * `smallCapacity`, otherwise as a normal (large) `string`. The large `string` 7 * will be allocated on the GC-heap if the `SSOString` is constructed from a 8 * non-`string` (non-`immutable` `char[]`) parameter. 9 * 10 * Because `SSOString` doesn't have a destructor it can safely allocate using a 11 * GC-backed region allocator without relying on a GC finalizer. 12 * 13 * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for 14 * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to 15 * default to a large string in which large pointer is set to `null`. 16 * 17 * Big-endian platform support hasn't been verified. 18 * 19 * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com 20 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792 21 * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org 22 * 23 * TODO Use extra bits in `Short.length` for these special text encodings: 24 * - 5-bit lowercase English letter into 128/5 = 25 chars 25 * - 5-bit uppercase English letter into 120/5 = 25 chars 26 * - 6-bit mixedcase English letter into 120/6 = 20 chars 27 * 28 * TODO Add to Phobos' std.typecons or std.array or std.string 29 */ 30 struct SSOString 31 { 32 @safe: 33 @property void toString(scope void delegate(const(char)[]) @safe sink) const 34 { 35 sink(opSlice()); 36 } 37 38 pure: 39 40 /** Construct from `source`, which potentially needs GC-allocation (iff 41 * `source.length > smallCapacity` and `source` is not a `string`). 42 */ 43 this(Chars)(const scope auto ref Chars source) @trusted nothrow 44 if (is(Chars : const(char)[])) // `isCharArray` 45 { 46 static if (__traits(isStaticArray, Chars)) 47 { 48 static if (source.length <= smallCapacity) // inferred @nogc 49 { 50 small.data[0 .. source.length] = source; 51 small.length = cast(typeof(small.length))(encodeSmallLength(source.length)); 52 } 53 else 54 { 55 static if (is(typeof(source[0]) == immutable(char))) 56 { 57 raw.ptr[0 .. source.length] = source; // copy elements 58 } 59 else 60 { 61 raw.ptr = source.idup.ptr; // GC-allocate 62 } 63 raw.length = encodeLargeLength(source.length); 64 } 65 } 66 else // `Chars` is a (dynamic) array slice 67 { 68 if (source.length <= smallCapacity) 69 { 70 (cast(char*)small.data.ptr)[0 .. source.length] = source; 71 small.length = cast(typeof(small.length))(encodeSmallLength(source.length)); 72 } 73 else 74 { 75 static if (is(typeof(source[0]) == immutable(char))) 76 { 77 raw.ptr = source.ptr; // already immutable so no duplication needed 78 } 79 else 80 { 81 raw.ptr = source.idup.ptr; // GC-allocate 82 } 83 raw.length = encodeLargeLength(source.length); 84 } 85 } 86 } 87 88 /** Construct from `source` of `dchar` 89 */ 90 this(Source)(scope Source source) @trusted 91 if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && // TODO `isConstRefIterable` 92 is(typeof(Source.init.front) == dchar)) 93 { 94 import std.utf : encode; 95 96 // pre-calculate number of `char`s needed 97 size_t charCount = 0; 98 foreach (const e; source) 99 { 100 char[4] chars; // TODO `= void` 101 charCount += encode(chars, e); 102 } 103 104 if (charCount <= smallCapacity) // fits in small 105 { 106 size_t offset = 0; 107 foreach (const e; source) 108 { 109 char[4] chars; 110 const count = encode(chars, e); 111 (cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count]; 112 offset += count; 113 } 114 assert(offset <= smallCapacity); 115 small.length = cast(typeof(small.length))(encodeSmallLength(offset)); 116 } 117 else // needs large 118 { 119 large = new immutable(char)[charCount]; 120 size_t offset = 0; 121 foreach (const e; source) 122 { 123 char[4] chars; 124 const count = encode(chars, e); 125 (cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data 126 offset += count; 127 } 128 raw.length = encodeLargeLength(charCount); 129 } 130 } 131 132 nothrow: 133 134 /** Return `this` converted to a `string`, without any GC-allocation because 135 * `this` is `immutable`. 136 */ 137 @property string toString() immutable @trusted pure nothrow @nogc // never allocates 138 { 139 version(D_Coverage) {} else pragma(inline, true); 140 return opSlice(); 141 } 142 143 /** Return `this` converted to a `string`, which potentially needs 144 * GC-allocation (iff `length > smallCapacity`). 145 * 146 * implementation kept in sync with `opSlice`. 147 */ 148 @property string toString() const return @trusted pure nothrow // may GC-allocate 149 { 150 if (isLarge) 151 { 152 // GC-allocated slice has immutable members so ok to cast 153 return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation 154 } 155 else 156 { 157 return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable` 158 } 159 } 160 161 @nogc: 162 163 /** Get hash of `this`, with extra fast computation for the small case. 164 */ 165 @property hash_t toHash() const scope @trusted 166 { 167 version(D_Coverage) {} else version(LDC) pragma(inline, true); 168 if (isLarge) 169 { 170 import core.internal.hash : hashOf; 171 return hashOf(opSliceLarge()); // use default 172 } 173 else // fast path for small string 174 { 175 import nxt.hash_functions : lemireHash64; 176 return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string 177 lemireHash64(words[1])); 178 } 179 } 180 181 /** Get length. */ 182 @property size_t length() const scope @trusted 183 { 184 version(D_Coverage) {} else pragma(inline, true); 185 if (isLarge) 186 { 187 return decodeRawLength(large.length); // skip first bit 188 } 189 else 190 { 191 return decodeRawLength(small.length); // skip fist bit 192 } 193 } 194 /// ditto 195 alias opDollar = length; 196 197 /** Check if `this` is empty. */ 198 @property bool empty() const scope @safe pure nothrow @nogc 199 { 200 return length == 0; 201 } 202 203 /** Check if `this` is `null`. */ 204 @property bool isNull() const scope @trusted pure nothrow @nogc 205 { 206 return raw.length == 0; 207 } 208 209 /** Return a slice to either the whole large or whole small `string`. 210 * 211 * Implementation is kept in sync with `toString`. 212 */ 213 inout(char)[] opSlice() inout return scope @trusted @nogc 214 { 215 version(D_Coverage) {} else pragma(inline, true); // TODO: maybe remove 216 if (isLarge) 217 { 218 return opSliceLarge(); 219 } 220 else 221 { 222 return opSliceSmall(); 223 } 224 } 225 226 /** Return a slice at `[i .. j]` to either the internally stored large or small `string`. 227 * 228 * Implementation is kept in sync with `toString`. 229 */ 230 inout(char)[] opSlice(size_t i, size_t j) inout return @safe 231 { 232 version(D_Coverage) {} else pragma(inline, true); 233 return opSlice()[i .. j]; 234 } 235 236 private inout(char)[] opSliceLarge() inout return scope @system @nogc 237 { 238 version(D_Coverage) {} else pragma(inline, true); 239 version(unittest) assert(isLarge); 240 return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation 241 // alternative: return large.ptr[0 .. large.length/2]; 242 } 243 244 private inout(char)[] opSliceSmall() inout return scope @trusted @nogc 245 { 246 version(D_Coverage) {} else pragma(inline, true); 247 version(unittest) assert(!isLarge); 248 return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped 249 } 250 251 /** Return the `index`ed `char` of `this`. 252 */ 253 ref inout(char) opIndex(size_t index) inout return @trusted 254 { 255 version(D_Coverage) {} else pragma(inline, true); 256 return opSlice()[index]; // does range check 257 } 258 259 /// Get pointer to the internally stored `char`s. 260 @property private immutable(char)* ptr() const return @trusted 261 { 262 if (isLarge) 263 { 264 return large.ptr; // GC-heap pointer 265 } 266 else 267 { 268 return small.data.ptr; // stack pointer 269 } 270 } 271 272 /** Check if `this` is equal to `rhs`. */ 273 bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted 274 { 275 version(D_Coverage) {} else pragma(inline, true); 276 return opSlice() == rhs.opSlice(); 277 } 278 279 /** Check if `this` is equal to `rhs`. */ 280 bool opEquals()(const scope const(char)[] rhs) const scope @trusted 281 { 282 version(D_Coverage) {} else pragma(inline, true); 283 return opSlice() == rhs; 284 } 285 286 /** Compare `this` with `that`. 287 * 288 * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org 289 */ 290 @property int opCmp()(const scope typeof(this) that) const scope // template-lazy 291 { 292 version(D_Coverage) {} else pragma(inline, true); 293 scope const a = this.opSlice(); 294 scope const b = that.opSlice(); 295 return a < b ? -1 : (a > b); 296 // import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`; 297 // return __cmp(this[], that[]); 298 } 299 300 bool opCast(T : bool)() const scope @trusted 301 { 302 version(D_Coverage) {} else pragma(inline, true); 303 if (isLarge) 304 { 305 return large !is null; 306 } 307 else 308 { 309 return small.length != 0; 310 } 311 } 312 313 /** Support trait `isNullable`. */ 314 static immutable nullValue = typeof(this).init; 315 316 /** Support trait `isHoleable`. */ 317 static immutable holeValue = typeof(this).asHole(); 318 319 /** Check if this a hole, meaning a removed/erase value. */ 320 bool isHole() const scope @safe nothrow @nogc 321 { 322 return words[0] == size_t.max; 323 } 324 325 /** That this a hole, meaning a removed/erase value. */ 326 void holeify() @system @nogc scope 327 { 328 words[0] = size_t.max; 329 words[1] = size_t.max; 330 } 331 332 /** Returns: a holed `SSOString`, meaning a removed/erase value. */ 333 private static typeof(this) asHole() @system 334 { 335 typeof(return) result = void; 336 result.holeify(); 337 return result; 338 } 339 340 /** Check if `this` is a small ASCII string. */ 341 bool isSmallASCII() const scope @trusted 342 { 343 version(D_Coverage) {} else pragma(inline, true); 344 static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small 345 // should be fast on 64-bit platforms: 346 return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small 347 (words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0); 348 } 349 350 private: 351 352 /** Returns: `true` iff this is a large string, otherwise `false.` */ 353 @property bool isLarge() const scope @trusted 354 { 355 version(D_Coverage) {} else pragma(inline, true); 356 return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large 357 } 358 359 alias Large = immutable(char)[]; 360 361 public enum smallCapacity = Large.sizeof - Small.length.sizeof; 362 static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof); 363 364 enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length. 365 enum smallLengthBitCount = 4; 366 static assert(smallCapacity == 2^^smallLengthBitCount-1); 367 368 enum metaBits = 3; ///< Number of bits used for metadata. 369 enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom. 370 enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data. 371 static assert(smallLengthBitCount + tagsBitCount == 8); 372 373 /// Get metadata byte with first `metaBits` bits set. 374 @property ubyte metadata() const @safe pure nothrow @nogc 375 { 376 return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits] 377 } 378 379 /// Set metadata. 380 @property void metadata(ubyte data) @trusted pure nothrow @nogc 381 { 382 assert(data < (1 << metaBits)); 383 if (isLarge) 384 { 385 raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1)); 386 } 387 else 388 { 389 small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1)); 390 } 391 } 392 393 /// Decode raw length `rawLength` by shifting away tag bits. 394 static size_t decodeRawLength(size_t rawLength) @safe pure nothrow @nogc 395 { 396 return rawLength >> tagsBitCount; 397 } 398 399 /// Encode `Large` length from `Length`. 400 static size_t encodeLargeLength(size_t length) @safe pure nothrow @nogc 401 { 402 return (length << tagsBitCount); 403 } 404 405 /// Encode `Small` length from `Length`. 406 static size_t encodeSmallLength(size_t length) @safe pure nothrow @nogc 407 { 408 assert(length <= smallCapacity); 409 return (length << tagsBitCount) | (1 << largeLengthTagBitOffset); 410 } 411 412 version(LittleEndian) // see: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw 413 { 414 struct Small 415 { 416 /* TODO only first 4 bits are needed to represent a length between 417 * 0-15, use other 4 bits. 418 */ 419 ubyte length = 0; 420 immutable(char)[smallCapacity] data = [0,0,0,0,0, 421 0,0,0,0,0, 422 0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true. 423 } 424 } 425 else 426 { 427 struct Small 428 { 429 immutable(char)[smallCapacity] data = [0,0,0,0,0, 430 0,0,0,0,0, 431 0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true. 432 /* TODO only first 4 bits are needed to represent a length between 433 * 0-15, use other 4 bits. 434 */ 435 ubyte length; 436 } 437 static assert(0, "TODO add BigEndian support and test"); 438 } 439 440 struct Raw // same memory layout as `immutable(char)[]` 441 { 442 size_t length = 0; // can be bit-fiddled without GC allocation 443 immutable(char)* ptr = null; 444 } 445 446 union 447 { 448 Raw raw; 449 Large large; 450 Small small; 451 size_t[2] words; 452 } 453 } 454 version(unittest) static assert(SSOString.sizeof == string.sizeof); 455 456 /** Returns: `x` lowercased. */ 457 SSOString toLower()(const SSOString x) @trusted // template-lazy 458 { 459 if (x.isSmallASCII) // small ASCII fast-path 460 { 461 typeof(return) result = void; 462 result.small.length = x.small.length; 463 foreach (const index; 0 .. x.smallCapacity) 464 { 465 import std.ascii : toLower; 466 (cast(char[])(result.small.data))[index] = toLower(x.small.data[index]); 467 } 468 return result; 469 } 470 else if (x.isLarge) 471 { 472 import std.uni : asLowerCase; 473 import std.conv : to; 474 return typeof(return)(x.opSlice().asLowerCase.to!string); // TODO make .to!string nothrow 475 } 476 else // small non-ASCII path usually without GC-allocation 477 { 478 typeof(return) result = x; // copy 479 import std.uni : toLowerInPlace; 480 auto slice = cast(char[])(result.opSlice()); // need ref to slice 481 toLowerInPlace(slice); 482 if (slice is result.opSlice() || // no reallocation 483 slice.length == result.length) // or same length (happens for German double-s) 484 { 485 return result; 486 } 487 else 488 { 489 version(none) 490 { 491 import nxt.dbgio; 492 dbg(`toLowerInPlace reallocated from "`, 493 result.opSlice(), `" of length `, result.opSlice().length, 494 ` to "` 495 , slice, `" of length `, slice.length); 496 } 497 return typeof(return)(slice); // reallocation occurred 498 } 499 } 500 } 501 502 /** Returns: `x` uppercased. */ 503 SSOString toUpper()(const SSOString x) @trusted // template-lazy 504 { 505 if (x.isSmallASCII) // small ASCII fast-path 506 { 507 typeof(return) result = void; 508 result.small.length = x.small.length; 509 foreach (const index; 0 .. x.smallCapacity) 510 { 511 import std.ascii : toUpper; 512 (cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]); 513 } 514 return result; 515 } 516 else if (x.isLarge) 517 { 518 import std.uni : asUpperCase; 519 import std.conv : to; 520 return typeof(return)(x.opSlice().asUpperCase.to!string); // TODO make .to!string nothrow 521 } 522 else // small non-ASCII path usually without GC-allocation 523 { 524 typeof(return) result = x; // copy 525 import std.uni : toUpperInPlace; 526 auto slice = cast(char[])(result.opSlice()); // need ref to slice 527 toUpperInPlace(slice); 528 if (slice is result.opSlice() || // no reallocation 529 slice.length == result.length) // or same length (happens for German double-s) 530 { 531 return result; 532 } 533 else 534 { 535 version(none) 536 { 537 import nxt.dbgio; 538 dbg(`toUpperInPlace reallocated from "`, 539 result.opSlice(), `" of length `, result.opSlice().length, 540 ` to "` 541 , slice, `" of length `, slice.length); 542 } 543 return typeof(return)(slice); // reallocation occurred 544 } 545 } 546 } 547 548 /// construct from non-immutable source is allowed in non-`@nogc`-scope 549 @safe pure nothrow unittest 550 { 551 alias S = SSOString; 552 553 scope const char[] x0; 554 const s0 = SSOString(x0); // no .idup 555 556 scope const char[] x16 = new char[16]; 557 const s16 = SSOString(x16); // will call .idup 558 } 559 560 /// construct from non-immutable source is not allowed in `@nogc`-scope 561 @safe pure nothrow @nogc unittest 562 { 563 scope const char[] s; 564 // TODO why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); })); 565 } 566 567 /// verify `isNull` when @nogc constructing from small static array of `char`s 568 @trusted pure nothrow @nogc unittest 569 { 570 static foreach (const n; 0 .. SSOString.smallCapacity + 1) 571 { 572 { 573 immutable(char)[n] x; 574 assert(!SSOString(x).isNull); 575 } 576 } 577 } 578 579 /// verify `isNull` when constructing from large static array of `char`s 580 @trusted pure nothrow unittest 581 { 582 static foreach (const n; SSOString.smallCapacity + 1 .. 32) 583 { 584 { 585 immutable(char)[n] x; 586 assert(!SSOString(x).isNull); 587 } 588 } 589 } 590 591 /// verify `isNull` when constructing from dynamic array of `char`s 592 @trusted pure nothrow unittest 593 { 594 foreach (const n; 0 .. 32) 595 { 596 scope x = new immutable(char)[n]; 597 assert(!SSOString(x).isNull); 598 } 599 } 600 601 /// test behaviour of `==` and `is` operator 602 @trusted pure nothrow @nogc unittest 603 { 604 const SSOString x = "42"; 605 assert(!x.isNull); 606 assert(x == "42"); 607 608 const SSOString y = "42"; 609 assert(!y.isNull); 610 assert(y == "42"); 611 612 assert(x == y); 613 assert(x == y[]); 614 assert(x[] == y); 615 assert(x[] == y[]); 616 assert(x[] is x[]); 617 assert(y[] is y[]); 618 assert(x[] !is y[]); 619 assert(x.ptr !is y.ptr); 620 621 const SSOString z = "43"; 622 assert(!z.isNull); 623 assert(z == "43"); 624 assert(x != z); 625 assert(x[] != z[]); 626 assert(x !is z); 627 assert(x[] !is z[]); 628 } 629 630 /// 631 @safe pure nothrow @nogc unittest 632 { 633 static assert(SSOString.smallCapacity == 15); 634 635 import nxt.gc_traits : mustAddGCRange; 636 static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned 637 638 static assert(__traits(isZeroInit, SSOString)); 639 // TODO assert(SSOString.init == SSOString.nullValue); 640 641 auto s0 = SSOString.init; 642 assert(s0.isNull); 643 assert(s0.length == 0); 644 assert(s0.isLarge); 645 assert(s0[] == []); 646 647 char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string 648 const sSmallCapacity = SSOString(charsSmallCapacity); 649 assert(!sSmallCapacity.isLarge); 650 assert(sSmallCapacity.length == SSOString.smallCapacity); 651 assert(sSmallCapacity == charsSmallCapacity); 652 653 const s0_ = SSOString(""); 654 assert(!s0_.isNull); // cannot distinguish 655 assert(s0 == s0_); 656 657 const s7 = SSOString("0123456"); 658 assert(!s7.isNull); 659 660 const s7_ = SSOString("0123456_"[0 .. $ - 1]); 661 assert(s7.ptr !is s7_.ptr); // string data shall not overlap 662 assert(s7 == s7_); 663 664 const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal 665 assert(s7.ptr !is _s7.ptr); // string data shall not overlap 666 assert(s7 == _s7); 667 668 assert(!s7.isLarge); 669 assert(s7.length == 7); 670 assert(s7[] == "0123456"); 671 assert(s7[] == "_0123456"[1 .. $]); 672 assert(s7[] == "0123456_"[0 .. $ - 1]); 673 assert(s7[0 .. 4] == "0123"); 674 675 const s15 = SSOString("0123456789abcde"); 676 assert(!s15.isNull); 677 static assert(is(typeof(s15[]) == const(char)[])); 678 assert(!s15.isLarge); 679 assert(s15.length == 15); 680 assert(s15[] == "0123456789abcde"); 681 assert(s15[0 .. 4] == "0123"); 682 assert(s15[10 .. 15] == "abcde"); 683 assert(s15[10 .. $] == "abcde"); 684 685 const s16 = SSOString("0123456789abcdef"); 686 assert(!s16.isNull); 687 static assert(is(typeof(s16[]) == const(char)[])); 688 assert(s16.isLarge); 689 690 const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]); 691 assert(s16.length == s16_.length); 692 assert(s16[] == s16_[]); 693 assert(s16.ptr !is s16_.ptr); // string data shall not overlap 694 assert(s16 == s16_); // but contents is equal 695 696 const _s16 = SSOString("_0123456789abcdef"[1 .. $]); 697 assert(s16.length == _s16.length); 698 assert(s16[] == _s16[]); // contents is equal 699 assert(s16 == _s16); // contents is equal 700 701 assert(s16.length == 16); 702 assert(s16[] == "0123456789abcdef"); 703 assert(s16[0] == '0'); 704 assert(s16[10] == 'a'); 705 assert(s16[15] == 'f'); 706 assert(s16[0 .. 4] == "0123"); 707 assert(s16[10 .. 16] == "abcdef"); 708 assert(s16[10 .. $] == "abcdef"); 709 } 710 711 /// metadata for null string 712 @safe pure nothrow @nogc unittest 713 { 714 auto s = SSOString.init; 715 assert(s.isNull); 716 foreach (const i; 0 .. 8) 717 { 718 s.metadata = i; 719 assert(s.metadata == i); 720 assert(s.length == 0); 721 } 722 } 723 724 /// metadata for small string 725 @safe pure nothrow @nogc unittest 726 { 727 auto s = SSOString("0123456"); 728 assert(!s.isNull); 729 assert(!s.isLarge); 730 foreach (const i; 0 .. 8) 731 { 732 s.metadata = i; 733 assert(s.metadata == i); 734 assert(s.length == 7); 735 assert(!s.isLarge); 736 assert(!s.isNull); 737 } 738 } 739 740 /// metadata for small string with maximum length 741 @safe pure nothrow @nogc unittest 742 { 743 auto s = SSOString("0123456789abcde"); 744 assert(s.length == SSOString.smallCapacity); 745 assert(!s.isNull); 746 assert(!s.isLarge); 747 foreach (const i; 0 .. 8) 748 { 749 s.metadata = i; 750 assert(s.metadata == i); 751 assert(s.length == 15); 752 assert(!s.isLarge); 753 assert(!s.isNull); 754 } 755 } 756 757 /// metadata for large string with minimum length 758 @safe pure nothrow @nogc unittest 759 { 760 auto s = SSOString("0123456789abcdef"); 761 assert(s.length == SSOString.smallCapacity + 1); 762 assert(!s.isNull); 763 assert(s.isLarge); 764 assert(!s.empty); 765 foreach (const i; 0 .. 8) 766 { 767 s.metadata = i; 768 assert(s.metadata == i); 769 assert(s.length == 16); 770 assert(s.isLarge); 771 assert(!s.isNull); 772 } 773 } 774 775 /// equality and equivalence 776 @safe pure nothrow @nogc unittest 777 { 778 assert(SSOString() == SSOString("")); 779 assert(SSOString() !is SSOString("")); 780 } 781 782 /// hashing of null, empty and non-empty 783 @safe pure nothrow @nogc unittest 784 { 785 assert(SSOString().toHash == 0); 786 assert(SSOString("").toHash == 0); 787 assert(SSOString("a").toHash != 0); 788 assert(SSOString("0123456789abcdef").toHash != 0); 789 } 790 791 /// construct from static array larger than `smallCapacity` 792 @safe pure nothrow unittest 793 { 794 char[SSOString.smallCapacity + 1] charsMinLargeCapacity; 795 const _ = SSOString(charsMinLargeCapacity); 796 } 797 798 // test construction from range 799 @safe pure unittest 800 { 801 static void test(const scope char[] x, 802 const bool isLarge) @safe pure 803 { 804 import std.utf : byDchar; 805 const scope s = SSOString(x.byDchar); 806 assert(s == x); 807 assert(s.isLarge == isLarge); 808 } 809 test("", false); 810 test("_", false); 811 test("123456789_12345", false); 812 test("123456789_123456", true); 813 test("123456789_123456789_123456789_", true); 814 } 815 816 /// hole handling 817 @trusted pure nothrow @nogc unittest 818 { 819 assert(!SSOString.init.isHole); 820 assert(!SSOString("").isHole); 821 assert(!SSOString("a").isHole); 822 assert(SSOString.asHole.isHole); 823 } 824 825 /// DIP-1000 return ref escape analysis 826 @safe pure nothrow unittest 827 { 828 static if (isDIP1000) 829 { 830 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } })); 831 static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } })); 832 static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } })); 833 static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { SSOString x; return x[0]; } })); 834 } 835 } 836 837 /// ASCII purity and case-conversion 838 @safe pure nothrow @nogc unittest 839 { 840 // these are all small ASCII 841 assert( SSOString("a").isSmallASCII); 842 assert( SSOString("b").isSmallASCII); 843 assert( SSOString("z").isSmallASCII); 844 assert( SSOString("_").isSmallASCII); 845 assert( SSOString("abcd").isSmallASCII); 846 assert( SSOString("123456789_12345").isSmallASCII); 847 848 // these are not 849 assert(!SSOString("123456789_123456").isSmallASCII); // too large 850 assert(!SSOString("123456789_123ö").isSmallASCII); 851 assert(!SSOString("ö").isSmallASCII); 852 assert(!SSOString("Ö").isSmallASCII); 853 assert(!SSOString("åäö").isSmallASCII); 854 assert(!SSOString("ö-värld").isSmallASCII); 855 } 856 857 /// ASCII purity and case-conversion 858 @safe pure unittest 859 { 860 assert(SSOString("A").toLower[] == "a"); 861 assert(SSOString("a").toUpper[] == "A"); 862 assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small 863 assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small 864 assert(SSOString("ÅÄÖ").toLower[] == "åäö"); 865 assert(SSOString("åäö").toUpper[] == "ÅÄÖ"); 866 assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large 867 assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large 868 869 char[6] x = "ÅÄÖ"; 870 import std.uni : toLowerInPlace; 871 auto xref = x[]; 872 toLowerInPlace(xref); 873 assert(x == "åäö"); 874 assert(xref == "åäö"); 875 } 876 877 /// lexicographic comparison 878 @safe pure unittest 879 { 880 const SSOString a = SSOString("a"); 881 assert(a == SSOString("a")); 882 883 immutable SSOString b = SSOString("b"); 884 885 assert(a < b); 886 assert(b > a); 887 assert(a[] < b[]); 888 889 assert("a" < "b"); 890 assert("a" < "å"); 891 assert("Å" < "å"); 892 assert(SSOString("a") < SSOString("å")); 893 assert(SSOString("ÅÄÖ") < SSOString("åäö")); 894 } 895 896 /// cast to bool 897 @safe pure unittest 898 { 899 // mimics behaviour of casting of `string` to `bool` 900 assert(!SSOString()); 901 assert(SSOString("")); 902 assert(SSOString("abc")); 903 } 904 905 /// to string conversion 906 @safe pure unittest 907 { 908 // mutable small will GC-allocate 909 { 910 SSOString s = SSOString("123456789_12345"); 911 assert(s.ptr is &s.opSlice()[0]); 912 assert(s.ptr !is &s.toString()[0]); 913 } 914 915 // const small will GC-allocate 916 { 917 const SSOString s = SSOString("123456789_12345"); 918 assert(s.ptr is &s.opSlice()[0]); 919 assert(s.ptr !is &s.toString()[0]); 920 } 921 922 // immutable small will not allocate 923 { 924 immutable SSOString s = SSOString("123456789_12345"); 925 assert(s.ptr is &s.opSlice()[0]); 926 assert(s.ptr is &s.toString()[0]); 927 // TODO check return via -dip1000 928 } 929 930 /* Forbid return of possibly locally scoped `Smll` small stack object 931 * regardless of head-mutability. 932 */ 933 static if (isDIP1000) 934 { 935 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } })); 936 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } })); 937 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } })); 938 939 /** TODO Enable the following line when DIP-1000 works for opSlice() 940 * 941 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792 942 */ 943 // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } })); 944 } 945 946 // large will never allocate regardless of head-mutability 947 { 948 SSOString s = SSOString("123456789_123456"); 949 assert(s.ptr is &s.opSlice()[0]); 950 assert(s.ptr is &s.toString()[0]); // shouldn't this change? 951 } 952 } 953 954 version(unittest) 955 { 956 import nxt.dip_traits : isDIP1000; 957 }