1 module nxt.sso_string; 2 3 /** Small-size-optimized (SSO) variant of `string`. 4 * 5 * Storage is placed on the stack if the number of `char`s is less than 6 * `smallCapacity`, otherwise as a normal (large) `string`. The large `string` 7 * will be allocated on the GC-heap if the `SSOString` is constructed from a 8 * non-`string` (non-`immutable` `char[]`) parameter. 9 * 10 * Because `SSOString` doesn't have a destructor it can safely allocate using a 11 * GC-backed region allocator without relying on a GC finalizer. 12 * 13 * In order to mimic `string/array/slice`-behaviour, opCast returns `false` for 14 * `SSOString()` and `true` for `SSOString("")`. This requires `SSOString()` to 15 * default to a large string in which large pointer is set to `null`. 16 * 17 * Big-endian platform support hasn't been verified. 18 * 19 * See_Also: https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com 20 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792 21 * See_Also: https://forum.dlang.org/post/agzznbzkacfhyqvoezht@forum.dlang.org 22 * 23 * TODO: Use extra bits in `Short.length` for these special text encodings: 24 * - 5-bit lowercase English letter into 128/5 = 25 chars 25 * - 5-bit uppercase English letter into 120/5 = 25 chars 26 * - 6-bit mixedcase English letter into 120/6 = 20 chars 27 * 28 * TODO: Move to Phobos' std.typecons or std.array or std.string 29 */ 30 struct SSOString 31 { 32 @safe: 33 @property void toString(Sink)(ref scope Sink sink) const scope 34 { 35 sink(opSlice()); 36 } 37 38 pure: 39 40 /** Construct from `source`, which potentially needs GC-allocation (iff 41 * `source.length > smallCapacity` and `source` is not a `string`). 42 */ 43 this(Chars)(const scope auto ref Chars source) @trusted nothrow 44 if (is(Chars : const(char)[])) // `isCharArray` 45 { 46 static if (__traits(isStaticArray, Chars)) 47 { 48 static if (source.length <= smallCapacity) // inferred @nogc 49 { 50 small.data[0 .. source.length] = source; 51 small.length = cast(typeof(small.length))(encodeSmallLength(source.length)); 52 } 53 else 54 { 55 static if (is(typeof(source[0]) == immutable(char))) 56 raw.ptr[0 .. source.length] = source; // copy elements 57 else 58 raw.ptr = source.idup.ptr; // GC-allocate 59 raw.length = encodeLargeLength(source.length); 60 } 61 } 62 else // `Chars` is a (dynamic) array slice 63 { 64 if (source.length <= smallCapacity) 65 { 66 (cast(char*)small.data.ptr)[0 .. source.length] = source; 67 small.length = cast(typeof(small.length))(encodeSmallLength(source.length)); 68 } 69 else 70 { 71 static if (is(typeof(source[0]) == immutable(char))) 72 raw.ptr = source.ptr; // already immutable so no duplication needed 73 else 74 raw.ptr = source.idup.ptr; // GC-allocate 75 raw.length = encodeLargeLength(source.length); 76 } 77 } 78 } 79 80 /** Construct from `source` of `dchar` 81 */ 82 this(Source)(scope Source source) @trusted 83 if (is(typeof({ foreach (const dchar elem; Source.init) {} })) && // TODO: `isConstRefIterable` 84 is(typeof(Source.init.front) == dchar)) 85 { 86 import std.utf : encode; 87 88 // pre-calculate number of `char`s needed 89 size_t charCount = 0; 90 foreach (const e; source) 91 { 92 char[4] chars; // TODO: `= void` 93 charCount += encode(chars, e); 94 } 95 96 if (charCount <= smallCapacity) // fits in small 97 { 98 size_t offset = 0; 99 foreach (const e; source) 100 { 101 char[4] chars; 102 const count = encode(chars, e); 103 (cast(char[])(small.data))[offset .. offset + count] = chars[0 .. count]; 104 offset += count; 105 } 106 assert(offset <= smallCapacity); 107 small.length = cast(typeof(small.length))(encodeSmallLength(offset)); 108 } 109 else // needs large 110 { 111 large = new immutable(char)[charCount]; 112 size_t offset = 0; 113 foreach (const e; source) 114 { 115 char[4] chars; 116 const count = encode(chars, e); 117 (cast(char[])(large))[offset .. offset + count] = chars[0 .. count]; // NOTE modifies immutable data 118 offset += count; 119 } 120 raw.length = encodeLargeLength(charCount); 121 } 122 } 123 124 nothrow: 125 126 /** Return `this` converted to a `string`, without any GC-allocation because 127 * `this` is `immutable`. 128 */ 129 @property string toString() immutable @trusted return pure nothrow @nogc // never allocates 130 { 131 version(D_Coverage) {} else pragma(inline, true); 132 return opSlice(); 133 } 134 135 /** Return `this` converted to a `string`, which potentially needs 136 * GC-allocation (iff `length > smallCapacity`). 137 * 138 * implementation kept in sync with `opSlice`. 139 */ 140 @property string toString() const return @trusted pure nothrow // may GC-allocate 141 { 142 if (isLarge) 143 // GC-allocated slice has immutable members so ok to cast 144 return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation 145 else 146 return small.data.ptr[0 .. decodeRawLength(small.length)].idup; // need duplicate to make `immutable` 147 } 148 149 @nogc: 150 151 /** Get hash of `this`, with extra fast computation for the small case. 152 */ 153 @property hash_t toHash() const scope @trusted 154 { 155 version(D_Coverage) {} else version(LDC) pragma(inline, true); 156 import core.internal.hash : hashOf; 157 import nxt.hash_functions : lemireHash64; 158 if (isLarge) 159 return hashOf(opSliceLarge()); // use default 160 else // fast path for small string 161 return (lemireHash64(words[0] >> 1) ^ // shift away LS-bit being a constant for a small string 162 lemireHash64(words[1])); 163 } 164 165 /** Get length. */ 166 @property size_t length() const scope @trusted 167 { 168 version(D_Coverage) {} else pragma(inline, true); 169 if (isLarge) 170 return decodeRawLength(large.length); // skip first bit 171 else 172 return decodeRawLength(small.length); // skip fist bit 173 } 174 /// ditto 175 alias opDollar = length; 176 177 /** Check if `this` is empty. */ 178 bool empty() const @property scope @safe pure nothrow @nogc 179 { 180 return length == 0; 181 } 182 183 /** Check if `this` is `null`. */ 184 @property bool isNull() const scope @trusted pure nothrow @nogc 185 { 186 return raw.length == 0; 187 } 188 189 /** Return a slice to either the whole large or whole small `string`. 190 * 191 * Implementation is kept in sync with `toString`. 192 */ 193 inout(char)[] opSlice() inout scope return @trusted @nogc 194 { 195 version(D_Coverage) {} else pragma(inline, true); // TODO: maybe remove 196 if (isLarge) 197 return opSliceLarge(); 198 else 199 return opSliceSmall(); 200 } 201 202 /** Return a slice at `[i .. j]` to either the internally stored large or small `string`. 203 * 204 * Implementation is kept in sync with `toString`. 205 */ 206 inout(char)[] opSlice(size_t i, size_t j) inout return @safe 207 { 208 version(D_Coverage) {} else pragma(inline, true); 209 return opSlice()[i .. j]; 210 } 211 212 private inout(char)[] opSliceLarge() inout return scope @system @nogc 213 in { version(unittest) assert(isLarge); } 214 do 215 { 216 version(D_Coverage) {} else pragma(inline, true); 217 return cast(typeof(return))raw.ptr[0 .. decodeRawLength(raw.length)]; // no allocation 218 // alternative: return large.ptr[0 .. large.length/2]; 219 } 220 221 private inout(char)[] opSliceSmall() inout return @trusted @nogc 222 in { version(unittest) assert(!isLarge); } 223 do 224 { 225 version(D_Coverage) {} else pragma(inline, true); 226 return cast(typeof(return))small.data.ptr[0 .. decodeRawLength(small.length)]; // scoped 227 } 228 229 /** Return the `index`ed `char` of `this`. 230 */ 231 ref inout(char) opIndex(size_t index) inout return @trusted 232 { 233 version(D_Coverage) {} else pragma(inline, true); 234 return opSlice()[index]; // does range check 235 } 236 237 /// Get pointer to the internally stored `char`s. 238 @property private immutable(char)* ptr() const return @trusted 239 { 240 if (isLarge) 241 return large.ptr; // GC-heap pointer 242 else 243 return small.data.ptr; // stack pointer 244 } 245 246 /** Check if `this` is equal to `rhs`. */ 247 bool opEquals()(const scope auto ref typeof(this) rhs) const scope @trusted 248 { 249 version(D_Coverage) {} else pragma(inline, true); 250 return opSlice() == rhs.opSlice(); 251 } 252 253 /** Check if `this` is equal to `rhs`. */ 254 bool opEquals()(const scope const(char)[] rhs) const scope @trusted 255 { 256 version(D_Coverage) {} else pragma(inline, true); 257 return opSlice() == rhs; 258 } 259 260 /** Compare `this` with `that`. 261 * 262 * See_Also: https://forum.dlang.org/post/muhfypwftdivluqdbmdf@forum.dlang.org 263 */ 264 @property int opCmp()(const scope typeof(this) that) const scope /* template-lazy */ 265 { 266 version(D_Coverage) {} else pragma(inline, true); 267 scope const a = this.opSlice(); 268 scope const b = that.opSlice(); 269 return a < b ? -1 : (a > b); 270 // import core.internal.array.comparison : __cmp; // instead of `std.algorithm.comparison : cmp`; 271 // return __cmp(this[], that[]); 272 } 273 274 bool opCast(T : bool)() const scope @trusted 275 { 276 version(D_Coverage) {} else pragma(inline, true); 277 if (isLarge) 278 return large !is null; 279 else 280 return small.length != 0; 281 } 282 283 /** Support trait `isNullable`. */ 284 static immutable nullValue = typeof(this).init; 285 286 /** Support trait `isHoleable`. */ 287 static immutable holeValue = typeof(this).asHole(); 288 289 /** Check if this a hole, meaning a removed/erase value. */ 290 bool isHole() const scope @safe nothrow @nogc 291 { 292 return words[0] == size_t.max; 293 } 294 295 /** That this a hole, meaning a removed/erase value. */ 296 void holeify() @system @nogc scope 297 { 298 words[0] = size_t.max; 299 words[1] = size_t.max; 300 } 301 302 /** Returns: a holed `SSOString`, meaning a removed/erase value. */ 303 private static typeof(this) asHole() @system 304 { 305 typeof(return) result = void; 306 result.holeify(); 307 return result; 308 } 309 310 /** Check if `this` is a small ASCII string. */ 311 bool isSmallASCII() const scope @trusted 312 { 313 version(D_Coverage) {} else pragma(inline, true); 314 static assert(largeLengthTagBitOffset == 0);// bit 0 of lsbyte not set => small 315 // should be fast on 64-bit platforms: 316 return ((words[0] & 0x_80_80_80_80__80_80_80_01UL) == 1 && // bit 0 of lsbyte is set => small 317 (words[1] & 0x_80_80_80_80__80_80_80_80UL) == 0); 318 } 319 320 private: 321 322 /** Returns: `true` iff this is a large string, otherwise `false.` */ 323 @property bool isLarge() const scope @trusted 324 { 325 version(D_Coverage) {} else pragma(inline, true); 326 return !(large.length & (1 << largeLengthTagBitOffset)); // first bit discriminates small from large 327 } 328 329 alias Large = immutable(char)[]; 330 331 public enum smallCapacity = Large.sizeof - Small.length.sizeof; 332 static assert(smallCapacity > 0, "No room for small source for immutable(char) being " ~ immutable(char).stringof); 333 334 enum largeLengthTagBitOffset = 0; ///< bit position for large tag in length. 335 enum smallLengthBitCount = 4; 336 static assert(smallCapacity == 2^^smallLengthBitCount-1); 337 338 enum metaBits = 3; ///< Number of bits used for metadata. 339 enum metaMask = (2^^metaBits-1); ///< Mask for metadata shifted to bottom. 340 enum tagsBitCount = 1 + metaBits; ///< Number of bits used for small discriminator plus extra meta data. 341 static assert(smallLengthBitCount + tagsBitCount == 8); 342 343 /// Get metadata byte with first `metaBits` bits set. 344 @property ubyte metadata() const @safe pure nothrow @nogc 345 { 346 return (small.length >> (1 << largeLengthTagBitOffset)) & metaMask; // git bits [1 .. 1+metaBits] 347 } 348 349 /// Set metadata. 350 @property void metadata(ubyte data) @trusted pure nothrow @nogc 351 in(data < (1 << metaBits)) 352 { 353 if (isLarge) 354 raw.length = encodeLargeLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1)); 355 else 356 small.length = cast(ubyte)encodeSmallLength(length) | ((data & metaMask) << (largeLengthTagBitOffset + 1)); 357 } 358 359 /// Decode raw length `rawLength` by shifting away tag bits. 360 static size_t decodeRawLength(size_t rawLength) @safe pure nothrow @nogc 361 { 362 return rawLength >> tagsBitCount; 363 } 364 365 /// Encode `Large` length from `Length`. 366 static size_t encodeLargeLength(size_t length) @safe pure nothrow @nogc 367 { 368 return (length << tagsBitCount); 369 } 370 371 /// Encode `Small` length from `Length`. 372 static size_t encodeSmallLength(size_t length) @safe pure nothrow @nogc 373 in(length <= smallCapacity) 374 { 375 return (length << tagsBitCount) | (1 << largeLengthTagBitOffset); 376 } 377 378 version(LittleEndian) // see: http://forum.dlang.org/posting/zifyahfohbwavwkwbgmw 379 { 380 struct Small 381 { 382 /* TODO: only first 4 bits are needed to represent a length between 383 * 0-15, use other 4 bits. 384 */ 385 ubyte length = 0; 386 immutable(char)[smallCapacity] data = [0,0,0,0,0, 387 0,0,0,0,0, 388 0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true. 389 } 390 } 391 else 392 { 393 struct Small 394 { 395 immutable(char)[smallCapacity] data = [0,0,0,0,0, 396 0,0,0,0,0, 397 0,0,0,0,0]; // explicit init needed for `__traits(isZeroInit)` to be true. 398 /* TODO: only first 4 bits are needed to represent a length between 399 * 0-15, use other 4 bits. 400 */ 401 ubyte length; 402 } 403 static assert(0, "TODO: add BigEndian support and test"); 404 } 405 406 struct Raw // same memory layout as `immutable(char)[]` 407 { 408 size_t length = 0; // can be bit-fiddled without GC allocation 409 immutable(char)* ptr = null; 410 } 411 412 union 413 { 414 Raw raw; 415 Large large; 416 Small small; 417 size_t[2] words; 418 } 419 } 420 version(unittest) static assert(SSOString.sizeof == string.sizeof); 421 422 /** Returns: `x` lowercased. */ 423 SSOString toLower()(const SSOString x) @trusted /* template-lazy */ 424 { 425 if (x.isSmallASCII) // small ASCII fast-path 426 { 427 typeof(return) result = void; 428 result.small.length = x.small.length; 429 import std.ascii : toLower; 430 foreach (const index; 0 .. x.smallCapacity) 431 (cast(char[])(result.small.data))[index] = toLower(x.small.data[index]); 432 return result; 433 } 434 else if (x.isLarge) 435 { 436 import std.uni : asLowerCase; 437 import std.conv : to; 438 return typeof(return)(x.opSlice().asLowerCase.to!string); // TODO: make .to!string nothrow 439 } 440 else // small non-ASCII path usually without GC-allocation 441 { 442 typeof(return) result = x; // copy 443 import std.uni : toLowerInPlace; 444 auto slice = cast(char[])(result.opSlice()); // need ref to slice 445 toLowerInPlace(slice); 446 if (slice is result.opSlice() || // no reallocation 447 slice.length == result.length) // or same length (happens for German double-s) 448 return result; 449 else 450 // version(none) 451 // { 452 // import nxt.dbgio; 453 // dbg(`toLowerInPlace reallocated from "`, 454 // result.opSlice(), `" of length `, result.opSlice().length, 455 // ` to "` 456 // , slice, `" of length `, slice.length); 457 // } 458 return typeof(return)(slice); // reallocation occurred 459 } 460 } 461 462 /** Returns: `x` uppercased. */ 463 SSOString toUpper()(const SSOString x) @trusted /* template-lazy */ 464 { 465 if (x.isSmallASCII) // small ASCII fast-path 466 { 467 typeof(return) result = void; 468 result.small.length = x.small.length; 469 import std.ascii : toUpper; 470 foreach (const index; 0 .. x.smallCapacity) 471 (cast(char[])(result.small.data))[index] = toUpper(x.small.data[index]); 472 return result; 473 } 474 else if (x.isLarge) 475 { 476 import std.uni : asUpperCase; 477 import std.conv : to; 478 return typeof(return)(x.opSlice().asUpperCase.to!string); // TODO: make .to!string nothrow 479 } 480 else // small non-ASCII path usually without GC-allocation 481 { 482 typeof(return) result = x; // copy 483 import std.uni : toUpperInPlace; 484 auto slice = cast(char[])(result.opSlice()); // need ref to slice 485 toUpperInPlace(slice); 486 if (slice is result.opSlice() || // no reallocation 487 slice.length == result.length) // or same length (happens for German double-s) 488 return result; 489 else 490 // version(none) 491 // { 492 // import nxt.dbgio; 493 // dbg(`toUpperInPlace reallocated from "`, 494 // result.opSlice(), `" of length `, result.opSlice().length, 495 // ` to "` 496 // , slice, `" of length `, slice.length); 497 // } 498 return typeof(return)(slice); // reallocation occurred 499 } 500 } 501 502 /// construct from non-immutable source is allowed in non-`@nogc`-scope 503 @safe pure nothrow unittest 504 { 505 alias S = SSOString; 506 507 scope const char[] x0; 508 const s0 = SSOString(x0); // no .idup 509 510 scope const char[] x16 = new char[16]; 511 const s16 = SSOString(x16); // will call .idup 512 } 513 514 /// construct from non-immutable source is not allowed in `@nogc`-scope 515 @safe pure nothrow @nogc unittest 516 { 517 scope const char[] s; 518 // TODO: why does this fail? static assert(!__traits(compiles, { const _ = SSOString(s); })); 519 } 520 521 /// verify `isNull` when @nogc constructing from small static array of `char`s 522 @trusted pure nothrow @nogc unittest 523 { 524 static foreach (const n; 0 .. SSOString.smallCapacity + 1) 525 { 526 { 527 immutable(char)[n] x; 528 assert(!SSOString(x).isNull); 529 } 530 } 531 } 532 533 /// verify `isNull` when constructing from large static array of `char`s 534 @trusted pure nothrow unittest 535 { 536 static foreach (const n; SSOString.smallCapacity + 1 .. 32) 537 { 538 { 539 immutable(char)[n] x; 540 assert(!SSOString(x).isNull); 541 } 542 } 543 } 544 545 /// verify `isNull` when constructing from dynamic array of `char`s 546 @trusted pure nothrow unittest 547 { 548 foreach (const n; 0 .. 32) 549 { 550 scope x = new immutable(char)[n]; 551 assert(!SSOString(x).isNull); 552 } 553 } 554 555 /// test behaviour of `==` and `is` operator 556 @trusted pure nothrow @nogc unittest 557 { 558 const SSOString x = "42"; 559 assert(!x.isNull); 560 assert(x == "42"); 561 562 const SSOString y = "42"; 563 assert(!y.isNull); 564 assert(y == "42"); 565 566 assert(x == y); 567 assert(x == y[]); 568 assert(x[] == y); 569 assert(x[] == y[]); 570 assert(x[] is x[]); 571 assert(y[] is y[]); 572 assert(x[] !is y[]); 573 assert(x.ptr !is y.ptr); 574 575 const SSOString z = "43"; 576 assert(!z.isNull); 577 assert(z == "43"); 578 assert(x != z); 579 assert(x[] != z[]); 580 assert(x !is z); 581 assert(x[] !is z[]); 582 } 583 584 /// 585 @safe pure nothrow @nogc unittest 586 { 587 static assert(SSOString.smallCapacity == 15); 588 589 import nxt.gc_traits : mustAddGCRange; 590 static assert(mustAddGCRange!SSOString); // `Large large.ptr` must be scanned 591 592 static assert(__traits(isZeroInit, SSOString)); 593 // TODO: assert(SSOString.init == SSOString.nullValue); 594 595 auto s0 = SSOString.init; 596 assert(s0.isNull); 597 assert(s0.length == 0); 598 assert(s0.isLarge); 599 assert(s0[] == []); 600 601 char[SSOString.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string 602 const sSmallCapacity = SSOString(charsSmallCapacity); 603 assert(!sSmallCapacity.isLarge); 604 assert(sSmallCapacity.length == SSOString.smallCapacity); 605 assert(sSmallCapacity == charsSmallCapacity); 606 607 const s0_ = SSOString(""); 608 assert(!s0_.isNull); // cannot distinguish 609 assert(s0 == s0_); 610 611 const s7 = SSOString("0123456"); 612 assert(!s7.isNull); 613 614 const s7_ = SSOString("0123456_"[0 .. $ - 1]); 615 assert(s7.ptr !is s7_.ptr); // string data shall not overlap 616 assert(s7 == s7_); 617 618 const _s7 = SSOString("_0123456"[1 .. $]); // source from other string literal 619 assert(s7.ptr !is _s7.ptr); // string data shall not overlap 620 assert(s7 == _s7); 621 622 assert(!s7.isLarge); 623 assert(s7.length == 7); 624 assert(s7[] == "0123456"); 625 assert(s7[] == "_0123456"[1 .. $]); 626 assert(s7[] == "0123456_"[0 .. $ - 1]); 627 assert(s7[0 .. 4] == "0123"); 628 629 const s15 = SSOString("0123456789abcde"); 630 assert(!s15.isNull); 631 static assert(is(typeof(s15[]) == const(char)[])); 632 assert(!s15.isLarge); 633 assert(s15.length == 15); 634 assert(s15[] == "0123456789abcde"); 635 assert(s15[0 .. 4] == "0123"); 636 assert(s15[10 .. 15] == "abcde"); 637 assert(s15[10 .. $] == "abcde"); 638 639 const s16 = SSOString("0123456789abcdef"); 640 assert(!s16.isNull); 641 static assert(is(typeof(s16[]) == const(char)[])); 642 assert(s16.isLarge); 643 644 const s16_ = SSOString("0123456789abcdef_"[0 .. s16.length]); 645 assert(s16.length == s16_.length); 646 assert(s16[] == s16_[]); 647 assert(s16.ptr !is s16_.ptr); // string data shall not overlap 648 assert(s16 == s16_); // but contents is equal 649 650 const _s16 = SSOString("_0123456789abcdef"[1 .. $]); 651 assert(s16.length == _s16.length); 652 assert(s16[] == _s16[]); // contents is equal 653 assert(s16 == _s16); // contents is equal 654 655 assert(s16.length == 16); 656 assert(s16[] == "0123456789abcdef"); 657 assert(s16[0] == '0'); 658 assert(s16[10] == 'a'); 659 assert(s16[15] == 'f'); 660 assert(s16[0 .. 4] == "0123"); 661 assert(s16[10 .. 16] == "abcdef"); 662 assert(s16[10 .. $] == "abcdef"); 663 } 664 665 /// metadata for null string 666 @safe pure nothrow @nogc unittest 667 { 668 auto s = SSOString.init; 669 assert(s.isNull); 670 foreach (const i; 0 .. 8) 671 { 672 s.metadata = i; 673 assert(s.metadata == i); 674 assert(s.length == 0); 675 } 676 } 677 678 /// metadata for small string 679 @safe pure nothrow @nogc unittest 680 { 681 auto s = SSOString("0123456"); 682 assert(!s.isNull); 683 assert(!s.isLarge); 684 foreach (const i; 0 .. 8) 685 { 686 s.metadata = i; 687 assert(s.metadata == i); 688 assert(s.length == 7); 689 assert(!s.isLarge); 690 assert(!s.isNull); 691 } 692 } 693 694 /// metadata for small string with maximum length 695 @safe pure nothrow @nogc unittest 696 { 697 auto s = SSOString("0123456789abcde"); 698 assert(s.length == SSOString.smallCapacity); 699 assert(!s.isNull); 700 assert(!s.isLarge); 701 foreach (const i; 0 .. 8) 702 { 703 s.metadata = i; 704 assert(s.metadata == i); 705 assert(s.length == 15); 706 assert(!s.isLarge); 707 assert(!s.isNull); 708 } 709 } 710 711 /// metadata for large string with minimum length 712 @safe pure nothrow @nogc unittest 713 { 714 auto s = SSOString("0123456789abcdef"); 715 assert(s.length == SSOString.smallCapacity + 1); 716 assert(!s.isNull); 717 assert(s.isLarge); 718 assert(!s.empty); 719 foreach (const i; 0 .. 8) 720 { 721 s.metadata = i; 722 assert(s.metadata == i); 723 assert(s.length == 16); 724 assert(s.isLarge); 725 assert(!s.isNull); 726 } 727 } 728 729 /// equality and equivalence 730 @safe pure nothrow @nogc unittest 731 { 732 assert(SSOString() == SSOString("")); 733 assert(SSOString() !is SSOString("")); 734 } 735 736 /// hashing of null, empty and non-empty 737 @safe pure nothrow @nogc unittest 738 { 739 assert(SSOString().toHash == 0); 740 assert(SSOString("").toHash == 0); 741 assert(SSOString("a").toHash != 0); 742 assert(SSOString("0123456789abcdef").toHash != 0); 743 } 744 745 /// construct from static array larger than `smallCapacity` 746 @safe pure nothrow unittest 747 { 748 char[SSOString.smallCapacity + 1] charsMinLargeCapacity; 749 const _ = SSOString(charsMinLargeCapacity); 750 } 751 752 // test construction from range 753 @safe pure unittest 754 { 755 static void test(const scope char[] x, 756 const bool isLarge) @safe pure 757 { 758 import std.utf : byDchar; 759 const scope s = SSOString(x.byDchar); 760 assert(s == x); 761 assert(s.isLarge == isLarge); 762 } 763 test("", false); 764 test("_", false); 765 test("123456789_12345", false); 766 test("123456789_123456", true); 767 test("123456789_123456789_123456789_", true); 768 } 769 770 /// hole handling 771 @trusted pure nothrow @nogc unittest 772 { 773 assert(!SSOString.init.isHole); 774 assert(!SSOString("").isHole); 775 assert(!SSOString("a").isHole); 776 assert(SSOString.asHole.isHole); 777 } 778 779 /// DIP-1000 return ref escape analysis 780 @safe pure nothrow unittest 781 { 782 static if (hasPreviewDIP1000) 783 { 784 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } })); 785 static assert(!__traits(compiles, { string f1() @safe pure nothrow { SSOString x; return x[]; } })); 786 static assert(!__traits(compiles, { string f2() @safe pure nothrow { SSOString x; return x.toString; } })); 787 static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { SSOString x; return x[0]; } })); 788 } 789 } 790 791 /// ASCII purity and case-conversion 792 @safe pure nothrow @nogc unittest 793 { 794 // these are all small ASCII 795 assert( SSOString("a").isSmallASCII); 796 assert( SSOString("b").isSmallASCII); 797 assert( SSOString("z").isSmallASCII); 798 assert( SSOString("_").isSmallASCII); 799 assert( SSOString("abcd").isSmallASCII); 800 assert( SSOString("123456789_12345").isSmallASCII); 801 802 // these are not 803 assert(!SSOString("123456789_123456").isSmallASCII); // too large 804 assert(!SSOString("123456789_123ö").isSmallASCII); 805 assert(!SSOString("ö").isSmallASCII); 806 assert(!SSOString("Ö").isSmallASCII); 807 assert(!SSOString("åäö").isSmallASCII); 808 assert(!SSOString("ö-värld").isSmallASCII); 809 } 810 811 /// ASCII purity and case-conversion 812 @safe pure unittest 813 { 814 assert(SSOString("A").toLower[] == "a"); 815 assert(SSOString("a").toUpper[] == "A"); 816 assert(SSOString("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small 817 assert(SSOString("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small 818 assert(SSOString("ÅÄÖ").toLower[] == "åäö"); 819 assert(SSOString("åäö").toUpper[] == "ÅÄÖ"); 820 assert(SSOString("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large 821 assert(SSOString("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large 822 823 char[6] x = "ÅÄÖ"; 824 import std.uni : toLowerInPlace; 825 auto xref = x[]; 826 toLowerInPlace(xref); 827 assert(x == "åäö"); 828 assert(xref == "åäö"); 829 } 830 831 /// lexicographic comparison 832 @safe pure unittest 833 { 834 const SSOString a = SSOString("a"); 835 assert(a == SSOString("a")); 836 837 immutable SSOString b = SSOString("b"); 838 839 assert(a < b); 840 assert(b > a); 841 assert(a[] < b[]); 842 843 assert("a" < "b"); 844 assert("a" < "å"); 845 assert("Å" < "å"); 846 assert(SSOString("a") < SSOString("å")); 847 assert(SSOString("ÅÄÖ") < SSOString("åäö")); 848 } 849 850 /// cast to bool 851 @safe pure unittest 852 { 853 // mimics behaviour of casting of `string` to `bool` 854 assert(!SSOString()); 855 assert(SSOString("")); 856 assert(SSOString("abc")); 857 } 858 859 /// to string conversion 860 @safe pure unittest 861 { 862 // mutable small will GC-allocate 863 { 864 SSOString s = SSOString("123456789_12345"); 865 assert(s.ptr is &s.opSlice()[0]); 866 assert(s.ptr !is &s.toString()[0]); 867 } 868 869 // const small will GC-allocate 870 { 871 const SSOString s = SSOString("123456789_12345"); 872 assert(s.ptr is &s.opSlice()[0]); 873 assert(s.ptr !is &s.toString()[0]); 874 } 875 876 // immutable small will not allocate 877 { 878 immutable SSOString s = SSOString("123456789_12345"); 879 assert(s.ptr is &s.opSlice()[0]); 880 assert(s.ptr is &s.toString()[0]); 881 // TODO: check return via -dip1000 882 } 883 884 /* Forbid return of possibly locally scoped `Smll` small stack object 885 * regardless of head-mutability. 886 */ 887 static if (hasPreviewDIP1000) 888 { 889 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { SSOString x; return x.ptr; } })); 890 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const SSOString x; return x.ptr; } })); 891 static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable SSOString x; return x.ptr; } })); 892 893 /** TODO: Enable the following line when DIP-1000 works for opSlice() 894 * 895 * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792 896 */ 897 // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable SSOString x; return x[]; } })); 898 } 899 900 // large will never allocate regardless of head-mutability 901 { 902 SSOString s = SSOString("123456789_123456"); 903 assert(s.ptr is &s.opSlice()[0]); 904 assert(s.ptr is &s.toString()[0]); // shouldn't this change? 905 } 906 } 907 908 version(unittest) 909 { 910 import nxt.dip_traits : hasPreviewDIP1000; 911 }