1 /** Reference Counted Array. 2 See_Also: http://dpaste.dzfl.pl/817283c163f5 3 */ 4 module nxt.rcstring; 5 6 import core.memory : GC; 7 // import core.stdc.stdlib; 8 // import core.stdc.string; 9 // import std.algorithm; 10 11 /** Reference Counted (RC) version of string. 12 */ 13 alias RCString = RCXString!(immutable char); 14 15 /** Reference Counted Array. 16 Configured with character type `E`, maximum length for the small string optimization, 17 and the allocation function, which must have the same semantics as `realloc`. 18 19 See_Also: https://github.com/burner/std.rcstring 20 */ 21 struct RCXString(E = immutable char, size_t maxSmallSize = 23, alias realloc = GC.realloc) 22 { 23 pure nothrow: 24 25 // Preconditions 26 static assert(is(E == immutable), "Only immutable characters supported for now."); 27 static assert(E.alignof <= 4, "Character type must be 32-bit aligned at most."); 28 static assert(E.min == 0, "Character type must be unsigned."); 29 static assert((maxSmallSize + 1) * E.sizeof % size_t.sizeof == 0, 30 "maxSmallSize + 1 must be a multiple of size_t.sizeof."); 31 static assert((maxSmallSize + 1) * E.sizeof >= 3 * size_t.sizeof, 32 "maxSmallSize + 1 must be >= size_t.sizeof * 3."); 33 static assert(maxSmallSize < E.max, "maxSmallSize must be less than E.max"); 34 35 enum maxSmallLength = maxSmallSize; 36 37 private: 38 // import std.utf; 39 import core.lifetime : emplace; 40 import std.traits: isSomeChar, Unqual; 41 42 version(unittest) import std.stdio; 43 44 alias ME = Unqual!E; // mutable E 45 46 enum isString = isSomeChar!E; 47 48 // Simple reference-counted buffer. The reference count itself is a E. Layout is a size_t (the capacity) 49 // followed by the reference count followed by the payload. 50 struct RCBuffer 51 { 52 size_t capacity; 53 uint refCount; 54 55 // Data starts right after the refcount, no padding because of the static assert above 56 ME* mptr() @nogc { return cast(ME*) (&refCount + 1); } 57 E* ptr() @nogc { return cast(E*) mptr; } 58 59 // Create a new buffer given capacity and initializes payload. Capacity must be large enough. 60 static RCBuffer* make(in size_t capacity, const(ME)[] content) 61 { 62 assert(capacity >= content.length); 63 auto result = cast(RCBuffer*) realloc(null, size_t.sizeof + uint.sizeof + capacity * E.sizeof); 64 result || assert(0); 65 result.capacity = capacity; 66 result.refCount = 1; 67 result.mptr[0 .. content.length] = content; 68 return result; 69 } 70 71 // Resize the buffer. It is assumed the reference count is 1. 72 static void resize(ref RCBuffer* p, in size_t capacity) 73 { 74 assert(p.refCount == 1); 75 p = cast(RCBuffer*) realloc(p, size_t.sizeof + uint.sizeof + capacity * E.sizeof); 76 p || assert(0); 77 p.capacity = capacity; 78 } 79 80 unittest 81 { 82 auto p = make(101, null); 83 assert(p.refCount == 1); 84 assert(p.capacity == 101); 85 resize(p, 203); 86 assert(p.refCount == 1); 87 assert(p.capacity == 203); 88 realloc(p, 0); 89 } 90 } 91 92 // Hosts a large string 93 struct Large 94 { 95 // <layout> 96 union 97 { 98 immutable RCBuffer* buf; 99 RCBuffer* mbuf; 100 } 101 union 102 { 103 E* ptr; 104 ME* mptr; 105 } 106 static if ((maxSmallSize + 1) * E.sizeof == 3 * size_t.sizeof) 107 { 108 /* The small buffer and the large buffer overlap. This means the large buffer must give up its last byte 109 * as a discriminator. 110 */ 111 size_t _length; 112 enum maxLarge = size_t.max >> (8 * E.sizeof); 113 version(BigEndian) 114 { 115 // Use the LSB to store the marker 116 size_t length() const @safe @nogc { return _length >> 8 * E.sizeof; } 117 void length(size_t s) @safe @nogc { _length = Marker.isRefCounted | (s << (8 * E.sizeof)); } 118 } 119 else version(LittleEndian) 120 { 121 // Use the MSB to store the marker 122 private enum size_t mask = size_t(E.max) << (8 * (size_t.sizeof - E.sizeof)); 123 size_t length() const @safe @nogc { return _length & ~mask; } 124 void length(size_t s) @safe @nogc { assert(s <= maxLarge); _length = s | mask; } 125 } 126 else 127 { 128 static assert(0, "Unspecified endianness."); 129 } 130 } 131 else 132 { 133 // No tricks needed, store the size plainly 134 size_t _length; 135 size_t length() const @safe @nogc 136 { 137 return _length; 138 } 139 void length(size_t s) @safe @nogc 140 { 141 _length = s; 142 } 143 } 144 // </layout> 145 146 // Get length 147 alias opDollar = length; 148 149 // Initializes a Large given capacity and content. Capacity must be at least as large as content's size. 150 this(in size_t capacity, const(ME)[] content) 151 { 152 assert(capacity >= content.length); 153 mbuf = RCBuffer.make(capacity, content); 154 mptr = mbuf.mptr; 155 length = content.length; 156 } 157 158 // Initializes a Large from a string by copying it. 159 this(const(ME)[] s) 160 { 161 this(s.length, s); 162 } 163 164 static if (isString) unittest 165 { 166 const(ME)[] s1 = "hello, world"; 167 auto lrg1 = Large(s1); 168 assert(lrg1.length == 12); 169 immutable lrg2 = immutable Large(s1); 170 assert(lrg2.length == 12); 171 const lrg3 = const Large(s1); 172 assert(lrg3.length == 12); 173 } 174 175 // Initializes a Large from a static string by referring to it. 176 this(immutable(ME)[] s) 177 { 178 assert(buf is null); 179 ptr = s.ptr; 180 length = s.length; 181 } 182 183 static if (isString) unittest 184 { 185 immutable ME[] s = "abcdef"; 186 auto lrg1 = Large(s); 187 assert(lrg1.length == 6); 188 assert(lrg1.buf is null); 189 } 190 191 // Decrements the reference count and frees buf if it goes down to zero. 192 void decRef() nothrow 193 { 194 if (!mbuf) return; 195 if (mbuf.refCount == 1) realloc(mbuf, 0); 196 else --mbuf.refCount; 197 } 198 199 auto opSlice() inout return 200 { 201 assert(ptr); 202 return ptr[0 .. length]; 203 } 204 205 // Makes sure there's room for at least newCap Chars. 206 void reserve(in size_t newCapacity) 207 { 208 if (mbuf && mbuf.refCount == 1 && mbuf.capacity >= newCapacity) return; 209 immutable size = this.length; 210 version(assert) scope(exit) assert(size == this.length); 211 if (!mbuf) 212 { 213 // Migrate from static string to allocated string 214 mbuf = RCBuffer.make(newCapacity, ptr[0 .. size]); 215 ptr = mbuf.ptr; 216 return; 217 } 218 if (mbuf.refCount > 1) 219 { 220 // Split this guy making its buffer unique 221 --mbuf.refCount; 222 mbuf = RCBuffer.make(newCapacity, ptr[0 .. size]); 223 ptr = mbuf.ptr; 224 // size stays untouched 225 } 226 else 227 { 228 immutable offset = ptr - mbuf.ptr; 229 // If offset is too large, it's worth decRef()ing and then allocating a new buffer 230 if (offset * 2 >= newCapacity) 231 { 232 auto newBuf = RCBuffer.make(newCapacity, ptr[0 .. size]); 233 decRef; 234 mbuf = newBuf; 235 ptr = mbuf.ptr; 236 } 237 else 238 { 239 RCBuffer.resize(mbuf, newCapacity); 240 ptr = mbuf.ptr + offset; 241 } 242 } 243 } 244 245 unittest 246 { 247 Large obj; 248 obj.reserve(1); 249 assert(obj.mbuf !is null); 250 assert(obj.mbuf.capacity >= 1); 251 obj.reserve(1000); 252 assert(obj.mbuf.capacity >= 1000); 253 obj.reserve(10000); 254 assert(obj.mbuf.capacity >= 10000); 255 } 256 } 257 258 // <layout> 259 union 260 { 261 Large large; 262 struct 263 { 264 union 265 { 266 E[maxSmallSize] small; 267 ME[maxSmallSize] msmall; 268 } 269 ME smallLength; 270 } 271 size_t[(maxSmallSize + 1) / size_t.sizeof] ancillary; // used internally 272 } 273 // </layout> 274 275 hash_t toHash() const @trusted 276 { 277 import core.internal.hash : hashOf; 278 return this.asSlice.hashOf; 279 } 280 281 static if (isString) unittest 282 { 283 assert(RCXString("a").toHash == 284 RCXString("a").toHash); 285 assert(RCXString("a").toHash != 286 RCXString("b").toHash); 287 assert(RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash == 288 RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash); 289 assert(RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash != 290 RCXString("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").toHash); 291 } 292 293 static if (isString) unittest 294 { 295 RCXString x; 296 assert(x.smallLength == 0); 297 assert(x.length == 0); 298 x.large.length = 133; 299 assert(x.smallLength == E.max); 300 assert(x.large.length == 133); 301 x.large.length = 0x0088_8888_8888_8888; 302 assert(x.large.length == 0x0088_8888_8888_8888); 303 assert(x.smallLength == E.max); 304 } 305 306 // is this string small? 307 bool isSmall() const @safe @nogc 308 { 309 return smallLength <= maxSmallSize; 310 } 311 312 // release all memory associated with this 313 private void decRef() @nogc 314 { 315 if (!isSmall) large.decRef; 316 } 317 318 // Return a slice with the string's contents 319 // Not public because it leaks the internals 320 auto asSlice() inout @nogc 321 { 322 immutable s = smallLength; 323 if (s <= maxSmallSize) return small.ptr[0 .. s]; 324 return large[]; 325 } 326 327 public: 328 329 /// Returns the length of the string 330 size_t length() const @nogc 331 { 332 immutable s = smallLength; 333 return s <= maxSmallSize ? s : large.length; 334 } 335 /// Ditto 336 alias opDollar = length; 337 338 static if (isString) unittest 339 { 340 auto s1 = RCXString("123456789_"); 341 assert(s1.length == 10); 342 s1 ~= RCXString("123456789_123456789_123456789_123456789_12345"); 343 assert(s1.length == 55); 344 } 345 346 /// Needed for correct printing in other modules 347 static if (isString) 348 { 349 string toArray() const @trusted 350 { 351 return this.asSlice; 352 } 353 } 354 355 /** Construct a `RCXString` from a slice `s`. 356 357 If the slice is immutable, assumes the slice is a literal or 358 GC-allocated and does NOT copy it internally. 359 360 Warning: Subsequently deallocating `s` will cause the `RCXString` 361 to dangle. If the slice has `const` or mutable characters, creates 362 and manages a copy internally. 363 */ 364 this(C)(C[] s) 365 if (is(Unqual!C == ME)) 366 { 367 // Contents is immutable, we may assume it won't go away ever 368 if (s.length <= maxSmallSize) 369 { 370 // fits in small 371 small[0 .. s.length] = s[]; // so copy it 372 smallLength = cast(E)s.length; 373 } 374 else 375 { 376 emplace(&large, s); 377 } 378 } 379 380 // Test construction from immutable(ME)[], const(ME)[], and ME[] 381 static if (isString) unittest 382 { 383 immutable(E)[] a = "123456789_"; 384 auto s1 = RCXString(a); 385 assert(s1 == a); 386 assert(s1.asSlice !is a, "Small strings must be copied"); 387 a = "123456789_123456789_123456789_123456789_"; 388 auto s2 = RCXString(a); 389 assert(s2 == a); 390 assert(s2.asSlice is a, "Large immutable strings shall not be copied"); 391 392 const(char)[] b = "123456789_"; 393 auto s3 = RCXString(b); 394 assert(s3 == b); 395 assert(s3.isSmall, "Small strings must be copied"); 396 b = "123456789_123456789_123456789_123456789_"; 397 auto s4 = RCXString(b); 398 assert(s4 == b); 399 assert(s4.asSlice !is b, "Large non-immutable strings shall be copied"); 400 401 char[] c = "123456789_".dup; 402 auto s5 = RCXString(c); 403 assert(s5 == c); 404 assert(s5.isSmall, "Small strings must be copied"); 405 c = "123456789_123456789_123456789_123456789_".dup; 406 auto s6 = RCXString(c); 407 assert(s6 == c); 408 assert(s6.asSlice !is c, "Large non-immutable strings shall be copied"); 409 } 410 411 static if (isString) unittest 412 { 413 const(ME)[] s = "123456789_123456789_123456789_123456789_"; 414 auto s1 = RCXString(s); 415 assert(s1.large.mbuf); 416 auto s2 = s1; 417 assert(s1.large.mbuf is s2.large.mbuf); 418 assert(s1.large.mbuf.refCount == 2); 419 s1 = s ~ "123"; 420 assert(s1.large.mbuf.refCount == 1); 421 assert(s2.large.mbuf.refCount == 1); 422 assert(s2 == s); 423 assert(s1 == s ~ "123"); 424 const s3 = s1; 425 assert(s1.large.mbuf.refCount == 2); 426 immutable s4 = s1; 427 //immutable s5 = s3; 428 assert(s1.large.mbuf.refCount == 3); 429 } 430 431 // Postblit 432 this(this) @nogc 433 { 434 if (!isSmall && large.mbuf) ++large.mbuf.refCount; 435 } 436 437 // Dtor decrements refcount and may deallocate 438 ~this() nothrow @nogc 439 { 440 decRef; 441 } 442 443 // Assigns another string 444 void opAssign(immutable(ME)[] s) 445 { 446 decRef; 447 // Contents is immutable, we may assume it won't go away ever 448 emplace(&this, s); 449 } 450 451 static if (isString) unittest 452 { 453 immutable(ME)[] s = "123456789_"; 454 RCXString rcs; 455 rcs = s; 456 assert(rcs.isSmall); 457 s = "123456789_123456789_123456789_123456789_"; 458 rcs = s; 459 assert(!rcs.isSmall); 460 assert(rcs.large.mbuf is null); 461 } 462 463 // Assigns another string 464 void opAssign(const(ME)[] s) 465 { 466 if (capacity >= s.length) 467 { 468 // Noice, there's room 469 if (s.length <= maxSmallSize) 470 { 471 // Fits in small 472 msmall[0 .. s.length] = s[]; 473 smallLength = cast(E) s.length; 474 } 475 else 476 { 477 // Large it is 478 assert(!isSmall); 479 large.mptr[0 .. s.length] = s; 480 large.length = s.length; 481 } 482 } 483 else 484 { 485 // Tear down and rebuild 486 decRef; 487 emplace(&this, s); 488 } 489 } 490 491 static if (isString) unittest 492 { 493 const(ME)[] s = "123456789_123456789_123456789_123456789_"; 494 RCXString s1; 495 s1 = s; 496 assert(!s1.isSmall && s1.large.buf !is null); 497 auto p = s1.ptr; 498 s1 = s; 499 assert(s1.ptr is p, "Wasteful reallocation"); 500 RCXString s2; 501 s2 = s1; 502 assert(s1.large.mbuf is s2.large.mbuf); 503 assert(s1.large.mbuf.refCount == 2); 504 s1 = "123456789_123456789_123456789_123456789_123456789_"; 505 assert(s1.large.mbuf !is s2.large.mbuf); 506 assert(s1.large.mbuf is null); 507 assert(s2.large.mbuf.refCount == 1); 508 assert(s1 == "123456789_123456789_123456789_123456789_123456789_"); 509 assert(s2 == "123456789_123456789_123456789_123456789_"); 510 } 511 512 bool opEquals(const(ME)[] s) const @nogc 513 { 514 if (isSmall) return s.length == smallLength && small[0 .. s.length] == s; 515 return large[] == s; 516 } 517 518 bool opEquals(in RCXString s) const => this == s.asSlice; 519 520 static if (isString) unittest 521 { 522 const s1 = RCXString("123456789_123456789_123456789_123456789_123456789_"); 523 RCXString s2 = s1[0 .. 10]; 524 auto s3 = RCXString("123456789_"); 525 assert(s2 == s3); 526 } 527 528 /** Returns the maximum number of character this string can store without 529 requesting more memory. 530 */ 531 size_t capacity() const @property @nogc 532 { 533 /** This is subtle: if large.mbuf is null (i.e. the string had been constructed from a literal), then the 534 capacity is maxSmallSize because that's what we can store without a memory (re)allocation. Same if refCount is 535 greater than 1 - we can't reuse the memory. 536 */ 537 return isSmall || !large.mbuf || large.mbuf.refCount > 1 ? maxSmallSize : large.mbuf.capacity; 538 } 539 540 static if (isString) unittest 541 { 542 auto s = RCXString("abc"); 543 assert(s.capacity == maxSmallSize); 544 s = "123456789_123456789_123456789_123456789_123456789_"; 545 assert(s.capacity == maxSmallSize); 546 const char[] lit = "123456789_123456789_123456789_123456789_123456789_"; 547 s = lit; 548 assert(s.capacity >= 50); 549 } 550 551 void reserve(in size_t capacity) 552 { 553 if (isSmall) 554 { 555 if (capacity <= maxSmallSize) 556 { 557 // stays small 558 return; 559 } 560 // small to large 561 immutable length = smallLength; 562 auto newLayout = Large(capacity, small.ptr[0 .. length]); 563 large = newLayout; 564 } 565 else 566 { 567 // large to large 568 if (large.mbuf && large.mbuf.capacity >= capacity) return; 569 large.reserve(capacity); 570 } 571 } 572 573 static if (isString) unittest 574 { 575 RCXString s1; 576 s1.reserve(1); 577 assert(s1.capacity >= 1); 578 s1.reserve(1023); 579 assert(s1.capacity >= 1023); 580 s1.reserve(10230); 581 assert(s1.capacity >= 10230); 582 } 583 584 /** Appends `s` to `this`. 585 */ 586 void opOpAssign(string s : "~")(const(ME)[] s) 587 { 588 immutable length = this.length; 589 immutable newLen = length + s.length; 590 if (isSmall) 591 { 592 if (newLen <= maxSmallSize) 593 { 594 // stays small 595 msmall[length .. newLen] = s; 596 smallLength = cast(E) newLen; 597 } 598 else 599 { 600 // small to large 601 auto newLayout = Large(newLen, small.ptr[0 .. length]); 602 newLayout.mptr[length .. newLen][] = s; 603 newLayout.length = newLen; 604 large = newLayout; 605 assert(!isSmall); 606 assert(this.length == newLen); 607 } 608 } 609 else 610 { 611 // large to large 612 large.reserve(newLen); 613 large.mptr[length .. newLen][] = s; 614 large.length = newLen; 615 } 616 } 617 618 static if (isString) unittest 619 { 620 auto s1 = RCXString("123456789_123456789_123456789_123456789_"); 621 s1 ~= s1; 622 assert(s1 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_"); 623 foreach (i; 0 .. 70) s1.popFront(); 624 assert(s1 == "123456789_"); 625 s1 ~= "abc"; 626 assert(s1 == "123456789_abc"); 627 } 628 629 /// Ditto 630 void opOpAssign(string s : "~")(const auto ref RCXString s) 631 { 632 this ~= s.asSlice; 633 } 634 635 static if (isString) unittest 636 { 637 RCXString s1; 638 s1 = "hello"; 639 assert(s1 == "hello"); 640 s1 ~= ", world! "; 641 assert(s1 == "hello, world! "); 642 s1 ~= s1; 643 assert(s1 == "hello, world! hello, world! "); 644 s1 ~= s1; 645 assert(s1 == "hello, world! hello, world! hello, world! hello, world! "); 646 auto s2 = RCXString("yah! "); 647 assert(s2 == "yah! "); 648 s2 ~= s1; 649 assert(s2 == "yah! hello, world! hello, world! hello, world! hello, world! "); 650 s2 = "123456789_123456789_123456789_123456789_"; 651 s2 ~= s2; 652 assert(s2 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_"); 653 auto s3 = s2; 654 assert(s3.large.mbuf.refCount == 2); 655 s2 ~= "123456789_"; 656 assert(s2.large.mbuf.refCount == 1); 657 assert(s3.large.mbuf.refCount == 1); 658 assert(s3 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_"); 659 660 s2 = "123456789_123456789_123456789_123456789_"; 661 const s4 = RCXString(", world"); 662 s2 ~= s4; 663 assert(s2 == "123456789_123456789_123456789_123456789_, world"); 664 s2 ~= const RCXString("!!!"); 665 assert(s2 == "123456789_123456789_123456789_123456789_, world!!!"); 666 } 667 668 /// Returns `true` iff `this` is empty 669 bool empty() const @property @nogc => !length; 670 671 static if (isString) 672 { 673 private dchar unsafeDecode(const(ME)* p) const @nogc 674 { 675 byte c = *p; 676 dchar res = c & 0b0111_1111; 677 if (c >= 0) return res; 678 assert(c < 0b1111_1000); 679 dchar cover = 0b1000_0000; 680 c <<= 1; 681 assert(c < 0); 682 do 683 { 684 ++p; 685 assert((*p >> 6) == 0b10); 686 cover <<= 5; 687 res = (res << 6) ^ *p ^ cover ^ 0b1000_0000; 688 c <<= 1; 689 } while(c < 0); 690 return res; 691 } 692 } 693 694 /// Returns the first code point of `this`. 695 auto front() const @property @nogc in(!empty) 696 { 697 // TODO: make safe 698 static if (isString) 699 return unsafeDecode(ptr); 700 else 701 return ptr[0]; 702 } 703 704 /// Returns the last code point of `this`. 705 static if (isString) 706 { 707 dchar back() const @property @nogc in(!empty) 708 { 709 auto p = ptr + length - 1; 710 if (*p < 0b1000_0000) 711 return *p; 712 // TODO: make safe 713 do 714 { 715 --p; 716 } while (!(*p & 0b0100_0000)); 717 return unsafeDecode(p); 718 } 719 } 720 else 721 E back() const @property @nogc => ptr[length - 1]; 722 723 /// Returns the `n`th code unit in `this`. 724 E opIndex(size_t n) const @nogc in(n < length) => ptr[n]; 725 726 static if (isString) unittest 727 { 728 auto s1 = RCXString("hello"); 729 assert(s1.front == 'h'); 730 assert(s1[1] == 'e'); 731 assert(s1.back == 'o'); 732 assert(s1[$ - 1] == 'o'); 733 s1 = RCXString("Ü"); 734 assert(s1.length == 2); 735 assert(s1.front == 'Ü'); 736 assert(s1.back == 'Ü'); 737 } 738 739 /// Discards the first code point 740 void popFront() @nogc 741 { 742 assert(!empty && ptr); 743 uint toPop = 1; 744 auto b = *ptr; 745 if (b >= 0b1000_0000) 746 { 747 toPop = (b | 0b0010_0000) != b ? 2 748 : (b | 0b0001_0000) != b ? 3 749 : 4; 750 } 751 if (isSmall) 752 { 753 // Must shuffle in place 754 // TODO: make faster 755 foreach (i; 0 .. length - toPop) 756 msmall[i] = small[i + toPop]; 757 smallLength -= toPop; 758 } 759 else 760 { 761 large.ptr += toPop; 762 large.length = large.length - toPop; 763 } 764 } 765 766 static if (isString) unittest 767 { 768 auto s1 = RCXString("123456789_"); 769 auto s2 = s1; 770 s1.popFront(); 771 assert(s1 == "23456789_"); 772 assert(s2 == "123456789_"); 773 s1 = RCXString("123456789_123456789_123456789_123456789_"); 774 s2 = s1; 775 s1.popFront(); 776 assert(s1 == "23456789_123456789_123456789_123456789_"); 777 assert(s2 == "123456789_123456789_123456789_123456789_"); 778 s1 = "öü"; 779 s2 = s1; 780 s1.popFront(); 781 assert(s1 == "ü"); 782 assert(s2 == "öü"); 783 } 784 785 /// Discards the last code point 786 void popBack() @nogc 787 { 788 assert(!empty && ptr); 789 auto p = ptr + length - 1; 790 if (*p < 0b1000_0000) 791 { 792 // hot path 793 if (isSmall) --smallLength; 794 else large.length = large.length - 1; 795 return; 796 } 797 // TODO: make safe 798 auto p1 = p; 799 do 800 { 801 --p; 802 } while (!(*p & 0b0100_0000)); 803 immutable diff = p1 - p + 1; 804 assert(diff > 1 && diff <= length); 805 if (isSmall) smallLength -= diff; 806 else large.length = large.length - diff; 807 } 808 809 static if (isString) unittest 810 { 811 auto s1 = RCXString("123456789_"); 812 auto s2 = s1; 813 s1.popBack; 814 assert(s1 == "123456789"); 815 assert(s2 == "123456789_"); 816 s1 = RCXString("123456789_123456789_123456789_123456789_"); 817 s2 = s1; 818 s1.popBack; 819 assert(s1 == "123456789_123456789_123456789_123456789"); 820 assert(s2 == "123456789_123456789_123456789_123456789_"); 821 s1 = "öü"; 822 s2 = s1; 823 s1.popBack; 824 assert(s1 == "ö"); 825 assert(s2 == "öü"); 826 } 827 828 /// Returns a slice to the entire string or a portion of it. 829 auto opSlice() inout @nogc 830 { 831 return this; 832 } 833 834 /// Ditto 835 auto opSlice(size_t b, size_t e) inout 836 { 837 assert(b <= e && e <= length); 838 auto ptr = this.ptr; 839 auto sz = e - b; 840 if (sz <= maxSmallSize) 841 { 842 // result is small 843 RCXString result = void; 844 result.msmall[0 .. sz] = ptr[b .. e]; 845 result.smallLength = cast(E) sz; 846 return result; 847 } 848 assert(!isSmall); 849 RCXString result = this; 850 result.large.ptr += b; 851 result.large.length = e - b; 852 return result; 853 } 854 855 static if (isString) unittest 856 { 857 immutable s = RCXString("123456789_123456789_123456789_123456789"); 858 RCXString s1 = s[0 .. 38]; 859 assert(!s1.isSmall && s1.large.buf is null); 860 } 861 862 // Unsafe! Returns a pointer to the beginning of the payload. 863 auto ptr() inout @nogc 864 { 865 return isSmall ? small.ptr : large.ptr; 866 } 867 868 static if (isString) unittest 869 { 870 auto s1 = RCXString("hello"); 871 auto s2 = s1[1 .. $ - 1]; 872 assert(s2 == "ell"); 873 s1 = "123456789_123456789_123456789_123456789_"; 874 s2 = s1[1 .. $ - 1]; 875 assert(s2 == "23456789_123456789_123456789_123456789"); 876 } 877 878 /// Returns the concatenation of `this` with `s`. 879 RCXString opBinary(string s = "~")(const auto ref RCXString s) const 880 { 881 return this ~ s.asSlice; 882 } 883 884 /// Ditto 885 RCXString opBinary(string s = "~")(const(ME)[] s) const 886 { 887 immutable length = this.length; 888 auto resultLen = length + s.length; 889 RCXString result = void; 890 if (resultLen <= maxSmallSize) 891 { 892 // noice 893 result.msmall.ptr[0 .. length] = ptr[0 .. length]; 894 result.msmall.ptr[length .. resultLen] = s[]; 895 result.smallLength = cast(E) resultLen; 896 return result; 897 } 898 emplace(&result.large, resultLen, this.asSlice); 899 result ~= s; 900 return result; 901 } 902 903 /// Returns the concatenation of `s` with `this`. 904 RCXString opBinaryRight(string s = "~")(const(E)[] s) const 905 { 906 immutable length = this.length, resultLen = length + s.length; 907 RCXString result = void; 908 if (resultLen <= maxSmallSize) 909 { 910 // noice 911 result.msmall.ptr[0 .. s.length] = s[]; 912 result.msmall.ptr[s.length .. resultLen] = small.ptr[0 .. length]; 913 result.smallLength = cast(E) resultLen; 914 return result; 915 } 916 emplace(&result.large, resultLen, s); 917 result ~= this; 918 return result; 919 } 920 921 static if (isString) unittest 922 { 923 auto s1 = RCXString("hello"); 924 auto s2 = s1 ~ ", world!"; 925 assert(s2 == "hello, world!"); 926 s1 = "123456789_123456789_123456789_123456789_"; 927 s2 = s1 ~ "abcdefghi_"; 928 assert(s2 == "123456789_123456789_123456789_123456789_abcdefghi_"); 929 s2 = s1 ~ s1; 930 assert(s2 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_"); 931 s2 = "abcdefghi_" ~ s1; 932 assert(s2 == "abcdefghi_123456789_123456789_123456789_123456789_"); 933 } 934 } 935 936 unittest 937 { 938 alias RCI = RCXString!(immutable uint); 939 RCI x; 940 } 941 942 /// verify UTF-8 storage 943 unittest 944 { 945 string s = "åäö"; 946 RCString rcs = s; 947 assert(rcs.length == 6); 948 import std.algorithm : count; 949 assert(rcs.count == 3); 950 assert(rcs.front == 'å'); 951 rcs.popFront(); 952 assert(rcs.front == 'ä'); 953 rcs.popFront(); 954 assert(rcs.front == 'ö'); 955 rcs.popFront(); 956 assert(rcs.empty); 957 } 958 959 version = profile; 960 961 /// shows performance increase for SSO over built-in string 962 version(profile) unittest 963 { 964 enum maxSmallSize = 23; 965 alias S = RCXString!(immutable char, maxSmallSize); 966 967 import std.datetime: StopWatch, Duration; 968 import std.conv : to; 969 import std.stdio; 970 971 enum n = 2^^21; 972 973 StopWatch sw; 974 975 sw.reset; 976 sw.start; 977 char[maxSmallSize] ss; 978 foreach (i; 0 .. n) 979 { 980 auto x = S(ss); 981 } 982 sw.stop; 983 auto timeRCString = sw.peek().msecs; 984 writeln("> RCString took ", sw.peek().to!Duration); 985 986 sw.reset; 987 sw.start; 988 foreach (i; 0 .. n) 989 { 990 string x = ss.idup; 991 } 992 sw.stop; 993 auto timeString = sw.peek().msecs; 994 writeln("> Builtin string took ", sw.peek().to!Duration); 995 996 writeln("> Speedup: ", timeString/timeRCString); 997 }