1 /** Reference Counted Array. 2 See_Also: http://dpaste.dzfl.pl/817283c163f5 3 */ 4 module nxt.rcstring; 5 6 import core.memory : GC; 7 // import core.stdc.stdlib; 8 // import core.stdc.string; 9 // import std.algorithm; 10 11 /** Reference Counted (RC) version of string. 12 */ 13 alias RCString = RCXString!(immutable char); 14 15 /** Reference Counted Array. 16 Configured with character type `E`, maximum length for the small string optimization, 17 and the allocation function, which must have the same semantics as `realloc`. 18 19 See_Also: https://github.com/burner/std.rcstring 20 */ 21 struct RCXString(E = immutable char, size_t maxSmallSize = 23, alias realloc = GC.realloc) 22 { 23 pure nothrow: 24 25 // Preconditions 26 static assert(is(E == immutable), "Only immutable characters supported for now."); 27 static assert(E.alignof <= 4, "Character type must be 32-bit aligned at most."); 28 static assert(E.min == 0, "Character type must be unsigned."); 29 static assert((maxSmallSize + 1) * E.sizeof % size_t.sizeof == 0, 30 "maxSmallSize + 1 must be a multiple of size_t.sizeof."); 31 static assert((maxSmallSize + 1) * E.sizeof >= 3 * size_t.sizeof, 32 "maxSmallSize + 1 must be >= size_t.sizeof * 3."); 33 static assert(maxSmallSize < E.max, "maxSmallSize must be less than E.max"); 34 35 enum maxSmallLength = maxSmallSize; 36 37 private: 38 // import std.utf; 39 import core.lifetime : emplace; 40 import std.traits: isSomeChar, Unqual; 41 42 version(unittest) import std.stdio; 43 44 alias ME = Unqual!E; // mutable E 45 46 enum isString = isSomeChar!E; 47 48 // Simple reference-counted buffer. The reference count itself is a E. Layout is a size_t (the capacity) 49 // followed by the reference count followed by the payload. 50 struct RCBuffer 51 { 52 size_t capacity; 53 uint refCount; 54 55 // Data starts right after the refcount, no padding because of the static assert above 56 ME* mptr() @nogc { return cast(ME*) (&refCount + 1); } 57 E* ptr() @nogc { return cast(E*) mptr; } 58 59 // Create a new buffer given capacity and initializes payload. Capacity must be large enough. 60 static RCBuffer* make(size_t capacity, const(ME)[] content) 61 { 62 assert(capacity >= content.length); 63 auto result = cast(RCBuffer*) realloc(null, size_t.sizeof + uint.sizeof + capacity * E.sizeof); 64 result || assert(0); 65 result.capacity = capacity; 66 result.refCount = 1; 67 result.mptr[0 .. content.length] = content; 68 return result; 69 } 70 71 // Resize the buffer. It is assumed the reference count is 1. 72 static void resize(ref RCBuffer* p, size_t capacity) 73 { 74 assert(p.refCount == 1); 75 p = cast(RCBuffer*) realloc(p, size_t.sizeof + uint.sizeof + capacity * E.sizeof); 76 p || assert(0); 77 p.capacity = capacity; 78 } 79 80 unittest 81 { 82 auto p = make(101, null); 83 assert(p.refCount == 1); 84 assert(p.capacity == 101); 85 resize(p, 203); 86 assert(p.refCount == 1); 87 assert(p.capacity == 203); 88 realloc(p, 0); 89 } 90 } 91 92 // Hosts a large string 93 struct Large 94 { 95 // <layout> 96 union 97 { 98 immutable RCBuffer* buf; 99 RCBuffer* mbuf; 100 } 101 union 102 { 103 E* ptr; 104 ME* mptr; 105 } 106 static if ((maxSmallSize + 1) * E.sizeof == 3 * size_t.sizeof) 107 { 108 /* The small buffer and the large buffer overlap. This means the large buffer must give up its last byte 109 * as a discriminator. 110 */ 111 size_t _length; 112 enum maxLarge = size_t.max >> (8 * E.sizeof); 113 version(BigEndian) 114 { 115 // Use the LSB to store the marker 116 size_t length() const @safe @nogc { return _length >> 8 * E.sizeof; } 117 void length(size_t s) @safe @nogc { _length = Marker.isRefCounted | (s << (8 * E.sizeof)); } 118 } 119 else version(LittleEndian) 120 { 121 // Use the MSB to store the marker 122 private enum size_t mask = size_t(E.max) << (8 * (size_t.sizeof - E.sizeof)); 123 size_t length() const @safe @nogc { return _length & ~mask; } 124 void length(size_t s) @safe @nogc { assert(s <= maxLarge); _length = s | mask; } 125 } 126 else 127 { 128 static assert(0, "Unspecified endianness."); 129 } 130 } 131 else 132 { 133 // No tricks needed, store the size plainly 134 size_t _length; 135 size_t length() const @safe @nogc 136 { 137 return _length; 138 } 139 void length(size_t s) @safe @nogc 140 { 141 _length = s; 142 } 143 } 144 // </layout> 145 146 // Get length 147 alias opDollar = length; 148 149 // Initializes a Large given capacity and content. Capacity must be at least as large as content's size. 150 this(size_t capacity, const(ME)[] content) 151 { 152 assert(capacity >= content.length); 153 mbuf = RCBuffer.make(capacity, content); 154 mptr = mbuf.mptr; 155 length = content.length; 156 } 157 158 // Initializes a Large from a string by copying it. 159 this(const(ME)[] s) 160 { 161 this(s.length, s); 162 } 163 164 static if (isString) unittest 165 { 166 const(ME)[] s1 = "hello, world"; 167 auto lrg1 = Large(s1); 168 assert(lrg1.length == 12); 169 immutable lrg2 = immutable Large(s1); 170 assert(lrg2.length == 12); 171 const lrg3 = const Large(s1); 172 assert(lrg3.length == 12); 173 } 174 175 // Initializes a Large from a static string by referring to it. 176 this(immutable(ME)[] s) 177 { 178 assert(buf is null); 179 ptr = s.ptr; 180 length = s.length; 181 } 182 183 static if (isString) unittest 184 { 185 immutable ME[] s = "abcdef"; 186 auto lrg1 = Large(s); 187 assert(lrg1.length == 6); 188 assert(lrg1.buf is null); 189 } 190 191 // Decrements the reference count and frees buf if it goes down to zero. 192 void decRef() nothrow 193 { 194 if (!mbuf) return; 195 if (mbuf.refCount == 1) realloc(mbuf, 0); 196 else --mbuf.refCount; 197 } 198 199 auto opSlice() inout return 200 { 201 assert(ptr); 202 return ptr[0 .. length]; 203 } 204 205 // Makes sure there's room for at least newCap Chars. 206 void reserve(size_t newCap) 207 { 208 if (mbuf && mbuf.refCount == 1 && mbuf.capacity >= newCap) return; 209 immutable size = this.length; 210 version(assert) scope(exit) assert(size == this.length); 211 if (!mbuf) 212 { 213 // Migrate from static string to allocated string 214 mbuf = RCBuffer.make(newCap, ptr[0 .. size]); 215 ptr = mbuf.ptr; 216 return; 217 } 218 if (mbuf.refCount > 1) 219 { 220 // Split this guy making its buffer unique 221 --mbuf.refCount; 222 mbuf = RCBuffer.make(newCap, ptr[0 .. size]); 223 ptr = mbuf.ptr; 224 // size stays untouched 225 } 226 else 227 { 228 immutable offset = ptr - mbuf.ptr; 229 // If offset is too large, it's worth decRef()ing and then allocating a new buffer 230 if (offset * 2 >= newCap) 231 { 232 auto newBuf = RCBuffer.make(newCap, ptr[0 .. size]); 233 decRef; 234 mbuf = newBuf; 235 ptr = mbuf.ptr; 236 } 237 else 238 { 239 RCBuffer.resize(mbuf, newCap); 240 ptr = mbuf.ptr + offset; 241 } 242 } 243 } 244 245 unittest 246 { 247 Large obj; 248 obj.reserve(1); 249 assert(obj.mbuf !is null); 250 assert(obj.mbuf.capacity >= 1); 251 obj.reserve(1000); 252 assert(obj.mbuf.capacity >= 1000); 253 obj.reserve(10000); 254 assert(obj.mbuf.capacity >= 10000); 255 } 256 } 257 258 // <layout> 259 union 260 { 261 Large large; 262 struct 263 { 264 union 265 { 266 E[maxSmallSize] small; 267 ME[maxSmallSize] msmall; 268 } 269 ME smallLength; 270 } 271 size_t[(maxSmallSize + 1) / size_t.sizeof] ancillary; // used internally 272 } 273 // </layout> 274 275 hash_t toHash() const @trusted 276 { 277 import core.internal.hash : hashOf; 278 return this.asSlice.hashOf; 279 } 280 281 static if (isString) unittest 282 { 283 assert(RCXString("a").toHash == 284 RCXString("a").toHash); 285 assert(RCXString("a").toHash != 286 RCXString("b").toHash); 287 assert(RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash == 288 RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash); 289 assert(RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash != 290 RCXString("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").toHash); 291 } 292 293 static if (isString) unittest 294 { 295 RCXString x; 296 assert(x.smallLength == 0); 297 assert(x.length == 0); 298 x.large.length = 133; 299 assert(x.smallLength == E.max); 300 assert(x.large.length == 133); 301 x.large.length = 0x0088_8888_8888_8888; 302 assert(x.large.length == 0x0088_8888_8888_8888); 303 assert(x.smallLength == E.max); 304 } 305 306 // is this string small? 307 bool isSmall() const @safe @nogc 308 { 309 return smallLength <= maxSmallSize; 310 } 311 312 // release all memory associated with this 313 private void decRef() @nogc 314 { 315 if (!isSmall) large.decRef; 316 } 317 318 // Return a slice with the string's contents 319 // Not public because it leaks the internals 320 auto asSlice() inout @nogc 321 { 322 immutable s = smallLength; 323 if (s <= maxSmallSize) return small.ptr[0 .. s]; 324 return large[]; 325 } 326 327 public: 328 329 /// Returns the length of the string 330 size_t length() const @nogc 331 { 332 immutable s = smallLength; 333 return s <= maxSmallSize ? s : large.length; 334 } 335 /// Ditto 336 alias opDollar = length; 337 338 static if (isString) unittest 339 { 340 auto s1 = RCXString("123456789_"); 341 assert(s1.length == 10); 342 s1 ~= RCXString("123456789_123456789_123456789_123456789_12345"); 343 assert(s1.length == 55); 344 } 345 346 /// Needed for correct printing in other modules 347 static if (isString) 348 { 349 string toArray() const @trusted 350 { 351 return this.asSlice; 352 } 353 } 354 355 /** Construct a `RCXString` from a slice `s`. 356 357 If the slice is immutable, assumes the slice is a literal or 358 GC-allocated and does NOT copy it internally. 359 360 Warning: Subsequently deallocating `s` will cause the `RCXString` 361 to dangle. If the slice has `const` or mutable characters, creates 362 and manages a copy internally. 363 */ 364 this(C)(C[] s) 365 if (is(Unqual!C == ME)) 366 { 367 // Contents is immutable, we may assume it won't go away ever 368 if (s.length <= maxSmallSize) 369 { 370 // fits in small 371 small[0 .. s.length] = s[]; // so copy it 372 smallLength = cast(E)s.length; 373 } 374 else 375 { 376 emplace(&large, s); 377 } 378 } 379 380 // Test construction from immutable(ME)[], const(ME)[], and ME[] 381 static if (isString) unittest 382 { 383 immutable(E)[] a = "123456789_"; 384 auto s1 = RCXString(a); 385 assert(s1 == a); 386 assert(s1.asSlice !is a, "Small strings must be copied"); 387 a = "123456789_123456789_123456789_123456789_"; 388 auto s2 = RCXString(a); 389 assert(s2 == a); 390 assert(s2.asSlice is a, "Large immutable strings shall not be copied"); 391 392 const(char)[] b = "123456789_"; 393 auto s3 = RCXString(b); 394 assert(s3 == b); 395 assert(s3.isSmall, "Small strings must be copied"); 396 b = "123456789_123456789_123456789_123456789_"; 397 auto s4 = RCXString(b); 398 assert(s4 == b); 399 assert(s4.asSlice !is b, "Large non-immutable strings shall be copied"); 400 401 char[] c = "123456789_".dup; 402 auto s5 = RCXString(c); 403 assert(s5 == c); 404 assert(s5.isSmall, "Small strings must be copied"); 405 c = "123456789_123456789_123456789_123456789_".dup; 406 auto s6 = RCXString(c); 407 assert(s6 == c); 408 assert(s6.asSlice !is c, "Large non-immutable strings shall be copied"); 409 } 410 411 static if (isString) unittest 412 { 413 const(ME)[] s = "123456789_123456789_123456789_123456789_"; 414 auto s1 = RCXString(s); 415 assert(s1.large.mbuf); 416 auto s2 = s1; 417 assert(s1.large.mbuf is s2.large.mbuf); 418 assert(s1.large.mbuf.refCount == 2); 419 s1 = s ~ "123"; 420 assert(s1.large.mbuf.refCount == 1); 421 assert(s2.large.mbuf.refCount == 1); 422 assert(s2 == s); 423 assert(s1 == s ~ "123"); 424 const s3 = s1; 425 assert(s1.large.mbuf.refCount == 2); 426 immutable s4 = s1; 427 //immutable s5 = s3; 428 assert(s1.large.mbuf.refCount == 3); 429 } 430 431 // Postblit 432 this(this) @nogc 433 { 434 if (!isSmall && large.mbuf) ++large.mbuf.refCount; 435 } 436 437 // Dtor decrements refcount and may deallocate 438 ~this() @nogc 439 { 440 decRef; 441 } 442 443 // Assigns another string 444 void opAssign(immutable(ME)[] s) 445 { 446 decRef; 447 // Contents is immutable, we may assume it won't go away ever 448 emplace(&this, s); 449 } 450 451 static if (isString) unittest 452 { 453 immutable(ME)[] s = "123456789_"; 454 RCXString rcs; 455 rcs = s; 456 assert(rcs.isSmall); 457 s = "123456789_123456789_123456789_123456789_"; 458 rcs = s; 459 assert(!rcs.isSmall); 460 assert(rcs.large.mbuf is null); 461 } 462 463 // Assigns another string 464 void opAssign(const(ME)[] s) 465 { 466 if (capacity >= s.length) 467 { 468 // Noice, there's room 469 if (s.length <= maxSmallSize) 470 { 471 // Fits in small 472 msmall[0 .. s.length] = s[]; 473 smallLength = cast(E) s.length; 474 } 475 else 476 { 477 // Large it is 478 assert(!isSmall); 479 large.mptr[0 .. s.length] = s; 480 large.length = s.length; 481 } 482 } 483 else 484 { 485 // Tear down and rebuild 486 decRef; 487 emplace(&this, s); 488 } 489 } 490 491 static if (isString) unittest 492 { 493 const(ME)[] s = "123456789_123456789_123456789_123456789_"; 494 RCXString s1; 495 s1 = s; 496 assert(!s1.isSmall && s1.large.buf !is null); 497 auto p = s1.ptr; 498 s1 = s; 499 assert(s1.ptr is p, "Wasteful reallocation"); 500 RCXString s2; 501 s2 = s1; 502 assert(s1.large.mbuf is s2.large.mbuf); 503 assert(s1.large.mbuf.refCount == 2); 504 s1 = "123456789_123456789_123456789_123456789_123456789_"; 505 assert(s1.large.mbuf !is s2.large.mbuf); 506 assert(s1.large.mbuf is null); 507 assert(s2.large.mbuf.refCount == 1); 508 assert(s1 == "123456789_123456789_123456789_123456789_123456789_"); 509 assert(s2 == "123456789_123456789_123456789_123456789_"); 510 } 511 512 bool opEquals(const(ME)[] s) const @nogc 513 { 514 if (isSmall) return s.length == smallLength && small[0 .. s.length] == s; 515 return large[] == s; 516 } 517 518 bool opEquals(in RCXString s) const 519 { 520 return this == s.asSlice; 521 } 522 523 static if (isString) unittest 524 { 525 const s1 = RCXString("123456789_123456789_123456789_123456789_123456789_"); 526 RCXString s2 = s1[0 .. 10]; 527 auto s3 = RCXString("123456789_"); 528 assert(s2 == s3); 529 } 530 531 /** Returns the maximum number of character this string can store without 532 requesting more memory. 533 */ 534 size_t capacity() const @nogc 535 { 536 /** This is subtle: if large.mbuf is null (i.e. the string had been constructed from a literal), then the 537 capacity is maxSmallSize because that's what we can store without a memory (re)allocation. Same if refCount is 538 greater than 1 - we can't reuse the memory. 539 */ 540 return isSmall || !large.mbuf || large.mbuf.refCount > 1 ? maxSmallSize : large.mbuf.capacity; 541 } 542 543 static if (isString) unittest 544 { 545 auto s = RCXString("abc"); 546 assert(s.capacity == maxSmallSize); 547 s = "123456789_123456789_123456789_123456789_123456789_"; 548 assert(s.capacity == maxSmallSize); 549 const char[] lit = "123456789_123456789_123456789_123456789_123456789_"; 550 s = lit; 551 assert(s.capacity >= 50); 552 } 553 554 void reserve(size_t capacity) 555 { 556 if (isSmall) 557 { 558 if (capacity <= maxSmallSize) 559 { 560 // stays small 561 return; 562 } 563 // small to large 564 immutable length = smallLength; 565 auto newLayout = Large(capacity, small.ptr[0 .. length]); 566 large = newLayout; 567 } 568 else 569 { 570 // large to large 571 if (large.mbuf && large.mbuf.capacity >= capacity) return; 572 large.reserve(capacity); 573 } 574 } 575 576 static if (isString) unittest 577 { 578 RCXString s1; 579 s1.reserve(1); 580 assert(s1.capacity >= 1); 581 s1.reserve(1023); 582 assert(s1.capacity >= 1023); 583 s1.reserve(10230); 584 assert(s1.capacity >= 10230); 585 } 586 587 /** Appends `s` to `this`. 588 */ 589 void opOpAssign(string s : "~")(const(ME)[] s) 590 { 591 immutable length = this.length; 592 immutable newLen = length + s.length; 593 if (isSmall) 594 { 595 if (newLen <= maxSmallSize) 596 { 597 // stays small 598 msmall[length .. newLen] = s; 599 smallLength = cast(E) newLen; 600 } 601 else 602 { 603 // small to large 604 auto newLayout = Large(newLen, small.ptr[0 .. length]); 605 newLayout.mptr[length .. newLen][] = s; 606 newLayout.length = newLen; 607 large = newLayout; 608 assert(!isSmall); 609 assert(this.length == newLen); 610 } 611 } 612 else 613 { 614 // large to large 615 large.reserve(newLen); 616 large.mptr[length .. newLen][] = s; 617 large.length = newLen; 618 } 619 } 620 621 static if (isString) unittest 622 { 623 auto s1 = RCXString("123456789_123456789_123456789_123456789_"); 624 s1 ~= s1; 625 assert(s1 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_"); 626 foreach (i; 0 .. 70) s1.popFront(); 627 assert(s1 == "123456789_"); 628 s1 ~= "abc"; 629 assert(s1 == "123456789_abc"); 630 } 631 632 /// Ditto 633 void opOpAssign(string s : "~")(const auto ref RCXString s) 634 { 635 this ~= s.asSlice; 636 } 637 638 static if (isString) unittest 639 { 640 RCXString s1; 641 s1 = "hello"; 642 assert(s1 == "hello"); 643 s1 ~= ", world! "; 644 assert(s1 == "hello, world! "); 645 s1 ~= s1; 646 assert(s1 == "hello, world! hello, world! "); 647 s1 ~= s1; 648 assert(s1 == "hello, world! hello, world! hello, world! hello, world! "); 649 auto s2 = RCXString("yah! "); 650 assert(s2 == "yah! "); 651 s2 ~= s1; 652 assert(s2 == "yah! hello, world! hello, world! hello, world! hello, world! "); 653 s2 = "123456789_123456789_123456789_123456789_"; 654 s2 ~= s2; 655 assert(s2 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_"); 656 auto s3 = s2; 657 assert(s3.large.mbuf.refCount == 2); 658 s2 ~= "123456789_"; 659 assert(s2.large.mbuf.refCount == 1); 660 assert(s3.large.mbuf.refCount == 1); 661 assert(s3 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_"); 662 663 s2 = "123456789_123456789_123456789_123456789_"; 664 const s4 = RCXString(", world"); 665 s2 ~= s4; 666 assert(s2 == "123456789_123456789_123456789_123456789_, world"); 667 s2 ~= const RCXString("!!!"); 668 assert(s2 == "123456789_123456789_123456789_123456789_, world!!!"); 669 } 670 671 /// Returns `true` iff `this` is empty 672 bool empty() const @nogc 673 { 674 return !length; 675 } 676 677 static if (isString) 678 { 679 private dchar unsafeDecode(const(ME)* p) const @nogc 680 { 681 byte c = *p; 682 dchar res = c & 0b0111_1111; 683 if (c >= 0) return res; 684 assert(c < 0b1111_1000); 685 dchar cover = 0b1000_0000; 686 c <<= 1; 687 assert(c < 0); 688 do 689 { 690 ++p; 691 assert((*p >> 6) == 0b10); 692 cover <<= 5; 693 res = (res << 6) ^ *p ^ cover ^ 0b1000_0000; 694 c <<= 1; 695 } while(c < 0); 696 return res; 697 } 698 } 699 700 /// Returns the first code point of `this`. 701 auto front() const @nogc 702 { 703 assert(!empty); 704 // TODO: make safe 705 static if (isString) 706 { 707 return unsafeDecode(ptr); 708 } 709 else 710 { 711 return ptr[0]; 712 } 713 } 714 715 /// Returns the last code point of `this`. 716 static if (isString) 717 { 718 dchar back() const @nogc 719 { 720 assert(!empty); 721 auto p = ptr + length - 1; 722 if (*p < 0b1000_0000) return *p; 723 // TODO: make safe 724 do 725 { 726 --p; 727 } while (!(*p & 0b0100_0000)); 728 return unsafeDecode(p); 729 } 730 } 731 else 732 { 733 E back() const @nogc 734 { 735 return ptr[length - 1]; 736 } 737 } 738 739 /// Returns the `n`th code unit in `this`. 740 E opIndex(size_t n) const @nogc 741 { 742 assert(n < length); 743 return ptr[n]; 744 } 745 746 static if (isString) unittest 747 { 748 auto s1 = RCXString("hello"); 749 assert(s1.front == 'h'); 750 assert(s1[1] == 'e'); 751 assert(s1.back == 'o'); 752 assert(s1[$ - 1] == 'o'); 753 s1 = RCXString("Ü"); 754 assert(s1.length == 2); 755 assert(s1.front == 'Ü'); 756 assert(s1.back == 'Ü'); 757 } 758 759 /// Discards the first code point 760 void popFront() @nogc 761 { 762 assert(!empty && ptr); 763 uint toPop = 1; 764 auto b = *ptr; 765 if (b >= 0b1000_0000) 766 { 767 toPop = (b | 0b0010_0000) != b ? 2 768 : (b | 0b0001_0000) != b ? 3 769 : 4; 770 } 771 if (isSmall) 772 { 773 // Must shuffle in place 774 // TODO: make faster 775 foreach (i; 0 .. length - toPop) 776 { 777 msmall[i] = small[i + toPop]; 778 } 779 smallLength -= toPop; 780 } 781 else 782 { 783 large.ptr += toPop; 784 large.length = large.length - toPop; 785 } 786 } 787 788 static if (isString) unittest 789 { 790 auto s1 = RCXString("123456789_"); 791 auto s2 = s1; 792 s1.popFront(); 793 assert(s1 == "23456789_"); 794 assert(s2 == "123456789_"); 795 s1 = RCXString("123456789_123456789_123456789_123456789_"); 796 s2 = s1; 797 s1.popFront(); 798 assert(s1 == "23456789_123456789_123456789_123456789_"); 799 assert(s2 == "123456789_123456789_123456789_123456789_"); 800 s1 = "öü"; 801 s2 = s1; 802 s1.popFront(); 803 assert(s1 == "ü"); 804 assert(s2 == "öü"); 805 } 806 807 /// Discards the last code point 808 void popBack() @nogc 809 { 810 assert(!empty && ptr); 811 auto p = ptr + length - 1; 812 if (*p < 0b1000_0000) 813 { 814 // hot path 815 if (isSmall) --smallLength; 816 else large.length = large.length - 1; 817 return; 818 } 819 // TODO: make safe 820 auto p1 = p; 821 do 822 { 823 --p; 824 } while (!(*p & 0b0100_0000)); 825 immutable diff = p1 - p + 1; 826 assert(diff > 1 && diff <= length); 827 if (isSmall) smallLength -= diff; 828 else large.length = large.length - diff; 829 } 830 831 static if (isString) unittest 832 { 833 auto s1 = RCXString("123456789_"); 834 auto s2 = s1; 835 s1.popBack; 836 assert(s1 == "123456789"); 837 assert(s2 == "123456789_"); 838 s1 = RCXString("123456789_123456789_123456789_123456789_"); 839 s2 = s1; 840 s1.popBack; 841 assert(s1 == "123456789_123456789_123456789_123456789"); 842 assert(s2 == "123456789_123456789_123456789_123456789_"); 843 s1 = "öü"; 844 s2 = s1; 845 s1.popBack; 846 assert(s1 == "ö"); 847 assert(s2 == "öü"); 848 } 849 850 /// Returns a slice to the entire string or a portion of it. 851 auto opSlice() inout @nogc 852 { 853 return this; 854 } 855 856 /// Ditto 857 auto opSlice(size_t b, size_t e) inout 858 { 859 assert(b <= e && e <= length); 860 auto ptr = this.ptr; 861 auto sz = e - b; 862 if (sz <= maxSmallSize) 863 { 864 // result is small 865 RCXString result = void; 866 result.msmall[0 .. sz] = ptr[b .. e]; 867 result.smallLength = cast(E) sz; 868 return result; 869 } 870 assert(!isSmall); 871 RCXString result = this; 872 result.large.ptr += b; 873 result.large.length = e - b; 874 return result; 875 } 876 877 static if (isString) unittest 878 { 879 immutable s = RCXString("123456789_123456789_123456789_123456789"); 880 RCXString s1 = s[0 .. 38]; 881 assert(!s1.isSmall && s1.large.buf is null); 882 } 883 884 // Unsafe! Returns a pointer to the beginning of the payload. 885 auto ptr() inout @nogc 886 { 887 return isSmall ? small.ptr : large.ptr; 888 } 889 890 static if (isString) unittest 891 { 892 auto s1 = RCXString("hello"); 893 auto s2 = s1[1 .. $ - 1]; 894 assert(s2 == "ell"); 895 s1 = "123456789_123456789_123456789_123456789_"; 896 s2 = s1[1 .. $ - 1]; 897 assert(s2 == "23456789_123456789_123456789_123456789"); 898 } 899 900 /// Returns the concatenation of `this` with `s`. 901 RCXString opBinary(string s = "~")(const auto ref RCXString s) const 902 { 903 return this ~ s.asSlice; 904 } 905 906 /// Ditto 907 RCXString opBinary(string s = "~")(const(ME)[] s) const 908 { 909 immutable length = this.length; 910 auto resultLen = length + s.length; 911 RCXString result = void; 912 if (resultLen <= maxSmallSize) 913 { 914 // noice 915 result.msmall.ptr[0 .. length] = ptr[0 .. length]; 916 result.msmall.ptr[length .. resultLen] = s[]; 917 result.smallLength = cast(E) resultLen; 918 return result; 919 } 920 emplace(&result.large, resultLen, this.asSlice); 921 result ~= s; 922 return result; 923 } 924 925 /// Returns the concatenation of `s` with `this`. 926 RCXString opBinaryRight(string s = "~")(const(E)[] s) const 927 { 928 immutable length = this.length, resultLen = length + s.length; 929 RCXString result = void; 930 if (resultLen <= maxSmallSize) 931 { 932 // noice 933 result.msmall.ptr[0 .. s.length] = s[]; 934 result.msmall.ptr[s.length .. resultLen] = small.ptr[0 .. length]; 935 result.smallLength = cast(E) resultLen; 936 return result; 937 } 938 emplace(&result.large, resultLen, s); 939 result ~= this; 940 return result; 941 } 942 943 static if (isString) unittest 944 { 945 auto s1 = RCXString("hello"); 946 auto s2 = s1 ~ ", world!"; 947 assert(s2 == "hello, world!"); 948 s1 = "123456789_123456789_123456789_123456789_"; 949 s2 = s1 ~ "abcdefghi_"; 950 assert(s2 == "123456789_123456789_123456789_123456789_abcdefghi_"); 951 s2 = s1 ~ s1; 952 assert(s2 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_"); 953 s2 = "abcdefghi_" ~ s1; 954 assert(s2 == "abcdefghi_123456789_123456789_123456789_123456789_"); 955 } 956 } 957 958 unittest 959 { 960 alias RCI = RCXString!(immutable uint); 961 RCI x; 962 } 963 964 /// verify UTF-8 storage 965 unittest 966 { 967 string s = "åäö"; 968 RCString rcs = s; 969 assert(rcs.length == 6); 970 import std.algorithm : count; 971 assert(rcs.count == 3); 972 assert(rcs.front == 'å'); 973 rcs.popFront(); 974 assert(rcs.front == 'ä'); 975 rcs.popFront(); 976 assert(rcs.front == 'ö'); 977 rcs.popFront(); 978 assert(rcs.empty); 979 } 980 981 version = profile; 982 983 /// shows performance increase for SSO over built-in string 984 version(profile) unittest 985 { 986 enum maxSmallSize = 23; 987 alias S = RCXString!(immutable char, maxSmallSize); 988 989 import std.datetime: StopWatch, Duration; 990 import std.conv : to; 991 import std.stdio; 992 993 enum n = 2^^21; 994 995 StopWatch sw; 996 997 sw.reset; 998 sw.start; 999 char[maxSmallSize] ss; 1000 foreach (i; 0 .. n) 1001 { 1002 auto x = S(ss); 1003 } 1004 sw.stop; 1005 auto timeRCString = sw.peek().msecs; 1006 writeln("> RCString took ", sw.peek().to!Duration); 1007 1008 sw.reset; 1009 sw.start; 1010 foreach (i; 0 .. n) 1011 { 1012 string x = ss.idup; 1013 } 1014 sw.stop; 1015 auto timeString = sw.peek().msecs; 1016 writeln("> Builtin string took ", sw.peek().to!Duration); 1017 1018 writeln("> Speedup: ", timeString/timeRCString); 1019 }