1 /** Reference Counted Array.
2     See_Also: http://dpaste.dzfl.pl/817283c163f5
3  */
4 module nxt.rcstring;
5 
6 import core.memory : GC;
7 // import core.stdc.stdlib;
8 // import core.stdc.string;
9 // import std.algorithm;
10 
11 /** Reference Counted (RC) version of string.
12  */
13 alias RCString = RCXString!(immutable char);
14 
15 /** Reference Counted Array.
16     Configured with character type `E`, maximum length for the small string optimization,
17     and the allocation function, which must have the same semantics as `realloc`.
18 
19     See_Also: https://github.com/burner/std.rcstring
20 */
21 struct RCXString(E = immutable char, size_t maxSmallSize = 23, alias realloc = GC.realloc)
22 {
23     pure nothrow:
24 
25     // Preconditions
26     static assert(is(E == immutable), "Only immutable characters supported for now.");
27     static assert(E.alignof <= 4, "Character type must be 32-bit aligned at most.");
28     static assert(E.min == 0, "Character type must be unsigned.");
29     static assert((maxSmallSize + 1) * E.sizeof % size_t.sizeof == 0,
30                   "maxSmallSize + 1 must be a multiple of size_t.sizeof.");
31     static assert((maxSmallSize + 1) * E.sizeof >= 3 * size_t.sizeof,
32                   "maxSmallSize + 1 must be >= size_t.sizeof * 3.");
33     static assert(maxSmallSize < E.max, "maxSmallSize must be less than E.max");
34 
35     enum maxSmallLength = maxSmallSize;
36 
37 private:
38     // import std.utf;
39     import core.lifetime : emplace;
40     import std.traits: isSomeChar, Unqual;
41 
42     version(unittest) import std.stdio;
43 
44     alias ME = Unqual!E; // mutable E
45 
46     enum isString = isSomeChar!E;
47 
48     // Simple reference-counted buffer. The reference count itself is a E. Layout is a size_t (the capacity)
49     // followed by the reference count followed by the payload.
50     struct RCBuffer
51     {
52         size_t capacity;
53         uint refCount;
54 
55         // Data starts right after the refcount, no padding because of the static assert above
56         ME* mptr() @nogc { return cast(ME*) (&refCount + 1); }
57         E* ptr() @nogc { return cast(E*) mptr; }
58 
59         // Create a new buffer given capacity and initializes payload. Capacity must be large enough.
60         static RCBuffer* make(in size_t capacity, const(ME)[] content)
61         {
62             assert(capacity >= content.length);
63             auto result = cast(RCBuffer*) realloc(null, size_t.sizeof + uint.sizeof + capacity * E.sizeof);
64             result || assert(0);
65             result.capacity = capacity;
66             result.refCount = 1;
67             result.mptr[0 .. content.length] = content;
68             return result;
69         }
70 
71         // Resize the buffer. It is assumed the reference count is 1.
72         static void resize(ref RCBuffer* p, in size_t capacity)
73         {
74             assert(p.refCount == 1);
75             p = cast(RCBuffer*) realloc(p, size_t.sizeof + uint.sizeof + capacity * E.sizeof);
76             p || assert(0);
77             p.capacity = capacity;
78         }
79 
80         unittest
81         {
82             auto p = make(101, null);
83             assert(p.refCount == 1);
84             assert(p.capacity == 101);
85             resize(p, 203);
86             assert(p.refCount == 1);
87             assert(p.capacity == 203);
88             realloc(p, 0);
89         }
90     }
91 
92     // Hosts a large string
93     struct Large
94     {
95         // <layout>
96         union
97         {
98             immutable RCBuffer* buf;
99             RCBuffer* mbuf;
100         }
101         union
102         {
103             E* ptr;
104             ME* mptr;
105         }
106         static if ((maxSmallSize + 1) * E.sizeof == 3 * size_t.sizeof)
107         {
108             /* The small buffer and the large buffer overlap. This means the large buffer must give up its last byte
109              * as a discriminator.
110              */
111             size_t _length;
112             enum maxLarge = size_t.max >> (8 * E.sizeof);
113             version(BigEndian)
114             {
115                 // Use the LSB to store the marker
116                 size_t length() const @safe @nogc { return _length >> 8 * E.sizeof; }
117                 void length(size_t s) @safe @nogc { _length = Marker.isRefCounted | (s << (8 * E.sizeof)); }
118             }
119             else version(LittleEndian)
120             {
121                 // Use the MSB to store the marker
122                 private enum size_t mask = size_t(E.max) << (8 * (size_t.sizeof - E.sizeof));
123                 size_t length() const @safe @nogc { return _length & ~mask; }
124                 void length(size_t s) @safe @nogc { assert(s <= maxLarge); _length = s | mask; }
125             }
126             else
127             {
128                 static assert(0, "Unspecified endianness.");
129             }
130         }
131         else
132         {
133             // No tricks needed, store the size plainly
134             size_t _length;
135             size_t length() const @safe @nogc
136             {
137                 return _length;
138             }
139             void length(size_t s) @safe @nogc
140             {
141                 _length = s;
142             }
143         }
144         // </layout>
145 
146         // Get length
147         alias opDollar = length;
148 
149         // Initializes a Large given capacity and content. Capacity must be at least as large as content's size.
150         this(in size_t capacity, const(ME)[] content)
151         {
152             assert(capacity >= content.length);
153             mbuf = RCBuffer.make(capacity, content);
154             mptr = mbuf.mptr;
155             length = content.length;
156         }
157 
158         // Initializes a Large from a string by copying it.
159         this(const(ME)[] s)
160         {
161             this(s.length, s);
162         }
163 
164         static if (isString) unittest
165         {
166             const(ME)[] s1 = "hello, world";
167             auto lrg1 = Large(s1);
168             assert(lrg1.length == 12);
169             immutable lrg2 = immutable Large(s1);
170             assert(lrg2.length == 12);
171             const lrg3 = const Large(s1);
172             assert(lrg3.length == 12);
173         }
174 
175         // Initializes a Large from a static string by referring to it.
176         this(immutable(ME)[] s)
177         {
178             assert(buf is null);
179             ptr = s.ptr;
180             length = s.length;
181         }
182 
183         static if (isString) unittest
184         {
185             immutable ME[] s = "abcdef";
186             auto lrg1 = Large(s);
187             assert(lrg1.length == 6);
188             assert(lrg1.buf is null);
189         }
190 
191         // Decrements the reference count and frees buf if it goes down to zero.
192         void decRef() nothrow
193         {
194             if (!mbuf) return;
195             if (mbuf.refCount == 1) realloc(mbuf, 0);
196             else --mbuf.refCount;
197         }
198 
199         auto opSlice() inout return
200         {
201             assert(ptr);
202             return ptr[0 .. length];
203         }
204 
205         // Makes sure there's room for at least newCap Chars.
206         void reserve(in size_t newCapacity)
207         {
208             if (mbuf && mbuf.refCount == 1 && mbuf.capacity >= newCapacity) return;
209             immutable size = this.length;
210             version(assert) scope(exit) assert(size == this.length);
211             if (!mbuf)
212             {
213                 // Migrate from static string to allocated string
214                 mbuf = RCBuffer.make(newCapacity, ptr[0 .. size]);
215                 ptr = mbuf.ptr;
216                 return;
217             }
218             if (mbuf.refCount > 1)
219             {
220                 // Split this guy making its buffer unique
221                 --mbuf.refCount;
222                 mbuf = RCBuffer.make(newCapacity, ptr[0 .. size]);
223                 ptr = mbuf.ptr;
224                 // size stays untouched
225             }
226             else
227             {
228                 immutable offset = ptr - mbuf.ptr;
229                 // If offset is too large, it's worth decRef()ing and then allocating a new buffer
230                 if (offset * 2 >= newCapacity)
231                 {
232                     auto newBuf = RCBuffer.make(newCapacity, ptr[0 .. size]);
233                     decRef;
234                     mbuf = newBuf;
235                     ptr = mbuf.ptr;
236                 }
237                 else
238                 {
239                     RCBuffer.resize(mbuf, newCapacity);
240                     ptr = mbuf.ptr + offset;
241                 }
242             }
243         }
244 
245         unittest
246         {
247             Large obj;
248             obj.reserve(1);
249             assert(obj.mbuf !is null);
250             assert(obj.mbuf.capacity >= 1);
251             obj.reserve(1000);
252             assert(obj.mbuf.capacity >= 1000);
253             obj.reserve(10000);
254             assert(obj.mbuf.capacity >= 10000);
255         }
256     }
257 
258     // <layout>
259     union
260     {
261         Large large;
262         struct
263         {
264             union
265             {
266                 E[maxSmallSize] small;
267                 ME[maxSmallSize] msmall;
268             }
269             ME smallLength;
270         }
271         size_t[(maxSmallSize + 1) / size_t.sizeof] ancillary; // used internally
272     }
273     // </layout>
274 
275     hash_t toHash() const @trusted
276     {
277         import core.internal.hash : hashOf;
278         return this.asSlice.hashOf;
279     }
280 
281     static if (isString) unittest
282     {
283         assert(RCXString("a").toHash ==
284                RCXString("a").toHash);
285         assert(RCXString("a").toHash !=
286                RCXString("b").toHash);
287         assert(RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash ==
288                RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash);
289         assert(RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash !=
290                RCXString("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").toHash);
291     }
292 
293     static if (isString) unittest
294     {
295         RCXString x;
296         assert(x.smallLength == 0);
297         assert(x.length == 0);
298         x.large.length = 133;
299         assert(x.smallLength == E.max);
300         assert(x.large.length == 133);
301         x.large.length = 0x0088_8888_8888_8888;
302         assert(x.large.length == 0x0088_8888_8888_8888);
303         assert(x.smallLength == E.max);
304     }
305 
306     // is this string small?
307     bool isSmall() const @safe @nogc
308     {
309         return smallLength <= maxSmallSize;
310     }
311 
312     // release all memory associated with this
313     private void decRef() @nogc
314     {
315         if (!isSmall) large.decRef;
316     }
317 
318     // Return a slice with the string's contents
319     // Not public because it leaks the internals
320     auto asSlice() inout @nogc
321     {
322         immutable s = smallLength;
323         if (s <= maxSmallSize) return small.ptr[0 .. s];
324         return large[];
325     }
326 
327 public:
328 
329     /// Returns the length of the string
330     size_t length() const @nogc
331     {
332         immutable s = smallLength;
333         return s <= maxSmallSize ? s : large.length;
334     }
335     /// Ditto
336     alias opDollar = length;
337 
338     static if (isString) unittest
339     {
340         auto s1 = RCXString("123456789_");
341         assert(s1.length == 10);
342         s1 ~= RCXString("123456789_123456789_123456789_123456789_12345");
343         assert(s1.length == 55);
344     }
345 
346     /// Needed for correct printing in other modules
347     static if (isString)
348     {
349         string toArray() const @trusted
350         {
351             return this.asSlice;
352         }
353     }
354 
355     /** Construct a `RCXString` from a slice `s`.
356 
357         If the slice is immutable, assumes the slice is a literal or
358         GC-allocated and does NOT copy it internally.
359 
360         Warning: Subsequently deallocating `s` will cause the `RCXString`
361         to dangle. If the slice has `const` or mutable characters, creates
362         and manages a copy internally.
363      */
364     this(C)(C[] s)
365         if (is(Unqual!C == ME))
366     {
367         // Contents is immutable, we may assume it won't go away ever
368         if (s.length <= maxSmallSize)
369         {
370             // fits in small
371             small[0 .. s.length] = s[]; // so copy it
372             smallLength = cast(E)s.length;
373         }
374         else
375         {
376             emplace(&large, s);
377         }
378     }
379 
380     // Test construction from immutable(ME)[], const(ME)[], and ME[]
381     static if (isString) unittest
382     {
383         immutable(E)[] a = "123456789_";
384         auto s1 = RCXString(a);
385         assert(s1 == a);
386         assert(s1.asSlice !is a, "Small strings must be copied");
387         a = "123456789_123456789_123456789_123456789_";
388         auto s2 = RCXString(a);
389         assert(s2 == a);
390         assert(s2.asSlice is a, "Large immutable strings shall not be copied");
391 
392         const(char)[] b = "123456789_";
393         auto s3 = RCXString(b);
394         assert(s3 == b);
395         assert(s3.isSmall, "Small strings must be copied");
396         b = "123456789_123456789_123456789_123456789_";
397         auto s4 = RCXString(b);
398         assert(s4 == b);
399         assert(s4.asSlice !is b, "Large non-immutable strings shall be copied");
400 
401         char[] c = "123456789_".dup;
402         auto s5 = RCXString(c);
403         assert(s5 == c);
404         assert(s5.isSmall, "Small strings must be copied");
405         c = "123456789_123456789_123456789_123456789_".dup;
406         auto s6 = RCXString(c);
407         assert(s6 == c);
408         assert(s6.asSlice !is c, "Large non-immutable strings shall be copied");
409     }
410 
411     static if (isString) unittest
412     {
413         const(ME)[] s = "123456789_123456789_123456789_123456789_";
414         auto s1 = RCXString(s);
415         assert(s1.large.mbuf);
416         auto s2 = s1;
417         assert(s1.large.mbuf is s2.large.mbuf);
418         assert(s1.large.mbuf.refCount == 2);
419         s1 = s ~ "123";
420         assert(s1.large.mbuf.refCount == 1);
421         assert(s2.large.mbuf.refCount == 1);
422         assert(s2 == s);
423         assert(s1 == s ~ "123");
424         const s3 = s1;
425         assert(s1.large.mbuf.refCount == 2);
426         immutable s4 = s1;
427         //immutable s5 = s3;
428         assert(s1.large.mbuf.refCount == 3);
429     }
430 
431     // Postblit
432     this(this) @nogc
433     {
434         if (!isSmall && large.mbuf) ++large.mbuf.refCount;
435     }
436 
437     // Dtor decrements refcount and may deallocate
438     ~this() nothrow @nogc
439     {
440         decRef;
441     }
442 
443     // Assigns another string
444     void opAssign(immutable(ME)[] s)
445     {
446         decRef;
447         // Contents is immutable, we may assume it won't go away ever
448         emplace(&this, s);
449     }
450 
451     static if (isString) unittest
452     {
453         immutable(ME)[] s = "123456789_";
454         RCXString rcs;
455         rcs = s;
456         assert(rcs.isSmall);
457         s = "123456789_123456789_123456789_123456789_";
458         rcs = s;
459         assert(!rcs.isSmall);
460         assert(rcs.large.mbuf is null);
461     }
462 
463     // Assigns another string
464     void opAssign(const(ME)[] s)
465     {
466         if (capacity >= s.length)
467         {
468             // Noice, there's room
469             if (s.length <= maxSmallSize)
470             {
471                 // Fits in small
472                 msmall[0 .. s.length] = s[];
473                 smallLength = cast(E) s.length;
474             }
475             else
476             {
477                 // Large it is
478                 assert(!isSmall);
479                 large.mptr[0 .. s.length] = s;
480                 large.length = s.length;
481             }
482         }
483         else
484         {
485             // Tear down and rebuild
486             decRef;
487             emplace(&this, s);
488         }
489     }
490 
491     static if (isString) unittest
492     {
493         const(ME)[] s = "123456789_123456789_123456789_123456789_";
494         RCXString s1;
495         s1 = s;
496         assert(!s1.isSmall && s1.large.buf !is null);
497         auto p = s1.ptr;
498         s1 = s;
499         assert(s1.ptr is p, "Wasteful reallocation");
500         RCXString s2;
501         s2 = s1;
502         assert(s1.large.mbuf is s2.large.mbuf);
503         assert(s1.large.mbuf.refCount == 2);
504         s1 = "123456789_123456789_123456789_123456789_123456789_";
505         assert(s1.large.mbuf !is s2.large.mbuf);
506         assert(s1.large.mbuf is null);
507         assert(s2.large.mbuf.refCount == 1);
508         assert(s1 == "123456789_123456789_123456789_123456789_123456789_");
509         assert(s2 == "123456789_123456789_123456789_123456789_");
510     }
511 
512     bool opEquals(const(ME)[] s) const @nogc
513     {
514         if (isSmall) return s.length == smallLength && small[0 .. s.length] == s;
515         return large[] == s;
516     }
517 
518     bool opEquals(in RCXString s) const => this == s.asSlice;
519 
520     static if (isString) unittest
521     {
522         const s1 = RCXString("123456789_123456789_123456789_123456789_123456789_");
523         RCXString s2 = s1[0 .. 10];
524         auto s3 = RCXString("123456789_");
525         assert(s2 == s3);
526     }
527 
528     /** Returns the maximum number of character this string can store without
529         requesting more memory.
530      */
531     size_t capacity() const @property @nogc
532     {
533         /** This is subtle: if large.mbuf is null (i.e. the string had been constructed from a literal), then the
534             capacity is maxSmallSize because that's what we can store without a memory (re)allocation. Same if refCount is
535             greater than 1 - we can't reuse the memory.
536         */
537         return isSmall || !large.mbuf || large.mbuf.refCount > 1 ? maxSmallSize : large.mbuf.capacity;
538     }
539 
540     static if (isString) unittest
541     {
542         auto s = RCXString("abc");
543         assert(s.capacity == maxSmallSize);
544         s = "123456789_123456789_123456789_123456789_123456789_";
545         assert(s.capacity == maxSmallSize);
546         const char[] lit = "123456789_123456789_123456789_123456789_123456789_";
547         s = lit;
548         assert(s.capacity >= 50);
549     }
550 
551     void reserve(in size_t capacity)
552     {
553         if (isSmall)
554         {
555             if (capacity <= maxSmallSize)
556             {
557                 // stays small
558                 return;
559             }
560             // small to large
561             immutable length = smallLength;
562             auto newLayout = Large(capacity, small.ptr[0 .. length]);
563             large = newLayout;
564         }
565         else
566         {
567             // large to large
568             if (large.mbuf && large.mbuf.capacity >= capacity) return;
569             large.reserve(capacity);
570         }
571     }
572 
573     static if (isString) unittest
574     {
575         RCXString s1;
576         s1.reserve(1);
577         assert(s1.capacity >= 1);
578         s1.reserve(1023);
579         assert(s1.capacity >= 1023);
580         s1.reserve(10230);
581         assert(s1.capacity >= 10230);
582     }
583 
584     /** Appends `s` to `this`.
585      */
586     void opOpAssign(string s : "~")(const(ME)[] s)
587     {
588         immutable length = this.length;
589         immutable newLen = length + s.length;
590         if (isSmall)
591         {
592             if (newLen <= maxSmallSize)
593             {
594                 // stays small
595                 msmall[length .. newLen] = s;
596                 smallLength = cast(E) newLen;
597             }
598             else
599             {
600                 // small to large
601                 auto newLayout = Large(newLen, small.ptr[0 .. length]);
602                 newLayout.mptr[length .. newLen][] = s;
603                 newLayout.length = newLen;
604                 large = newLayout;
605                 assert(!isSmall);
606                 assert(this.length == newLen);
607             }
608         }
609         else
610         {
611             // large to large
612             large.reserve(newLen);
613             large.mptr[length .. newLen][] = s;
614             large.length = newLen;
615         }
616     }
617 
618     static if (isString) unittest
619     {
620         auto s1 = RCXString("123456789_123456789_123456789_123456789_");
621         s1 ~= s1;
622         assert(s1 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_");
623         foreach (i; 0 .. 70) s1.popFront();
624         assert(s1 == "123456789_");
625         s1 ~= "abc";
626         assert(s1 == "123456789_abc");
627     }
628 
629     /// Ditto
630     void opOpAssign(string s : "~")(const auto ref RCXString s)
631     {
632         this ~= s.asSlice;
633     }
634 
635     static if (isString) unittest
636     {
637         RCXString s1;
638         s1 = "hello";
639         assert(s1 == "hello");
640         s1 ~= ", world! ";
641         assert(s1 == "hello, world! ");
642         s1 ~= s1;
643         assert(s1 == "hello, world! hello, world! ");
644         s1 ~= s1;
645         assert(s1 == "hello, world! hello, world! hello, world! hello, world! ");
646         auto s2 = RCXString("yah! ");
647         assert(s2 == "yah! ");
648         s2 ~= s1;
649         assert(s2 == "yah! hello, world! hello, world! hello, world! hello, world! ");
650         s2 = "123456789_123456789_123456789_123456789_";
651         s2 ~= s2;
652         assert(s2 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_");
653         auto s3 = s2;
654         assert(s3.large.mbuf.refCount == 2);
655         s2 ~= "123456789_";
656         assert(s2.large.mbuf.refCount == 1);
657         assert(s3.large.mbuf.refCount == 1);
658         assert(s3 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_");
659 
660         s2 = "123456789_123456789_123456789_123456789_";
661         const s4 = RCXString(", world");
662         s2 ~= s4;
663         assert(s2 == "123456789_123456789_123456789_123456789_, world");
664         s2 ~= const RCXString("!!!");
665         assert(s2 == "123456789_123456789_123456789_123456789_, world!!!");
666     }
667 
668     /// Returns `true` iff `this` is empty
669     bool empty() const @property @nogc => !length;
670 
671     static if (isString)
672     {
673         private dchar unsafeDecode(const(ME)* p) const @nogc
674         {
675             byte c = *p;
676             dchar res = c & 0b0111_1111;
677             if (c >= 0) return res;
678             assert(c < 0b1111_1000);
679             dchar cover = 0b1000_0000;
680             c <<= 1;
681             assert(c < 0);
682             do
683             {
684                 ++p;
685                 assert((*p >> 6) == 0b10);
686                 cover <<= 5;
687                 res = (res << 6) ^ *p ^ cover ^ 0b1000_0000;
688                 c <<= 1;
689             } while(c < 0);
690             return res;
691         }
692     }
693 
694     /// Returns the first code point of `this`.
695     auto front() const @property @nogc in(!empty)
696     {
697         // TODO: make safe
698         static if (isString)
699             return unsafeDecode(ptr);
700         else
701             return ptr[0];
702     }
703 
704     /// Returns the last code point of `this`.
705     static if (isString)
706     {
707         dchar back() const @property @nogc in(!empty)
708         {
709             auto p = ptr + length - 1;
710             if (*p < 0b1000_0000)
711 				return *p;
712             // TODO: make safe
713             do
714             {
715                 --p;
716             } while (!(*p & 0b0100_0000));
717             return unsafeDecode(p);
718         }
719     }
720     else
721         E back() const @property @nogc => ptr[length - 1];
722 
723     /// Returns the `n`th code unit in `this`.
724     E opIndex(size_t n) const @nogc in(n < length) => ptr[n];
725 
726     static if (isString) unittest
727     {
728         auto s1 = RCXString("hello");
729         assert(s1.front == 'h');
730         assert(s1[1] == 'e');
731         assert(s1.back == 'o');
732         assert(s1[$ - 1] == 'o');
733         s1 = RCXString("Ü");
734         assert(s1.length == 2);
735         assert(s1.front == 'Ü');
736         assert(s1.back == 'Ü');
737     }
738 
739     /// Discards the first code point
740     void popFront() @nogc
741     {
742         assert(!empty && ptr);
743         uint toPop = 1;
744         auto b = *ptr;
745         if (b >= 0b1000_0000)
746         {
747             toPop = (b | 0b0010_0000) != b ? 2
748                 : (b | 0b0001_0000) != b ? 3
749                 : 4;
750         }
751         if (isSmall)
752         {
753             // Must shuffle in place
754             // TODO: make faster
755             foreach (i; 0 .. length - toPop)
756                 msmall[i] = small[i + toPop];
757             smallLength -= toPop;
758         }
759         else
760         {
761             large.ptr += toPop;
762             large.length = large.length - toPop;
763         }
764     }
765 
766     static if (isString) unittest
767     {
768         auto s1 = RCXString("123456789_");
769         auto s2 = s1;
770         s1.popFront();
771         assert(s1 == "23456789_");
772         assert(s2 == "123456789_");
773         s1 = RCXString("123456789_123456789_123456789_123456789_");
774         s2 = s1;
775         s1.popFront();
776         assert(s1 == "23456789_123456789_123456789_123456789_");
777         assert(s2 == "123456789_123456789_123456789_123456789_");
778         s1 = "öü";
779         s2 = s1;
780         s1.popFront();
781         assert(s1 == "ü");
782         assert(s2 == "öü");
783     }
784 
785     /// Discards the last code point
786     void popBack() @nogc
787     {
788         assert(!empty && ptr);
789         auto p = ptr + length - 1;
790         if (*p < 0b1000_0000)
791         {
792             // hot path
793             if (isSmall) --smallLength;
794             else large.length = large.length - 1;
795             return;
796         }
797         // TODO: make safe
798         auto p1 = p;
799         do
800         {
801             --p;
802         } while (!(*p & 0b0100_0000));
803         immutable diff = p1 - p + 1;
804         assert(diff > 1 && diff <= length);
805         if (isSmall) smallLength -= diff;
806         else large.length = large.length - diff;
807     }
808 
809     static if (isString) unittest
810     {
811         auto s1 = RCXString("123456789_");
812         auto s2 = s1;
813         s1.popBack;
814         assert(s1 == "123456789");
815         assert(s2 == "123456789_");
816         s1 = RCXString("123456789_123456789_123456789_123456789_");
817         s2 = s1;
818         s1.popBack;
819         assert(s1 == "123456789_123456789_123456789_123456789");
820         assert(s2 == "123456789_123456789_123456789_123456789_");
821         s1 = "öü";
822         s2 = s1;
823         s1.popBack;
824         assert(s1 == "ö");
825         assert(s2 == "öü");
826     }
827 
828     /// Returns a slice to the entire string or a portion of it.
829     auto opSlice() inout @nogc
830     {
831         return this;
832     }
833 
834     /// Ditto
835     auto opSlice(size_t b, size_t e) inout
836     {
837         assert(b <= e && e <= length);
838         auto ptr = this.ptr;
839         auto sz = e - b;
840         if (sz <= maxSmallSize)
841         {
842             // result is small
843             RCXString result = void;
844             result.msmall[0 .. sz] = ptr[b .. e];
845             result.smallLength = cast(E) sz;
846             return result;
847         }
848         assert(!isSmall);
849         RCXString result = this;
850         result.large.ptr += b;
851         result.large.length = e - b;
852         return result;
853     }
854 
855     static if (isString) unittest
856     {
857         immutable s = RCXString("123456789_123456789_123456789_123456789");
858         RCXString s1 = s[0 .. 38];
859         assert(!s1.isSmall && s1.large.buf is null);
860     }
861 
862     // Unsafe! Returns a pointer to the beginning of the payload.
863     auto ptr() inout @nogc
864     {
865         return isSmall ? small.ptr : large.ptr;
866     }
867 
868     static if (isString) unittest
869     {
870         auto s1 = RCXString("hello");
871         auto s2 = s1[1 .. $ - 1];
872         assert(s2 == "ell");
873         s1 = "123456789_123456789_123456789_123456789_";
874         s2 = s1[1 .. $ - 1];
875         assert(s2 == "23456789_123456789_123456789_123456789");
876     }
877 
878     /// Returns the concatenation of `this` with `s`.
879     RCXString opBinary(string s = "~")(const auto ref RCXString s) const
880     {
881         return this ~ s.asSlice;
882     }
883 
884     /// Ditto
885     RCXString opBinary(string s = "~")(const(ME)[] s) const
886     {
887         immutable length = this.length;
888         auto resultLen = length + s.length;
889         RCXString result = void;
890         if (resultLen <= maxSmallSize)
891         {
892             // noice
893             result.msmall.ptr[0 .. length] = ptr[0 .. length];
894             result.msmall.ptr[length .. resultLen] = s[];
895             result.smallLength = cast(E) resultLen;
896             return result;
897         }
898         emplace(&result.large, resultLen, this.asSlice);
899         result ~= s;
900         return result;
901     }
902 
903     /// Returns the concatenation of `s` with `this`.
904     RCXString opBinaryRight(string s = "~")(const(E)[] s) const
905     {
906         immutable length = this.length, resultLen = length + s.length;
907         RCXString result = void;
908         if (resultLen <= maxSmallSize)
909         {
910             // noice
911             result.msmall.ptr[0 .. s.length] = s[];
912             result.msmall.ptr[s.length .. resultLen] = small.ptr[0 .. length];
913             result.smallLength = cast(E) resultLen;
914             return result;
915         }
916         emplace(&result.large, resultLen, s);
917         result ~= this;
918         return result;
919     }
920 
921     static if (isString) unittest
922     {
923         auto s1 = RCXString("hello");
924         auto s2 = s1 ~ ", world!";
925         assert(s2 == "hello, world!");
926         s1 = "123456789_123456789_123456789_123456789_";
927         s2 = s1 ~ "abcdefghi_";
928         assert(s2 == "123456789_123456789_123456789_123456789_abcdefghi_");
929         s2 = s1 ~ s1;
930         assert(s2 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_");
931         s2 = "abcdefghi_" ~ s1;
932         assert(s2 == "abcdefghi_123456789_123456789_123456789_123456789_");
933     }
934 }
935 
936 unittest
937 {
938     alias RCI = RCXString!(immutable uint);
939     RCI x;
940 }
941 
942 /// verify UTF-8 storage
943 unittest
944 {
945     string s = "åäö";
946     RCString rcs = s;
947     assert(rcs.length == 6);
948     import std.algorithm : count;
949     assert(rcs.count == 3);
950     assert(rcs.front == 'å');
951     rcs.popFront();
952     assert(rcs.front == 'ä');
953     rcs.popFront();
954     assert(rcs.front == 'ö');
955     rcs.popFront();
956     assert(rcs.empty);
957 }
958 
959 version = profile;
960 
961 /// shows performance increase for SSO over built-in string
962 version(profile) unittest
963 {
964     enum maxSmallSize = 23;
965     alias S = RCXString!(immutable char, maxSmallSize);
966 
967     import std.datetime: StopWatch, Duration;
968     import std.conv : to;
969     import std.stdio;
970 
971     enum n = 2^^21;
972 
973     StopWatch sw;
974 
975     sw.reset;
976     sw.start;
977     char[maxSmallSize] ss;
978     foreach (i; 0 .. n)
979     {
980         auto x = S(ss);
981     }
982     sw.stop;
983     auto timeRCString = sw.peek().msecs;
984     writeln("> RCString took ", sw.peek().to!Duration);
985 
986     sw.reset;
987     sw.start;
988     foreach (i; 0 .. n)
989     {
990         string x = ss.idup;
991     }
992     sw.stop;
993     auto timeString = sw.peek().msecs;
994     writeln("> Builtin string took ", sw.peek().to!Duration);
995 
996     writeln("> Speedup: ", timeString/timeRCString);
997 }