1 /** Reference Counted Array.
2     See_Also: http://dpaste.dzfl.pl/817283c163f5
3  */
4 module nxt.rcstring;
5 
6 import core.memory : GC;
7 // import core.stdc.stdlib;
8 // import core.stdc.string;
9 // import std.algorithm;
10 
11 /** Reference Counted (RC) version of string.
12  */
13 alias RCString = RCXString!(immutable char);
14 
15 /** Reference Counted Array.
16     Configured with character type `E`, maximum length for the small string optimization,
17     and the allocation function, which must have the same semantics as `realloc`.
18 
19     See_Also: https://github.com/burner/std.rcstring
20 */
21 struct RCXString(E = immutable char, size_t maxSmallSize = 23, alias realloc = GC.realloc)
22 {
23     pure nothrow:
24 
25     // Preconditions
26     static assert(is(E == immutable), "Only immutable characters supported for now.");
27     static assert(E.alignof <= 4, "Character type must be 32-bit aligned at most.");
28     static assert(E.min == 0, "Character type must be unsigned.");
29     static assert((maxSmallSize + 1) * E.sizeof % size_t.sizeof == 0,
30                   "maxSmallSize + 1 must be a multiple of size_t.sizeof.");
31     static assert((maxSmallSize + 1) * E.sizeof >= 3 * size_t.sizeof,
32                   "maxSmallSize + 1 must be >= size_t.sizeof * 3.");
33     static assert(maxSmallSize < E.max, "maxSmallSize must be less than E.max");
34 
35     enum maxSmallLength = maxSmallSize;
36 
37 private:
38     // import std.utf;
39     import core.lifetime : emplace;
40     import std.traits: isSomeChar, Unqual;
41 
42     version(unittest) import std.stdio;
43 
44     alias ME = Unqual!E; // mutable E
45 
46     enum isString = isSomeChar!E;
47 
48     // Simple reference-counted buffer. The reference count itself is a E. Layout is a size_t (the capacity)
49     // followed by the reference count followed by the payload.
50     struct RCBuffer
51     {
52         size_t capacity;
53         uint refCount;
54 
55         // Data starts right after the refcount, no padding because of the static assert above
56         ME* mptr() @nogc { return cast(ME*) (&refCount + 1); }
57         E* ptr() @nogc { return cast(E*) mptr; }
58 
59         // Create a new buffer given capacity and initializes payload. Capacity must be large enough.
60         static RCBuffer* make(size_t capacity, const(ME)[] content)
61         {
62             assert(capacity >= content.length);
63             auto result = cast(RCBuffer*) realloc(null, size_t.sizeof + uint.sizeof + capacity * E.sizeof);
64             result || assert(0);
65             result.capacity = capacity;
66             result.refCount = 1;
67             result.mptr[0 .. content.length] = content;
68             return result;
69         }
70 
71         // Resize the buffer. It is assumed the reference count is 1.
72         static void resize(ref RCBuffer* p, size_t capacity)
73         {
74             assert(p.refCount == 1);
75             p = cast(RCBuffer*) realloc(p, size_t.sizeof + uint.sizeof + capacity * E.sizeof);
76             p || assert(0);
77             p.capacity = capacity;
78         }
79 
80         unittest
81         {
82             auto p = make(101, null);
83             assert(p.refCount == 1);
84             assert(p.capacity == 101);
85             resize(p, 203);
86             assert(p.refCount == 1);
87             assert(p.capacity == 203);
88             realloc(p, 0);
89         }
90     }
91 
92     // Hosts a large string
93     struct Large
94     {
95         // <layout>
96         union
97         {
98             immutable RCBuffer* buf;
99             RCBuffer* mbuf;
100         }
101         union
102         {
103             E* ptr;
104             ME* mptr;
105         }
106         static if ((maxSmallSize + 1) * E.sizeof == 3 * size_t.sizeof)
107         {
108             /* The small buffer and the large buffer overlap. This means the large buffer must give up its last byte
109              * as a discriminator.
110              */
111             size_t _length;
112             enum maxLarge = size_t.max >> (8 * E.sizeof);
113             version(BigEndian)
114             {
115                 // Use the LSB to store the marker
116                 size_t length() const @safe @nogc { return _length >> 8 * E.sizeof; }
117                 void length(size_t s) @safe @nogc { _length = Marker.isRefCounted | (s << (8 * E.sizeof)); }
118             }
119             else version(LittleEndian)
120             {
121                 // Use the MSB to store the marker
122                 private enum size_t mask = size_t(E.max) << (8 * (size_t.sizeof - E.sizeof));
123                 size_t length() const @safe @nogc { return _length & ~mask; }
124                 void length(size_t s) @safe @nogc { assert(s <= maxLarge); _length = s | mask; }
125             }
126             else
127             {
128                 static assert(0, "Unspecified endianness.");
129             }
130         }
131         else
132         {
133             // No tricks needed, store the size plainly
134             size_t _length;
135             size_t length() const @safe @nogc
136             {
137                 return _length;
138             }
139             void length(size_t s) @safe @nogc
140             {
141                 _length = s;
142             }
143         }
144         // </layout>
145 
146         // Get length
147         alias opDollar = length;
148 
149         // Initializes a Large given capacity and content. Capacity must be at least as large as content's size.
150         this(size_t capacity, const(ME)[] content)
151         {
152             assert(capacity >= content.length);
153             mbuf = RCBuffer.make(capacity, content);
154             mptr = mbuf.mptr;
155             length = content.length;
156         }
157 
158         // Initializes a Large from a string by copying it.
159         this(const(ME)[] s)
160         {
161             this(s.length, s);
162         }
163 
164         static if (isString) unittest
165         {
166             const(ME)[] s1 = "hello, world";
167             auto lrg1 = Large(s1);
168             assert(lrg1.length == 12);
169             immutable lrg2 = immutable Large(s1);
170             assert(lrg2.length == 12);
171             const lrg3 = const Large(s1);
172             assert(lrg3.length == 12);
173         }
174 
175         // Initializes a Large from a static string by referring to it.
176         this(immutable(ME)[] s)
177         {
178             assert(buf is null);
179             ptr = s.ptr;
180             length = s.length;
181         }
182 
183         static if (isString) unittest
184         {
185             immutable ME[] s = "abcdef";
186             auto lrg1 = Large(s);
187             assert(lrg1.length == 6);
188             assert(lrg1.buf is null);
189         }
190 
191         // Decrements the reference count and frees buf if it goes down to zero.
192         void decRef() nothrow
193         {
194             if (!mbuf) return;
195             if (mbuf.refCount == 1) realloc(mbuf, 0);
196             else --mbuf.refCount;
197         }
198 
199         auto opSlice() inout return
200         {
201             assert(ptr);
202             return ptr[0 .. length];
203         }
204 
205         // Makes sure there's room for at least newCap Chars.
206         void reserve(size_t newCap)
207         {
208             if (mbuf && mbuf.refCount == 1 && mbuf.capacity >= newCap) return;
209             immutable size = this.length;
210             version(assert) scope(exit) assert(size == this.length);
211             if (!mbuf)
212             {
213                 // Migrate from static string to allocated string
214                 mbuf = RCBuffer.make(newCap, ptr[0 .. size]);
215                 ptr = mbuf.ptr;
216                 return;
217             }
218             if (mbuf.refCount > 1)
219             {
220                 // Split this guy making its buffer unique
221                 --mbuf.refCount;
222                 mbuf = RCBuffer.make(newCap, ptr[0 .. size]);
223                 ptr = mbuf.ptr;
224                 // size stays untouched
225             }
226             else
227             {
228                 immutable offset = ptr - mbuf.ptr;
229                 // If offset is too large, it's worth decRef()ing and then allocating a new buffer
230                 if (offset * 2 >= newCap)
231                 {
232                     auto newBuf = RCBuffer.make(newCap, ptr[0 .. size]);
233                     decRef;
234                     mbuf = newBuf;
235                     ptr = mbuf.ptr;
236                 }
237                 else
238                 {
239                     RCBuffer.resize(mbuf, newCap);
240                     ptr = mbuf.ptr + offset;
241                 }
242             }
243         }
244 
245         unittest
246         {
247             Large obj;
248             obj.reserve(1);
249             assert(obj.mbuf !is null);
250             assert(obj.mbuf.capacity >= 1);
251             obj.reserve(1000);
252             assert(obj.mbuf.capacity >= 1000);
253             obj.reserve(10000);
254             assert(obj.mbuf.capacity >= 10000);
255         }
256     }
257 
258     // <layout>
259     union
260     {
261         Large large;
262         struct
263         {
264             union
265             {
266                 E[maxSmallSize] small;
267                 ME[maxSmallSize] msmall;
268             }
269             ME smallLength;
270         }
271         size_t[(maxSmallSize + 1) / size_t.sizeof] ancillary; // used internally
272     }
273     // </layout>
274 
275     hash_t toHash() const @trusted
276     {
277         import core.internal.hash : hashOf;
278         return this.asSlice.hashOf;
279     }
280 
281     static if (isString) unittest
282     {
283         assert(RCXString("a").toHash ==
284                RCXString("a").toHash);
285         assert(RCXString("a").toHash !=
286                RCXString("b").toHash);
287         assert(RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash ==
288                RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash);
289         assert(RCXString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").toHash !=
290                RCXString("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb").toHash);
291     }
292 
293     static if (isString) unittest
294     {
295         RCXString x;
296         assert(x.smallLength == 0);
297         assert(x.length == 0);
298         x.large.length = 133;
299         assert(x.smallLength == E.max);
300         assert(x.large.length == 133);
301         x.large.length = 0x0088_8888_8888_8888;
302         assert(x.large.length == 0x0088_8888_8888_8888);
303         assert(x.smallLength == E.max);
304     }
305 
306     // is this string small?
307     bool isSmall() const @safe @nogc
308     {
309         return smallLength <= maxSmallSize;
310     }
311 
312     // release all memory associated with this
313     private void decRef() @nogc
314     {
315         if (!isSmall) large.decRef;
316     }
317 
318     // Return a slice with the string's contents
319     // Not public because it leaks the internals
320     auto asSlice() inout @nogc
321     {
322         immutable s = smallLength;
323         if (s <= maxSmallSize) return small.ptr[0 .. s];
324         return large[];
325     }
326 
327 public:
328 
329     /// Returns the length of the string
330     size_t length() const @nogc
331     {
332         immutable s = smallLength;
333         return s <= maxSmallSize ? s : large.length;
334     }
335     /// Ditto
336     alias opDollar = length;
337 
338     static if (isString) unittest
339     {
340         auto s1 = RCXString("123456789_");
341         assert(s1.length == 10);
342         s1 ~= RCXString("123456789_123456789_123456789_123456789_12345");
343         assert(s1.length == 55);
344     }
345 
346     /// Needed for correct printing in other modules
347     static if (isString)
348     {
349         string toArray() const @trusted
350         {
351             return this.asSlice;
352         }
353     }
354 
355     /** Construct a `RCXString` from a slice `s`.
356 
357         If the slice is immutable, assumes the slice is a literal or
358         GC-allocated and does NOT copy it internally.
359 
360         Warning: Subsequently deallocating `s` will cause the `RCXString`
361         to dangle. If the slice has `const` or mutable characters, creates
362         and manages a copy internally.
363      */
364     this(C)(C[] s)
365         if (is(Unqual!C == ME))
366     {
367         // Contents is immutable, we may assume it won't go away ever
368         if (s.length <= maxSmallSize)
369         {
370             // fits in small
371             small[0 .. s.length] = s[]; // so copy it
372             smallLength = cast(E)s.length;
373         }
374         else
375         {
376             emplace(&large, s);
377         }
378     }
379 
380     // Test construction from immutable(ME)[], const(ME)[], and ME[]
381     static if (isString) unittest
382     {
383         immutable(E)[] a = "123456789_";
384         auto s1 = RCXString(a);
385         assert(s1 == a);
386         assert(s1.asSlice !is a, "Small strings must be copied");
387         a = "123456789_123456789_123456789_123456789_";
388         auto s2 = RCXString(a);
389         assert(s2 == a);
390         assert(s2.asSlice is a, "Large immutable strings shall not be copied");
391 
392         const(char)[] b = "123456789_";
393         auto s3 = RCXString(b);
394         assert(s3 == b);
395         assert(s3.isSmall, "Small strings must be copied");
396         b = "123456789_123456789_123456789_123456789_";
397         auto s4 = RCXString(b);
398         assert(s4 == b);
399         assert(s4.asSlice !is b, "Large non-immutable strings shall be copied");
400 
401         char[] c = "123456789_".dup;
402         auto s5 = RCXString(c);
403         assert(s5 == c);
404         assert(s5.isSmall, "Small strings must be copied");
405         c = "123456789_123456789_123456789_123456789_".dup;
406         auto s6 = RCXString(c);
407         assert(s6 == c);
408         assert(s6.asSlice !is c, "Large non-immutable strings shall be copied");
409     }
410 
411     static if (isString) unittest
412     {
413         const(ME)[] s = "123456789_123456789_123456789_123456789_";
414         auto s1 = RCXString(s);
415         assert(s1.large.mbuf);
416         auto s2 = s1;
417         assert(s1.large.mbuf is s2.large.mbuf);
418         assert(s1.large.mbuf.refCount == 2);
419         s1 = s ~ "123";
420         assert(s1.large.mbuf.refCount == 1);
421         assert(s2.large.mbuf.refCount == 1);
422         assert(s2 == s);
423         assert(s1 == s ~ "123");
424         const s3 = s1;
425         assert(s1.large.mbuf.refCount == 2);
426         immutable s4 = s1;
427         //immutable s5 = s3;
428         assert(s1.large.mbuf.refCount == 3);
429     }
430 
431     // Postblit
432     this(this) @nogc
433     {
434         if (!isSmall && large.mbuf) ++large.mbuf.refCount;
435     }
436 
437     // Dtor decrements refcount and may deallocate
438     ~this() @nogc
439     {
440         decRef;
441     }
442 
443     // Assigns another string
444     void opAssign(immutable(ME)[] s)
445     {
446         decRef;
447         // Contents is immutable, we may assume it won't go away ever
448         emplace(&this, s);
449     }
450 
451     static if (isString) unittest
452     {
453         immutable(ME)[] s = "123456789_";
454         RCXString rcs;
455         rcs = s;
456         assert(rcs.isSmall);
457         s = "123456789_123456789_123456789_123456789_";
458         rcs = s;
459         assert(!rcs.isSmall);
460         assert(rcs.large.mbuf is null);
461     }
462 
463     // Assigns another string
464     void opAssign(const(ME)[] s)
465     {
466         if (capacity >= s.length)
467         {
468             // Noice, there's room
469             if (s.length <= maxSmallSize)
470             {
471                 // Fits in small
472                 msmall[0 .. s.length] = s[];
473                 smallLength = cast(E) s.length;
474             }
475             else
476             {
477                 // Large it is
478                 assert(!isSmall);
479                 large.mptr[0 .. s.length] = s;
480                 large.length = s.length;
481             }
482         }
483         else
484         {
485             // Tear down and rebuild
486             decRef;
487             emplace(&this, s);
488         }
489     }
490 
491     static if (isString) unittest
492     {
493         const(ME)[] s = "123456789_123456789_123456789_123456789_";
494         RCXString s1;
495         s1 = s;
496         assert(!s1.isSmall && s1.large.buf !is null);
497         auto p = s1.ptr;
498         s1 = s;
499         assert(s1.ptr is p, "Wasteful reallocation");
500         RCXString s2;
501         s2 = s1;
502         assert(s1.large.mbuf is s2.large.mbuf);
503         assert(s1.large.mbuf.refCount == 2);
504         s1 = "123456789_123456789_123456789_123456789_123456789_";
505         assert(s1.large.mbuf !is s2.large.mbuf);
506         assert(s1.large.mbuf is null);
507         assert(s2.large.mbuf.refCount == 1);
508         assert(s1 == "123456789_123456789_123456789_123456789_123456789_");
509         assert(s2 == "123456789_123456789_123456789_123456789_");
510     }
511 
512     bool opEquals(const(ME)[] s) const @nogc
513     {
514         if (isSmall) return s.length == smallLength && small[0 .. s.length] == s;
515         return large[] == s;
516     }
517 
518     bool opEquals(in RCXString s) const
519     {
520         return this == s.asSlice;
521     }
522 
523     static if (isString) unittest
524     {
525         const s1 = RCXString("123456789_123456789_123456789_123456789_123456789_");
526         RCXString s2 = s1[0 .. 10];
527         auto s3 = RCXString("123456789_");
528         assert(s2 == s3);
529     }
530 
531     /** Returns the maximum number of character this string can store without
532         requesting more memory.
533      */
534     size_t capacity() const @nogc
535     {
536         /** This is subtle: if large.mbuf is null (i.e. the string had been constructed from a literal), then the
537             capacity is maxSmallSize because that's what we can store without a memory (re)allocation. Same if refCount is
538             greater than 1 - we can't reuse the memory.
539         */
540         return isSmall || !large.mbuf || large.mbuf.refCount > 1 ? maxSmallSize : large.mbuf.capacity;
541     }
542 
543     static if (isString) unittest
544     {
545         auto s = RCXString("abc");
546         assert(s.capacity == maxSmallSize);
547         s = "123456789_123456789_123456789_123456789_123456789_";
548         assert(s.capacity == maxSmallSize);
549         const char[] lit = "123456789_123456789_123456789_123456789_123456789_";
550         s = lit;
551         assert(s.capacity >= 50);
552     }
553 
554     void reserve(size_t capacity)
555     {
556         if (isSmall)
557         {
558             if (capacity <= maxSmallSize)
559             {
560                 // stays small
561                 return;
562             }
563             // small to large
564             immutable length = smallLength;
565             auto newLayout = Large(capacity, small.ptr[0 .. length]);
566             large = newLayout;
567         }
568         else
569         {
570             // large to large
571             if (large.mbuf && large.mbuf.capacity >= capacity) return;
572             large.reserve(capacity);
573         }
574     }
575 
576     static if (isString) unittest
577     {
578         RCXString s1;
579         s1.reserve(1);
580         assert(s1.capacity >= 1);
581         s1.reserve(1023);
582         assert(s1.capacity >= 1023);
583         s1.reserve(10230);
584         assert(s1.capacity >= 10230);
585     }
586 
587     /** Appends `s` to `this`.
588      */
589     void opOpAssign(string s : "~")(const(ME)[] s)
590     {
591         immutable length = this.length;
592         immutable newLen = length + s.length;
593         if (isSmall)
594         {
595             if (newLen <= maxSmallSize)
596             {
597                 // stays small
598                 msmall[length .. newLen] = s;
599                 smallLength = cast(E) newLen;
600             }
601             else
602             {
603                 // small to large
604                 auto newLayout = Large(newLen, small.ptr[0 .. length]);
605                 newLayout.mptr[length .. newLen][] = s;
606                 newLayout.length = newLen;
607                 large = newLayout;
608                 assert(!isSmall);
609                 assert(this.length == newLen);
610             }
611         }
612         else
613         {
614             // large to large
615             large.reserve(newLen);
616             large.mptr[length .. newLen][] = s;
617             large.length = newLen;
618         }
619     }
620 
621     static if (isString) unittest
622     {
623         auto s1 = RCXString("123456789_123456789_123456789_123456789_");
624         s1 ~= s1;
625         assert(s1 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_");
626         foreach (i; 0 .. 70) s1.popFront();
627         assert(s1 == "123456789_");
628         s1 ~= "abc";
629         assert(s1 == "123456789_abc");
630     }
631 
632     /// Ditto
633     void opOpAssign(string s : "~")(const auto ref RCXString s)
634     {
635         this ~= s.asSlice;
636     }
637 
638     static if (isString) unittest
639     {
640         RCXString s1;
641         s1 = "hello";
642         assert(s1 == "hello");
643         s1 ~= ", world! ";
644         assert(s1 == "hello, world! ");
645         s1 ~= s1;
646         assert(s1 == "hello, world! hello, world! ");
647         s1 ~= s1;
648         assert(s1 == "hello, world! hello, world! hello, world! hello, world! ");
649         auto s2 = RCXString("yah! ");
650         assert(s2 == "yah! ");
651         s2 ~= s1;
652         assert(s2 == "yah! hello, world! hello, world! hello, world! hello, world! ");
653         s2 = "123456789_123456789_123456789_123456789_";
654         s2 ~= s2;
655         assert(s2 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_");
656         auto s3 = s2;
657         assert(s3.large.mbuf.refCount == 2);
658         s2 ~= "123456789_";
659         assert(s2.large.mbuf.refCount == 1);
660         assert(s3.large.mbuf.refCount == 1);
661         assert(s3 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_");
662 
663         s2 = "123456789_123456789_123456789_123456789_";
664         const s4 = RCXString(", world");
665         s2 ~= s4;
666         assert(s2 == "123456789_123456789_123456789_123456789_, world");
667         s2 ~= const RCXString("!!!");
668         assert(s2 == "123456789_123456789_123456789_123456789_, world!!!");
669     }
670 
671     /// Returns `true` iff `this` is empty
672     bool empty() const @nogc
673     {
674         return !length;
675     }
676 
677     static if (isString)
678     {
679         private dchar unsafeDecode(const(ME)* p) const @nogc
680         {
681             byte c = *p;
682             dchar res = c & 0b0111_1111;
683             if (c >= 0) return res;
684             assert(c < 0b1111_1000);
685             dchar cover = 0b1000_0000;
686             c <<= 1;
687             assert(c < 0);
688             do
689             {
690                 ++p;
691                 assert((*p >> 6) == 0b10);
692                 cover <<= 5;
693                 res = (res << 6) ^ *p ^ cover ^ 0b1000_0000;
694                 c <<= 1;
695             } while(c < 0);
696             return res;
697         }
698     }
699 
700     /// Returns the first code point of `this`.
701     auto front() const @nogc
702     {
703         assert(!empty);
704         // TODO make safe
705         static if (isString)
706         {
707             return unsafeDecode(ptr);
708         }
709         else
710         {
711             return ptr[0];
712         }
713     }
714 
715     /// Returns the last code point of `this`.
716     static if (isString)
717     {
718         dchar back() const @nogc
719         {
720             assert(!empty);
721             auto p = ptr + length - 1;
722             if (*p < 0b1000_0000) return *p;
723             // TODO make safe
724             do
725             {
726                 --p;
727             } while (!(*p & 0b0100_0000));
728             return unsafeDecode(p);
729         }
730     }
731     else
732     {
733         E back() const @nogc
734         {
735             return ptr[length - 1];
736         }
737     }
738 
739     /// Returns the `n`th code unit in `this`.
740     E opIndex(size_t n) const @nogc
741     {
742         assert(n < length);
743         return ptr[n];
744     }
745 
746     static if (isString) unittest
747     {
748         auto s1 = RCXString("hello");
749         assert(s1.front == 'h');
750         assert(s1[1] == 'e');
751         assert(s1.back == 'o');
752         assert(s1[$ - 1] == 'o');
753         s1 = RCXString("Ü");
754         assert(s1.length == 2);
755         assert(s1.front == 'Ü');
756         assert(s1.back == 'Ü');
757     }
758 
759     /// Discards the first code point
760     void popFront() @nogc
761     {
762         assert(!empty && ptr);
763         uint toPop = 1;
764         auto b = *ptr;
765         if (b >= 0b1000_0000)
766         {
767             toPop = (b | 0b0010_0000) != b ? 2
768                 : (b | 0b0001_0000) != b ? 3
769                 : 4;
770         }
771         if (isSmall)
772         {
773             // Must shuffle in place
774             // TODO make faster
775             foreach (i; 0 .. length - toPop)
776             {
777                 msmall[i] = small[i + toPop];
778             }
779             smallLength -= toPop;
780         }
781         else
782         {
783             large.ptr += toPop;
784             large.length = large.length - toPop;
785         }
786     }
787 
788     static if (isString) unittest
789     {
790         auto s1 = RCXString("123456789_");
791         auto s2 = s1;
792         s1.popFront();
793         assert(s1 == "23456789_");
794         assert(s2 == "123456789_");
795         s1 = RCXString("123456789_123456789_123456789_123456789_");
796         s2 = s1;
797         s1.popFront();
798         assert(s1 == "23456789_123456789_123456789_123456789_");
799         assert(s2 == "123456789_123456789_123456789_123456789_");
800         s1 = "öü";
801         s2 = s1;
802         s1.popFront();
803         assert(s1 == "ü");
804         assert(s2 == "öü");
805     }
806 
807     /// Discards the last code point
808     void popBack() @nogc
809     {
810         assert(!empty && ptr);
811         auto p = ptr + length - 1;
812         if (*p < 0b1000_0000)
813         {
814             // hot path
815             if (isSmall) --smallLength;
816             else large.length = large.length - 1;
817             return;
818         }
819         // TODO make safe
820         auto p1 = p;
821         do
822         {
823             --p;
824         } while (!(*p & 0b0100_0000));
825         immutable diff = p1 - p + 1;
826         assert(diff > 1 && diff <= length);
827         if (isSmall) smallLength -= diff;
828         else large.length = large.length - diff;
829     }
830 
831     static if (isString) unittest
832     {
833         auto s1 = RCXString("123456789_");
834         auto s2 = s1;
835         s1.popBack;
836         assert(s1 == "123456789");
837         assert(s2 == "123456789_");
838         s1 = RCXString("123456789_123456789_123456789_123456789_");
839         s2 = s1;
840         s1.popBack;
841         assert(s1 == "123456789_123456789_123456789_123456789");
842         assert(s2 == "123456789_123456789_123456789_123456789_");
843         s1 = "öü";
844         s2 = s1;
845         s1.popBack;
846         assert(s1 == "ö");
847         assert(s2 == "öü");
848     }
849 
850     /// Returns a slice to the entire string or a portion of it.
851     auto opSlice() inout @nogc
852     {
853         return this;
854     }
855 
856     /// Ditto
857     auto opSlice(size_t b, size_t e) inout
858     {
859         assert(b <= e && e <= length);
860         auto ptr = this.ptr;
861         auto sz = e - b;
862         if (sz <= maxSmallSize)
863         {
864             // result is small
865             RCXString result = void;
866             result.msmall[0 .. sz] = ptr[b .. e];
867             result.smallLength = cast(E) sz;
868             return result;
869         }
870         assert(!isSmall);
871         RCXString result = this;
872         result.large.ptr += b;
873         result.large.length = e - b;
874         return result;
875     }
876 
877     static if (isString) unittest
878     {
879         immutable s = RCXString("123456789_123456789_123456789_123456789");
880         RCXString s1 = s[0 .. 38];
881         assert(!s1.isSmall && s1.large.buf is null);
882     }
883 
884     // Unsafe! Returns a pointer to the beginning of the payload.
885     auto ptr() inout @nogc
886     {
887         return isSmall ? small.ptr : large.ptr;
888     }
889 
890     static if (isString) unittest
891     {
892         auto s1 = RCXString("hello");
893         auto s2 = s1[1 .. $ - 1];
894         assert(s2 == "ell");
895         s1 = "123456789_123456789_123456789_123456789_";
896         s2 = s1[1 .. $ - 1];
897         assert(s2 == "23456789_123456789_123456789_123456789");
898     }
899 
900     /// Returns the concatenation of `this` with `s`.
901     RCXString opBinary(string s = "~")(const auto ref RCXString s) const
902     {
903         return this ~ s.asSlice;
904     }
905 
906     /// Ditto
907     RCXString opBinary(string s = "~")(const(ME)[] s) const
908     {
909         immutable length = this.length;
910         auto resultLen = length + s.length;
911         RCXString result = void;
912         if (resultLen <= maxSmallSize)
913         {
914             // noice
915             result.msmall.ptr[0 .. length] = ptr[0 .. length];
916             result.msmall.ptr[length .. resultLen] = s[];
917             result.smallLength = cast(E) resultLen;
918             return result;
919         }
920         emplace(&result.large, resultLen, this.asSlice);
921         result ~= s;
922         return result;
923     }
924 
925     /// Returns the concatenation of `s` with `this`.
926     RCXString opBinaryRight(string s = "~")(const(E)[] s) const
927     {
928         immutable length = this.length, resultLen = length + s.length;
929         RCXString result = void;
930         if (resultLen <= maxSmallSize)
931         {
932             // noice
933             result.msmall.ptr[0 .. s.length] = s[];
934             result.msmall.ptr[s.length .. resultLen] = small.ptr[0 .. length];
935             result.smallLength = cast(E) resultLen;
936             return result;
937         }
938         emplace(&result.large, resultLen, s);
939         result ~= this;
940         return result;
941     }
942 
943     static if (isString) unittest
944     {
945         auto s1 = RCXString("hello");
946         auto s2 = s1 ~ ", world!";
947         assert(s2 == "hello, world!");
948         s1 = "123456789_123456789_123456789_123456789_";
949         s2 = s1 ~ "abcdefghi_";
950         assert(s2 == "123456789_123456789_123456789_123456789_abcdefghi_");
951         s2 = s1 ~ s1;
952         assert(s2 == "123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_");
953         s2 = "abcdefghi_" ~ s1;
954         assert(s2 == "abcdefghi_123456789_123456789_123456789_123456789_");
955     }
956 }
957 
958 unittest
959 {
960     alias RCI = RCXString!(immutable uint);
961     RCI x;
962 }
963 
964 /// verify UTF-8 storage
965 unittest
966 {
967     string s = "åäö";
968     RCString rcs = s;
969     assert(rcs.length == 6);
970     import std.algorithm : count;
971     assert(rcs.count == 3);
972     assert(rcs.front == 'å');
973     rcs.popFront();
974     assert(rcs.front == 'ä');
975     rcs.popFront();
976     assert(rcs.front == 'ö');
977     rcs.popFront();
978     assert(rcs.empty);
979 }
980 
981 version = profile;
982 
983 /// shows performance increase for SSO over built-in string
984 version(profile) unittest
985 {
986     enum maxSmallSize = 23;
987     alias S = RCXString!(immutable char, maxSmallSize);
988 
989     import std.datetime: StopWatch, Duration;
990     import std.conv : to;
991     import std.stdio;
992 
993     enum n = 2^^21;
994 
995     StopWatch sw;
996 
997     sw.reset;
998     sw.start;
999     char[maxSmallSize] ss;
1000     foreach (i; 0 .. n)
1001     {
1002         auto x = S(ss);
1003     }
1004     sw.stop;
1005     auto timeRCString = sw.peek().msecs;
1006     writeln("> RCString took ", sw.peek().to!Duration);
1007 
1008     sw.reset;
1009     sw.start;
1010     foreach (i; 0 .. n)
1011     {
1012         string x = ss.idup;
1013     }
1014     sw.stop;
1015     auto timeString = sw.peek().msecs;
1016     writeln("> Builtin string took ", sw.peek().to!Duration);
1017 
1018     writeln("> Speedup: ", timeString/timeRCString);
1019 }