SSOString

Small-size-optimized (SSO) variant of string.

Storage is placed on the stack if the number of chars is less than smallCapacity, otherwise as a normal (large) string. The large string will be allocated on the GC-heap if the SSOString is constructed from a non-string (non-immutable char[]) parameter.

Because SSOString doesn't have a destructor it can safely allocate using a GC-backed region allocator without relying on a GC finalizer.

In order to mimic string/array/slice-behaviour, opCast returns false for SSOString() and true for SSOString(""). This requires SSOString() to default to a large string in which large pointer is set to null.

NOTE big-endian platform support hasn't been verified.

TODO Add to Phobos' std.typecons or std.array or std.string

Constructors

this
this(Chars source)

Construct from source, which potentially needs GC-allocation (iff source.length > smallCapacity and source is not a string).

this
this(Source source)
Undocumented in source.

Members

Aliases

opDollar
alias opDollar = length

Get length.

Functions

holeify
void holeify()

That this a hole, meaning a removed/erase value.

isHole
bool isHole()

Check if this a hole, meaning a removed/erase value.

isSmallASCII
bool isSmallASCII()

Check if this is a small ASCII string.

opBinary
bool opBinary(typeof(this) rhs)

Check if is the same as to rhs.

opCast
bool opCast()
Undocumented in source. Be warned that the author may not have intended to support it.
opEquals
bool opEquals(typeof(this) rhs)

Check if this is equal to rhs.

opEquals
bool opEquals(const(E)[] rhs)

Check if this is equal to rhs.

opIndex
inout(E) opIndex(size_t index)

Return the indexed char of this.

opSlice
inout(E)[] opSlice()

Return a slice to either the whole large or whole small string.

opSlice
inout(E)[] opSlice(size_t i, size_t j)

Return a slice at [i .. j] to either the internally stored large or small string.

toLower
typeof(this) toLower()

Return this lowercased.

toUpper
typeof(this) toUpper()

Return this uppercased.

Manifest constants

smallCapacity
enum smallCapacity;
Undocumented in source.

Properties

empty
bool empty [@property getter]

Check if this is empty.

isNull
bool isNull [@property getter]

Check if this is null.

length
size_t length [@property getter]

Get length.

opCmp
typeof(this) opCmp [@property setter]

Compare this with that.

toHash
hash_t toHash [@property getter]

Get hash of this, with extra fast computation for the small case.

toString
void delegate(const(E)[]) @(safe) toString [@property setter]
Undocumented in source. Be warned that the author may not have intended to support it.
toString
string toString [@property getter]

Return this converted to a string, without any GC-allocation because this is immutable.

toString
string toString [@property getter]

Return this converted to a string, which potentially needs GC-allocation (iff length > smallCapacity).

Static variables

holeValue
auto holeValue;

Support trait isHoleable.

nullValue
auto nullValue;

Support trait isNullable.

Examples

construct from non-immutable source is allowed in non-@nogc context

alias S = SSOString;

const char[] x0;
const s0 = S(x0);           // no .idup

const char[] x16 = new char[16];
const s16 = S(x16);         // will call .idup

construct from non-immutable source is not allowed in @nogc context

alias S = SSOString;
const char[] s;
static assert(__traits(compiles, { const s0_ = S(s); }));

test behaviour of == and is operator

alias S = SSOString;

const S x = "42";
assert(!x.isNull);
assert(x == "42");

const S y = "42";
assert(!y.isNull);
assert(y == "42");

assert(x == y);
assert(x == y[]);
assert(x[] == y);
assert(x[] == y[]);
assert(x[] is x[]);
assert(y[] is y[]);
assert(x[] !is y[]);
assert(x.ptr !is y.ptr);

const S z = "43";
assert(!z.isNull);
assert(z == "43");
assert(x != z);
assert(x[] != z[]);
assert(x !is z);
assert(x[] !is z[]);
1 alias S = SSOString;
2 
3 static assert(S.smallCapacity == 15);
4 
5 import nxt.gc_traits : mustAddGCRange;
6 static assert(mustAddGCRange!S); // `Large large.ptr` must be scanned
7 
8 static assert(__traits(isZeroInit, S));
9 // TODO assert(S.init == S.nullValue);
10 
11 auto s0 = S.init;
12 assert(s0.isNull);
13 assert(s0.length == 0);
14 assert(s0.isLarge);
15 assert(s0[] == []);
16 
17 char[S.smallCapacity] charsSmallCapacity = "123456789_12345"; // fits in small string
18 const sSmallCapacity = S(charsSmallCapacity);
19 assert(!sSmallCapacity.isLarge);
20 assert(sSmallCapacity.length == S.smallCapacity);
21 assert(sSmallCapacity == charsSmallCapacity);
22 
23 const s0_ = S("");
24 assert(s0_.isNull);         // cannot distinguish
25 assert(s0 == s0_);
26 
27 const s7 = S("0123456");
28 assert(!s7.isNull);
29 
30 const s7_ = S("0123456_"[0 .. $ - 1]);
31 assert(s7.ptr !is s7_.ptr); // string data shall not overlap
32 assert(s7 == s7_);
33 
34 const _s7 = S("_0123456"[1 .. $]); // source from other string literal
35 assert(s7.ptr !is _s7.ptr); // string data shall not overlap
36 assert(s7 == _s7);
37 
38 assert(!s7.isLarge);
39 assert(s7.length == 7);
40 assert(s7[] == "0123456");
41 assert(s7[] == "_0123456"[1 .. $]);
42 assert(s7[] == "0123456_"[0 .. $ - 1]);
43 assert(s7[0 .. 4] == "0123");
44 
45 const s15 = S("0123456789abcde");
46 assert(!s15.isNull);
47 static assert(is(typeof(s15[]) == const(char)[]));
48 assert(!s15.isLarge);
49 assert(s15.length == 15);
50 assert(s15[] == "0123456789abcde");
51 assert(s15[0 .. 4] == "0123");
52 assert(s15[10 .. 15] == "abcde");
53 assert(s15[10 .. $] == "abcde");
54 
55 const s16 = S("0123456789abcdef");
56 assert(!s16.isNull);
57 static assert(is(typeof(s16[]) == const(char)[]));
58 assert(s16.isLarge);
59 
60 const s16_ = S("0123456789abcdef_"[0 .. s16.length]);
61 assert(s16.length == s16_.length);
62 assert(s16[] == s16_[]);
63 assert(s16.ptr !is s16_.ptr); // string data shall not overlap
64 assert(s16 == s16_);              // but contents is equal
65 
66 const _s16 = S("_0123456789abcdef"[1 .. $]);
67 assert(s16.length == _s16.length);
68 assert(s16[] == _s16[]);    // contents is equal
69 assert(s16 == _s16);        // contents is equal
70 
71 assert(s16.length == 16);
72 assert(s16[] == "0123456789abcdef");
73 assert(s16[0] == '0');
74 assert(s16[10] == 'a');
75 assert(s16[15] == 'f');
76 assert(s16[0 .. 4] == "0123");
77 assert(s16[10 .. 16] == "abcdef");
78 assert(s16[10 .. $] == "abcdef");

metadata for null string

alias S = SSOString;
auto s = S.init;
assert(s.isNull);
foreach (const i; 0 .. 8)
{
    s.metadata = i;
    assert(s.metadata == i);
    assert(s.length == 0);
    // TODO assert(!s.isNull);
}

metadata for small string

alias S = SSOString;
auto s = S("0123456");
assert(!s.isNull);
assert(!s.isLarge);
foreach (const i; 0 .. 8)
{
    s.metadata = i;
    assert(s.metadata == i);
    assert(s.length == 7);
    assert(!s.isLarge);
    assert(!s.isNull);
}

metadata for small string with maximum length

alias S = SSOString;
auto s = S("0123456789abcde");
assert(s.length == S.smallCapacity);
assert(!s.isNull);
assert(!s.isLarge);
foreach (const i; 0 .. 8)
{
    s.metadata = i;
    assert(s.metadata == i);
    assert(s.length == 15);
    assert(!s.isLarge);
    assert(!s.isNull);
}

metadata for large string with minimum length

alias S = SSOString;
auto s = S("0123456789abcdef");
assert(s.length == S.smallCapacity + 1);
assert(!s.isNull);
assert(s.isLarge);
foreach (const i; 0 .. 8)
{
    s.metadata = i;
    assert(s.metadata == i);
    assert(s.length == 16);
    assert(s.isLarge);
    assert(!s.isNull);
}

construct from static array larger than smallCapacity

alias S = SSOString;
char[S.smallCapacity + 1] charsMinLargeCapacity;
const _ = S(charsMinLargeCapacity);

hole handling

alias S = SSOString;
assert(!S.init.isHole);
assert(!S("").isHole);
assert(!S("a").isHole);
assert(S.asHole.isHole);

DIP-1000 return ref escape analysis

static if (isDIP1000)
{
    alias S = SSOString;
    static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { S x; return x.ptr; } }));
    static assert(!__traits(compiles, { string f1() @safe pure nothrow { S x; return x[]; } }));
    static assert(!__traits(compiles, { string f2() @safe pure nothrow { S x; return x.toString; } }));
    static assert(!__traits(compiles, { ref immutable(char) g() @safe pure nothrow @nogc { S x; return x[0]; } }));
}

ASCII purity and case-conversion

alias S = SSOString;

// these are all small ASCII
assert( S("a").isSmallASCII);
assert( S("b").isSmallASCII);
assert( S("z").isSmallASCII);
assert( S("_").isSmallASCII);
assert( S("abcd").isSmallASCII);
assert( S("123456789_12345").isSmallASCII);

// these are not
assert(!S("123456789_123456").isSmallASCII); // too large
assert(!S("123456789_123ö").isSmallASCII);
assert(!S("ö").isSmallASCII);
assert(!S("Ö").isSmallASCII);
assert(!S("åäö").isSmallASCII);
assert(!S("ö-värld").isSmallASCII);

ASCII purity and case-conversion

alias S = SSOString;
assert(S("A").toLower[] == "a");
assert(S("a").toUpper[] == "A");
assert(S("ABCDEFGHIJKLMNO").toLower[] == "abcdefghijklmno"); // small
assert(S("abcdefghijklmno").toUpper[] == "ABCDEFGHIJKLMNO"); // small
assert(S("ÅÄÖ").toLower[] == "åäö");
assert(S("åäö").toUpper[] == "ÅÄÖ");
assert(S("ABCDEFGHIJKLMNOP").toLower[] == "abcdefghijklmnop"); // large
assert(S("abcdefghijklmnop").toUpper[] == "ABCDEFGHIJKLMNOP"); // large

char[6] x = "ÅÄÖ";
import std.uni : toLowerInPlace;
auto xref = x[];
toLowerInPlace(xref);
assert(x == "åäö");
assert(xref == "åäö");

lexicographic comparison

alias S = SSOString;

const S a = S("a");
assert(a == S("a"));

immutable S b = S("b");

assert(a < b);
assert(b > a);
assert(a[] < b[]);

assert("a" < "b");
assert("a" < "å");
assert("Å" < "å");
assert(S("a") < S("å"));
assert(S("ÅÄÖ") < S("åäö"));

cast to bool

alias S = SSOString;
// mimics behaviour of casting of `string` to `bool`
assert(!S());
assert(S(""));
assert(S("abc"));

to string conversion

alias S = SSOString;

// mutable small will GC-allocate
{
    S s = S("123456789_12345");
    assert(s.ptr is &s.opSlice()[0]);
    assert(s.ptr !is &s.toString()[0]);
}

// const small will GC-allocate
{
    const S s = S("123456789_12345");
    assert(s.ptr is &s.opSlice()[0]);
    assert(s.ptr !is &s.toString()[0]);
}

// immutable small will not allocate
{
    immutable S s = S("123456789_12345");
    assert(s.ptr is &s.opSlice()[0]);
    assert(s.ptr is &s.toString()[0]);
    // TODO check return via -dip1000
}

/* Forbid return of possibly locally scoped `Smll` small stack object
 * regardless of head-mutability.
 */
static if (isDIP1000)
{
    static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { S x; return x.ptr; } }));
    static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { const S x; return x.ptr; } }));
    static assert(!__traits(compiles, { immutable(char)* f1() @safe pure nothrow { immutable S x; return x.ptr; } }));

    /** TODO Enable the following line when DIP-1000 works for opSlice()
     *
     * See_Also: https://issues.dlang.org/show_bug.cgi?id=18792
     */
    // static assert(!__traits(compiles, { string f1() @safe pure nothrow { immutable S x; return x[]; } }));
}

// large will never allocate regardless of head-mutability
{
    S s = S("123456789_123456");
    assert(s.ptr is &s.opSlice()[0]);
    assert(s.ptr is &s.toString()[0]); // shouldn't this change?
}
import std.stdio;
writeln(SSOString("alpha"));

See Also

https://forum.dlang.org/post/pb87rn$2icb$1@digitalmars.com https://issues.dlang.org/show_bug.cgi?id=18792

TODO Use extra bits in Short.length for these special text encodings: - 5-bit lowercase English letter into 128/5 = 25 chars - 5-bit uppercase English letter into 120/5 = 25 chars - 6-bit mixedcase English letter into 120/6 = 20 chars

Meta