1 module nxt.stringcache;
2 
3 /**
4  * The string cache is used for string interning.
5  *
6  * It will only story a single copy of any string that it is asked to hold.
7  * Interned strings can be compared for equality by comparing their $(B .ptr)
8  * field.
9  *
10  * Default and postbilt constructors are disabled. When a StringCache goes out
11  * of scope, the memory held by it is freed.
12  *
13  * See_Also: $(LINK http://en.wikipedia.org/wiki/String_interning)
14  */
15 struct StringCache
16 {
17 public:
18 
19     @disable this();
20     @disable this(this);
21 
22     /**
23      * Params: bucketCount = the initial number of buckets. Must be a
24      * power of two
25      */
26     this(size_t bucketCount)
27     {
28         buckets = (cast(Node**) calloc((Node*).sizeof, bucketCount))[0 .. bucketCount];
29     }
30 
31     ~this() @nogc
32     {
33         Block* current = rootBlock;
34         while (current !is null)
35         {
36             Block* prev = current;
37             current = current.next;
38             free(cast(void*) prev.bytes.ptr);
39             free(cast(void*) prev);
40         }
41         foreach (nodePointer; buckets)
42         {
43             Node* currentNode = nodePointer;
44             while (currentNode !is null)
45             {
46                 Node* prev = currentNode;
47                 currentNode = currentNode.next;
48                 free(prev);
49             }
50         }
51         rootBlock = null;
52         free(buckets.ptr);
53         buckets = null;
54     }
55 
56     /**
57      * Caches a string.
58      */
59     string intern(const(ubyte)[] str) pure nothrow @safe
60     {
61         if (str is null || str.length == 0)
62             return "";
63         immutable uint hash = hashBytes(str);
64         return intern(str, hash);
65     }
66 
67     /**
68      * ditto
69      */
70     string intern(string str) pure nothrow @trusted
71     {
72         return intern(cast(ubyte[]) str);
73     }
74 
75     /**
76      * Caches a string as above, but uses the given hash code instead of
77      * calculating one itself. Use this alongside $(LREF hashStep)() can reduce the
78      * amount of work necessary when lexing dynamic tokens.
79      */
80     string intern(const(ubyte)[] str, uint hash) pure nothrow @safe
81         in
82         {
83             assert (str.length > 0);
84         }
85     do
86     {
87         return _intern(str, hash);
88 //		string s = _intern(str, hash);
89 //		size_t* ptr = s in debugMap;
90 //		if (ptr is null)
91 //			debugMap[s] = cast(size_t) s.ptr;
92 //		else
93 //			assert (*ptr == cast(size_t) s.ptr);
94 //        return s;
95     }
96 
97     /**
98      * Incremental hashing.
99      * Params:
100      *     b = the byte to add to the hash
101      *     h = the hash that has been calculated so far
102      * Returns: the new hash code for the string.
103      */
104     static uint hashStep(ubyte b, uint h) pure nothrow @safe
105     {
106         return (h ^ sbox[b]) * 3;
107     }
108 
109     /**
110      * The default bucket count for the string cache.
111      */
112     static enum defaultBucketCount = 4096;
113 
114     size_t allocated() pure nothrow @safe @property
115     {
116         return _allocated;
117     }
118 
119 private:
120 
121     string _intern(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
122     {
123         if (bytes is null || bytes.length == 0)
124             return "";
125         immutable size_t index = hash & (buckets.length - 1);
126         Node* s = find(bytes, hash);
127         if (s !is null)
128             return cast(string) s.str;
129         _allocated += bytes.length;
130         ubyte[] mem = allocate(bytes.length);
131         mem[] = bytes[];
132         Node* node = cast(Node*) malloc(Node.sizeof);
133         node.str = mem;
134         node.hash = hash;
135         node.next = buckets[index];
136         buckets[index] = node;
137         return cast(string) mem;
138     }
139 
140     Node* find(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
141     {
142         import std.algorithm;
143         immutable size_t index = hash & (buckets.length - 1);
144         Node* node = buckets[index];
145         while (node !is null)
146         {
147             if (node.hash == hash && bytes.equal(cast(ubyte[]) node.str))
148                 return node;
149             node = node.next;
150         }
151         return node;
152     }
153 
154     static uint hashBytes(const(ubyte)[] data) pure nothrow @trusted
155         in
156         {
157             assert (data !is null);
158             assert (data.length > 0);
159         }
160     do
161     {
162         uint hash = 0;
163         foreach (ubyte b; data)
164         {
165             hash ^= sbox[b];
166             hash *= 3;
167         }
168         return hash;
169     }
170 
171     ubyte[] allocate(size_t numBytes) pure nothrow @trusted
172         in
173         {
174             assert (numBytes != 0);
175         }
176     out (result)
177         {
178             assert (result.length == numBytes);
179         }
180     do
181     {
182         if (numBytes > (blockSize / 4))
183             return (cast(ubyte*) malloc(numBytes))[0 .. numBytes];
184         Block* r = rootBlock;
185         size_t i = 0;
186         while  (i <= 3 && r !is null)
187         {
188 
189             immutable size_t available = r.bytes.length;
190             immutable size_t oldUsed = r.used;
191             immutable size_t newUsed = oldUsed + numBytes;
192             if (newUsed <= available)
193             {
194                 r.used = newUsed;
195                 return r.bytes[oldUsed .. newUsed];
196             }
197             i++;
198             r = r.next;
199         }
200         Block* b = cast(Block*) malloc(Block.sizeof);
201         b.bytes = (cast(ubyte*) malloc(blockSize))[0 .. blockSize];
202         b.used = numBytes;
203         b.next = rootBlock;
204         rootBlock = b;
205         return b.bytes[0 .. numBytes];
206     }
207 
208     static struct Node
209     {
210         ubyte[] str;
211         uint hash;
212         Node* next;
213     }
214 
215     static struct Block
216     {
217         ubyte[] bytes;
218         size_t used;
219         Block* next;
220     }
221 
222     static enum blockSize = 1024 * 16;
223 
224     static immutable uint[] sbox = [
225         0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53,
226         0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982,
227         0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56,
228         0x514F4303, 0x7BE12B83, 0x7192F195, 0x82DC7300,
229         0x084380B4, 0x480B55D3, 0x5F430471, 0x13F75991,
230         0x3F9CF22C, 0x2FE0907A, 0xFD8E1E69, 0x7B1D5DE8,
231         0xD575A85C, 0xAD01C50A, 0x7EE00737, 0x3CE981E8,
232         0x0E447EFA, 0x23089DD6, 0xB59F149F, 0x13600EC7,
233         0xE802C8E6, 0x670921E4, 0x7207EFF0, 0xE74761B0,
234         0x69035234, 0xBFA40F19, 0xF63651A0, 0x29E64C26,
235         0x1F98CCA7, 0xD957007E, 0xE71DDC75, 0x3E729595,
236         0x7580B7CC, 0xD7FAF60B, 0x92484323, 0xA44113EB,
237         0xE4CBDE08, 0x346827C9, 0x3CF32AFA, 0x0B29BCF1,
238         0x6E29F7DF, 0xB01E71CB, 0x3BFBC0D1, 0x62EDC5B8,
239         0xB7DE789A, 0xA4748EC9, 0xE17A4C4F, 0x67E5BD03,
240         0xF3B33D1A, 0x97D8D3E9, 0x09121BC0, 0x347B2D2C,
241         0x79A1913C, 0x504172DE, 0x7F1F8483, 0x13AC3CF6,
242         0x7A2094DB, 0xC778FA12, 0xADF7469F, 0x21786B7B,
243         0x71A445D0, 0xA8896C1B, 0x656F62FB, 0x83A059B3,
244         0x972DFE6E, 0x4122000C, 0x97D9DA19, 0x17D5947B,
245         0xB1AFFD0C, 0x6EF83B97, 0xAF7F780B, 0x4613138A,
246         0x7C3E73A6, 0xCF15E03D, 0x41576322, 0x672DF292,
247         0xB658588D, 0x33EBEFA9, 0x938CBF06, 0x06B67381,
248         0x07F192C6, 0x2BDA5855, 0x348EE0E8, 0x19DBB6E3,
249         0x3222184B, 0xB69D5DBA, 0x7E760B88, 0xAF4D8154,
250         0x007A51AD, 0x35112500, 0xC9CD2D7D, 0x4F4FB761,
251         0x694772E3, 0x694C8351, 0x4A7E3AF5, 0x67D65CE1,
252         0x9287DE92, 0x2518DB3C, 0x8CB4EC06, 0xD154D38F,
253         0xE19A26BB, 0x295EE439, 0xC50A1104, 0x2153C6A7,
254         0x82366656, 0x0713BC2F, 0x6462215A, 0x21D9BFCE,
255         0xBA8EACE6, 0xAE2DF4C1, 0x2A8D5E80, 0x3F7E52D1,
256         0x29359399, 0xFEA1D19C, 0x18879313, 0x455AFA81,
257         0xFADFE838, 0x62609838, 0xD1028839, 0x0736E92F,
258         0x3BCA22A3, 0x1485B08A, 0x2DA7900B, 0x852C156D,
259         0xE8F24803, 0x00078472, 0x13F0D332, 0x2ACFD0CF,
260         0x5F747F5C, 0x87BB1E2F, 0xA7EFCB63, 0x23F432F0,
261         0xE6CE7C5C, 0x1F954EF6, 0xB609C91B, 0x3B4571BF,
262         0xEED17DC0, 0xE556CDA0, 0xA7846A8D, 0xFF105F94,
263         0x52B7CCDE, 0x0E33E801, 0x664455EA, 0xF2C70414,
264         0x73E7B486, 0x8F830661, 0x8B59E826, 0xBB8AEDCA,
265         0xF3D70AB9, 0xD739F2B9, 0x4A04C34A, 0x88D0F089,
266         0xE02191A2, 0xD89D9C78, 0x192C2749, 0xFC43A78F,
267         0x0AAC88CB, 0x9438D42D, 0x9E280F7A, 0x36063802,
268         0x38E8D018, 0x1C42A9CB, 0x92AAFF6C, 0xA24820C5,
269         0x007F077F, 0xCE5BC543, 0x69668D58, 0x10D6FF74,
270         0xBE00F621, 0x21300BBE, 0x2E9E8F46, 0x5ACEA629,
271         0xFA1F86C7, 0x52F206B8, 0x3EDF1A75, 0x6DA8D843,
272         0xCF719928, 0x73E3891F, 0xB4B95DD6, 0xB2A42D27,
273         0xEDA20BBF, 0x1A58DBDF, 0xA449AD03, 0x6DDEF22B,
274         0x900531E6, 0x3D3BFF35, 0x5B24ABA2, 0x472B3E4C,
275         0x387F2D75, 0x4D8DBA36, 0x71CB5641, 0xE3473F3F,
276         0xF6CD4B7F, 0xBF7D1428, 0x344B64D0, 0xC5CDFCB6,
277         0xFE2E0182, 0x2C37A673, 0xDE4EB7A3, 0x63FDC933,
278         0x01DC4063, 0x611F3571, 0xD167BFAF, 0x4496596F,
279         0x3DEE0689, 0xD8704910, 0x7052A114, 0x068C9EC5,
280         0x75D0E766, 0x4D54CC20, 0xB44ECDE2, 0x4ABC653E,
281         0x2C550A21, 0x1A52C0DB, 0xCFED03D0, 0x119BAFE2,
282         0x876A6133, 0xBC232088, 0x435BA1B2, 0xAE99BBFA,
283         0xBB4F08E4, 0xA62B5F49, 0x1DA4B695, 0x336B84DE,
284         0xDC813D31, 0x00C134FB, 0x397A98E6, 0x151F0E64,
285         0xD9EB3E69, 0xD3C7DF60, 0xD2F2C336, 0x2DDD067B,
286         0xBD122835, 0xB0B3BD3A, 0xB0D54E46, 0x8641F1E4,
287         0xA0B38F96, 0x51D39199, 0x37A6AD75, 0xDF84EE41,
288         0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A,
289         ];
290 
291 //	deprecated size_t[string] debugMap;
292     size_t _allocated;
293     Node*[] buckets;
294     Block* rootBlock;
295 }
296 
297 private extern (C) void* calloc(size_t, size_t) nothrow pure @nogc;
298 private extern (C) void* malloc(size_t) nothrow pure @nogc;
299 private extern (C) void free(void*) nothrow pure @nogc;