1 module nxt.stringcache;
2 
3 /**
4  * The string cache is used for string interning.
5  *
6  * It will only story a single copy of any string that it is asked to hold.
7  * Interned strings can be compared for equality by comparing their $(B .ptr)
8  * field.
9  *
10  * Default and postbilt constructors are disabled. When a StringCache goes out
11  * of scope, the memory held by it is freed.
12  *
13  * See_Also: $(LINK http://en.wikipedia.org/wiki/String_interning)
14  */
15 struct StringCache
16 {
17 public:
18 
19 	@disable this();
20 	this(this) @disable;
21 
22 	/**
23 	 * Params: bucketCount = the initial number of buckets. Must be a
24 	 * power of two
25 	 */
26 	this(size_t bucketCount)
27 	{
28 		buckets = (cast(Node**) calloc((Node*).sizeof, bucketCount))[0 .. bucketCount];
29 	}
30 
31 	~this() nothrow @nogc
32 	{
33 		Block* current = rootBlock;
34 		while (current !is null)
35 		{
36 			Block* prev = current;
37 			current = current.next;
38 			free(cast(void*) prev.bytes.ptr);
39 			free(cast(void*) prev);
40 		}
41 		foreach (nodePointer; buckets)
42 		{
43 			Node* currentNode = nodePointer;
44 			while (currentNode !is null)
45 			{
46 				Node* prev = currentNode;
47 				currentNode = currentNode.next;
48 				free(prev);
49 			}
50 		}
51 		rootBlock = null;
52 		free(buckets.ptr);
53 		buckets = null;
54 	}
55 
56 	/**
57 	 * Caches a string.
58 	 */
59 	string intern(const(ubyte)[] str) pure nothrow @safe
60 	{
61 		if (str is null || str.length == 0)
62 			return "";
63 		immutable uint hash = hashBytes(str);
64 		return intern(str, hash);
65 	}
66 
67 	/**
68 	 * ditto
69 	 */
70 	string intern(string str) pure nothrow @trusted
71 	{
72 		return intern(cast(ubyte[]) str);
73 	}
74 
75 	/**
76 	 * Caches a string as above, but uses the given hash code instead of
77 	 * calculating one itself. Use this alongside $(LREF hashStep)() can reduce the
78 	 * amount of work necessary when lexing dynamic tokens.
79 	 */
80 	string intern(const(ubyte)[] str, uint hash) pure nothrow @safe
81 		in
82 		{
83 			assert (str.length > 0);
84 		}
85 	do
86 	{
87 		return _intern(str, hash);
88 //		string s = _intern(str, hash);
89 //		size_t* ptr = s in debugMap;
90 //		if (ptr is null)
91 //			debugMap[s] = cast(size_t) s.ptr;
92 //		else
93 //			assert (*ptr == cast(size_t) s.ptr);
94 //		return s;
95 	}
96 
97 	/**
98 	 * Incremental hashing.
99 	 * Params:
100 	 *	 b = the byte to add to the hash
101 	 *	 h = the hash that has been calculated so far
102 	 * Returns: the new hash code for the string.
103 	 */
104 	static uint hashStep(ubyte b, uint h) pure nothrow @safe
105 	{
106 		return (h ^ sbox[b]) * 3;
107 	}
108 
109 	/**
110 	 * The default bucket count for the string cache.
111 	 */
112 	static enum defaultBucketCount = 4096;
113 
114 	size_t allocated() pure nothrow @safe @property
115 	{
116 		return _allocated;
117 	}
118 
119 private:
120 
121 	string _intern(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
122 	{
123 		if (bytes is null || bytes.length == 0)
124 			return "";
125 		immutable size_t index = hash & (buckets.length - 1);
126 		Node* s = find(bytes, hash);
127 		if (s !is null)
128 			return cast(string) s.str;
129 		_allocated += bytes.length;
130 		ubyte[] mem = allocate(bytes.length);
131 		mem[] = bytes[];
132 		Node* node = cast(Node*) malloc(Node.sizeof);
133 		node.str = mem;
134 		node.hash = hash;
135 		node.next = buckets[index];
136 		buckets[index] = node;
137 		return cast(string) mem;
138 	}
139 
140 	Node* find(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
141 	{
142 		import std.algorithm;
143 		immutable size_t index = hash & (buckets.length - 1);
144 		Node* node = buckets[index];
145 		while (node !is null)
146 		{
147 			if (node.hash == hash && bytes.equal(cast(ubyte[]) node.str))
148 				return node;
149 			node = node.next;
150 		}
151 		return node;
152 	}
153 
154 	static uint hashBytes(const(ubyte)[] data) pure nothrow @trusted
155 		in
156 		{
157 			assert (data !is null);
158 			assert (data.length > 0);
159 		}
160 	do
161 	{
162 		uint hash = 0;
163 		foreach (ubyte b; data)
164 		{
165 			hash ^= sbox[b];
166 			hash *= 3;
167 		}
168 		return hash;
169 	}
170 
171 	ubyte[] allocate(size_t numBytes) pure nothrow @trusted
172 		in
173 		{
174 			assert (numBytes != 0);
175 		}
176 	out (result)
177 		{
178 			assert (result.length == numBytes);
179 		}
180 	do
181 	{
182 		if (numBytes > (blockSize / 4))
183 			return (cast(ubyte*) malloc(numBytes))[0 .. numBytes];
184 		Block* r = rootBlock;
185 		size_t i = 0;
186 		while  (i <= 3 && r !is null)
187 		{
188 
189 			immutable size_t available = r.bytes.length;
190 			immutable size_t oldUsed = r.used;
191 			immutable size_t newUsed = oldUsed + numBytes;
192 			if (newUsed <= available)
193 			{
194 				r.used = newUsed;
195 				return r.bytes[oldUsed .. newUsed];
196 			}
197 			i++;
198 			r = r.next;
199 		}
200 		Block* b = cast(Block*) malloc(Block.sizeof);
201 		b.bytes = (cast(ubyte*) malloc(blockSize))[0 .. blockSize];
202 		b.used = numBytes;
203 		b.next = rootBlock;
204 		rootBlock = b;
205 		return b.bytes[0 .. numBytes];
206 	}
207 
208 	static struct Node
209 	{
210 		ubyte[] str;
211 		uint hash;
212 		Node* next;
213 	}
214 
215 	static struct Block
216 	{
217 		ubyte[] bytes;
218 		size_t used;
219 		Block* next;
220 	}
221 
222 	static enum blockSize = 1024 * 16;
223 
224 	static immutable uint[] sbox = [
225 		0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53,
226 		0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982,
227 		0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56,
228 		0x514F4303, 0x7BE12B83, 0x7192F195, 0x82DC7300,
229 		0x084380B4, 0x480B55D3, 0x5F430471, 0x13F75991,
230 		0x3F9CF22C, 0x2FE0907A, 0xFD8E1E69, 0x7B1D5DE8,
231 		0xD575A85C, 0xAD01C50A, 0x7EE00737, 0x3CE981E8,
232 		0x0E447EFA, 0x23089DD6, 0xB59F149F, 0x13600EC7,
233 		0xE802C8E6, 0x670921E4, 0x7207EFF0, 0xE74761B0,
234 		0x69035234, 0xBFA40F19, 0xF63651A0, 0x29E64C26,
235 		0x1F98CCA7, 0xD957007E, 0xE71DDC75, 0x3E729595,
236 		0x7580B7CC, 0xD7FAF60B, 0x92484323, 0xA44113EB,
237 		0xE4CBDE08, 0x346827C9, 0x3CF32AFA, 0x0B29BCF1,
238 		0x6E29F7DF, 0xB01E71CB, 0x3BFBC0D1, 0x62EDC5B8,
239 		0xB7DE789A, 0xA4748EC9, 0xE17A4C4F, 0x67E5BD03,
240 		0xF3B33D1A, 0x97D8D3E9, 0x09121BC0, 0x347B2D2C,
241 		0x79A1913C, 0x504172DE, 0x7F1F8483, 0x13AC3CF6,
242 		0x7A2094DB, 0xC778FA12, 0xADF7469F, 0x21786B7B,
243 		0x71A445D0, 0xA8896C1B, 0x656F62FB, 0x83A059B3,
244 		0x972DFE6E, 0x4122000C, 0x97D9DA19, 0x17D5947B,
245 		0xB1AFFD0C, 0x6EF83B97, 0xAF7F780B, 0x4613138A,
246 		0x7C3E73A6, 0xCF15E03D, 0x41576322, 0x672DF292,
247 		0xB658588D, 0x33EBEFA9, 0x938CBF06, 0x06B67381,
248 		0x07F192C6, 0x2BDA5855, 0x348EE0E8, 0x19DBB6E3,
249 		0x3222184B, 0xB69D5DBA, 0x7E760B88, 0xAF4D8154,
250 		0x007A51AD, 0x35112500, 0xC9CD2D7D, 0x4F4FB761,
251 		0x694772E3, 0x694C8351, 0x4A7E3AF5, 0x67D65CE1,
252 		0x9287DE92, 0x2518DB3C, 0x8CB4EC06, 0xD154D38F,
253 		0xE19A26BB, 0x295EE439, 0xC50A1104, 0x2153C6A7,
254 		0x82366656, 0x0713BC2F, 0x6462215A, 0x21D9BFCE,
255 		0xBA8EACE6, 0xAE2DF4C1, 0x2A8D5E80, 0x3F7E52D1,
256 		0x29359399, 0xFEA1D19C, 0x18879313, 0x455AFA81,
257 		0xFADFE838, 0x62609838, 0xD1028839, 0x0736E92F,
258 		0x3BCA22A3, 0x1485B08A, 0x2DA7900B, 0x852C156D,
259 		0xE8F24803, 0x00078472, 0x13F0D332, 0x2ACFD0CF,
260 		0x5F747F5C, 0x87BB1E2F, 0xA7EFCB63, 0x23F432F0,
261 		0xE6CE7C5C, 0x1F954EF6, 0xB609C91B, 0x3B4571BF,
262 		0xEED17DC0, 0xE556CDA0, 0xA7846A8D, 0xFF105F94,
263 		0x52B7CCDE, 0x0E33E801, 0x664455EA, 0xF2C70414,
264 		0x73E7B486, 0x8F830661, 0x8B59E826, 0xBB8AEDCA,
265 		0xF3D70AB9, 0xD739F2B9, 0x4A04C34A, 0x88D0F089,
266 		0xE02191A2, 0xD89D9C78, 0x192C2749, 0xFC43A78F,
267 		0x0AAC88CB, 0x9438D42D, 0x9E280F7A, 0x36063802,
268 		0x38E8D018, 0x1C42A9CB, 0x92AAFF6C, 0xA24820C5,
269 		0x007F077F, 0xCE5BC543, 0x69668D58, 0x10D6FF74,
270 		0xBE00F621, 0x21300BBE, 0x2E9E8F46, 0x5ACEA629,
271 		0xFA1F86C7, 0x52F206B8, 0x3EDF1A75, 0x6DA8D843,
272 		0xCF719928, 0x73E3891F, 0xB4B95DD6, 0xB2A42D27,
273 		0xEDA20BBF, 0x1A58DBDF, 0xA449AD03, 0x6DDEF22B,
274 		0x900531E6, 0x3D3BFF35, 0x5B24ABA2, 0x472B3E4C,
275 		0x387F2D75, 0x4D8DBA36, 0x71CB5641, 0xE3473F3F,
276 		0xF6CD4B7F, 0xBF7D1428, 0x344B64D0, 0xC5CDFCB6,
277 		0xFE2E0182, 0x2C37A673, 0xDE4EB7A3, 0x63FDC933,
278 		0x01DC4063, 0x611F3571, 0xD167BFAF, 0x4496596F,
279 		0x3DEE0689, 0xD8704910, 0x7052A114, 0x068C9EC5,
280 		0x75D0E766, 0x4D54CC20, 0xB44ECDE2, 0x4ABC653E,
281 		0x2C550A21, 0x1A52C0DB, 0xCFED03D0, 0x119BAFE2,
282 		0x876A6133, 0xBC232088, 0x435BA1B2, 0xAE99BBFA,
283 		0xBB4F08E4, 0xA62B5F49, 0x1DA4B695, 0x336B84DE,
284 		0xDC813D31, 0x00C134FB, 0x397A98E6, 0x151F0E64,
285 		0xD9EB3E69, 0xD3C7DF60, 0xD2F2C336, 0x2DDD067B,
286 		0xBD122835, 0xB0B3BD3A, 0xB0D54E46, 0x8641F1E4,
287 		0xA0B38F96, 0x51D39199, 0x37A6AD75, 0xDF84EE41,
288 		0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A,
289 		];
290 
291 //	deprecated size_t[string] debugMap;
292 	size_t _allocated;
293 	Node*[] buckets;
294 	Block* rootBlock;
295 }
296 
297 private extern (C) void* calloc(size_t, size_t) nothrow pure @nogc;
298 private extern (C) void* malloc(size_t) nothrow pure @nogc;
299 private extern (C) void free(void*) nothrow pure @nogc;