1 module nxt.stringcache; 2 3 /** 4 * The string cache is used for string interning. 5 * 6 * It will only story a single copy of any string that it is asked to hold. 7 * Interned strings can be compared for equality by comparing their $(B .ptr) 8 * field. 9 * 10 * Default and postbilt constructors are disabled. When a StringCache goes out 11 * of scope, the memory held by it is freed. 12 * 13 * See_Also: $(LINK http://en.wikipedia.org/wiki/String_interning) 14 */ 15 struct StringCache 16 { 17 public: 18 19 @disable this(); 20 @disable this(this); 21 22 /** 23 * Params: bucketCount = the initial number of buckets. Must be a 24 * power of two 25 */ 26 this(size_t bucketCount) 27 { 28 buckets = (cast(Node**) calloc((Node*).sizeof, bucketCount))[0 .. bucketCount]; 29 } 30 31 ~this() nothrow @nogc 32 { 33 Block* current = rootBlock; 34 while (current !is null) 35 { 36 Block* prev = current; 37 current = current.next; 38 free(cast(void*) prev.bytes.ptr); 39 free(cast(void*) prev); 40 } 41 foreach (nodePointer; buckets) 42 { 43 Node* currentNode = nodePointer; 44 while (currentNode !is null) 45 { 46 Node* prev = currentNode; 47 currentNode = currentNode.next; 48 free(prev); 49 } 50 } 51 rootBlock = null; 52 free(buckets.ptr); 53 buckets = null; 54 } 55 56 /** 57 * Caches a string. 58 */ 59 string intern(const(ubyte)[] str) pure nothrow @safe 60 { 61 if (str is null || str.length == 0) 62 return ""; 63 immutable uint hash = hashBytes(str); 64 return intern(str, hash); 65 } 66 67 /** 68 * ditto 69 */ 70 string intern(string str) pure nothrow @trusted 71 { 72 return intern(cast(ubyte[]) str); 73 } 74 75 /** 76 * Caches a string as above, but uses the given hash code instead of 77 * calculating one itself. Use this alongside $(LREF hashStep)() can reduce the 78 * amount of work necessary when lexing dynamic tokens. 79 */ 80 string intern(const(ubyte)[] str, uint hash) pure nothrow @safe 81 in 82 { 83 assert (str.length > 0); 84 } 85 do 86 { 87 return _intern(str, hash); 88 // string s = _intern(str, hash); 89 // size_t* ptr = s in debugMap; 90 // if (ptr is null) 91 // debugMap[s] = cast(size_t) s.ptr; 92 // else 93 // assert (*ptr == cast(size_t) s.ptr); 94 // return s; 95 } 96 97 /** 98 * Incremental hashing. 99 * Params: 100 * b = the byte to add to the hash 101 * h = the hash that has been calculated so far 102 * Returns: the new hash code for the string. 103 */ 104 static uint hashStep(ubyte b, uint h) pure nothrow @safe 105 { 106 return (h ^ sbox[b]) * 3; 107 } 108 109 /** 110 * The default bucket count for the string cache. 111 */ 112 static enum defaultBucketCount = 4096; 113 114 size_t allocated() pure nothrow @safe @property 115 { 116 return _allocated; 117 } 118 119 private: 120 121 string _intern(const(ubyte)[] bytes, uint hash) pure nothrow @trusted 122 { 123 if (bytes is null || bytes.length == 0) 124 return ""; 125 immutable size_t index = hash & (buckets.length - 1); 126 Node* s = find(bytes, hash); 127 if (s !is null) 128 return cast(string) s.str; 129 _allocated += bytes.length; 130 ubyte[] mem = allocate(bytes.length); 131 mem[] = bytes[]; 132 Node* node = cast(Node*) malloc(Node.sizeof); 133 node.str = mem; 134 node.hash = hash; 135 node.next = buckets[index]; 136 buckets[index] = node; 137 return cast(string) mem; 138 } 139 140 Node* find(const(ubyte)[] bytes, uint hash) pure nothrow @trusted 141 { 142 import std.algorithm; 143 immutable size_t index = hash & (buckets.length - 1); 144 Node* node = buckets[index]; 145 while (node !is null) 146 { 147 if (node.hash == hash && bytes.equal(cast(ubyte[]) node.str)) 148 return node; 149 node = node.next; 150 } 151 return node; 152 } 153 154 static uint hashBytes(const(ubyte)[] data) pure nothrow @trusted 155 in 156 { 157 assert (data !is null); 158 assert (data.length > 0); 159 } 160 do 161 { 162 uint hash = 0; 163 foreach (ubyte b; data) 164 { 165 hash ^= sbox[b]; 166 hash *= 3; 167 } 168 return hash; 169 } 170 171 ubyte[] allocate(size_t numBytes) pure nothrow @trusted 172 in 173 { 174 assert (numBytes != 0); 175 } 176 out (result) 177 { 178 assert (result.length == numBytes); 179 } 180 do 181 { 182 if (numBytes > (blockSize / 4)) 183 return (cast(ubyte*) malloc(numBytes))[0 .. numBytes]; 184 Block* r = rootBlock; 185 size_t i = 0; 186 while (i <= 3 && r !is null) 187 { 188 189 immutable size_t available = r.bytes.length; 190 immutable size_t oldUsed = r.used; 191 immutable size_t newUsed = oldUsed + numBytes; 192 if (newUsed <= available) 193 { 194 r.used = newUsed; 195 return r.bytes[oldUsed .. newUsed]; 196 } 197 i++; 198 r = r.next; 199 } 200 Block* b = cast(Block*) malloc(Block.sizeof); 201 b.bytes = (cast(ubyte*) malloc(blockSize))[0 .. blockSize]; 202 b.used = numBytes; 203 b.next = rootBlock; 204 rootBlock = b; 205 return b.bytes[0 .. numBytes]; 206 } 207 208 static struct Node 209 { 210 ubyte[] str; 211 uint hash; 212 Node* next; 213 } 214 215 static struct Block 216 { 217 ubyte[] bytes; 218 size_t used; 219 Block* next; 220 } 221 222 static enum blockSize = 1024 * 16; 223 224 static immutable uint[] sbox = [ 225 0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53, 226 0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982, 227 0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56, 228 0x514F4303, 0x7BE12B83, 0x7192F195, 0x82DC7300, 229 0x084380B4, 0x480B55D3, 0x5F430471, 0x13F75991, 230 0x3F9CF22C, 0x2FE0907A, 0xFD8E1E69, 0x7B1D5DE8, 231 0xD575A85C, 0xAD01C50A, 0x7EE00737, 0x3CE981E8, 232 0x0E447EFA, 0x23089DD6, 0xB59F149F, 0x13600EC7, 233 0xE802C8E6, 0x670921E4, 0x7207EFF0, 0xE74761B0, 234 0x69035234, 0xBFA40F19, 0xF63651A0, 0x29E64C26, 235 0x1F98CCA7, 0xD957007E, 0xE71DDC75, 0x3E729595, 236 0x7580B7CC, 0xD7FAF60B, 0x92484323, 0xA44113EB, 237 0xE4CBDE08, 0x346827C9, 0x3CF32AFA, 0x0B29BCF1, 238 0x6E29F7DF, 0xB01E71CB, 0x3BFBC0D1, 0x62EDC5B8, 239 0xB7DE789A, 0xA4748EC9, 0xE17A4C4F, 0x67E5BD03, 240 0xF3B33D1A, 0x97D8D3E9, 0x09121BC0, 0x347B2D2C, 241 0x79A1913C, 0x504172DE, 0x7F1F8483, 0x13AC3CF6, 242 0x7A2094DB, 0xC778FA12, 0xADF7469F, 0x21786B7B, 243 0x71A445D0, 0xA8896C1B, 0x656F62FB, 0x83A059B3, 244 0x972DFE6E, 0x4122000C, 0x97D9DA19, 0x17D5947B, 245 0xB1AFFD0C, 0x6EF83B97, 0xAF7F780B, 0x4613138A, 246 0x7C3E73A6, 0xCF15E03D, 0x41576322, 0x672DF292, 247 0xB658588D, 0x33EBEFA9, 0x938CBF06, 0x06B67381, 248 0x07F192C6, 0x2BDA5855, 0x348EE0E8, 0x19DBB6E3, 249 0x3222184B, 0xB69D5DBA, 0x7E760B88, 0xAF4D8154, 250 0x007A51AD, 0x35112500, 0xC9CD2D7D, 0x4F4FB761, 251 0x694772E3, 0x694C8351, 0x4A7E3AF5, 0x67D65CE1, 252 0x9287DE92, 0x2518DB3C, 0x8CB4EC06, 0xD154D38F, 253 0xE19A26BB, 0x295EE439, 0xC50A1104, 0x2153C6A7, 254 0x82366656, 0x0713BC2F, 0x6462215A, 0x21D9BFCE, 255 0xBA8EACE6, 0xAE2DF4C1, 0x2A8D5E80, 0x3F7E52D1, 256 0x29359399, 0xFEA1D19C, 0x18879313, 0x455AFA81, 257 0xFADFE838, 0x62609838, 0xD1028839, 0x0736E92F, 258 0x3BCA22A3, 0x1485B08A, 0x2DA7900B, 0x852C156D, 259 0xE8F24803, 0x00078472, 0x13F0D332, 0x2ACFD0CF, 260 0x5F747F5C, 0x87BB1E2F, 0xA7EFCB63, 0x23F432F0, 261 0xE6CE7C5C, 0x1F954EF6, 0xB609C91B, 0x3B4571BF, 262 0xEED17DC0, 0xE556CDA0, 0xA7846A8D, 0xFF105F94, 263 0x52B7CCDE, 0x0E33E801, 0x664455EA, 0xF2C70414, 264 0x73E7B486, 0x8F830661, 0x8B59E826, 0xBB8AEDCA, 265 0xF3D70AB9, 0xD739F2B9, 0x4A04C34A, 0x88D0F089, 266 0xE02191A2, 0xD89D9C78, 0x192C2749, 0xFC43A78F, 267 0x0AAC88CB, 0x9438D42D, 0x9E280F7A, 0x36063802, 268 0x38E8D018, 0x1C42A9CB, 0x92AAFF6C, 0xA24820C5, 269 0x007F077F, 0xCE5BC543, 0x69668D58, 0x10D6FF74, 270 0xBE00F621, 0x21300BBE, 0x2E9E8F46, 0x5ACEA629, 271 0xFA1F86C7, 0x52F206B8, 0x3EDF1A75, 0x6DA8D843, 272 0xCF719928, 0x73E3891F, 0xB4B95DD6, 0xB2A42D27, 273 0xEDA20BBF, 0x1A58DBDF, 0xA449AD03, 0x6DDEF22B, 274 0x900531E6, 0x3D3BFF35, 0x5B24ABA2, 0x472B3E4C, 275 0x387F2D75, 0x4D8DBA36, 0x71CB5641, 0xE3473F3F, 276 0xF6CD4B7F, 0xBF7D1428, 0x344B64D0, 0xC5CDFCB6, 277 0xFE2E0182, 0x2C37A673, 0xDE4EB7A3, 0x63FDC933, 278 0x01DC4063, 0x611F3571, 0xD167BFAF, 0x4496596F, 279 0x3DEE0689, 0xD8704910, 0x7052A114, 0x068C9EC5, 280 0x75D0E766, 0x4D54CC20, 0xB44ECDE2, 0x4ABC653E, 281 0x2C550A21, 0x1A52C0DB, 0xCFED03D0, 0x119BAFE2, 282 0x876A6133, 0xBC232088, 0x435BA1B2, 0xAE99BBFA, 283 0xBB4F08E4, 0xA62B5F49, 0x1DA4B695, 0x336B84DE, 284 0xDC813D31, 0x00C134FB, 0x397A98E6, 0x151F0E64, 285 0xD9EB3E69, 0xD3C7DF60, 0xD2F2C336, 0x2DDD067B, 286 0xBD122835, 0xB0B3BD3A, 0xB0D54E46, 0x8641F1E4, 287 0xA0B38F96, 0x51D39199, 0x37A6AD75, 0xDF84EE41, 288 0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A, 289 ]; 290 291 // deprecated size_t[string] debugMap; 292 size_t _allocated; 293 Node*[] buckets; 294 Block* rootBlock; 295 } 296 297 private extern (C) void* calloc(size_t, size_t) nothrow pure @nogc; 298 private extern (C) void* malloc(size_t) nothrow pure @nogc; 299 private extern (C) void free(void*) nothrow pure @nogc;