1 /** xxHash is an extremely fast non-cryptographic hash algorithm, working at 2 speeds close to RAM limits. It is proposed in two flavors, 32 and 64 bits. 3 4 TODO merge into xxhash-d 5 TODO make endian-aware 6 */ 7 module nxt.xxhash64; 8 9 @safe pure nothrow @nogc: 10 11 /** xxHash-64, based on Yann Collet's descriptions 12 13 How to use: 14 15 ulong myseed = 0; 16 XXHash64 myhash(myseed); 17 myhash.put(pointerToSomeBytes, numberOfBytes); 18 myhash.put(pointerToSomeMoreBytes, numberOfMoreBytes); // call put() as often as you like to ... 19 20 and compute hash: 21 22 ulong result = myhash.hash(); 23 24 or all of the above in one single line: 25 26 ulong result2 = XXHash64::hashOf(mypointer, numBytes, myseed); 27 28 See_Also: http://cyan4973.github.io/xxHash/ 29 See_Also: http://create.stephan-brumme.com/xxhash/ 30 31 TODO make endian-aware 32 **/ 33 struct XXHash64 34 { 35 @safe pure nothrow @nogc: 36 37 /** 38 * Constructs XXHash64 with `seed`. 39 */ 40 this(ulong seed) 41 { 42 _seed = seed; 43 } 44 45 /** (Re)initialize. 46 */ 47 void start() 48 { 49 _state[0] = _seed + prime1 + prime2; 50 _state[1] = _seed + prime2; 51 _state[2] = _seed; 52 _state[3] = _seed - prime1; 53 _bufferSize = 0; 54 _totalLength = 0; 55 } 56 57 /** Use this to feed the hash with data. 58 59 Also implements the $(XREF range, OutputRange) interface for $(D ubyte) and $(D const(ubyte)[]). 60 */ 61 void put(scope const(ubyte)[] data...) @trusted 62 { 63 auto ptr = data.ptr; 64 auto length = data.length; 65 66 _totalLength += length; 67 68 // unprocessed old data plus new data still fit in temporary buffer ? 69 if (_bufferSize + length < bufferMaxSize) 70 { 71 // just add new data 72 while (length-- > 0) 73 { 74 _buffer[_bufferSize++] = *ptr++; 75 } 76 return; 77 } 78 79 // point beyond last byte 80 const(ubyte)* end = ptr + length; 81 const(ubyte)* endBlock = end - bufferMaxSize; 82 83 // some data left from previous update ? 84 if (_bufferSize > 0) 85 { 86 // make sure temporary buffer is full (16 bytes) 87 while (_bufferSize < bufferMaxSize) 88 { 89 _buffer[_bufferSize++] = *ptr++; 90 } 91 92 // process these 32 bytes (4x8) 93 process(_buffer.ptr, _state[0], _state[1], _state[2], _state[3]); 94 } 95 96 // copying _state to local variables helps optimizer A LOT 97 ulong s0 = _state[0], s1 = _state[1], s2 = _state[2], s3 = _state[3]; 98 // 32 bytes at once 99 while (ptr <= endBlock) 100 { 101 // local variables s0..s3 instead of _state[0].._state[3] are much faster 102 process(ptr, s0, s1, s2, s3); 103 ptr += 32; 104 } 105 // copy back 106 _state[0] = s0; _state[1] = s1; _state[2] = s2; _state[3] = s3; 107 108 // copy remainder to temporary buffer 109 _bufferSize = end - ptr; 110 foreach (const i; 0 .. _bufferSize) 111 { 112 _buffer[i] = ptr[i]; 113 } 114 } 115 116 /** Returns: the finished XXHash64 hash. 117 This also calls $(LREF start) to reset the internal _state. 118 */ 119 ulong finishUlong() @trusted 120 { 121 // fold 256 bit _state into one single 64 bit value 122 ulong result; 123 if (_totalLength >= bufferMaxSize) 124 { 125 result = (rotateLeft(_state[0], 1) + 126 rotateLeft(_state[1], 7) + 127 rotateLeft(_state[2], 12) + 128 rotateLeft(_state[3], 18)); 129 result = (result ^ processSingle(0, _state[0])) * prime1 + prime4; 130 result = (result ^ processSingle(0, _state[1])) * prime1 + prime4; 131 result = (result ^ processSingle(0, _state[2])) * prime1 + prime4; 132 result = (result ^ processSingle(0, _state[3])) * prime1 + prime4; 133 } 134 else 135 { 136 // internal _state wasn't set in put(), therefore original seed is still stored in state2 137 result = _state[2] + prime5; 138 } 139 140 result += _totalLength; 141 142 // process remaining bytes in temporary buffer 143 const(ubyte)* data = _buffer.ptr; 144 145 // point beyond last byte 146 const(ubyte)* end = data + _bufferSize; 147 148 // at least 8 bytes left ? => eat 8 bytes per step 149 for (; data + 8 <= end; data += 8) 150 { 151 result = rotateLeft(result ^ processSingle(0, *cast(ulong*)data), 27) * prime1 + prime4; 152 } 153 154 // 4 bytes left ? => eat those 155 if (data + 4 <= end) 156 { 157 result = rotateLeft(result ^ (*cast(uint*)data) * prime1, 23) * prime2 + prime3; 158 data += 4; 159 } 160 161 // take care of remaining 0..3 bytes, eat 1 byte per step 162 while (data != end) 163 { 164 result = rotateLeft(result ^ (*data++) * prime5, 11) * prime1; 165 } 166 167 // mix bits 168 result ^= result >> 33; 169 result *= prime2; 170 result ^= result >> 29; 171 result *= prime3; 172 result ^= result >> 32; 173 174 start(); 175 176 return result; 177 } 178 179 /** Returns: the finished XXHash64 hash. 180 This also calls $(LREF start) to reset the internal _state. 181 */ 182 ubyte[8] finish() @trusted 183 { 184 import std.bitmanip : swapEndian; 185 _result = swapEndian(finishUlong()); 186 return (cast(ubyte*)&_result)[0 .. typeof(return).sizeof]; 187 } 188 189 ulong get() 190 { 191 return _result; 192 } 193 194 private: 195 /// magic constants 196 enum ulong prime1 = 11400714785074694791UL; 197 enum ulong prime2 = 14029467366897019727UL; 198 enum ulong prime3 = 1609587929392839161UL; 199 enum ulong prime4 = 9650029242287828579UL; 200 enum ulong prime5 = 2870177450012600261UL; 201 202 /// temporarily store up to 31 bytes between multiple put() calls 203 enum bufferMaxSize = 31+1; 204 205 ulong[4] _state; 206 ulong _bufferSize; 207 ulong _totalLength; 208 ulong _seed; 209 ubyte[bufferMaxSize] _buffer; 210 ulong _result; 211 212 /// rotate bits, should compile to a single CPU instruction (ROL) 213 static ulong rotateLeft(ulong x, ubyte bits) 214 { 215 return (x << bits) | (x >> (64 - bits)); 216 } 217 218 /// process a single 64 bit value 219 static ulong processSingle(ulong previous, ulong data) 220 { 221 return rotateLeft(previous + data * prime2, 31) * prime1; 222 } 223 224 /// process a block of 4x4 bytes, this is the main part of the XXHash32 algorithm 225 static void process(const(ubyte)* data, 226 out ulong state0, 227 out ulong state1, 228 out ulong state2, 229 out ulong state3) @trusted 230 { 231 const(ulong)* block = cast(const(ulong)*)data; 232 state0 = processSingle(state0, block[0]); 233 state1 = processSingle(state1, block[1]); 234 state2 = processSingle(state2, block[2]); 235 state3 = processSingle(state3, block[3]); 236 } 237 } 238 239 /** Compute xxHash-64 of input `data`, with optional seed `seed`. 240 */ 241 ulong xxhash64Of(in ubyte[] data, ulong seed = 0) 242 { 243 auto xh = XXHash64(seed); 244 xh.start(); 245 xh.put(data); 246 return xh.finishUlong(); 247 } 248 249 /** Compute xxHash-64 of input string `data`, with optional seed `seed`. 250 */ 251 ulong xxhash64Of(in char[] data, ulong seed = 0) 252 @trusted 253 { 254 return xxhash64Of(cast(ubyte[])data, seed); 255 } 256 257 /// test simple `xxhash64Of` 258 unittest 259 { 260 assert(xxhash64Of("") == 17241709254077376921UL); 261 262 ubyte[8] x = [1, 2, 3, 4, 5, 6, 7, 8]; 263 assert(xxhash64Of(x[]) == 9316896406413536788UL); 264 265 // tests copied from https://pypi.python.org/pypi/xxhash/0.6.0 266 assert(xxhash64Of(`xxhash`) == 3665147885093898016UL); 267 assert(xxhash64Of(`xxhash`, 20141025) == 13067679811253438005UL); 268 } 269 270 version(unittest) 271 { 272 import std.digest : hexDigest, isDigest; 273 static assert(isDigest!(XXHash64)); 274 } 275 276 /// `std.digest` conformance 277 unittest 278 { 279 import std.digest; 280 assert(hexDigest!XXHash64(`xxhash`) == `32DD38952C4BC720`); 281 }