1 /** xxHash is an extremely fast non-cryptographic hash algorithm, working at 2 speeds close to RAM limits. It is proposed in two flavors, 32 and 64 bits. 3 4 TODO: merge into xxhash-d 5 TODO: make endian-aware 6 */ 7 module nxt.xxhash64; 8 9 @safe pure nothrow @nogc: 10 11 /** xxHash-64, based on Yann Collet's descriptions 12 13 How to use: 14 15 ulong myseed = 0; 16 XXHash64 myhash(myseed); 17 myhash.put(pointerToSomeBytes, numberOfBytes); 18 myhash.put(pointerToSomeMoreBytes, numberOfMoreBytes); // call put() as often as you like to ... 19 20 and compute hash: 21 22 ulong result = myhash.hash(); 23 24 or all of the above in one single line: 25 26 ulong result2 = XXHash64::hashOf(mypointer, numBytes, myseed); 27 28 See_Also: http://cyan4973.github.io/xxHash/ 29 See_Also: http://create.stephan-brumme.com/xxhash/ 30 31 TODO: make endian-aware 32 **/ 33 struct XXHash64 34 { 35 @safe pure nothrow @nogc: 36 37 /** 38 * Constructs XXHash64 with `seed`. 39 */ 40 this(ulong seed) 41 { 42 _seed = seed; 43 } 44 45 /** (Re)initialize. 46 */ 47 void start() 48 { 49 _state[0] = _seed + prime1 + prime2; 50 _state[1] = _seed + prime2; 51 _state[2] = _seed; 52 _state[3] = _seed - prime1; 53 _bufferSize = 0; 54 _totalLength = 0; 55 } 56 57 /** Use this to feed the hash with data. 58 59 Also implements the $(XREF range, OutputRange) interface for $(D ubyte) and $(D const(ubyte)[]). 60 */ 61 void put(scope const(ubyte)[] data...) @trusted 62 { 63 auto ptr = data.ptr; 64 auto length = data.length; 65 66 _totalLength += length; 67 68 // unprocessed old data plus new data still fit in temporary buffer ? 69 if (_bufferSize + length < bufferMaxSize) 70 { 71 // just add new data 72 while (length-- > 0) 73 _buffer[_bufferSize++] = *ptr++; 74 return; 75 } 76 77 // point beyond last byte 78 const(ubyte)* end = ptr + length; 79 const(ubyte)* endBlock = end - bufferMaxSize; 80 81 // some data left from previous update ? 82 if (_bufferSize > 0) 83 { 84 // make sure temporary buffer is full (16 bytes) 85 while (_bufferSize < bufferMaxSize) 86 _buffer[_bufferSize++] = *ptr++; 87 88 // process these 32 bytes (4x8) 89 process(_buffer.ptr, _state[0], _state[1], _state[2], _state[3]); 90 } 91 92 // copying _state to local variables helps optimizer A LOT 93 ulong s0 = _state[0], s1 = _state[1], s2 = _state[2], s3 = _state[3]; 94 // 32 bytes at once 95 while (ptr <= endBlock) 96 { 97 // local variables s0..s3 instead of _state[0].._state[3] are much faster 98 process(ptr, s0, s1, s2, s3); 99 ptr += 32; 100 } 101 // copy back 102 _state[0] = s0; _state[1] = s1; _state[2] = s2; _state[3] = s3; 103 104 // copy remainder to temporary buffer 105 _bufferSize = end - ptr; 106 foreach (const i; 0 .. _bufferSize) 107 _buffer[i] = ptr[i]; 108 } 109 110 /** Returns: the finished XXHash64 hash. 111 This also calls $(LREF start) to reset the internal _state. 112 */ 113 ulong finishUlong() @trusted 114 { 115 // fold 256 bit _state into one single 64 bit value 116 ulong result; 117 if (_totalLength >= bufferMaxSize) 118 { 119 result = (rotateLeft(_state[0], 1) + 120 rotateLeft(_state[1], 7) + 121 rotateLeft(_state[2], 12) + 122 rotateLeft(_state[3], 18)); 123 result = (result ^ processSingle(0, _state[0])) * prime1 + prime4; 124 result = (result ^ processSingle(0, _state[1])) * prime1 + prime4; 125 result = (result ^ processSingle(0, _state[2])) * prime1 + prime4; 126 result = (result ^ processSingle(0, _state[3])) * prime1 + prime4; 127 } 128 else 129 // internal _state wasn't set in put(), therefore original seed is still stored in state2 130 result = _state[2] + prime5; 131 132 result += _totalLength; 133 134 // process remaining bytes in temporary buffer 135 const(ubyte)* data = _buffer.ptr; 136 137 // point beyond last byte 138 const(ubyte)* end = data + _bufferSize; 139 140 // at least 8 bytes left ? => eat 8 bytes per step 141 for (; data + 8 <= end; data += 8) 142 result = rotateLeft(result ^ processSingle(0, *cast(ulong*)data), 27) * prime1 + prime4; 143 144 // 4 bytes left ? => eat those 145 if (data + 4 <= end) 146 { 147 result = rotateLeft(result ^ (*cast(uint*)data) * prime1, 23) * prime2 + prime3; 148 data += 4; 149 } 150 151 // take care of remaining 0..3 bytes, eat 1 byte per step 152 while (data != end) 153 result = rotateLeft(result ^ (*data++) * prime5, 11) * prime1; 154 155 // mix bits 156 result ^= result >> 33; 157 result *= prime2; 158 result ^= result >> 29; 159 result *= prime3; 160 result ^= result >> 32; 161 162 start(); 163 164 return result; 165 } 166 167 /** Returns: the finished XXHash64 hash. 168 This also calls $(LREF start) to reset the internal _state. 169 */ 170 ubyte[8] finish() @trusted 171 { 172 import std.bitmanip : swapEndian; 173 _result = swapEndian(finishUlong()); 174 return (cast(ubyte*)&_result)[0 .. typeof(return).sizeof]; 175 } 176 177 ulong get() 178 { 179 return _result; 180 } 181 182 private: 183 /// magic constants 184 enum ulong prime1 = 11400714785074694791UL; 185 enum ulong prime2 = 14029467366897019727UL; 186 enum ulong prime3 = 1609587929392839161UL; 187 enum ulong prime4 = 9650029242287828579UL; 188 enum ulong prime5 = 2870177450012600261UL; 189 190 /// temporarily store up to 31 bytes between multiple put() calls 191 enum bufferMaxSize = 31+1; 192 193 ulong[4] _state; 194 ulong _bufferSize; 195 ulong _totalLength; 196 ulong _seed; 197 ubyte[bufferMaxSize] _buffer; 198 ulong _result; 199 200 /// rotate bits, should compile to a single CPU instruction (ROL) 201 static ulong rotateLeft(ulong x, ubyte bits) 202 { 203 return (x << bits) | (x >> (64 - bits)); 204 } 205 206 /// process a single 64 bit value 207 static ulong processSingle(ulong previous, ulong data) 208 { 209 return rotateLeft(previous + data * prime2, 31) * prime1; 210 } 211 212 /// process a block of 4x4 bytes, this is the main part of the XXHash32 algorithm 213 static void process(const(ubyte)* data, 214 out ulong state0, 215 out ulong state1, 216 out ulong state2, 217 out ulong state3) @trusted 218 { 219 const(ulong)* block = cast(const(ulong)*)data; 220 state0 = processSingle(state0, block[0]); 221 state1 = processSingle(state1, block[1]); 222 state2 = processSingle(state2, block[2]); 223 state3 = processSingle(state3, block[3]); 224 } 225 } 226 227 /** Compute xxHash-64 of input `data`, with optional seed `seed`. 228 */ 229 ulong xxhash64Of(in ubyte[] data, ulong seed = 0) 230 { 231 auto xh = XXHash64(seed); 232 xh.start(); 233 xh.put(data); 234 return xh.finishUlong(); 235 } 236 237 /** Compute xxHash-64 of input string `data`, with optional seed `seed`. 238 */ 239 ulong xxhash64Of(in char[] data, ulong seed = 0) @trusted 240 { 241 return xxhash64Of(cast(ubyte[])data, seed); 242 } 243 244 /// test simple `xxhash64Of` 245 unittest 246 { 247 assert(xxhash64Of("") == 17241709254077376921UL); 248 249 ubyte[8] x = [1, 2, 3, 4, 5, 6, 7, 8]; 250 assert(xxhash64Of(x[]) == 9316896406413536788UL); 251 252 // tests copied from https://pypi.python.org/pypi/xxhash/0.6.0 253 assert(xxhash64Of(`xxhash`) == 3665147885093898016UL); 254 assert(xxhash64Of(`xxhash`, 20141025) == 13067679811253438005UL); 255 } 256 257 version(unittest) 258 { 259 import std.digest : hexDigest, isDigest; 260 static assert(isDigest!(XXHash64)); 261 } 262 263 /// `std.digest` conformance 264 unittest 265 { 266 import std.digest; 267 assert(hexDigest!XXHash64(`xxhash`) == `32DD38952C4BC720`); 268 }