1 /** Integer Sorting Algorithms. 2 Copyright: Per Nordlöw 2022-. 3 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 4 Authors: $(WEB Per Nordlöw) 5 */ 6 module nxt.integer_sorting; 7 8 version = nxt_benchmark; 9 // version = show; 10 11 import std.range.primitives : isRandomAccessRange, ElementType; 12 import std.traits : isNumeric; 13 import std.meta : AliasSeq; 14 15 import nxt.bijections; 16 17 /** Radix sort of `input`. 18 19 Note that this implementation of non-inplace radix sort only requires 20 `input` to be a `BidirectionalRange` not a `RandomAccessRange`. 21 22 Note that `input` can be a `BidirectionalRange` aswell as 23 `RandomAccessRange`. 24 25 `radixBitCount` is the number of bits in radix (digit) 26 27 TODO: make `radixBitCount` a template parameter either 8 or 16, 28 ElementType.sizeof must be a multiple of radixBitCount 29 30 TODO: input[] = y[] not needed when input is mutable 31 32 TODO: Restrict fun. 33 34 TODO: Choose fastDigitDiscardal based on elementMin and elementMax (if they 35 are given) 36 37 See_Also: https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/ 38 See_Also: https://github.com/skarupke/ska_sort/blob/master/ska_sort.hpp 39 See_Also: http://forum.dlang.org/thread/vmytpazcusauxypkwdbn@forum.dlang.org#post-vmytpazcusauxypkwdbn:40forum.dlang.org 40 */ 41 auto radixSort(R, 42 alias fun = "a", 43 bool descending = false, 44 bool requestDigitDiscardal = false, 45 bool inPlace = false)(R input, 46 /* ElementType!R elementMin = ElementType!(R).max, */ 47 /* ElementType!R elementMax = ElementType!(R).min */) 48 49 @trusted 50 if (isRandomAccessRange!R && 51 (isNumeric!(ElementType!R))) 52 { 53 import std.range : assumeSorted; 54 import std.algorithm.sorting : isSorted; /+ TODO: move this to radixSort when know how map less to descending +/ 55 import std.algorithm.comparison : min, max; 56 import std.range.primitives : front; 57 58 immutable n = input.length; // number of elements 59 alias E = ElementType!R; 60 enum elementBitCount = 8*E.sizeof; // total number of bits needed to code each element 61 62 /* Lookup number of radix bits from sizeof `ElementType`. 63 These give optimal performance on Intel Core i7. 64 */ 65 static if (elementBitCount == 8 || 66 elementBitCount == 24) 67 enum radixBitCount = 8; 68 else static if (elementBitCount == 16 || 69 elementBitCount == 32 || 70 elementBitCount == 64) 71 enum radixBitCount = 16; 72 else 73 static assert(0, "TODO: handle element type " ~ e.stringof); 74 75 /+ TODO: activate this: subtract min from all values and then immutable uint elementBitCount = is_min(a_max) ? 8*sizeof(E) : binlog(a_max); and add it back. +/ 76 enum digitCount = elementBitCount / radixBitCount; // number of `digitCount` in radix `radixBitCount` 77 static assert(elementBitCount % radixBitCount == 0, 78 "Precision of ElementType must be evenly divisble by bit-precision of Radix."); 79 80 enum doDigitDiscardal = requestDigitDiscardal && digitCount >= 2; 81 82 enum radix = cast(typeof(radixBitCount))1 << radixBitCount; // bin count 83 enum mask = radix-1; // radix bit mask 84 85 alias UE = typeof(input.front.bijectToUnsigned); // get unsigned integer type of same precision as \tparam E. 86 87 import nxt.container.fixed_dynamic_array : FixedDynamicArray; 88 89 static if (inPlace) // most-significant digit (MSD) first in-place radix sort 90 { 91 static assert(!descending, "TODO: Implement descending version"); 92 93 foreach (immutable digitOffsetReversed; 0 .. digitCount) // for each `digitOffset` (in base `radix`) starting with least significant (LSD-first) 94 { 95 immutable digitOffset = digitCount - 1 - digitOffsetReversed; 96 immutable digitBitshift = digitOffset*radixBitCount; // digit bit shift 97 98 // [lowOffsets[i], highOffsets[i]] will become slices into `input` 99 size_t[radix] lowOffsets; // low offsets for each bin 100 size_t[radix] highOffsets; // high offsets for each bin 101 102 // calculate counts 103 foreach (immutable j; 0 .. n) // for each element index `j` in `input` 104 { 105 immutable UE currentUnsignedValue = cast(UE)input[j].bijectToUnsigned(descending); 106 immutable i = (currentUnsignedValue >> digitBitshift) & mask; // digit (index) 107 ++highOffsets[i]; // increase histogram bin counter 108 } 109 110 // bin boundaries: accumulate bin counters array 111 lowOffsets[0] = 0; // first low is always zero 112 foreach (immutable j; 1 .. radix) // for each successive bin counter 113 { 114 lowOffsets[j] = highOffsets[j - 1]; // previous roof becomes current floor 115 highOffsets[j] += highOffsets[j - 1]; // accumulate bin counter 116 } 117 assert(highOffsets[radix - 1] == n); // should equal high offset of last bin 118 } 119 120 // /** \em unstable in-place (permutate) reorder/sort `input` 121 // * access `input`'s elements in \em reverse to \em reuse filled caches from previous forward iteration. 122 // * \see `in_place_indexed_reorder` 123 // */ 124 // for (int r = radix - 1; r >= 0; --r) // for each radix digit r in reverse order (cache-friendly) 125 // { 126 // while (binStat[r]) // as long as elements left in r:th bucket 127 // { 128 // immutable uint i0 = binStat[r].pop_back(); // index to first element of permutation 129 // immutable E e0 = input[i0]; // value of first/current element of permutation 130 // while (true) 131 // { 132 // immutable int rN = (e0.bijectToUnsigned(descending) >> digitBitshift) & mask; // next digit (index) 133 // if (r == rN) // if permutation cycle closed (back to same digit) 134 // break; 135 // immutable ai = binStat[rN].pop_back(); // array index 136 // swap(input[ai], e0); // do swap 137 // } 138 // input[i0] = e0; // complete cycle 139 // } 140 // } 141 142 /+ TODO: copy reorder algorithm into local function that calls itself in the recursion step +/ 143 /+ TODO: call this local function +/ 144 145 assert(input.isSorted!"a < b"); 146 } 147 else // standard radix sort 148 { 149 // non-in-place requires temporary `y`. TODO: we could allocate these as 150 // a stack-allocated array for small arrays and gain extra speed. 151 auto tempStorage = FixedDynamicArray!E.makeUninitializedOfLength(n); 152 auto tempSlice = tempStorage[]; 153 154 static if (doDigitDiscardal) 155 { 156 UE ors = 0; // digits diff(xor)-or-sum 157 } 158 159 foreach (immutable digitOffset; 0 .. digitCount) // for each `digitOffset` (in base `radix`) starting with least significant (LSD-first) 160 { 161 immutable digitBitshift = digitOffset*radixBitCount; // digit bit shift 162 163 static if (doDigitDiscardal) 164 if (digitOffset != 0) // if first iteration already performed we have bit statistics 165 if ((! ((ors >> digitBitshift) & mask))) // if bits in digit[d] are either all \em zero or 166 continue; // no sorting is needed for this digit 167 168 // calculate counts 169 size_t[radix] highOffsets; // histogram buckets count and later upper-limits/walls for values in `input` 170 UE previousUnsignedValue = cast(UE)input[0].bijectToUnsigned(descending); 171 foreach (immutable j; 0 .. n) // for each element index `j` in `input` 172 { 173 immutable UE currentUnsignedValue = cast(UE)input[j].bijectToUnsigned(descending); 174 static if (doDigitDiscardal) 175 if (digitOffset == 0) // first iteration calculates statistics 176 { 177 ors |= previousUnsignedValue ^ currentUnsignedValue; // accumulate bit change statistics 178 // ors |= currentUnsignedValue; // accumulate bits statistics 179 } 180 immutable i = (currentUnsignedValue >> digitBitshift) & mask; // digit (index) 181 ++highOffsets[i]; // increase histogram bin counter 182 previousUnsignedValue = currentUnsignedValue; 183 } 184 185 static if (doDigitDiscardal) 186 if (digitOffset == 0) // if first iteration already performed we have bit statistics 187 if ((! ((ors >> digitBitshift) & mask))) // if bits in digit[d] are either all \em zero or 188 continue; // no sorting is needed for this digit 189 190 // bin boundaries: accumulate bin counters array 191 foreach (immutable j; 1 .. radix) // for each successive bin counter 192 highOffsets[j] += highOffsets[j - 1]; // accumulate bin counter 193 assert(highOffsets[radix - 1] == n); // should equal high offset of last bin 194 195 // reorder. access `input`'s elements in \em reverse to \em reuse filled caches from previous forward iteration. 196 // \em stable reorder from `input` to `tempSlice` using normal counting sort (see `counting_sort` above). 197 enum unrollFactor = 1; 198 assert((n % unrollFactor) == 0, "TODO: Add reordering for remainder"); // is evenly divisible by unroll factor 199 for (size_t j = n - 1; j < n; j -= unrollFactor) // for each element `j` in reverse order. when `j` wraps around `j` < `n` is no longer true 200 { 201 static foreach (k; 0 .. unrollFactor) // inlined (unrolled) loop 202 { 203 immutable i = (input[j - k].bijectToUnsigned(descending) >> digitBitshift) & mask; // digit (index) 204 tempSlice[--highOffsets[i]] = input[j - k]; // reorder into tempSlice 205 } 206 } 207 assert(highOffsets[0] == 0); // should equal low offset of first bin 208 209 static if (digitCount & 1) // if odd number of digit passes 210 { 211 static if (__traits(compiles, input[] == tempSlice[])) 212 input[] = tempSlice[]; // faster than std.algorithm.copy() because input never overlap tempSlice 213 else 214 { 215 import std.algorithm.mutation : copy; 216 copy(tempSlice[], input[]); /+ TODO: use memcpy +/ 217 } 218 } 219 else 220 { 221 import std.algorithm.mutation : swap; 222 swap(input, tempSlice); 223 } 224 } 225 } 226 227 static if (descending) 228 return input.assumeSorted!"a > b"; 229 else 230 return input.assumeSorted!"a < b"; 231 } 232 233 version (nxt_benchmark) 234 @safe unittest { 235 version (show) import std.stdio : write, writef, writeln; 236 237 /** Test `radixSort` with element-type `E`. */ 238 void test(E)(int n) @safe 239 { 240 version (show) writef("%8-s, %10-s, ", E.stringof, n); 241 242 import nxt.container.dynamic_array : Array = DynamicArray; 243 244 import std.traits : isIntegral, isSigned, isUnsigned; 245 import nxt.random_ex : randInPlace, randInPlaceWithElementRange; 246 import std.algorithm.sorting : sort, isSorted; 247 import std.algorithm.mutation : SwapStrategy; 248 import std.algorithm.comparison : min, max, equal; 249 import std.range : retro; 250 import std.datetime.stopwatch : StopWatch, AutoStart; 251 auto sw = StopWatch(); 252 immutable nMax = 5; 253 254 // generate random 255 alias A = Array!E; 256 A a; 257 a.length = n; 258 static if (isUnsigned!E) { 259 // a[].randInPlaceWithElementRange(cast(E)0, cast(E)uint.max); 260 a[].randInPlace(); 261 } else { 262 a[].randInPlace(); 263 } 264 version (show) write("original random: ", a[0 .. min(nMax, $)], ", "); 265 266 // standard quick sort 267 auto qa = a.dupShallow; 268 269 sw.reset; 270 sw.start(); 271 qa[].sort!("a < b", SwapStrategy.stable)(); 272 sw.stop; 273 immutable sortTimeUsecs = sw.peek.total!"usecs"; 274 version (show) write("quick sorted: ", qa[0 .. min(nMax, $)], ", "); 275 assert(qa[].isSorted); 276 277 // reverse radix sort 278 { 279 auto b = a.dupShallow; 280 b[].radixSort!(typeof(b[]), "a", true)(); 281 version (show) write("reverse radix sorted: ", b[0 .. min(nMax, $)], ", "); 282 assert(b[].retro.equal(qa[])); 283 } 284 285 // standard radix sort 286 { 287 auto b = a.dupShallow; 288 289 sw.reset; 290 sw.start(); 291 b[].radixSort!(typeof(b[]), "b", false)(); 292 sw.stop; 293 immutable radixTime1 = sw.peek.total!"usecs"; 294 295 version (show) writef("%9-s, ", cast(real)sortTimeUsecs / radixTime1); 296 assert(b[].equal(qa[])); 297 } 298 299 // standard radix sort fast-discardal 300 { 301 auto b = a.dupShallow; 302 303 sw.reset; 304 sw.start(); 305 b[].radixSort!(typeof(b[]), "b", false, true)(); 306 sw.stop; 307 immutable radixTime = sw.peek.total!"usecs"; 308 309 assert(b[].equal(qa[])); 310 311 version (show) 312 { 313 writeln("standard radix sorted with fast-discardal: ", 314 b[0 .. min(nMax, $)]); 315 } 316 version (show) writef("%9-s, ", cast(real)sortTimeUsecs / radixTime); 317 } 318 319 // inplace-place radix sort 320 // static if (is(E == uint)) 321 // { 322 // auto b = a.dupShallow; 323 324 // sw.reset; 325 // sw.start(); 326 // b[].radixSort!(typeof(b[]), "b", false, false, true)(); 327 // sw.stop; 328 // immutable radixTime = sw.peek.usecs; 329 330 // assert(b[].equal(qa[])); 331 332 // version (show) 333 // { 334 // writeln("in-place radix sorted with fast-discardal: ", 335 // b[0 .. min(nMax, $)]); 336 // } 337 // writef("%9-s, ", cast(real)sortTimeUsecs / radixTime); 338 // } 339 340 version (show) writeln(""); 341 } 342 343 import std.meta : AliasSeq; 344 immutable n = 1_00_000; 345 version (show) writeln("EType, eCount, radixSort (speed-up), radixSort with fast discardal (speed-up), in-place radixSort (speed-up)"); 346 foreach (immutable ix, T; AliasSeq!(byte, short, int, long)) 347 { 348 test!T(n); // test signed 349 import std.traits : Unsigned; 350 test!(Unsigned!T)(n); // test unsigned 351 } 352 test!float(n); 353 test!double(n); 354 } 355 356 @safe unittest { 357 import std.algorithm.sorting : sort, isSorted; 358 import std.algorithm.mutation : swap; 359 import std.meta : AliasSeq; 360 import nxt.container.dynamic_array : Array = DynamicArray; 361 import nxt.random_ex : randInPlace; 362 immutable n = 10_000; 363 foreach (ix, E; AliasSeq!(byte, ubyte, short, ushort, int, uint, long, ulong, float, double)) { 364 alias A = Array!E; 365 A a; 366 a.length = n; 367 a[].randInPlace(); 368 auto b = a.dupShallow; 369 assert(a[].radixSort() == b[].sort()); 370 swap(a, b); 371 } 372 } 373 374 version (unittest) { 375 import nxt.construction : dupShallow; 376 }