1 /** Integer Sorting Algorithms.
2 	Copyright: Per Nordlöw 2022-.
3 	License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
4 	Authors: $(WEB Per Nordlöw)
5  */
6 module nxt.integer_sorting;
7 
8 version = nxt_benchmark;
9 // version = show;
10 
11 import std.range.primitives : isRandomAccessRange, ElementType;
12 import std.traits : isNumeric;
13 import std.meta : AliasSeq;
14 
15 import nxt.bijections;
16 
17 /** Radix sort of `input`.
18 
19 	Note that this implementation of non-inplace radix sort only requires
20 	`input` to be a `BidirectionalRange` not a `RandomAccessRange`.
21 
22 	Note that `input` can be a `BidirectionalRange` aswell as
23 	`RandomAccessRange`.
24 
25 	`radixBitCount` is the number of bits in radix (digit)
26 
27 	TODO: make `radixBitCount` a template parameter either 8 or 16,
28 	ElementType.sizeof must be a multiple of radixBitCount
29 
30 	TODO: input[] = y[] not needed when input is mutable
31 
32 	TODO: Restrict fun.
33 
34 	TODO: Choose fastDigitDiscardal based on elementMin and elementMax (if they
35 	are given)
36 
37 	See_Also: https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/
38 	See_Also: https://github.com/skarupke/ska_sort/blob/master/ska_sort.hpp
39 	See_Also: http://forum.dlang.org/thread/vmytpazcusauxypkwdbn@forum.dlang.org#post-vmytpazcusauxypkwdbn:40forum.dlang.org
40  */
41 auto radixSort(R,
42 			   alias fun = "a",
43 			   bool descending = false,
44 			   bool requestDigitDiscardal = false,
45 			   bool inPlace = false)(R input,
46 									 /* ElementType!R elementMin = ElementType!(R).max, */
47 									 /* ElementType!R elementMax = ElementType!(R).min */)
48 
49 	@trusted
50 if (isRandomAccessRange!R &&
51 	(isNumeric!(ElementType!R)))
52 {
53 	import std.range : assumeSorted;
54 	import std.algorithm.sorting : isSorted; /+ TODO: move this to radixSort when know how map less to descending +/
55 	import std.algorithm.comparison : min, max;
56 	import std.range.primitives : front;
57 
58 	immutable n = input.length; // number of elements
59 	alias E = ElementType!R;
60 	enum elementBitCount = 8*E.sizeof; // total number of bits needed to code each element
61 
62 	/* Lookup number of radix bits from sizeof `ElementType`.
63 	   These give optimal performance on Intel Core i7.
64 	*/
65 	static if (elementBitCount == 8 ||
66 			   elementBitCount == 24)
67 		enum radixBitCount = 8;
68 	else static if (elementBitCount == 16 ||
69 					elementBitCount == 32 ||
70 					elementBitCount == 64)
71 		enum radixBitCount = 16;
72 	else
73 		static assert(0, "TODO: handle element type " ~ e.stringof);
74 
75 	/+ TODO: activate this: subtract min from all values and then immutable uint elementBitCount = is_min(a_max) ? 8*sizeof(E) : binlog(a_max); and add it back. +/
76 	enum digitCount = elementBitCount / radixBitCount;		 // number of `digitCount` in radix `radixBitCount`
77 	static assert(elementBitCount % radixBitCount == 0,
78 				  "Precision of ElementType must be evenly divisble by bit-precision of Radix.");
79 
80 	enum doDigitDiscardal = requestDigitDiscardal && digitCount >= 2;
81 
82 	enum radix = cast(typeof(radixBitCount))1 << radixBitCount;	// bin count
83 	enum mask = radix-1;									 // radix bit mask
84 
85 	alias UE = typeof(input.front.bijectToUnsigned); // get unsigned integer type of same precision as \tparam E.
86 
87 	import nxt.container.fixed_dynamic_array : FixedDynamicArray;
88 
89 	static if (inPlace) // most-significant digit (MSD) first in-place radix sort
90 	{
91 		static assert(!descending, "TODO: Implement descending version");
92 
93 		foreach (immutable digitOffsetReversed; 0 .. digitCount) // for each `digitOffset` (in base `radix`) starting with least significant (LSD-first)
94 		{
95 			immutable digitOffset = digitCount - 1 - digitOffsetReversed;
96 			immutable digitBitshift = digitOffset*radixBitCount; // digit bit shift
97 
98 			// [lowOffsets[i], highOffsets[i]] will become slices into `input`
99 			size_t[radix] lowOffsets; // low offsets for each bin
100 			size_t[radix] highOffsets; // high offsets for each bin
101 
102 			// calculate counts
103 			foreach (immutable j; 0 .. n) // for each element index `j` in `input`
104 			{
105 				immutable UE currentUnsignedValue = cast(UE)input[j].bijectToUnsigned(descending);
106 				immutable i = (currentUnsignedValue >> digitBitshift) & mask; // digit (index)
107 				++highOffsets[i];   // increase histogram bin counter
108 			}
109 
110 			// bin boundaries: accumulate bin counters array
111 			lowOffsets[0] = 0;			 // first low is always zero
112 			foreach (immutable j; 1 .. radix) // for each successive bin counter
113 			{
114 				lowOffsets[j] = highOffsets[j - 1]; // previous roof becomes current floor
115 				highOffsets[j] += highOffsets[j - 1]; // accumulate bin counter
116 			}
117 			assert(highOffsets[radix - 1] == n); // should equal high offset of last bin
118 		}
119 
120 		// /** \em unstable in-place (permutate) reorder/sort `input`
121 		//  * access `input`'s elements in \em reverse to \em reuse filled caches from previous forward iteration.
122 		//  * \see `in_place_indexed_reorder`
123 		//  */
124 		// for (int r = radix - 1; r >= 0; --r) // for each radix digit r in reverse order (cache-friendly)
125 		// {
126 		//	 while (binStat[r])  // as long as elements left in r:th bucket
127 		//	 {
128 		//		 immutable uint i0 = binStat[r].pop_back(); // index to first element of permutation
129 		//		 immutable E	e0 = input[i0]; // value of first/current element of permutation
130 		//		 while (true)
131 		//		 {
132 		//			 immutable int rN = (e0.bijectToUnsigned(descending) >> digitBitshift) & mask; // next digit (index)
133 		//			 if (r == rN) // if permutation cycle closed (back to same digit)
134 		//				 break;
135 		//			 immutable ai = binStat[rN].pop_back(); // array index
136 		//			 swap(input[ai], e0); // do swap
137 		//		 }
138 		//		 input[i0] = e0;		 // complete cycle
139 		//	 }
140 		// }
141 
142 		/+ TODO: copy reorder algorithm into local function that calls itself in the recursion step +/
143 		/+ TODO: call this local function +/
144 
145 		assert(input.isSorted!"a < b");
146 	}
147 	else						// standard radix sort
148 	{
149 		// non-in-place requires temporary `y`. TODO: we could allocate these as
150 		// a stack-allocated array for small arrays and gain extra speed.
151 		auto tempStorage = FixedDynamicArray!E.makeUninitializedOfLength(n);
152 		auto tempSlice = tempStorage[];
153 
154 		static if (doDigitDiscardal)
155 		{
156 			UE ors  = 0;		 // digits diff(xor)-or-sum
157 		}
158 
159 		foreach (immutable digitOffset; 0 .. digitCount) // for each `digitOffset` (in base `radix`) starting with least significant (LSD-first)
160 		{
161 			immutable digitBitshift = digitOffset*radixBitCount;   // digit bit shift
162 
163 			static if (doDigitDiscardal)
164 				if (digitOffset != 0) // if first iteration already performed we have bit statistics
165 					if ((! ((ors >> digitBitshift) & mask))) // if bits in digit[d] are either all \em zero or
166 						continue;			   // no sorting is needed for this digit
167 
168 			// calculate counts
169 			size_t[radix] highOffsets; // histogram buckets count and later upper-limits/walls for values in `input`
170 			UE previousUnsignedValue = cast(UE)input[0].bijectToUnsigned(descending);
171 			foreach (immutable j; 0 .. n) // for each element index `j` in `input`
172 			{
173 				immutable UE currentUnsignedValue = cast(UE)input[j].bijectToUnsigned(descending);
174 				static if (doDigitDiscardal)
175 					if (digitOffset == 0) // first iteration calculates statistics
176 					{
177 						ors |= previousUnsignedValue ^ currentUnsignedValue; // accumulate bit change statistics
178 						// ors |= currentUnsignedValue; // accumulate bits statistics
179 					}
180 				immutable i = (currentUnsignedValue >> digitBitshift) & mask; // digit (index)
181 				++highOffsets[i];			  // increase histogram bin counter
182 				previousUnsignedValue = currentUnsignedValue;
183 			}
184 
185 			static if (doDigitDiscardal)
186 				if (digitOffset == 0) // if first iteration already performed we have bit statistics
187 					if ((! ((ors >> digitBitshift) & mask))) // if bits in digit[d] are either all \em zero or
188 						continue;			   // no sorting is needed for this digit
189 
190 			// bin boundaries: accumulate bin counters array
191 			foreach (immutable j; 1 .. radix) // for each successive bin counter
192 				highOffsets[j] += highOffsets[j - 1]; // accumulate bin counter
193 			assert(highOffsets[radix - 1] == n); // should equal high offset of last bin
194 
195 			// reorder. access `input`'s elements in \em reverse to \em reuse filled caches from previous forward iteration.
196 			// \em stable reorder from `input` to `tempSlice` using normal counting sort (see `counting_sort` above).
197 			enum unrollFactor = 1;
198 			assert((n % unrollFactor) == 0, "TODO: Add reordering for remainder"); // is evenly divisible by unroll factor
199 			for (size_t j = n - 1; j < n; j -= unrollFactor) // for each element `j` in reverse order. when `j` wraps around `j` < `n` is no longer true
200 			{
201 				static foreach (k; 0 .. unrollFactor) // inlined (unrolled) loop
202 				{
203 					immutable i = (input[j - k].bijectToUnsigned(descending) >> digitBitshift) & mask; // digit (index)
204 					tempSlice[--highOffsets[i]] = input[j - k]; // reorder into tempSlice
205 				}
206 			}
207 			assert(highOffsets[0] == 0); // should equal low offset of first bin
208 
209 			static if (digitCount & 1) // if odd number of digit passes
210 			{
211 				static if (__traits(compiles, input[] == tempSlice[]))
212 					input[] = tempSlice[]; // faster than std.algorithm.copy() because input never overlap tempSlice
213 				else
214 				{
215 					import std.algorithm.mutation : copy;
216 					copy(tempSlice[], input[]); /+ TODO: use memcpy +/
217 				}
218 			}
219 			else
220 			{
221 				import std.algorithm.mutation : swap;
222 				swap(input, tempSlice);
223 			}
224 		}
225 	}
226 
227 	static if (descending)
228 		return input.assumeSorted!"a > b";
229 	else
230 		return input.assumeSorted!"a < b";
231 }
232 
233 version (nxt_benchmark)
234 @safe unittest {
235 	version (show) import std.stdio : write, writef, writeln;
236 
237 	/** Test `radixSort` with element-type `E`. */
238 	void test(E)(int n) @safe
239 	{
240 		version (show) writef("%8-s, %10-s, ", E.stringof, n);
241 
242 		import nxt.container.dynamic_array : Array = DynamicArray;
243 
244 		import std.traits : isIntegral, isSigned, isUnsigned;
245 		import nxt.random_ex : randInPlace, randInPlaceWithElementRange;
246 		import std.algorithm.sorting : sort, isSorted;
247 		import std.algorithm.mutation : SwapStrategy;
248 		import std.algorithm.comparison : min, max, equal;
249 		import std.range : retro;
250 		import std.datetime.stopwatch : StopWatch, AutoStart;
251 		auto sw = StopWatch();
252 		immutable nMax = 5;
253 
254 		// generate random
255 		alias A = Array!E;
256 		A a;
257 		a.length = n;
258 		static if (isUnsigned!E) {
259 			// a[].randInPlaceWithElementRange(cast(E)0, cast(E)uint.max);
260 			a[].randInPlace();
261 		} else {
262 			a[].randInPlace();
263 		}
264 		version (show) write("original random: ", a[0 .. min(nMax, $)], ", ");
265 
266 		// standard quick sort
267 		auto qa = a.dupShallow;
268 
269 		sw.reset;
270 		sw.start();
271 		qa[].sort!("a < b", SwapStrategy.stable)();
272 		sw.stop;
273 		immutable sortTimeUsecs = sw.peek.total!"usecs";
274 		version (show) write("quick sorted: ", qa[0 .. min(nMax, $)], ", ");
275 		assert(qa[].isSorted);
276 
277 		// reverse radix sort
278 		{
279 			auto b = a.dupShallow;
280 			b[].radixSort!(typeof(b[]), "a", true)();
281 			version (show) write("reverse radix sorted: ", b[0 .. min(nMax, $)], ", ");
282 			assert(b[].retro.equal(qa[]));
283 		}
284 
285 		// standard radix sort
286 		{
287 			auto b = a.dupShallow;
288 
289 			sw.reset;
290 			sw.start();
291 			b[].radixSort!(typeof(b[]), "b", false)();
292 			sw.stop;
293 			immutable radixTime1 = sw.peek.total!"usecs";
294 
295 			version (show) writef("%9-s, ", cast(real)sortTimeUsecs / radixTime1);
296 			assert(b[].equal(qa[]));
297 		}
298 
299 		// standard radix sort fast-discardal
300 		{
301 			auto b = a.dupShallow;
302 
303 			sw.reset;
304 			sw.start();
305 			b[].radixSort!(typeof(b[]), "b", false, true)();
306 			sw.stop;
307 			immutable radixTime = sw.peek.total!"usecs";
308 
309 			assert(b[].equal(qa[]));
310 
311 			version (show)
312 			{
313 				writeln("standard radix sorted with fast-discardal: ",
314 						b[0 .. min(nMax, $)]);
315 			}
316 			version (show) writef("%9-s, ", cast(real)sortTimeUsecs / radixTime);
317 		}
318 
319 		// inplace-place radix sort
320 		// static if (is(E == uint))
321 		// {
322 		//	 auto b = a.dupShallow;
323 
324 		//	 sw.reset;
325 		//	 sw.start();
326 		//	 b[].radixSort!(typeof(b[]), "b", false, false, true)();
327 		//	 sw.stop;
328 		//	 immutable radixTime = sw.peek.usecs;
329 
330 		//	 assert(b[].equal(qa[]));
331 
332 		//	 version (show)
333 		//	 {
334 		//		 writeln("in-place radix sorted with fast-discardal: ",
335 		//				 b[0 .. min(nMax, $)]);
336 		//	 }
337 		//	 writef("%9-s, ", cast(real)sortTimeUsecs / radixTime);
338 		// }
339 
340 		version (show) writeln("");
341 	}
342 
343 	import std.meta : AliasSeq;
344 	immutable n = 1_00_000;
345 	version (show) writeln("EType, eCount, radixSort (speed-up), radixSort with fast discardal (speed-up), in-place radixSort (speed-up)");
346 	foreach (immutable ix, T; AliasSeq!(byte, short, int, long))
347 	{
348 		test!T(n); // test signed
349 		import std.traits : Unsigned;
350 		test!(Unsigned!T)(n); // test unsigned
351 	}
352 	test!float(n);
353 	test!double(n);
354 }
355 
356 @safe unittest {
357 	import std.algorithm.sorting : sort, isSorted;
358 	import std.algorithm.mutation : swap;
359 	import std.meta : AliasSeq;
360 	import nxt.container.dynamic_array : Array = DynamicArray;
361 	import nxt.random_ex : randInPlace;
362 	immutable n = 10_000;
363 	foreach (ix, E; AliasSeq!(byte, ubyte, short, ushort, int, uint, long, ulong, float, double)) {
364 		alias A = Array!E;
365 		A a;
366 		a.length = n;
367 		a[].randInPlace();
368 		auto b = a.dupShallow;
369 		assert(a[].radixSort() == b[].sort());
370 		swap(a, b);
371 	}
372 }
373 
374 version (unittest) {
375 	import nxt.construction : dupShallow;
376 }