1 // Written in the D programming language
2 
3 /**
4 Copyright: Copyright 2011-
5 License:   $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
6 Authors:   $(WEB erdani.com, Andrei Alexandrescu) and Kato Shoichi
7 Source:	$(PHOBOSSRC std/_benchmark.d)
8 
9 Synopsis:
10 ----
11 module nxt.module_one;
12 import std.benchmark, std.file;
13 
14 void benchmark_fileWrite()
15 {
16 	std.file.write("/tmp/deleteme", "hello, world!");
17 }
18 
19 void benchmark_fileRead()
20 {
21 	std.file.read("/tmp/deleteme");
22 }
23 
24 mixin(scheduleForBenchmarking);
25 
26 void main()
27 {
28 	printBenchmarks();
29 }
30 ----
31 
32 The code above prints:
33 
34 ---
35 ===============================================================================
36 module_one											 relative ns/iter  iter/s
37 ===============================================================================
38 fileWrite														144.2K	6.9K
39 fileRead														  27.1K   36.9K
40 ===============================================================================
41 ---
42  */
43 module std.benchmark;
44 
45 import std.algorithm, std.datetime, std.range, std.stdio, std.traits, std.typecons;
46 version (unittest) import std.random;
47 
48 debug = std_benchmark;
49 
50 /**
51 Benchmarks one or more functions for speed assessment and comparison,
52 and prints results as formatted text. A baseline timing that accounts
53 for benchmarking overheads is kept along with the results and
54 automatically deducted from all timings. A timing indistinguishable
55 from the baseline looping overhead appears with a run time of zero and
56 indicates a function that does too little work to be timed.
57 
58 Measurement is done in epochs. For each function benchmarked, the
59 smallest time is taken over all epochs.
60 
61 Benchmark results report time per iteration.
62 
63 Params:
64 
65 funs = If one ore more `funs` are provided, they must come as pairs
66 in which the first element is a string (the name of the benchmark) and
67 the second element is the alias of a function (the actual
68 benchmark). Each alias must refer to a function that takes either no
69 arguments or one integral argument (which is the iterations count).
70 target = `File` where output is printed.
71 
72 Example:
73 ---
74 printBenchmarks!(
75 	"file write", { std.file.write("/tmp/deleteme", "hello, world!"); },
76 	"file read",  { std.file.read("/tmp/deleteme"); })
77 	();
78 ---
79 
80 The example above outputs to `stdout`:
81 
82 ---
83 ===============================================================================
84 Benchmark											  relative ns/iter  iter/s
85 ===============================================================================
86 file write													   144.3K	6.9K
87 file read														 26.3K   38.0K
88 ===============================================================================
89 ---
90 
91 With internal iteration, the results would be similar:
92 
93 Example:
94 ---
95 printBenchmarks!(
96 	"file write", { std.file.write("/tmp/deleteme", "hello, world!"); },
97 	"file read",  (uint n) {
98 		foreach (i; 0 .. n) std.file.read("/tmp/deleteme");
99 	})
100 	();
101 ---
102 
103 In the example above, the framework iterates the first lambda many
104 times and collects timing information. For the second lambda, instead
105 of doing the iteration, the framework simply passes increasing values
106 of `n` to the lambda. In the end time per iteration is measured, so
107 the performance profile (and the printout) would be virtually
108 identical to the one in the previous example.
109 
110 If the call to `printBenchmarks` does not provide a name for some
111 benchmarks, the name of the benchmarked function is used. (For
112 lambdas, the name is e.g. $(D __lambda5).)
113 
114 Example:
115 ---
116 void benchmark_fileWrite()
117 {
118 	std.file.write("/tmp/deleteme", "hello, world!");
119 }
120 printBenchmarks!(
121 	benchmark_fileWrite,
122 	"file read", { std.file.read("/tmp/deleteme"); },
123 	{ std.file.read("/tmp/deleteme"); })
124 	();
125 ---
126 
127 The example above outputs to `stdout`:
128 
129 ---
130 ===============================================================================
131 Benchmark											  relative ns/iter  iter/s
132 ===============================================================================
133 fileWrite2()													  76.0K   13.2K
134 file read														 28.6K   34.9K
135 __lambda2														 27.8K   36.0K
136 ===============================================================================
137 ---
138 
139 If the name of the benchmark starts with $(D "benchmark_") or $(D
140 "benchmark_relative_"), that prefix does not appear in the printed
141 name. If the prefix is $(D "benchmark_relative_"), the "relative"
142 column is filled in the output. The relative performance is computed
143 relative to the last non-relative benchmark completed and expressed in
144 either times (suffix $(D "x")) or percentage (suffix $(D "%")). The
145 percentage is, for example, $(D 100.0%) if benchmark runs at the same
146 speed as the baseline, $(D 200.0%) if the benchmark is twice as fast
147 as the baseline, and $(D 50%) if the benchmark is half as fast.
148 
149 Example:
150 ---
151 printBenchmarks!(
152 	"file write", { std.file.write("/tmp/deleteme", "hello, world!"); },
153 	"benchmark_relative_file read",  { std.file.read("/tmp/deleteme"); },
154 	"benchmark_relative_array creation",  { new char[32]; })
155 	();
156 ---
157 
158 This example has one baseline and two relative tests. The output looks
159 as follows:
160 
161 ---
162 ===============================================================================
163 Benchmark											  relative ns/iter  iter/s
164 ===============================================================================
165 file write													   140.2K	7.1K
166 file read												517.8%   27.1K   36.9K
167 array creation										  1284.3%  116.0	 8.6M
168 ===============================================================================
169 ---
170 
171 According to the data above, file reading is $(D 5.178) times faster
172 than file writing, whereas array creation is $(D 1200) times faster
173 than file writing.
174 
175 If no functions are passed as `funs`, calling `printBenchmarks`
176 prints the previously registered per-module benchmarks.
177 Refer to $(LREF scheduleForBenchmarking).
178  */
179 void printBenchmarks(funs...)(File target = stdout)
180 {
181 	BenchmarkResult[] results;
182 	static if (funs.length > 0)
183 	{
184 		benchmark!(funs)(results);
185 	}
186 	else
187 	{
188 		runBenchmarks(results);
189 	}
190 	printResults(results);
191 }
192 
193 /**
194 Benchmarks functions and appends the results to the `results`
195 parameter. This function is used by $(LREF printBenchmarks), and the
196 `funs` parameter has the same requirements as for that function.
197 
198 Using `benchmark` directly is recommended for custom processing and
199 printing of _benchmark results.
200 
201 Example:
202 ---
203 BenchmarkResults[] timings;
204 benchmark!(
205 	"file write", { std.file.write("/tmp/deleteme", "hello, world!"); },
206 	"file read",  { std.file.read("/tmp/deleteme"); })
207 	(timings);
208 foreach (ref datum; timings)
209 {
210 	writefln("%s: %s ns/iteration", datum.benchmarkName,
211 		datum.perIteration.to!("nsecs", uint));
212 }
213 ---
214  */
215 void benchmark(funs...)(ref BenchmarkResult[] results, string moduleName = null)
216 {
217 	auto times = benchmarkImpl!(onlyAliases!funs)();
218 	uint curBenchmark = 0;
219 	string name;
220 	TickDuration lastBaseline;
221 
222 	foreach (i, fun; funs)
223 	{
224 		static if (is(typeof(fun): string))
225 		{
226 			name = fun;
227 		}
228 		else
229 		{
230 			if (!name) name = funs[i].stringof;
231 			const isRelative = skipOver(name, "benchmark_") &&
232 				skipOver(name, "relative_");
233 			double relative;
234 			if (isRelative)
235 			{
236 				relative = cast(double) lastBaseline.to!("nsecs", uint) /
237 					times[curBenchmark].to!("nsecs", uint);
238 			}
239 			else
240 			{
241 				lastBaseline = times[curBenchmark];
242 			}
243 			results ~= BenchmarkResult(
244 				moduleName,
245 				name,
246 				times[curBenchmark++],
247 				relative);
248 			name = null;
249 		}
250 	}
251 }
252 
253 /**
254    Result of one function's benchmark.
255 */
256 struct BenchmarkResult
257 {
258 	/**
259 	   Module name in which the function resides (`null` if not applicable).
260 	*/
261 	string moduleName;
262 	/**
263 	   Name of the benchmark (sans the `benchmark_` or $(D
264 	   benchmark_relative_) prefix, if any).
265 	*/
266 	string benchmarkName;
267 	/**
268 	   Time per iteration.
269 	*/
270 	TickDuration perIteration;
271 	/**
272 	   Relative timing (if benchmark is a _relative one). Refer to the
273 	   definition of `benchmarkModule` below for what constitutes a
274 	   relative benchmark. For relative benchmarks, `relative` is
275 	   $(D 1.0) if the benchmark has the same speed as its baseline,
276 	   $(D 2.0) is the benchmark is twice as fast, and $(D 0.5) if the
277 	   benchmark has half the speed. For non-_relative benchmarks, $(D
278 	   relative) is set to `NaN`, which is testable with $(XREF
279 	   math,isNaN).
280 	 */
281 	double relative;
282 }
283 
284 /*
285   Given a bunch of aliases, benchmarks them all and returns their
286   respective durations.
287 */
288 private TickDuration[fun.length] benchmarkImpl(fun...)()
289 {
290 	immutable uint epochs = 1000;
291 	TickDuration minSignificantDuration = TickDuration.from!"usecs"(50);
292 
293 	// Baseline function. Use asm inside the body to avoid
294 	// optimizations.
295 	static void baseline() { asm { nop; } }
296 
297 	// Use a local pointer to avoid TLS access overhead
298 	auto theStopWatch = &.theStopWatch;
299 
300 	// All functions to look at include the baseline and the measured
301 	// functions.
302 	import std.meta;
303 	alias AliasSeq!(baseline, fun) allFuns;
304 	TickDuration baselineTimePerIteration;
305 	TickDuration[fun.length] result;
306 
307 	// MEASUREMENTS START HERE
308 
309 	foreach (i, measured; allFuns)
310 	{
311 		TickDuration bestEpoch;
312 
313 		// So we have several epochs, and bestEpoch will track the
314 		// minimum time across epochs.
315 		foreach (epoch; 0 .. epochs)
316 		{
317 			// Within each epoch, we call the function repeatedly
318 			// until we collect at least a total time of
319 			// minSignificantDuration.
320 			for (uint repeats = 1; repeats < 1_000_000_000; repeats *= 10)
321 			{
322 				auto elapsed = callFun!(allFuns[i])(repeats, *theStopWatch);
323 
324 				if (elapsed < minSignificantDuration)
325 				{
326 					// Crappy measurement, try again with more repeats
327 					continue;
328 				}
329 
330 				// Good measurement, record it if it's better than the
331 				// minimum.
332 
333 				// Removing this useless test exposes a codegen bug.
334 				if (!repeats) throw new Exception(null);
335 
336 				auto timePerIteration = elapsed / cast(double) repeats;
337 				if (bestEpoch == bestEpoch.init || timePerIteration < bestEpoch)
338 				{
339 					bestEpoch = timePerIteration;
340 				}
341 				break;
342 			}
343 		}
344 
345 		// Store the final result
346 		static if (i == 0)
347 		{
348 			baselineTimePerIteration = bestEpoch;
349 		}
350 		else
351 		{
352 			result[i - 1] = bestEpoch;
353 		}
354 	}
355 
356 	// MEASUREMENTS JUST ENDED
357 
358 	// Subtract the baseline from all results
359 	foreach (i, f; fun)
360 	{
361 		//writeln(__traits(identifier, fun[i]));
362 		if ((result[i] -= baselineTimePerIteration) < TickDuration.init) {
363 			result[i] = TickDuration.init;
364 		}
365 	}
366 
367 	// Return result sans the baseline
368 	return result;
369 }
370 
371 unittest {
372 	import std.conv : to;
373 
374 	debug(std_benchmark) {} else void writefln(S...)(S args){}
375 
376 	int a = 123_456_789;
377 	auto r = benchmarkImpl!(
378 		{auto b = to!string(a);},
379 		{auto b = to!wstring(a);},
380 		{auto b = to!dstring(a);})
381 		();
382 	auto names = [ "intToString", "intToWstring", "intToDstring" ];
383 	foreach (i, timing; r)
384 		writefln("%s: %sns/call", names[i], r[i].to!("nsecs", int)());
385 }
386 
387 // Verify Example 2
388 unittest {
389 	import std.conv;
390 
391 	debug (std_benchmark) {} else void writefln(S...)(S args){}
392 
393 	int a = 123_456_789;
394 	auto r = benchmarkImpl!(
395 		(uint n) { foreach (i; 0 .. n) auto b = to!string(a); },
396 		(uint n) { foreach (i; 0 .. n) auto b = to!wstring(a); },
397 		(uint n) { foreach (i; 0 .. n) auto b = to!dstring(a); })
398 		();
399 	auto names = [ "intToString", "intToWstring", "intToDstring" ];
400 	foreach (i, timing; r)
401 		writefln("%s: %sns/call", names[i], r[i].to!("nsecs", int)());
402 }
403 
404 /*
405  * Calls a function either by alias or by string. Returns the duration
406  * per iteration.
407  */
408 private TickDuration callFun(alias measured)(uint repeats, ref StopWatch theStopWatch)
409 {
410 	with (theStopWatch) running ? reset() : start();
411 
412 	static if (is(typeof(measured(repeats))))
413 	{
414 		// Internal iteration
415 		measured(repeats);
416 	}
417 	else
418 	{
419 		// External iteration
420 		foreach (j; 0 .. repeats)
421 		{
422 			measured();
423 		}
424 	}
425 
426 	return theStopWatch.peek();
427 }
428 
429 /*
430  * Given a bunch of aliases and strings, keeps only aliases of symbols
431  * that start with "benchmark_".
432  */
433 private template onlyBenchmarks(T...)
434 {
435 	import std.meta;
436 
437 	static if (!T.length)
438 	{
439 		alias T onlyBenchmarks;
440 	}
441 	else static if (T[0].stringof.length > 10 && T[0].stringof[0 .. 10] == "benchmark_")
442 	{
443 		alias AliasSeq!(T[0], onlyBenchmarks!(T[1 .. $])) onlyBenchmarks;
444 	}
445 	else
446 	{
447 		alias AliasSeq!(onlyBenchmarks!(T[1 .. $])) onlyBenchmarks;
448 	}
449 }
450 
451 /*
452   Eliminates strings, keeping only aliases.
453  */
454 private template onlyAliases(T...)
455 {
456 	import std.meta;
457 
458 	static if (!T.length)
459 	{
460 		alias T onlyAliases;
461 	}
462 	else static if (!is(typeof(T[0]) : string))
463 	{
464 		alias AliasSeq!(T[0], onlyAliases!(T[1 .. $])) onlyAliases;
465 	}
466 	else
467 	{
468 		alias AliasSeq!(onlyAliases!(T[1 .. $])) onlyAliases;
469 	}
470 }
471 
472 unittest {
473 	alias onlyAliases!("hello", int, "world", double) aliases;
474 	static assert(aliases.length == 2);
475 	static assert(is(int == aliases[0]));
476 	static assert(is(double == aliases[1]));
477 }
478 
479 debug (std_benchmark) mixin(scheduleForBenchmarking);
480 
481 /*
482 Generates benchmarking code for a bunch of functions passed as
483 strings. Intended to be used during compilation as part of $(D
484 scheduleForBenchmarking).
485  */
486 string benchmarkModuleCodegen()(string[] funs...)
487 {
488 	auto result = q{addBenchmarks(& benchmark!(};
489 	foreach (fun; funs)
490 	{
491 		import std.algorithm;
492 		if (!startsWith(fun, "benchmark_"))
493 		{
494 			continue;
495 		}
496 		result ~= fun;
497 		result ~= ", ";
498 	}
499 	result ~= q{), "test");};
500 	return result;
501 }
502 
503 /**
504 The examples shown so far feature simple, ad-hoc benchmarks, but $(D
505 std.benchmark) allows for superior automation and systematic use aimed
506 at large projects.
507 
508 The code $(D mixin(scheduleForBenchmarking)) planted at module level
509 schedules the entire module for benchmarking. The actual benchmarking
510 can be done globally by calling $(LREF printBenchmarks) (with no
511 arguments) or $(LREF runBenchmarks). Either call is usually made from
512 `main`.
513 
514 In a multi-module application, several modules may define benchmarks,
515 and `printBenchmarks` distinguishes each visually.
516 
517 Example:
518 ---
519 module nxt.acme;
520 import std.file, std.array;
521 
522 void benchmark_fileWrite()
523 {
524 	std.file.write("/tmp/deleteme", "hello, world!");
525 }
526 
527 void benchmark_relative_fileRead()
528 {
529 	std.file.read("/tmp/deleteme");
530 }
531 
532 mixin(scheduleForBenchmarking);
533 ---
534 
535 Typically the `mixin` is guarded by a version so benchmarks are only run if desired.
536 
537 Example:
538 ---
539 version (benchmark_enabled) mixin(scheduleForBenchmarking);
540 ---
541  */
542 @property string scheduleForBenchmarking()
543 {
544 	return `
545 shared static this()
546 {
547 	mixin("enum code = benchmarkModuleCodegen(__traits(allMembers, "~
548 			.stringof[7 .. $]~"));");
549 	mixin(code);
550 }`;
551 }
552 
553 debug (std_benchmark)
554 {
555 	private void benchmark_fileWrite()
556 	{
557 		import std.file;
558 		std.file.write("/tmp/deleteme", "hello, world!");
559 	}
560 
561 	private void benchmark_fileRead()
562 	{
563 		import std.file;
564 		std.file.read("/tmp/deleteme");
565 	}
566 
567 	private void benchmark_appendBuiltin(uint n)
568 	{
569 		string a;
570 		foreach (i; 0 .. n)
571 		{
572 			a ~= 'x';
573 		}
574 	}
575 
576 	private void benchmark_relative_appendAppender(uint n)
577 	{
578 		import std.range;
579 		auto a = appender!string();
580 		foreach (i; 0 .. n)
581 		{
582 			put(a, 'x');
583 		}
584 	}
585 
586 	private void benchmark_relative_appendConcat(uint n)
587 	{
588 		string a;
589 		foreach (i; 0 .. n)
590 		{
591 			a = a ~ 'x';
592 		}
593 	}
594 }
595 
596 /*
597 Adds benchmarks to the global queue of benchmarks to be executed. Most
598 frequently used implicitly by $(LREF scheduleForBenchmarking).
599  */
600 void addBenchmarks(
601 	void function(ref BenchmarkResult[], string) fun,
602 	string moduleName)
603 {
604 	allBenchmarks ~= tuple(moduleName, fun);
605 }
606 
607 // One benchmark stopwatch
608 private StopWatch theStopWatch;
609 
610 /*
611 Array containing all benchmarks to be executed. Usually they are
612 executed by calling `runBenchmarks` or `printBenchmarks`.
613  */
614 private Tuple!(string, void function(ref BenchmarkResult[], string))[]
615 allBenchmarks;
616 
617 /**
618 
619 Performs all benchmarks previously scheduled with $(LREF
620 scheduleForBenchmarking), application-wide. Usually not called
621 directly, but instead as support for $(LREF printBenchmarks). User
622 code may be interested in using this function directly for
623 e.g. results formatting.
624 
625   */
626 void runBenchmarks(ref BenchmarkResult[] results)
627 {
628 	foreach (b; allBenchmarks)
629 	{
630 		b[1](results, b[0]);
631 	}
632 }
633 
634 /**
635 Prints benchmark results as described with $(LREF
636 printBenchmarks). This is useful if benchmark postprocessing is
637 desired before printing.
638  */
639 void printResults(in BenchmarkResult[] data, File target = stdout)
640 {
641 	if (data.empty)
642 	{
643 		return;
644 	}
645 
646 	enum columns = 79;
647 	string thisModule, thisGroup;
648 
649 	void printModuleHeader()
650 	{
651 		target.writefln(
652 			"==================================================" ~
653 			"=============================\n" ~
654 			"%-*s%8s%8s%8s\n" ~
655 			"=================================================" ~
656 			"==============================", columns - 24,
657 			thisModule is null ? "Benchmark" : thisModule,
658 			"relative", "t/iter", "iter/s");
659 	}
660 
661 	foreach (i, datum; data)
662 	{
663 		if (thisModule != datum.moduleName || i == 0)
664 		{
665 			thisModule = datum.moduleName;
666 			printModuleHeader();
667 		}
668 		double itersPerSecond = 1.0 / datum.perIteration.to!("seconds", double)();
669 		auto name = datum.benchmarkName;
670 		if (datum.relative is double.init)
671 		{
672 			// Write without relative information
673 			target.writefln("%-*s %6.1ms  %6.1m",
674 					columns - 16, name,
675 					datum.perIteration.to!("seconds", double)(),
676 					itersPerSecond);
677 		}
678 		else
679 		{
680 			// Write with relative information
681 			double relative = datum.relative;
682 			string fmt;
683 			if (relative < 1000)
684 			{
685 				// represent relative speed as percent
686 				fmt = "%-*s %7.1f%%  %6.1m  %6.1m";
687 				relative *= 100;
688 			}
689 			else
690 			{
691 				// represent relative speed as multiplication factor
692 				fmt = "%-*s  %6.1mx  %6.1m  %6.1m";
693 			}
694 			target.writefln(fmt,
695 					columns - 25, name,
696 					relative,
697 					datum.perIteration.to!("seconds", double)(),
698 					itersPerSecond);
699 		}
700 	}
701 	target.writeln(repeat('=', columns));
702 }
703 
704 /**
705 Suspends and resumes the current benchmark, respectively. This is useful
706 if the benchmark needs to set things up before performing the
707 measurement.
708 
709 Example:
710 ----
711 import std.algorithm;
712 void benchmark_findDouble(uint n)
713 {
714 	// Fill an array of random numbers and an array of random indexes
715 	benchmarkSuspend();
716 	auto array = new double[n];
717 	auto indexes = new size_t[n];
718 	foreach (i; 0 .. n)
719 	{
720 		array[i] = uniform(0.0, 1000.0);
721 		indexes[i] = uniform(0, n);
722 	}
723 	benchmarkResume();
724 
725 	// The actual benchmark begins here
726 	foreach (i; 0 .. n)
727 	{
728 		auto balance = std.algorithm.find(array, array[indexes[i]]);
729 	}
730 }
731 unittest {
732 	TODO: writeln(benchmark!benchmark_findDouble(10_000).to!("msecs", uint));
733 }
734 ----
735  */
736 void benchmarkSuspend()
737 {
738 	theStopWatch.stop();
739 }
740 
741 /// Ditto
742 void benchmarkResume()
743 {
744 	theStopWatch.start();
745 }
746 
747 unittest {
748 	void benchmark_findDouble(uint n)
749 	{
750 		// Fill an array of random numbers and an array of random indexes
751 		benchmarkSuspend();
752 		auto array = new double[n];
753 		auto indexes = new size_t[n];
754 		foreach (i; 0 .. n)
755 		{
756 			array[i] = uniform(0.0, 1000.0);
757 			indexes[i] = uniform(0, n);
758 		}
759 		benchmarkResume();
760 
761 		// The actual benchmark begins here
762 		foreach (i; 0 .. n)
763 		{
764 			auto balance = std.algorithm.find(array, array[indexes[i]]);
765 		}
766 	}
767 
768 	//writeln(benchmark!benchmark_findDouble(10_000)[0].to!("msecs",
769 	//uint));
770 	benchmarkImpl!benchmark_findDouble();
771 }
772 
773 /++
774    Aliases from $(D std.datetime).
775   +/
776 alias std.datetime.AutoStart AutoStart;
777 /// ditto
778 alias std.datetime.StopWatch StopWatch;
779 /// ditto
780 alias std.datetime.ComparingBenchmarkResult ComparingBenchmarkResult;
781 /// ditto
782 alias std.datetime.comparingBenchmark comparingBenchmark;
783 /// ditto
784 alias std.datetime.measureTime measureTime;