1 // Written in the D programming language
2 
3 /**
4 Copyright: Copyright 2011-
5 License:   $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
6 Authors:   $(WEB erdani.com, Andrei Alexandrescu) and Kato Shoichi
7 Source:    $(PHOBOSSRC std/_benchmark.d)
8 
9 Synopsis:
10 ----
11 module nxt.module_one;
12 import std.benchmark, std.file;
13 
14 void benchmark_fileWrite()
15 {
16     std.file.write("/tmp/deleteme", "hello, world!");
17 }
18 
19 void benchmark_fileRead()
20 {
21     std.file.read("/tmp/deleteme");
22 }
23 
24 mixin(scheduleForBenchmarking);
25 
26 void main()
27 {
28     printBenchmarks();
29 }
30 ----
31 
32 The code above prints:
33 
34 ---
35 ===============================================================================
36 module_one                                             relative ns/iter  iter/s
37 ===============================================================================
38 fileWrite                                                        144.2K    6.9K
39 fileRead                                                          27.1K   36.9K
40 ===============================================================================
41 ---
42  */
43 module std.benchmark;
44 
45 import std.algorithm, std.datetime, std.range, std.stdio, std.traits, std.typecons;
46 version(unittest) import std.random;
47 
48 debug = std_benchmark;
49 
50 /**
51 Benchmarks one or more functions for speed assessment and comparison,
52 and prints results as formatted text. A baseline timing that accounts
53 for benchmarking overheads is kept along with the results and
54 automatically deducted from all timings. A timing indistinguishable
55 from the baseline looping overhead appears with a run time of zero and
56 indicates a function that does too little work to be timed.
57 
58 Measurement is done in epochs. For each function benchmarked, the
59 smallest time is taken over all epochs.
60 
61 Benchmark results report time per iteration.
62 
63 Params:
64 
65 funs = If one ore more `funs` are provided, they must come as pairs
66 in which the first element is a string (the name of the benchmark) and
67 the second element is the alias of a function (the actual
68 benchmark). Each alias must refer to a function that takes either no
69 arguments or one integral argument (which is the iterations count).
70 target = `File` where output is printed.
71 
72 Example:
73 ---
74 printBenchmarks!(
75     "file write", { std.file.write("/tmp/deleteme", "hello, world!"); },
76     "file read",  { std.file.read("/tmp/deleteme"); })
77     ();
78 ---
79 
80 The example above outputs to `stdout`:
81 
82 ---
83 ===============================================================================
84 Benchmark                                              relative ns/iter  iter/s
85 ===============================================================================
86 file write                                                       144.3K    6.9K
87 file read                                                         26.3K   38.0K
88 ===============================================================================
89 ---
90 
91 With internal iteration, the results would be similar:
92 
93 Example:
94 ---
95 printBenchmarks!(
96     "file write", { std.file.write("/tmp/deleteme", "hello, world!"); },
97     "file read",  (uint n) {
98         foreach (i; 0 .. n) std.file.read("/tmp/deleteme");
99     })
100     ();
101 ---
102 
103 In the example above, the framework iterates the first lambda many
104 times and collects timing information. For the second lambda, instead
105 of doing the iteration, the framework simply passes increasing values
106 of `n` to the lambda. In the end time per iteration is measured, so
107 the performance profile (and the printout) would be virtually
108 identical to the one in the previous example.
109 
110 If the call to `printBenchmarks` does not provide a name for some
111 benchmarks, the name of the benchmarked function is used. (For
112 lambdas, the name is e.g. $(D __lambda5).)
113 
114 Example:
115 ---
116 void benchmark_fileWrite()
117 {
118     std.file.write("/tmp/deleteme", "hello, world!");
119 }
120 printBenchmarks!(
121     benchmark_fileWrite,
122     "file read", { std.file.read("/tmp/deleteme"); },
123     { std.file.read("/tmp/deleteme"); })
124     ();
125 ---
126 
127 The example above outputs to `stdout`:
128 
129 ---
130 ===============================================================================
131 Benchmark                                              relative ns/iter  iter/s
132 ===============================================================================
133 fileWrite2()                                                      76.0K   13.2K
134 file read                                                         28.6K   34.9K
135 __lambda2                                                         27.8K   36.0K
136 ===============================================================================
137 ---
138 
139 If the name of the benchmark starts with $(D "benchmark_") or $(D
140 "benchmark_relative_"), that prefix does not appear in the printed
141 name. If the prefix is $(D "benchmark_relative_"), the "relative"
142 column is filled in the output. The relative performance is computed
143 relative to the last non-relative benchmark completed and expressed in
144 either times (suffix $(D "x")) or percentage (suffix $(D "%")). The
145 percentage is, for example, $(D 100.0%) if benchmark runs at the same
146 speed as the baseline, $(D 200.0%) if the benchmark is twice as fast
147 as the baseline, and $(D 50%) if the benchmark is half as fast.
148 
149 Example:
150 ---
151 printBenchmarks!(
152     "file write", { std.file.write("/tmp/deleteme", "hello, world!"); },
153     "benchmark_relative_file read",  { std.file.read("/tmp/deleteme"); },
154     "benchmark_relative_array creation",  { new char[32]; })
155     ();
156 ---
157 
158 This example has one baseline and two relative tests. The output looks
159 as follows:
160 
161 ---
162 ===============================================================================
163 Benchmark                                              relative ns/iter  iter/s
164 ===============================================================================
165 file write                                                       140.2K    7.1K
166 file read                                                517.8%   27.1K   36.9K
167 array creation                                          1284.3%  116.0     8.6M
168 ===============================================================================
169 ---
170 
171 According to the data above, file reading is $(D 5.178) times faster
172 than file writing, whereas array creation is $(D 1200) times faster
173 than file writing.
174 
175 If no functions are passed as `funs`, calling `printBenchmarks`
176 prints the previously registered per-module benchmarks.
177 Refer to $(LREF scheduleForBenchmarking).
178  */
179 void printBenchmarks(funs...)(File target = stdout)
180 {
181     BenchmarkResult[] results;
182     static if (funs.length > 0)
183     {
184         benchmark!(funs)(results);
185     }
186     else
187     {
188         runBenchmarks(results);
189     }
190     printResults(results);
191 }
192 
193 /**
194 Benchmarks functions and appends the results to the `results`
195 parameter. This function is used by $(LREF printBenchmarks), and the
196 `funs` parameter has the same requirements as for that function.
197 
198 Using `benchmark` directly is recommended for custom processing and
199 printing of _benchmark results.
200 
201 Example:
202 ---
203 BenchmarkResults[] timings;
204 benchmark!(
205     "file write", { std.file.write("/tmp/deleteme", "hello, world!"); },
206     "file read",  { std.file.read("/tmp/deleteme"); })
207     (timings);
208 foreach (ref datum; timings)
209 {
210     writefln("%s: %s ns/iteration", datum.benchmarkName,
211         datum.perIteration.to!("nsecs", uint));
212 }
213 ---
214  */
215 void benchmark(funs...)(ref BenchmarkResult[] results, string moduleName = null)
216 {
217     auto times = benchmarkImpl!(onlyAliases!funs)();
218     uint curBenchmark = 0;
219     string name;
220     TickDuration lastBaseline;
221 
222     foreach (i, fun; funs)
223     {
224         static if (is(typeof(fun): string))
225         {
226             name = fun;
227         }
228         else
229         {
230             if (!name) name = funs[i].stringof;
231             const isRelative = skipOver(name, "benchmark_") &&
232                 skipOver(name, "relative_");
233             double relative;
234             if (isRelative)
235             {
236                 relative = cast(double) lastBaseline.to!("nsecs", uint) /
237                     times[curBenchmark].to!("nsecs", uint);
238             }
239             else
240             {
241                 lastBaseline = times[curBenchmark];
242             }
243             results ~= BenchmarkResult(
244                 moduleName,
245                 name,
246                 times[curBenchmark++],
247                 relative);
248             name = null;
249         }
250     }
251 }
252 
253 /**
254    Result of one function's benchmark.
255 */
256 struct BenchmarkResult
257 {
258     /**
259        Module name in which the function resides (`null` if not applicable).
260     */
261     string moduleName;
262     /**
263        Name of the benchmark (sans the `benchmark_` or $(D
264        benchmark_relative_) prefix, if any).
265     */
266     string benchmarkName;
267     /**
268        Time per iteration.
269     */
270     TickDuration perIteration;
271     /**
272        Relative timing (if benchmark is a _relative one). Refer to the
273        definition of `benchmarkModule` below for what constitutes a
274        relative benchmark. For relative benchmarks, `relative` is
275        $(D 1.0) if the benchmark has the same speed as its baseline,
276        $(D 2.0) is the benchmark is twice as fast, and $(D 0.5) if the
277        benchmark has half the speed. For non-_relative benchmarks, $(D
278        relative) is set to `NaN`, which is testable with $(XREF
279        math,isNaN).
280      */
281     double relative;
282 }
283 
284 /*
285   Given a bunch of aliases, benchmarks them all and returns their
286   respective durations.
287 */
288 private TickDuration[fun.length] benchmarkImpl(fun...)()
289 {
290     immutable uint epochs = 1000;
291     TickDuration minSignificantDuration = TickDuration.from!"usecs"(50);
292 
293     // Baseline function. Use asm inside the body to avoid
294     // optimizations.
295     static void baseline() { asm { nop; } }
296 
297     // Use a local pointer to avoid TLS access overhead
298     auto theStopWatch = &.theStopWatch;
299 
300     // All functions to look at include the baseline and the measured
301     // functions.
302     import std.meta;
303     alias AliasSeq!(baseline, fun) allFuns;
304     TickDuration baselineTimePerIteration;
305     TickDuration[fun.length] result;
306 
307     // MEASUREMENTS START HERE
308 
309     foreach (i, measured; allFuns)
310     {
311         TickDuration bestEpoch;
312 
313         // So we have several epochs, and bestEpoch will track the
314         // minimum time across epochs.
315         foreach (epoch; 0 .. epochs)
316         {
317             // Within each epoch, we call the function repeatedly
318             // until we collect at least a total time of
319             // minSignificantDuration.
320             for (uint repeats = 1; repeats < 1_000_000_000; repeats *= 10)
321             {
322                 auto elapsed = callFun!(allFuns[i])(repeats, *theStopWatch);
323 
324                 if (elapsed < minSignificantDuration)
325                 {
326                     // Crappy measurement, try again with more repeats
327                     continue;
328                 }
329 
330                 // Good measurement, record it if it's better than the
331                 // minimum.
332 
333                 // Removing this useless test exposes a codegen bug.
334                 if (!repeats) throw new Exception(null);
335 
336                 auto timePerIteration = elapsed / cast(double) repeats;
337                 if (bestEpoch == bestEpoch.init || timePerIteration < bestEpoch)
338                 {
339                     bestEpoch = timePerIteration;
340                 }
341                 break;
342             }
343         }
344 
345         // Store the final result
346         static if (i == 0)
347         {
348             baselineTimePerIteration = bestEpoch;
349         }
350         else
351         {
352             result[i - 1] = bestEpoch;
353         }
354     }
355 
356     // MEASUREMENTS JUST ENDED
357 
358     // Subtract the baseline from all results
359     foreach (i, f; fun)
360     {
361         //writeln(__traits(identifier, fun[i]));
362         if ((result[i] -= baselineTimePerIteration) < TickDuration.init) {
363             result[i] = TickDuration.init;
364         }
365     }
366 
367     // Return result sans the baseline
368     return result;
369 }
370 
371 unittest
372 {
373     import std.conv : to;
374 
375     debug(std_benchmark) {} else void writefln(S...)(S args){}
376 
377     int a = 123_456_789;
378     auto r = benchmarkImpl!(
379         {auto b = to!string(a);},
380         {auto b = to!wstring(a);},
381         {auto b = to!dstring(a);})
382         ();
383     auto names = [ "intToString", "intToWstring", "intToDstring" ];
384     foreach (i, timing; r)
385         writefln("%s: %sns/call", names[i], r[i].to!("nsecs", int)());
386 }
387 
388 // Verify Example 2
389 unittest
390 {
391     import std.conv;
392 
393     debug (std_benchmark) {} else void writefln(S...)(S args){}
394 
395     int a = 123_456_789;
396     auto r = benchmarkImpl!(
397         (uint n) { foreach (i; 0 .. n) auto b = to!string(a); },
398         (uint n) { foreach (i; 0 .. n) auto b = to!wstring(a); },
399         (uint n) { foreach (i; 0 .. n) auto b = to!dstring(a); })
400         ();
401     auto names = [ "intToString", "intToWstring", "intToDstring" ];
402     foreach (i, timing; r)
403         writefln("%s: %sns/call", names[i], r[i].to!("nsecs", int)());
404 }
405 
406 /*
407  * Calls a function either by alias or by string. Returns the duration
408  * per iteration.
409  */
410 private TickDuration callFun(alias measured)(uint repeats, ref StopWatch theStopWatch)
411 {
412     with (theStopWatch) running ? reset() : start();
413 
414     static if (is(typeof(measured(repeats))))
415     {
416         // Internal iteration
417         measured(repeats);
418     }
419     else
420     {
421         // External iteration
422         foreach (j; 0 .. repeats)
423         {
424             measured();
425         }
426     }
427 
428     return theStopWatch.peek();
429 }
430 
431 /*
432  * Given a bunch of aliases and strings, keeps only aliases of symbols
433  * that start with "benchmark_".
434  */
435 private template onlyBenchmarks(T...)
436 {
437     import std.meta;
438 
439     static if (!T.length)
440     {
441         alias T onlyBenchmarks;
442     }
443     else static if (T[0].stringof.length > 10 && T[0].stringof[0 .. 10] == "benchmark_")
444     {
445         alias AliasSeq!(T[0], onlyBenchmarks!(T[1 .. $])) onlyBenchmarks;
446     }
447     else
448     {
449         alias AliasSeq!(onlyBenchmarks!(T[1 .. $])) onlyBenchmarks;
450     }
451 }
452 
453 /*
454   Eliminates strings, keeping only aliases.
455  */
456 private template onlyAliases(T...)
457 {
458     import std.meta;
459 
460     static if (!T.length)
461     {
462         alias T onlyAliases;
463     }
464     else static if (!is(typeof(T[0]) : string))
465     {
466         alias AliasSeq!(T[0], onlyAliases!(T[1 .. $])) onlyAliases;
467     }
468     else
469     {
470         alias AliasSeq!(onlyAliases!(T[1 .. $])) onlyAliases;
471     }
472 }
473 
474 unittest
475 {
476     alias onlyAliases!("hello", int, "world", double) aliases;
477     static assert(aliases.length == 2);
478     static assert(is(int == aliases[0]));
479     static assert(is(double == aliases[1]));
480 }
481 
482 debug (std_benchmark) mixin(scheduleForBenchmarking);
483 
484 /*
485 Generates benchmarking code for a bunch of functions passed as
486 strings. Intended to be used during compilation as part of $(D
487 scheduleForBenchmarking).
488  */
489 string benchmarkModuleCodegen()(string[] funs...)
490 {
491     auto result = q{addBenchmarks(& benchmark!(};
492     foreach (fun; funs)
493     {
494         import std.algorithm;
495         if (!startsWith(fun, "benchmark_"))
496         {
497             continue;
498         }
499         result ~= fun;
500         result ~= ", ";
501     }
502     result ~= q{), "test");};
503     return result;
504 }
505 
506 /**
507 The examples shown so far feature simple, ad-hoc benchmarks, but $(D
508 std.benchmark) allows for superior automation and systematic use aimed
509 at large projects.
510 
511 The code $(D mixin(scheduleForBenchmarking)) planted at module level
512 schedules the entire module for benchmarking. The actual benchmarking
513 can be done globally by calling $(LREF printBenchmarks) (with no
514 arguments) or $(LREF runBenchmarks). Either call is usually made from
515 `main`.
516 
517 In a multi-module application, several modules may define benchmarks,
518 and `printBenchmarks` distinguishes each visually.
519 
520 Example:
521 ---
522 module nxt.acme;
523 import std.file, std.array;
524 
525 void benchmark_fileWrite()
526 {
527     std.file.write("/tmp/deleteme", "hello, world!");
528 }
529 
530 void benchmark_relative_fileRead()
531 {
532     std.file.read("/tmp/deleteme");
533 }
534 
535 mixin(scheduleForBenchmarking);
536 ---
537 
538 Typically the `mixin` is guarded by a version so benchmarks are only run if desired.
539 
540 Example:
541 ---
542 version (benchmark_enabled) mixin(scheduleForBenchmarking);
543 ---
544  */
545 @property string scheduleForBenchmarking()
546 {
547     return `
548 shared static this()
549 {
550     mixin("enum code = benchmarkModuleCodegen(__traits(allMembers, "~
551             .stringof[7 .. $]~"));");
552     mixin(code);
553 }`;
554 }
555 
556 debug (std_benchmark)
557 {
558     private void benchmark_fileWrite()
559     {
560         import std.file;
561         std.file.write("/tmp/deleteme", "hello, world!");
562     }
563 
564     private void benchmark_fileRead()
565     {
566         import std.file;
567         std.file.read("/tmp/deleteme");
568     }
569 
570     private void benchmark_appendBuiltin(uint n)
571     {
572         string a;
573         foreach (i; 0 .. n)
574         {
575             a ~= 'x';
576         }
577     }
578 
579     private void benchmark_relative_appendAppender(uint n)
580     {
581         import std.range;
582         auto a = appender!string();
583         foreach (i; 0 .. n)
584         {
585             put(a, 'x');
586         }
587     }
588 
589     private void benchmark_relative_appendConcat(uint n)
590     {
591         string a;
592         foreach (i; 0 .. n)
593         {
594             a = a ~ 'x';
595         }
596     }
597 }
598 
599 /*
600 Adds benchmarks to the global queue of benchmarks to be executed. Most
601 frequently used implicitly by $(LREF scheduleForBenchmarking).
602  */
603 void addBenchmarks(
604     void function(ref BenchmarkResult[], string) fun,
605     string moduleName)
606 {
607     allBenchmarks ~= tuple(moduleName, fun);
608 }
609 
610 // One benchmark stopwatch
611 private StopWatch theStopWatch;
612 
613 /*
614 Array containing all benchmarks to be executed. Usually they are
615 executed by calling `runBenchmarks` or `printBenchmarks`.
616  */
617 private Tuple!(string, void function(ref BenchmarkResult[], string))[]
618 allBenchmarks;
619 
620 /**
621 
622 Performs all benchmarks previously scheduled with $(LREF
623 scheduleForBenchmarking), application-wide. Usually not called
624 directly, but instead as support for $(LREF printBenchmarks). User
625 code may be interested in using this function directly for
626 e.g. results formatting.
627 
628   */
629 void runBenchmarks(ref BenchmarkResult[] results)
630 {
631     foreach (b; allBenchmarks)
632     {
633         b[1](results, b[0]);
634     }
635 }
636 
637 /**
638 Prints benchmark results as described with $(LREF
639 printBenchmarks). This is useful if benchmark postprocessing is
640 desired before printing.
641  */
642 void printResults(in BenchmarkResult[] data, File target = stdout)
643 {
644     if (data.empty)
645     {
646         return;
647     }
648 
649     enum columns = 79;
650     string thisModule, thisGroup;
651 
652     void printModuleHeader()
653     {
654         target.writefln(
655             "==================================================" ~
656             "=============================\n" ~
657             "%-*s%8s%8s%8s\n" ~
658             "=================================================" ~
659             "==============================", columns - 24,
660             thisModule is null ? "Benchmark" : thisModule,
661             "relative", "t/iter", "iter/s");
662     }
663 
664     foreach (i, datum; data)
665     {
666         if (thisModule != datum.moduleName || i == 0)
667         {
668             thisModule = datum.moduleName;
669             printModuleHeader();
670         }
671         double itersPerSecond = 1.0 / datum.perIteration.to!("seconds", double)();
672         auto name = datum.benchmarkName;
673         if (datum.relative is double.init)
674         {
675             // Write without relative information
676             target.writefln("%-*s %6.1ms  %6.1m",
677                     columns - 16, name,
678                     datum.perIteration.to!("seconds", double)(),
679                     itersPerSecond);
680         }
681         else
682         {
683             // Write with relative information
684             double relative = datum.relative;
685             string fmt;
686             if (relative < 1000)
687             {
688                 // represent relative speed as percent
689                 fmt = "%-*s %7.1f%%  %6.1m  %6.1m";
690                 relative *= 100;
691             }
692             else
693             {
694                 // represent relative speed as multiplication factor
695                 fmt = "%-*s  %6.1mx  %6.1m  %6.1m";
696             }
697             target.writefln(fmt,
698                     columns - 25, name,
699                     relative,
700                     datum.perIteration.to!("seconds", double)(),
701                     itersPerSecond);
702         }
703     }
704     target.writeln(repeat('=', columns));
705 }
706 
707 /**
708 Suspends and resumes the current benchmark, respectively. This is useful
709 if the benchmark needs to set things up before performing the
710 measurement.
711 
712 Example:
713 ----
714 import std.algorithm;
715 void benchmark_findDouble(uint n)
716 {
717     // Fill an array of random numbers and an array of random indexes
718     benchmarkSuspend();
719     auto array = new double[n];
720     auto indexes = new size_t[n];
721     foreach (i; 0 .. n)
722     {
723         array[i] = uniform(0.0, 1000.0);
724         indexes[i] = uniform(0, n);
725     }
726     benchmarkResume();
727 
728     // The actual benchmark begins here
729     foreach (i; 0 .. n)
730     {
731         auto balance = std.algorithm.find(array, array[indexes[i]]);
732     }
733 }
734 unittest
735 {
736     TODO writeln(benchmark!benchmark_findDouble(10_000).to!("msecs", uint));
737 }
738 ----
739  */
740 void benchmarkSuspend()
741 {
742     theStopWatch.stop();
743 }
744 
745 /// Ditto
746 void benchmarkResume()
747 {
748     theStopWatch.start();
749 }
750 
751 unittest
752 {
753     void benchmark_findDouble(uint n)
754     {
755         // Fill an array of random numbers and an array of random indexes
756         benchmarkSuspend();
757         auto array = new double[n];
758         auto indexes = new size_t[n];
759         foreach (i; 0 .. n)
760         {
761             array[i] = uniform(0.0, 1000.0);
762             indexes[i] = uniform(0, n);
763         }
764         benchmarkResume();
765 
766         // The actual benchmark begins here
767         foreach (i; 0 .. n)
768         {
769             auto balance = std.algorithm.find(array, array[indexes[i]]);
770         }
771     }
772 
773     //writeln(benchmark!benchmark_findDouble(10_000)[0].to!("msecs",
774     //uint));
775     benchmarkImpl!benchmark_findDouble();
776 }
777 
778 /++
779    Aliases from $(D std.datetime).
780   +/
781 alias std.datetime.AutoStart AutoStart;
782 /// ditto
783 alias std.datetime.StopWatch StopWatch;
784 /// ditto
785 alias std.datetime.ComparingBenchmarkResult ComparingBenchmarkResult;
786 /// ditto
787 alias std.datetime.comparingBenchmark comparingBenchmark;
788 /// ditto
789 alias std.datetime.measureTime measureTime;