1 // Written in the D programming language 2 3 /** 4 Copyright: Copyright 2011- 5 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 6 Authors: $(WEB erdani.com, Andrei Alexandrescu) and Kato Shoichi 7 Source: $(PHOBOSSRC std/_benchmark.d) 8 9 Synopsis: 10 ---- 11 module nxt.module_one; 12 import std.benchmark, std.file; 13 14 void benchmark_fileWrite() 15 { 16 std.file.write("/tmp/deleteme", "hello, world!"); 17 } 18 19 void benchmark_fileRead() 20 { 21 std.file.read("/tmp/deleteme"); 22 } 23 24 mixin(scheduleForBenchmarking); 25 26 void main() 27 { 28 printBenchmarks(); 29 } 30 ---- 31 32 The code above prints: 33 34 --- 35 =============================================================================== 36 module_one relative ns/iter iter/s 37 =============================================================================== 38 fileWrite 144.2K 6.9K 39 fileRead 27.1K 36.9K 40 =============================================================================== 41 --- 42 */ 43 module std.benchmark; 44 45 import std.algorithm, std.datetime, std.range, std.stdio, std.traits, std.typecons; 46 version(unittest) import std.random; 47 48 debug = std_benchmark; 49 50 /** 51 Benchmarks one or more functions for speed assessment and comparison, 52 and prints results as formatted text. A baseline timing that accounts 53 for benchmarking overheads is kept along with the results and 54 automatically deducted from all timings. A timing indistinguishable 55 from the baseline looping overhead appears with a run time of zero and 56 indicates a function that does too little work to be timed. 57 58 Measurement is done in epochs. For each function benchmarked, the 59 smallest time is taken over all epochs. 60 61 Benchmark results report time per iteration. 62 63 Params: 64 65 funs = If one ore more `funs` are provided, they must come as pairs 66 in which the first element is a string (the name of the benchmark) and 67 the second element is the alias of a function (the actual 68 benchmark). Each alias must refer to a function that takes either no 69 arguments or one integral argument (which is the iterations count). 70 target = `File` where output is printed. 71 72 Example: 73 --- 74 printBenchmarks!( 75 "file write", { std.file.write("/tmp/deleteme", "hello, world!"); }, 76 "file read", { std.file.read("/tmp/deleteme"); }) 77 (); 78 --- 79 80 The example above outputs to `stdout`: 81 82 --- 83 =============================================================================== 84 Benchmark relative ns/iter iter/s 85 =============================================================================== 86 file write 144.3K 6.9K 87 file read 26.3K 38.0K 88 =============================================================================== 89 --- 90 91 With internal iteration, the results would be similar: 92 93 Example: 94 --- 95 printBenchmarks!( 96 "file write", { std.file.write("/tmp/deleteme", "hello, world!"); }, 97 "file read", (uint n) { 98 foreach (i; 0 .. n) std.file.read("/tmp/deleteme"); 99 }) 100 (); 101 --- 102 103 In the example above, the framework iterates the first lambda many 104 times and collects timing information. For the second lambda, instead 105 of doing the iteration, the framework simply passes increasing values 106 of `n` to the lambda. In the end time per iteration is measured, so 107 the performance profile (and the printout) would be virtually 108 identical to the one in the previous example. 109 110 If the call to `printBenchmarks` does not provide a name for some 111 benchmarks, the name of the benchmarked function is used. (For 112 lambdas, the name is e.g. $(D __lambda5).) 113 114 Example: 115 --- 116 void benchmark_fileWrite() 117 { 118 std.file.write("/tmp/deleteme", "hello, world!"); 119 } 120 printBenchmarks!( 121 benchmark_fileWrite, 122 "file read", { std.file.read("/tmp/deleteme"); }, 123 { std.file.read("/tmp/deleteme"); }) 124 (); 125 --- 126 127 The example above outputs to `stdout`: 128 129 --- 130 =============================================================================== 131 Benchmark relative ns/iter iter/s 132 =============================================================================== 133 fileWrite2() 76.0K 13.2K 134 file read 28.6K 34.9K 135 __lambda2 27.8K 36.0K 136 =============================================================================== 137 --- 138 139 If the name of the benchmark starts with $(D "benchmark_") or $(D 140 "benchmark_relative_"), that prefix does not appear in the printed 141 name. If the prefix is $(D "benchmark_relative_"), the "relative" 142 column is filled in the output. The relative performance is computed 143 relative to the last non-relative benchmark completed and expressed in 144 either times (suffix $(D "x")) or percentage (suffix $(D "%")). The 145 percentage is, for example, $(D 100.0%) if benchmark runs at the same 146 speed as the baseline, $(D 200.0%) if the benchmark is twice as fast 147 as the baseline, and $(D 50%) if the benchmark is half as fast. 148 149 Example: 150 --- 151 printBenchmarks!( 152 "file write", { std.file.write("/tmp/deleteme", "hello, world!"); }, 153 "benchmark_relative_file read", { std.file.read("/tmp/deleteme"); }, 154 "benchmark_relative_array creation", { new char[32]; }) 155 (); 156 --- 157 158 This example has one baseline and two relative tests. The output looks 159 as follows: 160 161 --- 162 =============================================================================== 163 Benchmark relative ns/iter iter/s 164 =============================================================================== 165 file write 140.2K 7.1K 166 file read 517.8% 27.1K 36.9K 167 array creation 1284.3% 116.0 8.6M 168 =============================================================================== 169 --- 170 171 According to the data above, file reading is $(D 5.178) times faster 172 than file writing, whereas array creation is $(D 1200) times faster 173 than file writing. 174 175 If no functions are passed as `funs`, calling `printBenchmarks` 176 prints the previously registered per-module benchmarks. 177 Refer to $(LREF scheduleForBenchmarking). 178 */ 179 void printBenchmarks(funs...)(File target = stdout) 180 { 181 BenchmarkResult[] results; 182 static if (funs.length > 0) 183 { 184 benchmark!(funs)(results); 185 } 186 else 187 { 188 runBenchmarks(results); 189 } 190 printResults(results); 191 } 192 193 /** 194 Benchmarks functions and appends the results to the `results` 195 parameter. This function is used by $(LREF printBenchmarks), and the 196 `funs` parameter has the same requirements as for that function. 197 198 Using `benchmark` directly is recommended for custom processing and 199 printing of _benchmark results. 200 201 Example: 202 --- 203 BenchmarkResults[] timings; 204 benchmark!( 205 "file write", { std.file.write("/tmp/deleteme", "hello, world!"); }, 206 "file read", { std.file.read("/tmp/deleteme"); }) 207 (timings); 208 foreach (ref datum; timings) 209 { 210 writefln("%s: %s ns/iteration", datum.benchmarkName, 211 datum.perIteration.to!("nsecs", uint)); 212 } 213 --- 214 */ 215 void benchmark(funs...)(ref BenchmarkResult[] results, string moduleName = null) 216 { 217 auto times = benchmarkImpl!(onlyAliases!funs)(); 218 uint curBenchmark = 0; 219 string name; 220 TickDuration lastBaseline; 221 222 foreach (i, fun; funs) 223 { 224 static if (is(typeof(fun): string)) 225 { 226 name = fun; 227 } 228 else 229 { 230 if (!name) name = funs[i].stringof; 231 const isRelative = skipOver(name, "benchmark_") && 232 skipOver(name, "relative_"); 233 double relative; 234 if (isRelative) 235 { 236 relative = cast(double) lastBaseline.to!("nsecs", uint) / 237 times[curBenchmark].to!("nsecs", uint); 238 } 239 else 240 { 241 lastBaseline = times[curBenchmark]; 242 } 243 results ~= BenchmarkResult( 244 moduleName, 245 name, 246 times[curBenchmark++], 247 relative); 248 name = null; 249 } 250 } 251 } 252 253 /** 254 Result of one function's benchmark. 255 */ 256 struct BenchmarkResult 257 { 258 /** 259 Module name in which the function resides (`null` if not applicable). 260 */ 261 string moduleName; 262 /** 263 Name of the benchmark (sans the `benchmark_` or $(D 264 benchmark_relative_) prefix, if any). 265 */ 266 string benchmarkName; 267 /** 268 Time per iteration. 269 */ 270 TickDuration perIteration; 271 /** 272 Relative timing (if benchmark is a _relative one). Refer to the 273 definition of `benchmarkModule` below for what constitutes a 274 relative benchmark. For relative benchmarks, `relative` is 275 $(D 1.0) if the benchmark has the same speed as its baseline, 276 $(D 2.0) is the benchmark is twice as fast, and $(D 0.5) if the 277 benchmark has half the speed. For non-_relative benchmarks, $(D 278 relative) is set to `NaN`, which is testable with $(XREF 279 math,isNaN). 280 */ 281 double relative; 282 } 283 284 /* 285 Given a bunch of aliases, benchmarks them all and returns their 286 respective durations. 287 */ 288 private TickDuration[fun.length] benchmarkImpl(fun...)() 289 { 290 immutable uint epochs = 1000; 291 TickDuration minSignificantDuration = TickDuration.from!"usecs"(50); 292 293 // Baseline function. Use asm inside the body to avoid 294 // optimizations. 295 static void baseline() { asm { nop; } } 296 297 // Use a local pointer to avoid TLS access overhead 298 auto theStopWatch = &.theStopWatch; 299 300 // All functions to look at include the baseline and the measured 301 // functions. 302 import std.meta; 303 alias AliasSeq!(baseline, fun) allFuns; 304 TickDuration baselineTimePerIteration; 305 TickDuration[fun.length] result; 306 307 // MEASUREMENTS START HERE 308 309 foreach (i, measured; allFuns) 310 { 311 TickDuration bestEpoch; 312 313 // So we have several epochs, and bestEpoch will track the 314 // minimum time across epochs. 315 foreach (epoch; 0 .. epochs) 316 { 317 // Within each epoch, we call the function repeatedly 318 // until we collect at least a total time of 319 // minSignificantDuration. 320 for (uint repeats = 1; repeats < 1_000_000_000; repeats *= 10) 321 { 322 auto elapsed = callFun!(allFuns[i])(repeats, *theStopWatch); 323 324 if (elapsed < minSignificantDuration) 325 { 326 // Crappy measurement, try again with more repeats 327 continue; 328 } 329 330 // Good measurement, record it if it's better than the 331 // minimum. 332 333 // Removing this useless test exposes a codegen bug. 334 if (!repeats) throw new Exception(null); 335 336 auto timePerIteration = elapsed / cast(double) repeats; 337 if (bestEpoch == bestEpoch.init || timePerIteration < bestEpoch) 338 { 339 bestEpoch = timePerIteration; 340 } 341 break; 342 } 343 } 344 345 // Store the final result 346 static if (i == 0) 347 { 348 baselineTimePerIteration = bestEpoch; 349 } 350 else 351 { 352 result[i - 1] = bestEpoch; 353 } 354 } 355 356 // MEASUREMENTS JUST ENDED 357 358 // Subtract the baseline from all results 359 foreach (i, f; fun) 360 { 361 //writeln(__traits(identifier, fun[i])); 362 if ((result[i] -= baselineTimePerIteration) < TickDuration.init) { 363 result[i] = TickDuration.init; 364 } 365 } 366 367 // Return result sans the baseline 368 return result; 369 } 370 371 unittest 372 { 373 import std.conv : to; 374 375 debug(std_benchmark) {} else void writefln(S...)(S args){} 376 377 int a = 123_456_789; 378 auto r = benchmarkImpl!( 379 {auto b = to!string(a);}, 380 {auto b = to!wstring(a);}, 381 {auto b = to!dstring(a);}) 382 (); 383 auto names = [ "intToString", "intToWstring", "intToDstring" ]; 384 foreach (i, timing; r) 385 writefln("%s: %sns/call", names[i], r[i].to!("nsecs", int)()); 386 } 387 388 // Verify Example 2 389 unittest 390 { 391 import std.conv; 392 393 debug (std_benchmark) {} else void writefln(S...)(S args){} 394 395 int a = 123_456_789; 396 auto r = benchmarkImpl!( 397 (uint n) { foreach (i; 0 .. n) auto b = to!string(a); }, 398 (uint n) { foreach (i; 0 .. n) auto b = to!wstring(a); }, 399 (uint n) { foreach (i; 0 .. n) auto b = to!dstring(a); }) 400 (); 401 auto names = [ "intToString", "intToWstring", "intToDstring" ]; 402 foreach (i, timing; r) 403 writefln("%s: %sns/call", names[i], r[i].to!("nsecs", int)()); 404 } 405 406 /* 407 * Calls a function either by alias or by string. Returns the duration 408 * per iteration. 409 */ 410 private TickDuration callFun(alias measured)(uint repeats, ref StopWatch theStopWatch) 411 { 412 with (theStopWatch) running ? reset() : start(); 413 414 static if (is(typeof(measured(repeats)))) 415 { 416 // Internal iteration 417 measured(repeats); 418 } 419 else 420 { 421 // External iteration 422 foreach (j; 0 .. repeats) 423 { 424 measured(); 425 } 426 } 427 428 return theStopWatch.peek(); 429 } 430 431 /* 432 * Given a bunch of aliases and strings, keeps only aliases of symbols 433 * that start with "benchmark_". 434 */ 435 private template onlyBenchmarks(T...) 436 { 437 import std.meta; 438 439 static if (!T.length) 440 { 441 alias T onlyBenchmarks; 442 } 443 else static if (T[0].stringof.length > 10 && T[0].stringof[0 .. 10] == "benchmark_") 444 { 445 alias AliasSeq!(T[0], onlyBenchmarks!(T[1 .. $])) onlyBenchmarks; 446 } 447 else 448 { 449 alias AliasSeq!(onlyBenchmarks!(T[1 .. $])) onlyBenchmarks; 450 } 451 } 452 453 /* 454 Eliminates strings, keeping only aliases. 455 */ 456 private template onlyAliases(T...) 457 { 458 import std.meta; 459 460 static if (!T.length) 461 { 462 alias T onlyAliases; 463 } 464 else static if (!is(typeof(T[0]) : string)) 465 { 466 alias AliasSeq!(T[0], onlyAliases!(T[1 .. $])) onlyAliases; 467 } 468 else 469 { 470 alias AliasSeq!(onlyAliases!(T[1 .. $])) onlyAliases; 471 } 472 } 473 474 unittest 475 { 476 alias onlyAliases!("hello", int, "world", double) aliases; 477 static assert(aliases.length == 2); 478 static assert(is(int == aliases[0])); 479 static assert(is(double == aliases[1])); 480 } 481 482 debug (std_benchmark) mixin(scheduleForBenchmarking); 483 484 /* 485 Generates benchmarking code for a bunch of functions passed as 486 strings. Intended to be used during compilation as part of $(D 487 scheduleForBenchmarking). 488 */ 489 string benchmarkModuleCodegen()(string[] funs...) 490 { 491 auto result = q{addBenchmarks(& benchmark!(}; 492 foreach (fun; funs) 493 { 494 import std.algorithm; 495 if (!startsWith(fun, "benchmark_")) 496 { 497 continue; 498 } 499 result ~= fun; 500 result ~= ", "; 501 } 502 result ~= q{), "test");}; 503 return result; 504 } 505 506 /** 507 The examples shown so far feature simple, ad-hoc benchmarks, but $(D 508 std.benchmark) allows for superior automation and systematic use aimed 509 at large projects. 510 511 The code $(D mixin(scheduleForBenchmarking)) planted at module level 512 schedules the entire module for benchmarking. The actual benchmarking 513 can be done globally by calling $(LREF printBenchmarks) (with no 514 arguments) or $(LREF runBenchmarks). Either call is usually made from 515 `main`. 516 517 In a multi-module application, several modules may define benchmarks, 518 and `printBenchmarks` distinguishes each visually. 519 520 Example: 521 --- 522 module nxt.acme; 523 import std.file, std.array; 524 525 void benchmark_fileWrite() 526 { 527 std.file.write("/tmp/deleteme", "hello, world!"); 528 } 529 530 void benchmark_relative_fileRead() 531 { 532 std.file.read("/tmp/deleteme"); 533 } 534 535 mixin(scheduleForBenchmarking); 536 --- 537 538 Typically the `mixin` is guarded by a version so benchmarks are only run if desired. 539 540 Example: 541 --- 542 version (benchmark_enabled) mixin(scheduleForBenchmarking); 543 --- 544 */ 545 @property string scheduleForBenchmarking() 546 { 547 return ` 548 shared static this() 549 { 550 mixin("enum code = benchmarkModuleCodegen(__traits(allMembers, "~ 551 .stringof[7 .. $]~"));"); 552 mixin(code); 553 }`; 554 } 555 556 debug (std_benchmark) 557 { 558 private void benchmark_fileWrite() 559 { 560 import std.file; 561 std.file.write("/tmp/deleteme", "hello, world!"); 562 } 563 564 private void benchmark_fileRead() 565 { 566 import std.file; 567 std.file.read("/tmp/deleteme"); 568 } 569 570 private void benchmark_appendBuiltin(uint n) 571 { 572 string a; 573 foreach (i; 0 .. n) 574 { 575 a ~= 'x'; 576 } 577 } 578 579 private void benchmark_relative_appendAppender(uint n) 580 { 581 import std.range; 582 auto a = appender!string(); 583 foreach (i; 0 .. n) 584 { 585 put(a, 'x'); 586 } 587 } 588 589 private void benchmark_relative_appendConcat(uint n) 590 { 591 string a; 592 foreach (i; 0 .. n) 593 { 594 a = a ~ 'x'; 595 } 596 } 597 } 598 599 /* 600 Adds benchmarks to the global queue of benchmarks to be executed. Most 601 frequently used implicitly by $(LREF scheduleForBenchmarking). 602 */ 603 void addBenchmarks( 604 void function(ref BenchmarkResult[], string) fun, 605 string moduleName) 606 { 607 allBenchmarks ~= tuple(moduleName, fun); 608 } 609 610 // One benchmark stopwatch 611 private StopWatch theStopWatch; 612 613 /* 614 Array containing all benchmarks to be executed. Usually they are 615 executed by calling `runBenchmarks` or `printBenchmarks`. 616 */ 617 private Tuple!(string, void function(ref BenchmarkResult[], string))[] 618 allBenchmarks; 619 620 /** 621 622 Performs all benchmarks previously scheduled with $(LREF 623 scheduleForBenchmarking), application-wide. Usually not called 624 directly, but instead as support for $(LREF printBenchmarks). User 625 code may be interested in using this function directly for 626 e.g. results formatting. 627 628 */ 629 void runBenchmarks(ref BenchmarkResult[] results) 630 { 631 foreach (b; allBenchmarks) 632 { 633 b[1](results, b[0]); 634 } 635 } 636 637 /** 638 Prints benchmark results as described with $(LREF 639 printBenchmarks). This is useful if benchmark postprocessing is 640 desired before printing. 641 */ 642 void printResults(in BenchmarkResult[] data, File target = stdout) 643 { 644 if (data.empty) 645 { 646 return; 647 } 648 649 enum columns = 79; 650 string thisModule, thisGroup; 651 652 void printModuleHeader() 653 { 654 target.writefln( 655 "==================================================" ~ 656 "=============================\n" ~ 657 "%-*s%8s%8s%8s\n" ~ 658 "=================================================" ~ 659 "==============================", columns - 24, 660 thisModule is null ? "Benchmark" : thisModule, 661 "relative", "t/iter", "iter/s"); 662 } 663 664 foreach (i, datum; data) 665 { 666 if (thisModule != datum.moduleName || i == 0) 667 { 668 thisModule = datum.moduleName; 669 printModuleHeader(); 670 } 671 double itersPerSecond = 1.0 / datum.perIteration.to!("seconds", double)(); 672 auto name = datum.benchmarkName; 673 if (datum.relative is double.init) 674 { 675 // Write without relative information 676 target.writefln("%-*s %6.1ms %6.1m", 677 columns - 16, name, 678 datum.perIteration.to!("seconds", double)(), 679 itersPerSecond); 680 } 681 else 682 { 683 // Write with relative information 684 double relative = datum.relative; 685 string fmt; 686 if (relative < 1000) 687 { 688 // represent relative speed as percent 689 fmt = "%-*s %7.1f%% %6.1m %6.1m"; 690 relative *= 100; 691 } 692 else 693 { 694 // represent relative speed as multiplication factor 695 fmt = "%-*s %6.1mx %6.1m %6.1m"; 696 } 697 target.writefln(fmt, 698 columns - 25, name, 699 relative, 700 datum.perIteration.to!("seconds", double)(), 701 itersPerSecond); 702 } 703 } 704 target.writeln(repeat('=', columns)); 705 } 706 707 /** 708 Suspends and resumes the current benchmark, respectively. This is useful 709 if the benchmark needs to set things up before performing the 710 measurement. 711 712 Example: 713 ---- 714 import std.algorithm; 715 void benchmark_findDouble(uint n) 716 { 717 // Fill an array of random numbers and an array of random indexes 718 benchmarkSuspend(); 719 auto array = new double[n]; 720 auto indexes = new size_t[n]; 721 foreach (i; 0 .. n) 722 { 723 array[i] = uniform(0.0, 1000.0); 724 indexes[i] = uniform(0, n); 725 } 726 benchmarkResume(); 727 728 // The actual benchmark begins here 729 foreach (i; 0 .. n) 730 { 731 auto balance = std.algorithm.find(array, array[indexes[i]]); 732 } 733 } 734 unittest 735 { 736 TODO writeln(benchmark!benchmark_findDouble(10_000).to!("msecs", uint)); 737 } 738 ---- 739 */ 740 void benchmarkSuspend() 741 { 742 theStopWatch.stop(); 743 } 744 745 /// Ditto 746 void benchmarkResume() 747 { 748 theStopWatch.start(); 749 } 750 751 unittest 752 { 753 void benchmark_findDouble(uint n) 754 { 755 // Fill an array of random numbers and an array of random indexes 756 benchmarkSuspend(); 757 auto array = new double[n]; 758 auto indexes = new size_t[n]; 759 foreach (i; 0 .. n) 760 { 761 array[i] = uniform(0.0, 1000.0); 762 indexes[i] = uniform(0, n); 763 } 764 benchmarkResume(); 765 766 // The actual benchmark begins here 767 foreach (i; 0 .. n) 768 { 769 auto balance = std.algorithm.find(array, array[indexes[i]]); 770 } 771 } 772 773 //writeln(benchmark!benchmark_findDouble(10_000)[0].to!("msecs", 774 //uint)); 775 benchmarkImpl!benchmark_findDouble(); 776 } 777 778 /++ 779 Aliases from $(D std.datetime). 780 +/ 781 alias std.datetime.AutoStart AutoStart; 782 /// ditto 783 alias std.datetime.StopWatch StopWatch; 784 /// ditto 785 alias std.datetime.ComparingBenchmarkResult ComparingBenchmarkResult; 786 /// ditto 787 alias std.datetime.comparingBenchmark comparingBenchmark; 788 /// ditto 789 alias std.datetime.measureTime measureTime;