1 // Written in the D programming language 2 3 /** 4 Copyright: Copyright 2011- 5 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 6 Authors: $(WEB erdani.com, Andrei Alexandrescu) and Kato Shoichi 7 Source: $(PHOBOSSRC std/_benchmark.d) 8 9 Synopsis: 10 ---- 11 module nxt.module_one; 12 import std.benchmark, std.file; 13 14 void benchmark_fileWrite() 15 { 16 std.file.write("/tmp/deleteme", "hello, world!"); 17 } 18 19 void benchmark_fileRead() 20 { 21 std.file.read("/tmp/deleteme"); 22 } 23 24 mixin(scheduleForBenchmarking); 25 26 void main() 27 { 28 printBenchmarks(); 29 } 30 ---- 31 32 The code above prints: 33 34 --- 35 =============================================================================== 36 module_one relative ns/iter iter/s 37 =============================================================================== 38 fileWrite 144.2K 6.9K 39 fileRead 27.1K 36.9K 40 =============================================================================== 41 --- 42 */ 43 module std.benchmark; 44 45 import std.algorithm, std.datetime, std.range, std.stdio, std.traits, std.typecons; 46 version (unittest) import std.random; 47 48 debug = std_benchmark; 49 50 /** 51 Benchmarks one or more functions for speed assessment and comparison, 52 and prints results as formatted text. A baseline timing that accounts 53 for benchmarking overheads is kept along with the results and 54 automatically deducted from all timings. A timing indistinguishable 55 from the baseline looping overhead appears with a run time of zero and 56 indicates a function that does too little work to be timed. 57 58 Measurement is done in epochs. For each function benchmarked, the 59 smallest time is taken over all epochs. 60 61 Benchmark results report time per iteration. 62 63 Params: 64 65 funs = If one ore more `funs` are provided, they must come as pairs 66 in which the first element is a string (the name of the benchmark) and 67 the second element is the alias of a function (the actual 68 benchmark). Each alias must refer to a function that takes either no 69 arguments or one integral argument (which is the iterations count). 70 target = `File` where output is printed. 71 72 Example: 73 --- 74 printBenchmarks!( 75 "file write", { std.file.write("/tmp/deleteme", "hello, world!"); }, 76 "file read", { std.file.read("/tmp/deleteme"); }) 77 (); 78 --- 79 80 The example above outputs to `stdout`: 81 82 --- 83 =============================================================================== 84 Benchmark relative ns/iter iter/s 85 =============================================================================== 86 file write 144.3K 6.9K 87 file read 26.3K 38.0K 88 =============================================================================== 89 --- 90 91 With internal iteration, the results would be similar: 92 93 Example: 94 --- 95 printBenchmarks!( 96 "file write", { std.file.write("/tmp/deleteme", "hello, world!"); }, 97 "file read", (uint n) { 98 foreach (i; 0 .. n) std.file.read("/tmp/deleteme"); 99 }) 100 (); 101 --- 102 103 In the example above, the framework iterates the first lambda many 104 times and collects timing information. For the second lambda, instead 105 of doing the iteration, the framework simply passes increasing values 106 of `n` to the lambda. In the end time per iteration is measured, so 107 the performance profile (and the printout) would be virtually 108 identical to the one in the previous example. 109 110 If the call to `printBenchmarks` does not provide a name for some 111 benchmarks, the name of the benchmarked function is used. (For 112 lambdas, the name is e.g. $(D __lambda5).) 113 114 Example: 115 --- 116 void benchmark_fileWrite() 117 { 118 std.file.write("/tmp/deleteme", "hello, world!"); 119 } 120 printBenchmarks!( 121 benchmark_fileWrite, 122 "file read", { std.file.read("/tmp/deleteme"); }, 123 { std.file.read("/tmp/deleteme"); }) 124 (); 125 --- 126 127 The example above outputs to `stdout`: 128 129 --- 130 =============================================================================== 131 Benchmark relative ns/iter iter/s 132 =============================================================================== 133 fileWrite2() 76.0K 13.2K 134 file read 28.6K 34.9K 135 __lambda2 27.8K 36.0K 136 =============================================================================== 137 --- 138 139 If the name of the benchmark starts with $(D "benchmark_") or $(D 140 "benchmark_relative_"), that prefix does not appear in the printed 141 name. If the prefix is $(D "benchmark_relative_"), the "relative" 142 column is filled in the output. The relative performance is computed 143 relative to the last non-relative benchmark completed and expressed in 144 either times (suffix $(D "x")) or percentage (suffix $(D "%")). The 145 percentage is, for example, $(D 100.0%) if benchmark runs at the same 146 speed as the baseline, $(D 200.0%) if the benchmark is twice as fast 147 as the baseline, and $(D 50%) if the benchmark is half as fast. 148 149 Example: 150 --- 151 printBenchmarks!( 152 "file write", { std.file.write("/tmp/deleteme", "hello, world!"); }, 153 "benchmark_relative_file read", { std.file.read("/tmp/deleteme"); }, 154 "benchmark_relative_array creation", { new char[32]; }) 155 (); 156 --- 157 158 This example has one baseline and two relative tests. The output looks 159 as follows: 160 161 --- 162 =============================================================================== 163 Benchmark relative ns/iter iter/s 164 =============================================================================== 165 file write 140.2K 7.1K 166 file read 517.8% 27.1K 36.9K 167 array creation 1284.3% 116.0 8.6M 168 =============================================================================== 169 --- 170 171 According to the data above, file reading is $(D 5.178) times faster 172 than file writing, whereas array creation is $(D 1200) times faster 173 than file writing. 174 175 If no functions are passed as `funs`, calling `printBenchmarks` 176 prints the previously registered per-module benchmarks. 177 Refer to $(LREF scheduleForBenchmarking). 178 */ 179 void printBenchmarks(funs...)(File target = stdout) 180 { 181 BenchmarkResult[] results; 182 static if (funs.length > 0) 183 { 184 benchmark!(funs)(results); 185 } 186 else 187 { 188 runBenchmarks(results); 189 } 190 printResults(results); 191 } 192 193 /** 194 Benchmarks functions and appends the results to the `results` 195 parameter. This function is used by $(LREF printBenchmarks), and the 196 `funs` parameter has the same requirements as for that function. 197 198 Using `benchmark` directly is recommended for custom processing and 199 printing of _benchmark results. 200 201 Example: 202 --- 203 BenchmarkResults[] timings; 204 benchmark!( 205 "file write", { std.file.write("/tmp/deleteme", "hello, world!"); }, 206 "file read", { std.file.read("/tmp/deleteme"); }) 207 (timings); 208 foreach (ref datum; timings) 209 { 210 writefln("%s: %s ns/iteration", datum.benchmarkName, 211 datum.perIteration.to!("nsecs", uint)); 212 } 213 --- 214 */ 215 void benchmark(funs...)(ref BenchmarkResult[] results, string moduleName = null) 216 { 217 auto times = benchmarkImpl!(onlyAliases!funs)(); 218 uint curBenchmark = 0; 219 string name; 220 TickDuration lastBaseline; 221 222 foreach (i, fun; funs) 223 { 224 static if (is(typeof(fun): string)) 225 { 226 name = fun; 227 } 228 else 229 { 230 if (!name) name = funs[i].stringof; 231 const isRelative = skipOver(name, "benchmark_") && 232 skipOver(name, "relative_"); 233 double relative; 234 if (isRelative) 235 { 236 relative = cast(double) lastBaseline.to!("nsecs", uint) / 237 times[curBenchmark].to!("nsecs", uint); 238 } 239 else 240 { 241 lastBaseline = times[curBenchmark]; 242 } 243 results ~= BenchmarkResult( 244 moduleName, 245 name, 246 times[curBenchmark++], 247 relative); 248 name = null; 249 } 250 } 251 } 252 253 /** 254 Result of one function's benchmark. 255 */ 256 struct BenchmarkResult 257 { 258 /** 259 Module name in which the function resides (`null` if not applicable). 260 */ 261 string moduleName; 262 /** 263 Name of the benchmark (sans the `benchmark_` or $(D 264 benchmark_relative_) prefix, if any). 265 */ 266 string benchmarkName; 267 /** 268 Time per iteration. 269 */ 270 TickDuration perIteration; 271 /** 272 Relative timing (if benchmark is a _relative one). Refer to the 273 definition of `benchmarkModule` below for what constitutes a 274 relative benchmark. For relative benchmarks, `relative` is 275 $(D 1.0) if the benchmark has the same speed as its baseline, 276 $(D 2.0) is the benchmark is twice as fast, and $(D 0.5) if the 277 benchmark has half the speed. For non-_relative benchmarks, $(D 278 relative) is set to `NaN`, which is testable with $(XREF 279 math,isNaN). 280 */ 281 double relative; 282 } 283 284 /* 285 Given a bunch of aliases, benchmarks them all and returns their 286 respective durations. 287 */ 288 private TickDuration[fun.length] benchmarkImpl(fun...)() 289 { 290 immutable uint epochs = 1000; 291 TickDuration minSignificantDuration = TickDuration.from!"usecs"(50); 292 293 // Baseline function. Use asm inside the body to avoid 294 // optimizations. 295 static void baseline() { asm { nop; } } 296 297 // Use a local pointer to avoid TLS access overhead 298 auto theStopWatch = &.theStopWatch; 299 300 // All functions to look at include the baseline and the measured 301 // functions. 302 import std.meta; 303 alias AliasSeq!(baseline, fun) allFuns; 304 TickDuration baselineTimePerIteration; 305 TickDuration[fun.length] result; 306 307 // MEASUREMENTS START HERE 308 309 foreach (i, measured; allFuns) 310 { 311 TickDuration bestEpoch; 312 313 // So we have several epochs, and bestEpoch will track the 314 // minimum time across epochs. 315 foreach (epoch; 0 .. epochs) 316 { 317 // Within each epoch, we call the function repeatedly 318 // until we collect at least a total time of 319 // minSignificantDuration. 320 for (uint repeats = 1; repeats < 1_000_000_000; repeats *= 10) 321 { 322 auto elapsed = callFun!(allFuns[i])(repeats, *theStopWatch); 323 324 if (elapsed < minSignificantDuration) 325 { 326 // Crappy measurement, try again with more repeats 327 continue; 328 } 329 330 // Good measurement, record it if it's better than the 331 // minimum. 332 333 // Removing this useless test exposes a codegen bug. 334 if (!repeats) throw new Exception(null); 335 336 auto timePerIteration = elapsed / cast(double) repeats; 337 if (bestEpoch == bestEpoch.init || timePerIteration < bestEpoch) 338 { 339 bestEpoch = timePerIteration; 340 } 341 break; 342 } 343 } 344 345 // Store the final result 346 static if (i == 0) 347 { 348 baselineTimePerIteration = bestEpoch; 349 } 350 else 351 { 352 result[i - 1] = bestEpoch; 353 } 354 } 355 356 // MEASUREMENTS JUST ENDED 357 358 // Subtract the baseline from all results 359 foreach (i, f; fun) 360 { 361 //writeln(__traits(identifier, fun[i])); 362 if ((result[i] -= baselineTimePerIteration) < TickDuration.init) { 363 result[i] = TickDuration.init; 364 } 365 } 366 367 // Return result sans the baseline 368 return result; 369 } 370 371 unittest { 372 import std.conv : to; 373 374 debug(std_benchmark) {} else void writefln(S...)(S args){} 375 376 int a = 123_456_789; 377 auto r = benchmarkImpl!( 378 {auto b = to!string(a);}, 379 {auto b = to!wstring(a);}, 380 {auto b = to!dstring(a);}) 381 (); 382 auto names = [ "intToString", "intToWstring", "intToDstring" ]; 383 foreach (i, timing; r) 384 writefln("%s: %sns/call", names[i], r[i].to!("nsecs", int)()); 385 } 386 387 // Verify Example 2 388 unittest { 389 import std.conv; 390 391 debug (std_benchmark) {} else void writefln(S...)(S args){} 392 393 int a = 123_456_789; 394 auto r = benchmarkImpl!( 395 (uint n) { foreach (i; 0 .. n) auto b = to!string(a); }, 396 (uint n) { foreach (i; 0 .. n) auto b = to!wstring(a); }, 397 (uint n) { foreach (i; 0 .. n) auto b = to!dstring(a); }) 398 (); 399 auto names = [ "intToString", "intToWstring", "intToDstring" ]; 400 foreach (i, timing; r) 401 writefln("%s: %sns/call", names[i], r[i].to!("nsecs", int)()); 402 } 403 404 /* 405 * Calls a function either by alias or by string. Returns the duration 406 * per iteration. 407 */ 408 private TickDuration callFun(alias measured)(uint repeats, ref StopWatch theStopWatch) 409 { 410 with (theStopWatch) running ? reset() : start(); 411 412 static if (is(typeof(measured(repeats)))) 413 { 414 // Internal iteration 415 measured(repeats); 416 } 417 else 418 { 419 // External iteration 420 foreach (j; 0 .. repeats) 421 { 422 measured(); 423 } 424 } 425 426 return theStopWatch.peek(); 427 } 428 429 /* 430 * Given a bunch of aliases and strings, keeps only aliases of symbols 431 * that start with "benchmark_". 432 */ 433 private template onlyBenchmarks(T...) 434 { 435 import std.meta; 436 437 static if (!T.length) 438 { 439 alias T onlyBenchmarks; 440 } 441 else static if (T[0].stringof.length > 10 && T[0].stringof[0 .. 10] == "benchmark_") 442 { 443 alias AliasSeq!(T[0], onlyBenchmarks!(T[1 .. $])) onlyBenchmarks; 444 } 445 else 446 { 447 alias AliasSeq!(onlyBenchmarks!(T[1 .. $])) onlyBenchmarks; 448 } 449 } 450 451 /* 452 Eliminates strings, keeping only aliases. 453 */ 454 private template onlyAliases(T...) 455 { 456 import std.meta; 457 458 static if (!T.length) 459 { 460 alias T onlyAliases; 461 } 462 else static if (!is(typeof(T[0]) : string)) 463 { 464 alias AliasSeq!(T[0], onlyAliases!(T[1 .. $])) onlyAliases; 465 } 466 else 467 { 468 alias AliasSeq!(onlyAliases!(T[1 .. $])) onlyAliases; 469 } 470 } 471 472 unittest { 473 alias onlyAliases!("hello", int, "world", double) aliases; 474 static assert(aliases.length == 2); 475 static assert(is(int == aliases[0])); 476 static assert(is(double == aliases[1])); 477 } 478 479 debug (std_benchmark) mixin(scheduleForBenchmarking); 480 481 /* 482 Generates benchmarking code for a bunch of functions passed as 483 strings. Intended to be used during compilation as part of $(D 484 scheduleForBenchmarking). 485 */ 486 string benchmarkModuleCodegen()(string[] funs...) 487 { 488 auto result = q{addBenchmarks(& benchmark!(}; 489 foreach (fun; funs) 490 { 491 import std.algorithm; 492 if (!startsWith(fun, "benchmark_")) 493 { 494 continue; 495 } 496 result ~= fun; 497 result ~= ", "; 498 } 499 result ~= q{), "test");}; 500 return result; 501 } 502 503 /** 504 The examples shown so far feature simple, ad-hoc benchmarks, but $(D 505 std.benchmark) allows for superior automation and systematic use aimed 506 at large projects. 507 508 The code $(D mixin(scheduleForBenchmarking)) planted at module level 509 schedules the entire module for benchmarking. The actual benchmarking 510 can be done globally by calling $(LREF printBenchmarks) (with no 511 arguments) or $(LREF runBenchmarks). Either call is usually made from 512 `main`. 513 514 In a multi-module application, several modules may define benchmarks, 515 and `printBenchmarks` distinguishes each visually. 516 517 Example: 518 --- 519 module nxt.acme; 520 import std.file, std.array; 521 522 void benchmark_fileWrite() 523 { 524 std.file.write("/tmp/deleteme", "hello, world!"); 525 } 526 527 void benchmark_relative_fileRead() 528 { 529 std.file.read("/tmp/deleteme"); 530 } 531 532 mixin(scheduleForBenchmarking); 533 --- 534 535 Typically the `mixin` is guarded by a version so benchmarks are only run if desired. 536 537 Example: 538 --- 539 version (benchmark_enabled) mixin(scheduleForBenchmarking); 540 --- 541 */ 542 @property string scheduleForBenchmarking() 543 { 544 return ` 545 shared static this() 546 { 547 mixin("enum code = benchmarkModuleCodegen(__traits(allMembers, "~ 548 .stringof[7 .. $]~"));"); 549 mixin(code); 550 }`; 551 } 552 553 debug (std_benchmark) 554 { 555 private void benchmark_fileWrite() 556 { 557 import std.file; 558 std.file.write("/tmp/deleteme", "hello, world!"); 559 } 560 561 private void benchmark_fileRead() 562 { 563 import std.file; 564 std.file.read("/tmp/deleteme"); 565 } 566 567 private void benchmark_appendBuiltin(uint n) 568 { 569 string a; 570 foreach (i; 0 .. n) 571 { 572 a ~= 'x'; 573 } 574 } 575 576 private void benchmark_relative_appendAppender(uint n) 577 { 578 import std.range; 579 auto a = appender!string(); 580 foreach (i; 0 .. n) 581 { 582 put(a, 'x'); 583 } 584 } 585 586 private void benchmark_relative_appendConcat(uint n) 587 { 588 string a; 589 foreach (i; 0 .. n) 590 { 591 a = a ~ 'x'; 592 } 593 } 594 } 595 596 /* 597 Adds benchmarks to the global queue of benchmarks to be executed. Most 598 frequently used implicitly by $(LREF scheduleForBenchmarking). 599 */ 600 void addBenchmarks( 601 void function(ref BenchmarkResult[], string) fun, 602 string moduleName) 603 { 604 allBenchmarks ~= tuple(moduleName, fun); 605 } 606 607 // One benchmark stopwatch 608 private StopWatch theStopWatch; 609 610 /* 611 Array containing all benchmarks to be executed. Usually they are 612 executed by calling `runBenchmarks` or `printBenchmarks`. 613 */ 614 private Tuple!(string, void function(ref BenchmarkResult[], string))[] 615 allBenchmarks; 616 617 /** 618 619 Performs all benchmarks previously scheduled with $(LREF 620 scheduleForBenchmarking), application-wide. Usually not called 621 directly, but instead as support for $(LREF printBenchmarks). User 622 code may be interested in using this function directly for 623 e.g. results formatting. 624 625 */ 626 void runBenchmarks(ref BenchmarkResult[] results) 627 { 628 foreach (b; allBenchmarks) 629 { 630 b[1](results, b[0]); 631 } 632 } 633 634 /** 635 Prints benchmark results as described with $(LREF 636 printBenchmarks). This is useful if benchmark postprocessing is 637 desired before printing. 638 */ 639 void printResults(in BenchmarkResult[] data, File target = stdout) 640 { 641 if (data.empty) 642 { 643 return; 644 } 645 646 enum columns = 79; 647 string thisModule, thisGroup; 648 649 void printModuleHeader() 650 { 651 target.writefln( 652 "==================================================" ~ 653 "=============================\n" ~ 654 "%-*s%8s%8s%8s\n" ~ 655 "=================================================" ~ 656 "==============================", columns - 24, 657 thisModule is null ? "Benchmark" : thisModule, 658 "relative", "t/iter", "iter/s"); 659 } 660 661 foreach (i, datum; data) 662 { 663 if (thisModule != datum.moduleName || i == 0) 664 { 665 thisModule = datum.moduleName; 666 printModuleHeader(); 667 } 668 double itersPerSecond = 1.0 / datum.perIteration.to!("seconds", double)(); 669 auto name = datum.benchmarkName; 670 if (datum.relative is double.init) 671 { 672 // Write without relative information 673 target.writefln("%-*s %6.1ms %6.1m", 674 columns - 16, name, 675 datum.perIteration.to!("seconds", double)(), 676 itersPerSecond); 677 } 678 else 679 { 680 // Write with relative information 681 double relative = datum.relative; 682 string fmt; 683 if (relative < 1000) 684 { 685 // represent relative speed as percent 686 fmt = "%-*s %7.1f%% %6.1m %6.1m"; 687 relative *= 100; 688 } 689 else 690 { 691 // represent relative speed as multiplication factor 692 fmt = "%-*s %6.1mx %6.1m %6.1m"; 693 } 694 target.writefln(fmt, 695 columns - 25, name, 696 relative, 697 datum.perIteration.to!("seconds", double)(), 698 itersPerSecond); 699 } 700 } 701 target.writeln(repeat('=', columns)); 702 } 703 704 /** 705 Suspends and resumes the current benchmark, respectively. This is useful 706 if the benchmark needs to set things up before performing the 707 measurement. 708 709 Example: 710 ---- 711 import std.algorithm; 712 void benchmark_findDouble(uint n) 713 { 714 // Fill an array of random numbers and an array of random indexes 715 benchmarkSuspend(); 716 auto array = new double[n]; 717 auto indexes = new size_t[n]; 718 foreach (i; 0 .. n) 719 { 720 array[i] = uniform(0.0, 1000.0); 721 indexes[i] = uniform(0, n); 722 } 723 benchmarkResume(); 724 725 // The actual benchmark begins here 726 foreach (i; 0 .. n) 727 { 728 auto balance = std.algorithm.find(array, array[indexes[i]]); 729 } 730 } 731 unittest { 732 TODO: writeln(benchmark!benchmark_findDouble(10_000).to!("msecs", uint)); 733 } 734 ---- 735 */ 736 void benchmarkSuspend() 737 { 738 theStopWatch.stop(); 739 } 740 741 /// Ditto 742 void benchmarkResume() 743 { 744 theStopWatch.start(); 745 } 746 747 unittest { 748 void benchmark_findDouble(uint n) 749 { 750 // Fill an array of random numbers and an array of random indexes 751 benchmarkSuspend(); 752 auto array = new double[n]; 753 auto indexes = new size_t[n]; 754 foreach (i; 0 .. n) 755 { 756 array[i] = uniform(0.0, 1000.0); 757 indexes[i] = uniform(0, n); 758 } 759 benchmarkResume(); 760 761 // The actual benchmark begins here 762 foreach (i; 0 .. n) 763 { 764 auto balance = std.algorithm.find(array, array[indexes[i]]); 765 } 766 } 767 768 //writeln(benchmark!benchmark_findDouble(10_000)[0].to!("msecs", 769 //uint)); 770 benchmarkImpl!benchmark_findDouble(); 771 } 772 773 /++ 774 Aliases from $(D std.datetime). 775 +/ 776 alias std.datetime.AutoStart AutoStart; 777 /// ditto 778 alias std.datetime.StopWatch StopWatch; 779 /// ditto 780 alias std.datetime.ComparingBenchmarkResult ComparingBenchmarkResult; 781 /// ditto 782 alias std.datetime.comparingBenchmark comparingBenchmark; 783 /// ditto 784 alias std.datetime.measureTime measureTime;