1// Copyright 2015 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include "benchmark/benchmark.h" 16#include "benchmark_api_internal.h" 17#include "internal_macros.h" 18 19#ifndef BENCHMARK_OS_WINDOWS 20#ifndef BENCHMARK_OS_FUCHSIA 21#include <sys/resource.h> 22#endif 23#include <sys/time.h> 24#include <unistd.h> 25#endif 26 27#include <algorithm> 28#include <atomic> 29#include <condition_variable> 30#include <cstdio> 31#include <cstdlib> 32#include <fstream> 33#include <iostream> 34#include <memory> 35#include <string> 36#include <thread> 37 38#include "check.h" 39#include "colorprint.h" 40#include "commandlineflags.h" 41#include "complexity.h" 42#include "counter.h" 43#include "internal_macros.h" 44#include "log.h" 45#include "mutex.h" 46#include "re.h" 47#include "statistics.h" 48#include "string_util.h" 49#include "thread_manager.h" 50#include "thread_timer.h" 51 52DEFINE_bool(benchmark_list_tests, false, 53 "Print a list of benchmarks. This option overrides all other " 54 "options."); 55 56DEFINE_string(benchmark_filter, ".", 57 "A regular expression that specifies the set of benchmarks " 58 "to execute. If this flag is empty, no benchmarks are run. " 59 "If this flag is the string \"all\", all benchmarks linked " 60 "into the process are run."); 61 62DEFINE_double(benchmark_min_time, 0.5, 63 "Minimum number of seconds we should run benchmark before " 64 "results are considered significant. For cpu-time based " 65 "tests, this is the lower bound on the total cpu time " 66 "used by all threads that make up the test. For real-time " 67 "based tests, this is the lower bound on the elapsed time " 68 "of the benchmark execution, regardless of number of " 69 "threads."); 70 71DEFINE_int32(benchmark_repetitions, 1, 72 "The number of runs of each benchmark. If greater than 1, the " 73 "mean and standard deviation of the runs will be reported."); 74 75DEFINE_bool(benchmark_report_aggregates_only, false, 76 "Report the result of each benchmark repetitions. When 'true' is " 77 "specified only the mean, standard deviation, and other statistics " 78 "are reported for repeated benchmarks."); 79 80DEFINE_string(benchmark_format, "console", 81 "The format to use for console output. Valid values are " 82 "'console', 'json', or 'csv'."); 83 84DEFINE_string(benchmark_out_format, "json", 85 "The format to use for file output. Valid values are " 86 "'console', 'json', or 'csv'."); 87 88DEFINE_string(benchmark_out, "", "The file to write additional output to"); 89 90DEFINE_string(benchmark_color, "auto", 91 "Whether to use colors in the output. Valid values: " 92 "'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use " 93 "colors if the output is being sent to a terminal and the TERM " 94 "environment variable is set to a terminal type that supports " 95 "colors."); 96 97DEFINE_bool(benchmark_counters_tabular, false, 98 "Whether to use tabular format when printing user counters to " 99 "the console. Valid values: 'true'/'yes'/1, 'false'/'no'/0." 100 "Defaults to false."); 101 102DEFINE_int32(v, 0, "The level of verbose logging to output"); 103 104namespace benchmark { 105 106namespace { 107static const size_t kMaxIterations = 1000000000; 108} // end namespace 109 110namespace internal { 111 112void UseCharPointer(char const volatile*) {} 113 114namespace { 115 116BenchmarkReporter::Run CreateRunReport( 117 const benchmark::internal::Benchmark::Instance& b, 118 const internal::ThreadManager::Result& results, 119 double seconds) { 120 // Create report about this benchmark run. 121 BenchmarkReporter::Run report; 122 123 report.benchmark_name = b.name; 124 report.error_occurred = results.has_error_; 125 report.error_message = results.error_message_; 126 report.report_label = results.report_label_; 127 // This is the total iterations across all threads. 128 report.iterations = results.iterations; 129 report.time_unit = b.time_unit; 130 131 if (!report.error_occurred) { 132 double bytes_per_second = 0; 133 if (results.bytes_processed > 0 && seconds > 0.0) { 134 bytes_per_second = (results.bytes_processed / seconds); 135 } 136 double items_per_second = 0; 137 if (results.items_processed > 0 && seconds > 0.0) { 138 items_per_second = (results.items_processed / seconds); 139 } 140 141 if (b.use_manual_time) { 142 report.real_accumulated_time = results.manual_time_used; 143 } else { 144 report.real_accumulated_time = results.real_time_used; 145 } 146 report.cpu_accumulated_time = results.cpu_time_used; 147 report.bytes_per_second = bytes_per_second; 148 report.items_per_second = items_per_second; 149 report.complexity_n = results.complexity_n; 150 report.complexity = b.complexity; 151 report.complexity_lambda = b.complexity_lambda; 152 report.statistics = b.statistics; 153 report.counters = results.counters; 154 internal::Finish(&report.counters, seconds, b.threads); 155 } 156 return report; 157} 158 159// Execute one thread of benchmark b for the specified number of iterations. 160// Adds the stats collected for the thread into *total. 161void RunInThread(const benchmark::internal::Benchmark::Instance* b, 162 size_t iters, int thread_id, 163 internal::ThreadManager* manager) { 164 internal::ThreadTimer timer; 165 State st(iters, b->arg, thread_id, b->threads, &timer, manager); 166 b->benchmark->Run(st); 167 CHECK(st.iterations() >= st.max_iterations) 168 << "Benchmark returned before State::KeepRunning() returned false!"; 169 { 170 MutexLock l(manager->GetBenchmarkMutex()); 171 internal::ThreadManager::Result& results = manager->results; 172 results.iterations += st.iterations(); 173 results.cpu_time_used += timer.cpu_time_used(); 174 results.real_time_used += timer.real_time_used(); 175 results.manual_time_used += timer.manual_time_used(); 176 results.bytes_processed += st.bytes_processed(); 177 results.items_processed += st.items_processed(); 178 results.complexity_n += st.complexity_length_n(); 179 internal::Increment(&results.counters, st.counters); 180 } 181 manager->NotifyThreadComplete(); 182} 183 184std::vector<BenchmarkReporter::Run> RunBenchmark( 185 const benchmark::internal::Benchmark::Instance& b, 186 std::vector<BenchmarkReporter::Run>* complexity_reports) { 187 std::vector<BenchmarkReporter::Run> reports; // return value 188 189 const bool has_explicit_iteration_count = b.iterations != 0; 190 size_t iters = has_explicit_iteration_count ? b.iterations : 1; 191 std::unique_ptr<internal::ThreadManager> manager; 192 std::vector<std::thread> pool(b.threads - 1); 193 const int repeats = 194 b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; 195 const bool report_aggregates_only = 196 repeats != 1 && 197 (b.report_mode == internal::RM_Unspecified 198 ? FLAGS_benchmark_report_aggregates_only 199 : b.report_mode == internal::RM_ReportAggregatesOnly); 200 for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { 201 for (;;) { 202 // Try benchmark 203 VLOG(2) << "Running " << b.name << " for " << iters << "\n"; 204 205 manager.reset(new internal::ThreadManager(b.threads)); 206 for (std::size_t ti = 0; ti < pool.size(); ++ti) { 207 pool[ti] = std::thread(&RunInThread, &b, iters, 208 static_cast<int>(ti + 1), manager.get()); 209 } 210 RunInThread(&b, iters, 0, manager.get()); 211 manager->WaitForAllThreads(); 212 for (std::thread& thread : pool) thread.join(); 213 internal::ThreadManager::Result results; 214 { 215 MutexLock l(manager->GetBenchmarkMutex()); 216 results = manager->results; 217 } 218 manager.reset(); 219 // Adjust real/manual time stats since they were reported per thread. 220 results.real_time_used /= b.threads; 221 results.manual_time_used /= b.threads; 222 223 VLOG(2) << "Ran in " << results.cpu_time_used << "/" 224 << results.real_time_used << "\n"; 225 226 // Base decisions off of real time if requested by this benchmark. 227 double seconds = results.cpu_time_used; 228 if (b.use_manual_time) { 229 seconds = results.manual_time_used; 230 } else if (b.use_real_time) { 231 seconds = results.real_time_used; 232 } 233 234 const double min_time = 235 !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; 236 237 // Determine if this run should be reported; Either it has 238 // run for a sufficient amount of time or because an error was reported. 239 const bool should_report = repetition_num > 0 240 || has_explicit_iteration_count // An exact iteration count was requested 241 || results.has_error_ 242 || iters >= kMaxIterations // No chance to try again, we hit the limit. 243 || seconds >= min_time // the elapsed time is large enough 244 // CPU time is specified but the elapsed real time greatly exceeds the 245 // minimum time. Note that user provided timers are except from this 246 // sanity check. 247 || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time); 248 249 if (should_report) { 250 BenchmarkReporter::Run report = CreateRunReport(b, results, seconds); 251 if (!report.error_occurred && b.complexity != oNone) 252 complexity_reports->push_back(report); 253 reports.push_back(report); 254 break; 255 } 256 257 // See how much iterations should be increased by 258 // Note: Avoid division by zero with max(seconds, 1ns). 259 double multiplier = min_time * 1.4 / std::max(seconds, 1e-9); 260 // If our last run was at least 10% of FLAGS_benchmark_min_time then we 261 // use the multiplier directly. Otherwise we use at most 10 times 262 // expansion. 263 // NOTE: When the last run was at least 10% of the min time the max 264 // expansion should be 14x. 265 bool is_significant = (seconds / min_time) > 0.1; 266 multiplier = is_significant ? multiplier : std::min(10.0, multiplier); 267 if (multiplier <= 1.0) multiplier = 2.0; 268 double next_iters = std::max(multiplier * iters, iters + 1.0); 269 if (next_iters > kMaxIterations) { 270 next_iters = kMaxIterations; 271 } 272 VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; 273 iters = static_cast<int>(next_iters + 0.5); 274 } 275 } 276 // Calculate additional statistics 277 auto stat_reports = ComputeStats(reports); 278 if ((b.complexity != oNone) && b.last_benchmark_instance) { 279 auto additional_run_stats = ComputeBigO(*complexity_reports); 280 stat_reports.insert(stat_reports.end(), additional_run_stats.begin(), 281 additional_run_stats.end()); 282 complexity_reports->clear(); 283 } 284 285 if (report_aggregates_only) reports.clear(); 286 reports.insert(reports.end(), stat_reports.begin(), stat_reports.end()); 287 return reports; 288} 289 290} // namespace 291} // namespace internal 292 293State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i, 294 int n_threads, internal::ThreadTimer* timer, 295 internal::ThreadManager* manager) 296 : total_iterations_(0), 297 batch_leftover_(0), 298 max_iterations(max_iters), 299 started_(false), 300 finished_(false), 301 error_occurred_(false), 302 range_(ranges), 303 bytes_processed_(0), 304 items_processed_(0), 305 complexity_n_(0), 306 counters(), 307 thread_index(thread_i), 308 threads(n_threads), 309 timer_(timer), 310 manager_(manager) { 311 CHECK(max_iterations != 0) << "At least one iteration must be run"; 312 CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; 313 314 // Note: The use of offsetof below is technically undefined until C++17 315 // because State is not a standard layout type. However, all compilers 316 // currently provide well-defined behavior as an extension (which is 317 // demonstrated since constexpr evaluation must diagnose all undefined 318 // behavior). However, GCC and Clang also warn about this use of offsetof, 319 // which must be suppressed. 320#ifdef __GNUC__ 321#pragma GCC diagnostic push 322#pragma GCC diagnostic ignored "-Winvalid-offsetof" 323#endif 324 // Offset tests to ensure commonly accessed data is on the first cache line. 325 const int cache_line_size = 64; 326 static_assert(offsetof(State, error_occurred_) <= 327 (cache_line_size - sizeof(error_occurred_)), ""); 328#ifdef __GNUC__ 329#pragma GCC diagnostic pop 330#endif 331} 332 333void State::PauseTiming() { 334 // Add in time accumulated so far 335 CHECK(started_ && !finished_ && !error_occurred_); 336 timer_->StopTimer(); 337} 338 339void State::ResumeTiming() { 340 CHECK(started_ && !finished_ && !error_occurred_); 341 timer_->StartTimer(); 342} 343 344void State::SkipWithError(const char* msg) { 345 CHECK(msg); 346 error_occurred_ = true; 347 { 348 MutexLock l(manager_->GetBenchmarkMutex()); 349 if (manager_->results.has_error_ == false) { 350 manager_->results.error_message_ = msg; 351 manager_->results.has_error_ = true; 352 } 353 } 354 total_iterations_ = 0; 355 if (timer_->running()) timer_->StopTimer(); 356} 357 358void State::SetIterationTime(double seconds) { 359 timer_->SetIterationTime(seconds); 360} 361 362void State::SetLabel(const char* label) { 363 MutexLock l(manager_->GetBenchmarkMutex()); 364 manager_->results.report_label_ = label; 365} 366 367void State::StartKeepRunning() { 368 CHECK(!started_ && !finished_); 369 started_ = true; 370 total_iterations_ = error_occurred_ ? 0 : max_iterations; 371 manager_->StartStopBarrier(); 372 if (!error_occurred_) ResumeTiming(); 373} 374 375void State::FinishKeepRunning() { 376 CHECK(started_ && (!finished_ || error_occurred_)); 377 if (!error_occurred_) { 378 PauseTiming(); 379 } 380 // Total iterations has now wrapped around past 0. Fix this. 381 total_iterations_ = 0; 382 finished_ = true; 383 manager_->StartStopBarrier(); 384} 385 386namespace internal { 387namespace { 388 389void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks, 390 BenchmarkReporter* console_reporter, 391 BenchmarkReporter* file_reporter) { 392 // Note the file_reporter can be null. 393 CHECK(console_reporter != nullptr); 394 395 // Determine the width of the name field using a minimum width of 10. 396 bool has_repetitions = FLAGS_benchmark_repetitions > 1; 397 size_t name_field_width = 10; 398 size_t stat_field_width = 0; 399 for (const Benchmark::Instance& benchmark : benchmarks) { 400 name_field_width = 401 std::max<size_t>(name_field_width, benchmark.name.size()); 402 has_repetitions |= benchmark.repetitions > 1; 403 404 for(const auto& Stat : *benchmark.statistics) 405 stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size()); 406 } 407 if (has_repetitions) name_field_width += 1 + stat_field_width; 408 409 // Print header here 410 BenchmarkReporter::Context context; 411 context.name_field_width = name_field_width; 412 413 // Keep track of running times of all instances of current benchmark 414 std::vector<BenchmarkReporter::Run> complexity_reports; 415 416 // We flush streams after invoking reporter methods that write to them. This 417 // ensures users get timely updates even when streams are not line-buffered. 418 auto flushStreams = [](BenchmarkReporter* reporter) { 419 if (!reporter) return; 420 std::flush(reporter->GetOutputStream()); 421 std::flush(reporter->GetErrorStream()); 422 }; 423 424 if (console_reporter->ReportContext(context) && 425 (!file_reporter || file_reporter->ReportContext(context))) { 426 flushStreams(console_reporter); 427 flushStreams(file_reporter); 428 for (const auto& benchmark : benchmarks) { 429 std::vector<BenchmarkReporter::Run> reports = 430 RunBenchmark(benchmark, &complexity_reports); 431 console_reporter->ReportRuns(reports); 432 if (file_reporter) file_reporter->ReportRuns(reports); 433 flushStreams(console_reporter); 434 flushStreams(file_reporter); 435 } 436 } 437 console_reporter->Finalize(); 438 if (file_reporter) file_reporter->Finalize(); 439 flushStreams(console_reporter); 440 flushStreams(file_reporter); 441} 442 443std::unique_ptr<BenchmarkReporter> CreateReporter( 444 std::string const& name, ConsoleReporter::OutputOptions output_opts) { 445 typedef std::unique_ptr<BenchmarkReporter> PtrType; 446 if (name == "console") { 447 return PtrType(new ConsoleReporter(output_opts)); 448 } else if (name == "json") { 449 return PtrType(new JSONReporter); 450 } else if (name == "csv") { 451 return PtrType(new CSVReporter); 452 } else { 453 std::cerr << "Unexpected format: '" << name << "'\n"; 454 std::exit(1); 455 } 456} 457 458} // end namespace 459 460bool IsZero(double n) { 461 return std::abs(n) < std::numeric_limits<double>::epsilon(); 462} 463 464ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { 465 int output_opts = ConsoleReporter::OO_Defaults; 466 if ((FLAGS_benchmark_color == "auto" && IsColorTerminal()) || 467 IsTruthyFlagValue(FLAGS_benchmark_color)) { 468 output_opts |= ConsoleReporter::OO_Color; 469 } else { 470 output_opts &= ~ConsoleReporter::OO_Color; 471 } 472 if(force_no_color) { 473 output_opts &= ~ConsoleReporter::OO_Color; 474 } 475 if(FLAGS_benchmark_counters_tabular) { 476 output_opts |= ConsoleReporter::OO_Tabular; 477 } else { 478 output_opts &= ~ConsoleReporter::OO_Tabular; 479 } 480 return static_cast< ConsoleReporter::OutputOptions >(output_opts); 481} 482 483} // end namespace internal 484 485size_t RunSpecifiedBenchmarks() { 486 return RunSpecifiedBenchmarks(nullptr, nullptr); 487} 488 489size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) { 490 return RunSpecifiedBenchmarks(console_reporter, nullptr); 491} 492 493size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter, 494 BenchmarkReporter* file_reporter) { 495 std::string spec = FLAGS_benchmark_filter; 496 if (spec.empty() || spec == "all") 497 spec = "."; // Regexp that matches all benchmarks 498 499 // Setup the reporters 500 std::ofstream output_file; 501 std::unique_ptr<BenchmarkReporter> default_console_reporter; 502 std::unique_ptr<BenchmarkReporter> default_file_reporter; 503 if (!console_reporter) { 504 default_console_reporter = internal::CreateReporter( 505 FLAGS_benchmark_format, internal::GetOutputOptions()); 506 console_reporter = default_console_reporter.get(); 507 } 508 auto& Out = console_reporter->GetOutputStream(); 509 auto& Err = console_reporter->GetErrorStream(); 510 511 std::string const& fname = FLAGS_benchmark_out; 512 if (fname.empty() && file_reporter) { 513 Err << "A custom file reporter was provided but " 514 "--benchmark_out=<file> was not specified." 515 << std::endl; 516 std::exit(1); 517 } 518 if (!fname.empty()) { 519 output_file.open(fname); 520 if (!output_file.is_open()) { 521 Err << "invalid file name: '" << fname << std::endl; 522 std::exit(1); 523 } 524 if (!file_reporter) { 525 default_file_reporter = internal::CreateReporter( 526 FLAGS_benchmark_out_format, ConsoleReporter::OO_None); 527 file_reporter = default_file_reporter.get(); 528 } 529 file_reporter->SetOutputStream(&output_file); 530 file_reporter->SetErrorStream(&output_file); 531 } 532 533 std::vector<internal::Benchmark::Instance> benchmarks; 534 if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0; 535 536 if (benchmarks.empty()) { 537 Err << "Failed to match any benchmarks against regex: " << spec << "\n"; 538 return 0; 539 } 540 541 if (FLAGS_benchmark_list_tests) { 542 for (auto const& benchmark : benchmarks) Out << benchmark.name << "\n"; 543 } else { 544 internal::RunBenchmarks(benchmarks, console_reporter, file_reporter); 545 } 546 547 return benchmarks.size(); 548} 549 550namespace internal { 551 552void PrintUsageAndExit() { 553 fprintf(stdout, 554 "benchmark" 555 " [--benchmark_list_tests={true|false}]\n" 556 " [--benchmark_filter=<regex>]\n" 557 " [--benchmark_min_time=<min_time>]\n" 558 " [--benchmark_repetitions=<num_repetitions>]\n" 559 " [--benchmark_report_aggregates_only={true|false}\n" 560 " [--benchmark_format=<console|json|csv>]\n" 561 " [--benchmark_out=<filename>]\n" 562 " [--benchmark_out_format=<json|console|csv>]\n" 563 " [--benchmark_color={auto|true|false}]\n" 564 " [--benchmark_counters_tabular={true|false}]\n" 565 " [--v=<verbosity>]\n"); 566 exit(0); 567} 568 569void ParseCommandLineFlags(int* argc, char** argv) { 570 using namespace benchmark; 571 BenchmarkReporter::Context::executable_name = argv[0]; 572 for (int i = 1; i < *argc; ++i) { 573 if (ParseBoolFlag(argv[i], "benchmark_list_tests", 574 &FLAGS_benchmark_list_tests) || 575 ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || 576 ParseDoubleFlag(argv[i], "benchmark_min_time", 577 &FLAGS_benchmark_min_time) || 578 ParseInt32Flag(argv[i], "benchmark_repetitions", 579 &FLAGS_benchmark_repetitions) || 580 ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", 581 &FLAGS_benchmark_report_aggregates_only) || 582 ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) || 583 ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) || 584 ParseStringFlag(argv[i], "benchmark_out_format", 585 &FLAGS_benchmark_out_format) || 586 ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) || 587 // "color_print" is the deprecated name for "benchmark_color". 588 // TODO: Remove this. 589 ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) || 590 ParseBoolFlag(argv[i], "benchmark_counters_tabular", 591 &FLAGS_benchmark_counters_tabular) || 592 ParseInt32Flag(argv[i], "v", &FLAGS_v)) { 593 for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; 594 595 --(*argc); 596 --i; 597 } else if (IsFlag(argv[i], "help")) { 598 PrintUsageAndExit(); 599 } 600 } 601 for (auto const* flag : 602 {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) 603 if (*flag != "console" && *flag != "json" && *flag != "csv") { 604 PrintUsageAndExit(); 605 } 606 if (FLAGS_benchmark_color.empty()) { 607 PrintUsageAndExit(); 608 } 609} 610 611int InitializeStreams() { 612 static std::ios_base::Init init; 613 return 0; 614} 615 616} // end namespace internal 617 618void Initialize(int* argc, char** argv) { 619 internal::ParseCommandLineFlags(argc, argv); 620 internal::LogLevel() = FLAGS_v; 621} 622 623bool ReportUnrecognizedArguments(int argc, char** argv) { 624 for (int i = 1; i < argc; ++i) { 625 fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], argv[i]); 626 } 627 return argc > 1; 628} 629 630} // end namespace benchmark 631