1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "benchmark/benchmark.h"
16#include "benchmark_api_internal.h"
17#include "internal_macros.h"
18
19#ifndef BENCHMARK_OS_WINDOWS
20#ifndef BENCHMARK_OS_FUCHSIA
21#include <sys/resource.h>
22#endif
23#include <sys/time.h>
24#include <unistd.h>
25#endif
26
27#include <algorithm>
28#include <atomic>
29#include <condition_variable>
30#include <cstdio>
31#include <cstdlib>
32#include <fstream>
33#include <iostream>
34#include <memory>
35#include <string>
36#include <thread>
37
38#include "check.h"
39#include "colorprint.h"
40#include "commandlineflags.h"
41#include "complexity.h"
42#include "counter.h"
43#include "internal_macros.h"
44#include "log.h"
45#include "mutex.h"
46#include "re.h"
47#include "statistics.h"
48#include "string_util.h"
49#include "thread_manager.h"
50#include "thread_timer.h"
51
52DEFINE_bool(benchmark_list_tests, false,
53            "Print a list of benchmarks. This option overrides all other "
54            "options.");
55
56DEFINE_string(benchmark_filter, ".",
57              "A regular expression that specifies the set of benchmarks "
58              "to execute.  If this flag is empty, no benchmarks are run.  "
59              "If this flag is the string \"all\", all benchmarks linked "
60              "into the process are run.");
61
62DEFINE_double(benchmark_min_time, 0.5,
63              "Minimum number of seconds we should run benchmark before "
64              "results are considered significant.  For cpu-time based "
65              "tests, this is the lower bound on the total cpu time "
66              "used by all threads that make up the test.  For real-time "
67              "based tests, this is the lower bound on the elapsed time "
68              "of the benchmark execution, regardless of number of "
69              "threads.");
70
71DEFINE_int32(benchmark_repetitions, 1,
72             "The number of runs of each benchmark. If greater than 1, the "
73             "mean and standard deviation of the runs will be reported.");
74
75DEFINE_bool(benchmark_report_aggregates_only, false,
76            "Report the result of each benchmark repetitions. When 'true' is "
77            "specified only the mean, standard deviation, and other statistics "
78            "are reported for repeated benchmarks.");
79
80DEFINE_string(benchmark_format, "console",
81              "The format to use for console output. Valid values are "
82              "'console', 'json', or 'csv'.");
83
84DEFINE_string(benchmark_out_format, "json",
85              "The format to use for file output. Valid values are "
86              "'console', 'json', or 'csv'.");
87
88DEFINE_string(benchmark_out, "", "The file to write additional output to");
89
90DEFINE_string(benchmark_color, "auto",
91              "Whether to use colors in the output.  Valid values: "
92              "'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use "
93              "colors if the output is being sent to a terminal and the TERM "
94              "environment variable is set to a terminal type that supports "
95              "colors.");
96
97DEFINE_bool(benchmark_counters_tabular, false,
98            "Whether to use tabular format when printing user counters to "
99            "the console.  Valid values: 'true'/'yes'/1, 'false'/'no'/0."
100            "Defaults to false.");
101
102DEFINE_int32(v, 0, "The level of verbose logging to output");
103
104namespace benchmark {
105
106namespace {
107static const size_t kMaxIterations = 1000000000;
108}  // end namespace
109
110namespace internal {
111
112void UseCharPointer(char const volatile*) {}
113
114namespace {
115
116BenchmarkReporter::Run CreateRunReport(
117    const benchmark::internal::Benchmark::Instance& b,
118    const internal::ThreadManager::Result& results,
119    double seconds) {
120  // Create report about this benchmark run.
121  BenchmarkReporter::Run report;
122
123  report.benchmark_name = b.name;
124  report.error_occurred = results.has_error_;
125  report.error_message = results.error_message_;
126  report.report_label = results.report_label_;
127  // This is the total iterations across all threads.
128  report.iterations = results.iterations;
129  report.time_unit = b.time_unit;
130
131  if (!report.error_occurred) {
132    double bytes_per_second = 0;
133    if (results.bytes_processed > 0 && seconds > 0.0) {
134      bytes_per_second = (results.bytes_processed / seconds);
135    }
136    double items_per_second = 0;
137    if (results.items_processed > 0 && seconds > 0.0) {
138      items_per_second = (results.items_processed / seconds);
139    }
140
141    if (b.use_manual_time) {
142      report.real_accumulated_time = results.manual_time_used;
143    } else {
144      report.real_accumulated_time = results.real_time_used;
145    }
146    report.cpu_accumulated_time = results.cpu_time_used;
147    report.bytes_per_second = bytes_per_second;
148    report.items_per_second = items_per_second;
149    report.complexity_n = results.complexity_n;
150    report.complexity = b.complexity;
151    report.complexity_lambda = b.complexity_lambda;
152    report.statistics = b.statistics;
153    report.counters = results.counters;
154    internal::Finish(&report.counters, seconds, b.threads);
155  }
156  return report;
157}
158
159// Execute one thread of benchmark b for the specified number of iterations.
160// Adds the stats collected for the thread into *total.
161void RunInThread(const benchmark::internal::Benchmark::Instance* b,
162                 size_t iters, int thread_id,
163                 internal::ThreadManager* manager) {
164  internal::ThreadTimer timer;
165  State st(iters, b->arg, thread_id, b->threads, &timer, manager);
166  b->benchmark->Run(st);
167  CHECK(st.iterations() >= st.max_iterations)
168      << "Benchmark returned before State::KeepRunning() returned false!";
169  {
170    MutexLock l(manager->GetBenchmarkMutex());
171    internal::ThreadManager::Result& results = manager->results;
172    results.iterations += st.iterations();
173    results.cpu_time_used += timer.cpu_time_used();
174    results.real_time_used += timer.real_time_used();
175    results.manual_time_used += timer.manual_time_used();
176    results.bytes_processed += st.bytes_processed();
177    results.items_processed += st.items_processed();
178    results.complexity_n += st.complexity_length_n();
179    internal::Increment(&results.counters, st.counters);
180  }
181  manager->NotifyThreadComplete();
182}
183
184std::vector<BenchmarkReporter::Run> RunBenchmark(
185    const benchmark::internal::Benchmark::Instance& b,
186    std::vector<BenchmarkReporter::Run>* complexity_reports) {
187  std::vector<BenchmarkReporter::Run> reports;  // return value
188
189  const bool has_explicit_iteration_count = b.iterations != 0;
190  size_t iters = has_explicit_iteration_count ? b.iterations : 1;
191  std::unique_ptr<internal::ThreadManager> manager;
192  std::vector<std::thread> pool(b.threads - 1);
193  const int repeats =
194      b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions;
195  const bool report_aggregates_only =
196      repeats != 1 &&
197      (b.report_mode == internal::RM_Unspecified
198           ? FLAGS_benchmark_report_aggregates_only
199           : b.report_mode == internal::RM_ReportAggregatesOnly);
200  for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
201    for (;;) {
202      // Try benchmark
203      VLOG(2) << "Running " << b.name << " for " << iters << "\n";
204
205      manager.reset(new internal::ThreadManager(b.threads));
206      for (std::size_t ti = 0; ti < pool.size(); ++ti) {
207        pool[ti] = std::thread(&RunInThread, &b, iters,
208                               static_cast<int>(ti + 1), manager.get());
209      }
210      RunInThread(&b, iters, 0, manager.get());
211      manager->WaitForAllThreads();
212      for (std::thread& thread : pool) thread.join();
213      internal::ThreadManager::Result results;
214      {
215        MutexLock l(manager->GetBenchmarkMutex());
216        results = manager->results;
217      }
218      manager.reset();
219      // Adjust real/manual time stats since they were reported per thread.
220      results.real_time_used /= b.threads;
221      results.manual_time_used /= b.threads;
222
223      VLOG(2) << "Ran in " << results.cpu_time_used << "/"
224              << results.real_time_used << "\n";
225
226      // Base decisions off of real time if requested by this benchmark.
227      double seconds = results.cpu_time_used;
228      if (b.use_manual_time) {
229        seconds = results.manual_time_used;
230      } else if (b.use_real_time) {
231        seconds = results.real_time_used;
232      }
233
234      const double min_time =
235          !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time;
236
237      // Determine if this run should be reported; Either it has
238      // run for a sufficient amount of time or because an error was reported.
239      const bool should_report =  repetition_num > 0
240        || has_explicit_iteration_count  // An exact iteration count was requested
241        || results.has_error_
242        || iters >= kMaxIterations  // No chance to try again, we hit the limit.
243        || seconds >= min_time  // the elapsed time is large enough
244        // CPU time is specified but the elapsed real time greatly exceeds the
245        // minimum time. Note that user provided timers are except from this
246        // sanity check.
247        || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time);
248
249      if (should_report) {
250        BenchmarkReporter::Run report = CreateRunReport(b, results, seconds);
251        if (!report.error_occurred && b.complexity != oNone)
252          complexity_reports->push_back(report);
253        reports.push_back(report);
254        break;
255      }
256
257      // See how much iterations should be increased by
258      // Note: Avoid division by zero with max(seconds, 1ns).
259      double multiplier = min_time * 1.4 / std::max(seconds, 1e-9);
260      // If our last run was at least 10% of FLAGS_benchmark_min_time then we
261      // use the multiplier directly. Otherwise we use at most 10 times
262      // expansion.
263      // NOTE: When the last run was at least 10% of the min time the max
264      // expansion should be 14x.
265      bool is_significant = (seconds / min_time) > 0.1;
266      multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
267      if (multiplier <= 1.0) multiplier = 2.0;
268      double next_iters = std::max(multiplier * iters, iters + 1.0);
269      if (next_iters > kMaxIterations) {
270        next_iters = kMaxIterations;
271      }
272      VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
273      iters = static_cast<int>(next_iters + 0.5);
274    }
275  }
276  // Calculate additional statistics
277  auto stat_reports = ComputeStats(reports);
278  if ((b.complexity != oNone) && b.last_benchmark_instance) {
279    auto additional_run_stats = ComputeBigO(*complexity_reports);
280    stat_reports.insert(stat_reports.end(), additional_run_stats.begin(),
281                        additional_run_stats.end());
282    complexity_reports->clear();
283  }
284
285  if (report_aggregates_only) reports.clear();
286  reports.insert(reports.end(), stat_reports.begin(), stat_reports.end());
287  return reports;
288}
289
290}  // namespace
291}  // namespace internal
292
293State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
294             int n_threads, internal::ThreadTimer* timer,
295             internal::ThreadManager* manager)
296    : total_iterations_(0),
297      batch_leftover_(0),
298      max_iterations(max_iters),
299      started_(false),
300      finished_(false),
301      error_occurred_(false),
302      range_(ranges),
303      bytes_processed_(0),
304      items_processed_(0),
305      complexity_n_(0),
306      counters(),
307      thread_index(thread_i),
308      threads(n_threads),
309      timer_(timer),
310      manager_(manager) {
311  CHECK(max_iterations != 0) << "At least one iteration must be run";
312  CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
313
314  // Note: The use of offsetof below is technically undefined until C++17
315  // because State is not a standard layout type. However, all compilers
316  // currently provide well-defined behavior as an extension (which is
317  // demonstrated since constexpr evaluation must diagnose all undefined
318  // behavior). However, GCC and Clang also warn about this use of offsetof,
319  // which must be suppressed.
320#ifdef __GNUC__
321#pragma GCC diagnostic push
322#pragma GCC diagnostic ignored "-Winvalid-offsetof"
323#endif
324  // Offset tests to ensure commonly accessed data is on the first cache line.
325  const int cache_line_size = 64;
326  static_assert(offsetof(State, error_occurred_) <=
327                (cache_line_size - sizeof(error_occurred_)), "");
328#ifdef __GNUC__
329#pragma GCC diagnostic pop
330#endif
331}
332
333void State::PauseTiming() {
334  // Add in time accumulated so far
335  CHECK(started_ && !finished_ && !error_occurred_);
336  timer_->StopTimer();
337}
338
339void State::ResumeTiming() {
340  CHECK(started_ && !finished_ && !error_occurred_);
341  timer_->StartTimer();
342}
343
344void State::SkipWithError(const char* msg) {
345  CHECK(msg);
346  error_occurred_ = true;
347  {
348    MutexLock l(manager_->GetBenchmarkMutex());
349    if (manager_->results.has_error_ == false) {
350      manager_->results.error_message_ = msg;
351      manager_->results.has_error_ = true;
352    }
353  }
354  total_iterations_ = 0;
355  if (timer_->running()) timer_->StopTimer();
356}
357
358void State::SetIterationTime(double seconds) {
359  timer_->SetIterationTime(seconds);
360}
361
362void State::SetLabel(const char* label) {
363  MutexLock l(manager_->GetBenchmarkMutex());
364  manager_->results.report_label_ = label;
365}
366
367void State::StartKeepRunning() {
368  CHECK(!started_ && !finished_);
369  started_ = true;
370  total_iterations_ = error_occurred_ ? 0 : max_iterations;
371  manager_->StartStopBarrier();
372  if (!error_occurred_) ResumeTiming();
373}
374
375void State::FinishKeepRunning() {
376  CHECK(started_ && (!finished_ || error_occurred_));
377  if (!error_occurred_) {
378    PauseTiming();
379  }
380  // Total iterations has now wrapped around past 0. Fix this.
381  total_iterations_ = 0;
382  finished_ = true;
383  manager_->StartStopBarrier();
384}
385
386namespace internal {
387namespace {
388
389void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
390                           BenchmarkReporter* console_reporter,
391                           BenchmarkReporter* file_reporter) {
392  // Note the file_reporter can be null.
393  CHECK(console_reporter != nullptr);
394
395  // Determine the width of the name field using a minimum width of 10.
396  bool has_repetitions = FLAGS_benchmark_repetitions > 1;
397  size_t name_field_width = 10;
398  size_t stat_field_width = 0;
399  for (const Benchmark::Instance& benchmark : benchmarks) {
400    name_field_width =
401        std::max<size_t>(name_field_width, benchmark.name.size());
402    has_repetitions |= benchmark.repetitions > 1;
403
404    for(const auto& Stat : *benchmark.statistics)
405      stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
406  }
407  if (has_repetitions) name_field_width += 1 + stat_field_width;
408
409  // Print header here
410  BenchmarkReporter::Context context;
411  context.name_field_width = name_field_width;
412
413  // Keep track of running times of all instances of current benchmark
414  std::vector<BenchmarkReporter::Run> complexity_reports;
415
416  // We flush streams after invoking reporter methods that write to them. This
417  // ensures users get timely updates even when streams are not line-buffered.
418  auto flushStreams = [](BenchmarkReporter* reporter) {
419    if (!reporter) return;
420    std::flush(reporter->GetOutputStream());
421    std::flush(reporter->GetErrorStream());
422  };
423
424  if (console_reporter->ReportContext(context) &&
425      (!file_reporter || file_reporter->ReportContext(context))) {
426    flushStreams(console_reporter);
427    flushStreams(file_reporter);
428    for (const auto& benchmark : benchmarks) {
429      std::vector<BenchmarkReporter::Run> reports =
430          RunBenchmark(benchmark, &complexity_reports);
431      console_reporter->ReportRuns(reports);
432      if (file_reporter) file_reporter->ReportRuns(reports);
433      flushStreams(console_reporter);
434      flushStreams(file_reporter);
435    }
436  }
437  console_reporter->Finalize();
438  if (file_reporter) file_reporter->Finalize();
439  flushStreams(console_reporter);
440  flushStreams(file_reporter);
441}
442
443std::unique_ptr<BenchmarkReporter> CreateReporter(
444    std::string const& name, ConsoleReporter::OutputOptions output_opts) {
445  typedef std::unique_ptr<BenchmarkReporter> PtrType;
446  if (name == "console") {
447    return PtrType(new ConsoleReporter(output_opts));
448  } else if (name == "json") {
449    return PtrType(new JSONReporter);
450  } else if (name == "csv") {
451    return PtrType(new CSVReporter);
452  } else {
453    std::cerr << "Unexpected format: '" << name << "'\n";
454    std::exit(1);
455  }
456}
457
458}  // end namespace
459
460bool IsZero(double n) {
461  return std::abs(n) < std::numeric_limits<double>::epsilon();
462}
463
464ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
465  int output_opts = ConsoleReporter::OO_Defaults;
466  if ((FLAGS_benchmark_color == "auto" && IsColorTerminal()) ||
467      IsTruthyFlagValue(FLAGS_benchmark_color)) {
468    output_opts |= ConsoleReporter::OO_Color;
469  } else {
470    output_opts &= ~ConsoleReporter::OO_Color;
471  }
472  if(force_no_color) {
473    output_opts &= ~ConsoleReporter::OO_Color;
474  }
475  if(FLAGS_benchmark_counters_tabular) {
476    output_opts |= ConsoleReporter::OO_Tabular;
477  } else {
478    output_opts &= ~ConsoleReporter::OO_Tabular;
479  }
480  return static_cast< ConsoleReporter::OutputOptions >(output_opts);
481}
482
483}  // end namespace internal
484
485size_t RunSpecifiedBenchmarks() {
486  return RunSpecifiedBenchmarks(nullptr, nullptr);
487}
488
489size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) {
490  return RunSpecifiedBenchmarks(console_reporter, nullptr);
491}
492
493size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
494                              BenchmarkReporter* file_reporter) {
495  std::string spec = FLAGS_benchmark_filter;
496  if (spec.empty() || spec == "all")
497    spec = ".";  // Regexp that matches all benchmarks
498
499  // Setup the reporters
500  std::ofstream output_file;
501  std::unique_ptr<BenchmarkReporter> default_console_reporter;
502  std::unique_ptr<BenchmarkReporter> default_file_reporter;
503  if (!console_reporter) {
504    default_console_reporter = internal::CreateReporter(
505          FLAGS_benchmark_format, internal::GetOutputOptions());
506    console_reporter = default_console_reporter.get();
507  }
508  auto& Out = console_reporter->GetOutputStream();
509  auto& Err = console_reporter->GetErrorStream();
510
511  std::string const& fname = FLAGS_benchmark_out;
512  if (fname.empty() && file_reporter) {
513    Err << "A custom file reporter was provided but "
514           "--benchmark_out=<file> was not specified."
515        << std::endl;
516    std::exit(1);
517  }
518  if (!fname.empty()) {
519    output_file.open(fname);
520    if (!output_file.is_open()) {
521      Err << "invalid file name: '" << fname << std::endl;
522      std::exit(1);
523    }
524    if (!file_reporter) {
525      default_file_reporter = internal::CreateReporter(
526          FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
527      file_reporter = default_file_reporter.get();
528    }
529    file_reporter->SetOutputStream(&output_file);
530    file_reporter->SetErrorStream(&output_file);
531  }
532
533  std::vector<internal::Benchmark::Instance> benchmarks;
534  if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0;
535
536  if (benchmarks.empty()) {
537    Err << "Failed to match any benchmarks against regex: " << spec << "\n";
538    return 0;
539  }
540
541  if (FLAGS_benchmark_list_tests) {
542    for (auto const& benchmark : benchmarks) Out << benchmark.name << "\n";
543  } else {
544    internal::RunBenchmarks(benchmarks, console_reporter, file_reporter);
545  }
546
547  return benchmarks.size();
548}
549
550namespace internal {
551
552void PrintUsageAndExit() {
553  fprintf(stdout,
554          "benchmark"
555          " [--benchmark_list_tests={true|false}]\n"
556          "          [--benchmark_filter=<regex>]\n"
557          "          [--benchmark_min_time=<min_time>]\n"
558          "          [--benchmark_repetitions=<num_repetitions>]\n"
559          "          [--benchmark_report_aggregates_only={true|false}\n"
560          "          [--benchmark_format=<console|json|csv>]\n"
561          "          [--benchmark_out=<filename>]\n"
562          "          [--benchmark_out_format=<json|console|csv>]\n"
563          "          [--benchmark_color={auto|true|false}]\n"
564          "          [--benchmark_counters_tabular={true|false}]\n"
565          "          [--v=<verbosity>]\n");
566  exit(0);
567}
568
569void ParseCommandLineFlags(int* argc, char** argv) {
570  using namespace benchmark;
571  BenchmarkReporter::Context::executable_name = argv[0];
572  for (int i = 1; i < *argc; ++i) {
573    if (ParseBoolFlag(argv[i], "benchmark_list_tests",
574                      &FLAGS_benchmark_list_tests) ||
575        ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
576        ParseDoubleFlag(argv[i], "benchmark_min_time",
577                        &FLAGS_benchmark_min_time) ||
578        ParseInt32Flag(argv[i], "benchmark_repetitions",
579                       &FLAGS_benchmark_repetitions) ||
580        ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
581                      &FLAGS_benchmark_report_aggregates_only) ||
582        ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) ||
583        ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) ||
584        ParseStringFlag(argv[i], "benchmark_out_format",
585                        &FLAGS_benchmark_out_format) ||
586        ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
587        // "color_print" is the deprecated name for "benchmark_color".
588        // TODO: Remove this.
589        ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
590        ParseBoolFlag(argv[i], "benchmark_counters_tabular",
591                        &FLAGS_benchmark_counters_tabular) ||
592        ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
593      for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
594
595      --(*argc);
596      --i;
597    } else if (IsFlag(argv[i], "help")) {
598      PrintUsageAndExit();
599    }
600  }
601  for (auto const* flag :
602       {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format})
603    if (*flag != "console" && *flag != "json" && *flag != "csv") {
604      PrintUsageAndExit();
605    }
606  if (FLAGS_benchmark_color.empty()) {
607    PrintUsageAndExit();
608  }
609}
610
611int InitializeStreams() {
612  static std::ios_base::Init init;
613  return 0;
614}
615
616}  // end namespace internal
617
618void Initialize(int* argc, char** argv) {
619  internal::ParseCommandLineFlags(argc, argv);
620  internal::LogLevel() = FLAGS_v;
621}
622
623bool ReportUnrecognizedArguments(int argc, char** argv) {
624  for (int i = 1; i < argc; ++i) {
625    fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], argv[i]);
626  }
627  return argc > 1;
628}
629
630}  // end namespace benchmark
631