1// Copyright 2015 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include "benchmark_register.h" 16 17#ifndef BENCHMARK_OS_WINDOWS 18#ifndef BENCHMARK_OS_FUCHSIA 19#include <sys/resource.h> 20#endif 21#include <sys/time.h> 22#include <unistd.h> 23#endif 24 25#include <algorithm> 26#include <atomic> 27#include <condition_variable> 28#include <cstdio> 29#include <cstdlib> 30#include <cstring> 31#include <fstream> 32#include <iostream> 33#include <memory> 34#include <sstream> 35#include <thread> 36 37#include "benchmark/benchmark.h" 38#include "benchmark_api_internal.h" 39#include "check.h" 40#include "commandlineflags.h" 41#include "complexity.h" 42#include "internal_macros.h" 43#include "log.h" 44#include "mutex.h" 45#include "re.h" 46#include "statistics.h" 47#include "string_util.h" 48#include "timers.h" 49 50namespace benchmark { 51 52namespace { 53// For non-dense Range, intermediate values are powers of kRangeMultiplier. 54static const int kRangeMultiplier = 8; 55// The size of a benchmark family determines is the number of inputs to repeat 56// the benchmark on. If this is "large" then warn the user during configuration. 57static const size_t kMaxFamilySize = 100; 58} // end namespace 59 60namespace internal { 61 62//=============================================================================// 63// BenchmarkFamilies 64//=============================================================================// 65 66// Class for managing registered benchmarks. Note that each registered 67// benchmark identifies a family of related benchmarks to run. 68class BenchmarkFamilies { 69 public: 70 static BenchmarkFamilies* GetInstance(); 71 72 // Registers a benchmark family and returns the index assigned to it. 73 size_t AddBenchmark(std::unique_ptr<Benchmark> family); 74 75 // Clear all registered benchmark families. 76 void ClearBenchmarks(); 77 78 // Extract the list of benchmark instances that match the specified 79 // regular expression. 80 bool FindBenchmarks(std::string re, 81 std::vector<BenchmarkInstance>* benchmarks, 82 std::ostream* Err); 83 84 private: 85 BenchmarkFamilies() {} 86 87 std::vector<std::unique_ptr<Benchmark>> families_; 88 Mutex mutex_; 89}; 90 91BenchmarkFamilies* BenchmarkFamilies::GetInstance() { 92 static BenchmarkFamilies instance; 93 return &instance; 94} 95 96size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) { 97 MutexLock l(mutex_); 98 size_t index = families_.size(); 99 families_.push_back(std::move(family)); 100 return index; 101} 102 103void BenchmarkFamilies::ClearBenchmarks() { 104 MutexLock l(mutex_); 105 families_.clear(); 106 families_.shrink_to_fit(); 107} 108 109bool BenchmarkFamilies::FindBenchmarks( 110 std::string spec, std::vector<BenchmarkInstance>* benchmarks, 111 std::ostream* ErrStream) { 112 CHECK(ErrStream); 113 auto& Err = *ErrStream; 114 // Make regular expression out of command-line flag 115 std::string error_msg; 116 Regex re; 117 bool isNegativeFilter = false; 118 if (spec[0] == '-') { 119 spec.replace(0, 1, ""); 120 isNegativeFilter = true; 121 } 122 if (!re.Init(spec, &error_msg)) { 123 Err << "Could not compile benchmark re: " << error_msg << std::endl; 124 return false; 125 } 126 127 // Special list of thread counts to use when none are specified 128 const std::vector<int> one_thread = {1}; 129 130 MutexLock l(mutex_); 131 for (std::unique_ptr<Benchmark>& family : families_) { 132 // Family was deleted or benchmark doesn't match 133 if (!family) continue; 134 135 if (family->ArgsCnt() == -1) { 136 family->Args({}); 137 } 138 const std::vector<int>* thread_counts = 139 (family->thread_counts_.empty() 140 ? &one_thread 141 : &static_cast<const std::vector<int>&>(family->thread_counts_)); 142 const size_t family_size = family->args_.size() * thread_counts->size(); 143 // The benchmark will be run at least 'family_size' different inputs. 144 // If 'family_size' is very large warn the user. 145 if (family_size > kMaxFamilySize) { 146 Err << "The number of inputs is very large. " << family->name_ 147 << " will be repeated at least " << family_size << " times.\n"; 148 } 149 // reserve in the special case the regex ".", since we know the final 150 // family size. 151 if (spec == ".") benchmarks->reserve(family_size); 152 153 for (auto const& args : family->args_) { 154 for (int num_threads : *thread_counts) { 155 BenchmarkInstance instance; 156 instance.name = family->name_; 157 instance.benchmark = family.get(); 158 instance.aggregation_report_mode = family->aggregation_report_mode_; 159 instance.arg = args; 160 instance.time_unit = family->time_unit_; 161 instance.range_multiplier = family->range_multiplier_; 162 instance.min_time = family->min_time_; 163 instance.iterations = family->iterations_; 164 instance.repetitions = family->repetitions_; 165 instance.use_real_time = family->use_real_time_; 166 instance.use_manual_time = family->use_manual_time_; 167 instance.complexity = family->complexity_; 168 instance.complexity_lambda = family->complexity_lambda_; 169 instance.statistics = &family->statistics_; 170 instance.threads = num_threads; 171 172 // Add arguments to instance name 173 size_t arg_i = 0; 174 for (auto const& arg : args) { 175 instance.name += "/"; 176 177 if (arg_i < family->arg_names_.size()) { 178 const auto& arg_name = family->arg_names_[arg_i]; 179 if (!arg_name.empty()) { 180 instance.name += 181 StrFormat("%s:", family->arg_names_[arg_i].c_str()); 182 } 183 } 184 185 // we know that the args are always non-negative (see 'AddRange()'), 186 // thus print as 'unsigned'. BUT, do a cast due to the 32-bit builds. 187 instance.name += StrFormat("%lu", static_cast<unsigned long>(arg)); 188 ++arg_i; 189 } 190 191 if (!IsZero(family->min_time_)) 192 instance.name += StrFormat("/min_time:%0.3f", family->min_time_); 193 if (family->iterations_ != 0) { 194 instance.name += 195 StrFormat("/iterations:%lu", 196 static_cast<unsigned long>(family->iterations_)); 197 } 198 if (family->repetitions_ != 0) 199 instance.name += StrFormat("/repeats:%d", family->repetitions_); 200 201 if (family->use_manual_time_) { 202 instance.name += "/manual_time"; 203 } else if (family->use_real_time_) { 204 instance.name += "/real_time"; 205 } 206 207 // Add the number of threads used to the name 208 if (!family->thread_counts_.empty()) { 209 instance.name += StrFormat("/threads:%d", instance.threads); 210 } 211 212 if ((re.Match(instance.name) && !isNegativeFilter) || 213 (!re.Match(instance.name) && isNegativeFilter)) { 214 instance.last_benchmark_instance = (&args == &family->args_.back()); 215 benchmarks->push_back(std::move(instance)); 216 } 217 } 218 } 219 } 220 return true; 221} 222 223Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { 224 std::unique_ptr<Benchmark> bench_ptr(bench); 225 BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); 226 families->AddBenchmark(std::move(bench_ptr)); 227 return bench; 228} 229 230// FIXME: This function is a hack so that benchmark.cc can access 231// `BenchmarkFamilies` 232bool FindBenchmarksInternal(const std::string& re, 233 std::vector<BenchmarkInstance>* benchmarks, 234 std::ostream* Err) { 235 return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err); 236} 237 238//=============================================================================// 239// Benchmark 240//=============================================================================// 241 242Benchmark::Benchmark(const char* name) 243 : name_(name), 244 aggregation_report_mode_(ARM_Unspecified), 245 time_unit_(kNanosecond), 246 range_multiplier_(kRangeMultiplier), 247 min_time_(0), 248 iterations_(0), 249 repetitions_(0), 250 use_real_time_(false), 251 use_manual_time_(false), 252 complexity_(oNone), 253 complexity_lambda_(nullptr) { 254 ComputeStatistics("mean", StatisticsMean); 255 ComputeStatistics("median", StatisticsMedian); 256 ComputeStatistics("stddev", StatisticsStdDev); 257} 258 259Benchmark::~Benchmark() {} 260 261Benchmark* Benchmark::Arg(int64_t x) { 262 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 263 args_.push_back({x}); 264 return this; 265} 266 267Benchmark* Benchmark::Unit(TimeUnit unit) { 268 time_unit_ = unit; 269 return this; 270} 271 272Benchmark* Benchmark::Range(int64_t start, int64_t limit) { 273 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 274 std::vector<int64_t> arglist; 275 AddRange(&arglist, start, limit, range_multiplier_); 276 277 for (int64_t i : arglist) { 278 args_.push_back({i}); 279 } 280 return this; 281} 282 283Benchmark* Benchmark::Ranges( 284 const std::vector<std::pair<int64_t, int64_t>>& ranges) { 285 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size())); 286 std::vector<std::vector<int64_t>> arglists(ranges.size()); 287 std::size_t total = 1; 288 for (std::size_t i = 0; i < ranges.size(); i++) { 289 AddRange(&arglists[i], ranges[i].first, ranges[i].second, 290 range_multiplier_); 291 total *= arglists[i].size(); 292 } 293 294 std::vector<std::size_t> ctr(arglists.size(), 0); 295 296 for (std::size_t i = 0; i < total; i++) { 297 std::vector<int64_t> tmp; 298 tmp.reserve(arglists.size()); 299 300 for (std::size_t j = 0; j < arglists.size(); j++) { 301 tmp.push_back(arglists[j].at(ctr[j])); 302 } 303 304 args_.push_back(std::move(tmp)); 305 306 for (std::size_t j = 0; j < arglists.size(); j++) { 307 if (ctr[j] + 1 < arglists[j].size()) { 308 ++ctr[j]; 309 break; 310 } 311 ctr[j] = 0; 312 } 313 } 314 return this; 315} 316 317Benchmark* Benchmark::ArgName(const std::string& name) { 318 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 319 arg_names_ = {name}; 320 return this; 321} 322 323Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) { 324 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size())); 325 arg_names_ = names; 326 return this; 327} 328 329Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { 330 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 331 CHECK_GE(start, 0); 332 CHECK_LE(start, limit); 333 for (int64_t arg = start; arg <= limit; arg += step) { 334 args_.push_back({arg}); 335 } 336 return this; 337} 338 339Benchmark* Benchmark::Args(const std::vector<int64_t>& args) { 340 CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size())); 341 args_.push_back(args); 342 return this; 343} 344 345Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { 346 custom_arguments(this); 347 return this; 348} 349 350Benchmark* Benchmark::RangeMultiplier(int multiplier) { 351 CHECK(multiplier > 1); 352 range_multiplier_ = multiplier; 353 return this; 354} 355 356Benchmark* Benchmark::MinTime(double t) { 357 CHECK(t > 0.0); 358 CHECK(iterations_ == 0); 359 min_time_ = t; 360 return this; 361} 362 363Benchmark* Benchmark::Iterations(size_t n) { 364 CHECK(n > 0); 365 CHECK(IsZero(min_time_)); 366 iterations_ = n; 367 return this; 368} 369 370Benchmark* Benchmark::Repetitions(int n) { 371 CHECK(n > 0); 372 repetitions_ = n; 373 return this; 374} 375 376Benchmark* Benchmark::ReportAggregatesOnly(bool value) { 377 aggregation_report_mode_ = value ? ARM_ReportAggregatesOnly : ARM_Default; 378 return this; 379} 380 381Benchmark* Benchmark::DisplayAggregatesOnly(bool value) { 382 // If we were called, the report mode is no longer 'unspecified', in any case. 383 aggregation_report_mode_ = static_cast<AggregationReportMode>( 384 aggregation_report_mode_ | ARM_Default); 385 386 if (value) { 387 aggregation_report_mode_ = static_cast<AggregationReportMode>( 388 aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly); 389 } else { 390 aggregation_report_mode_ = static_cast<AggregationReportMode>( 391 aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly); 392 } 393 394 return this; 395} 396 397Benchmark* Benchmark::UseRealTime() { 398 CHECK(!use_manual_time_) 399 << "Cannot set UseRealTime and UseManualTime simultaneously."; 400 use_real_time_ = true; 401 return this; 402} 403 404Benchmark* Benchmark::UseManualTime() { 405 CHECK(!use_real_time_) 406 << "Cannot set UseRealTime and UseManualTime simultaneously."; 407 use_manual_time_ = true; 408 return this; 409} 410 411Benchmark* Benchmark::Complexity(BigO complexity) { 412 complexity_ = complexity; 413 return this; 414} 415 416Benchmark* Benchmark::Complexity(BigOFunc* complexity) { 417 complexity_lambda_ = complexity; 418 complexity_ = oLambda; 419 return this; 420} 421 422Benchmark* Benchmark::ComputeStatistics(std::string name, 423 StatisticsFunc* statistics) { 424 statistics_.emplace_back(name, statistics); 425 return this; 426} 427 428Benchmark* Benchmark::Threads(int t) { 429 CHECK_GT(t, 0); 430 thread_counts_.push_back(t); 431 return this; 432} 433 434Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { 435 CHECK_GT(min_threads, 0); 436 CHECK_GE(max_threads, min_threads); 437 438 AddRange(&thread_counts_, min_threads, max_threads, 2); 439 return this; 440} 441 442Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, 443 int stride) { 444 CHECK_GT(min_threads, 0); 445 CHECK_GE(max_threads, min_threads); 446 CHECK_GE(stride, 1); 447 448 for (auto i = min_threads; i < max_threads; i += stride) { 449 thread_counts_.push_back(i); 450 } 451 thread_counts_.push_back(max_threads); 452 return this; 453} 454 455Benchmark* Benchmark::ThreadPerCpu() { 456 thread_counts_.push_back(CPUInfo::Get().num_cpus); 457 return this; 458} 459 460void Benchmark::SetName(const char* name) { name_ = name; } 461 462int Benchmark::ArgsCnt() const { 463 if (args_.empty()) { 464 if (arg_names_.empty()) return -1; 465 return static_cast<int>(arg_names_.size()); 466 } 467 return static_cast<int>(args_.front().size()); 468} 469 470//=============================================================================// 471// FunctionBenchmark 472//=============================================================================// 473 474void FunctionBenchmark::Run(State& st) { func_(st); } 475 476} // end namespace internal 477 478void ClearRegisteredBenchmarks() { 479 internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); 480} 481 482} // end namespace benchmark 483