1// Copyright 2015 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include "internal_macros.h" 16 17#ifdef BENCHMARK_OS_WINDOWS 18#include <shlwapi.h> 19#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA 20#include <versionhelpers.h> 21#include <windows.h> 22#else 23#include <fcntl.h> 24#ifndef BENCHMARK_OS_FUCHSIA 25#include <sys/resource.h> 26#endif 27#include <sys/time.h> 28#include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD 29#include <unistd.h> 30#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ 31 defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD 32#define BENCHMARK_HAS_SYSCTL 33#include <sys/sysctl.h> 34#endif 35#endif 36#if defined(BENCHMARK_OS_SOLARIS) 37#include <kstat.h> 38#endif 39 40#include <algorithm> 41#include <array> 42#include <bitset> 43#include <cerrno> 44#include <climits> 45#include <cstdint> 46#include <cstdio> 47#include <cstdlib> 48#include <cstring> 49#include <fstream> 50#include <iostream> 51#include <iterator> 52#include <limits> 53#include <memory> 54#include <sstream> 55 56#include "check.h" 57#include "cycleclock.h" 58#include "internal_macros.h" 59#include "log.h" 60#include "sleep.h" 61#include "string_util.h" 62 63namespace benchmark { 64namespace { 65 66void PrintImp(std::ostream& out) { out << std::endl; } 67 68template <class First, class... Rest> 69void PrintImp(std::ostream& out, First&& f, Rest&&... rest) { 70 out << std::forward<First>(f); 71 PrintImp(out, std::forward<Rest>(rest)...); 72} 73 74template <class... Args> 75BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) { 76 PrintImp(std::cerr, std::forward<Args>(args)...); 77 std::exit(EXIT_FAILURE); 78} 79 80#ifdef BENCHMARK_HAS_SYSCTL 81 82/// ValueUnion - A type used to correctly alias the byte-for-byte output of 83/// `sysctl` with the result type it's to be interpreted as. 84struct ValueUnion { 85 union DataT { 86 uint32_t uint32_value; 87 uint64_t uint64_value; 88 // For correct aliasing of union members from bytes. 89 char bytes[8]; 90 }; 91 using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>; 92 93 // The size of the data union member + its trailing array size. 94 size_t Size; 95 DataPtr Buff; 96 97 public: 98 ValueUnion() : Size(0), Buff(nullptr, &std::free) {} 99 100 explicit ValueUnion(size_t BuffSize) 101 : Size(sizeof(DataT) + BuffSize), 102 Buff(::new (std::malloc(Size)) DataT(), &std::free) {} 103 104 ValueUnion(ValueUnion&& other) = default; 105 106 explicit operator bool() const { return bool(Buff); } 107 108 char* data() const { return Buff->bytes; } 109 110 std::string GetAsString() const { return std::string(data()); } 111 112 int64_t GetAsInteger() const { 113 if (Size == sizeof(Buff->uint32_value)) 114 return static_cast<int32_t>(Buff->uint32_value); 115 else if (Size == sizeof(Buff->uint64_value)) 116 return static_cast<int64_t>(Buff->uint64_value); 117 BENCHMARK_UNREACHABLE(); 118 } 119 120 uint64_t GetAsUnsigned() const { 121 if (Size == sizeof(Buff->uint32_value)) 122 return Buff->uint32_value; 123 else if (Size == sizeof(Buff->uint64_value)) 124 return Buff->uint64_value; 125 BENCHMARK_UNREACHABLE(); 126 } 127 128 template <class T, int N> 129 std::array<T, N> GetAsArray() { 130 const int ArrSize = sizeof(T) * N; 131 CHECK_LE(ArrSize, Size); 132 std::array<T, N> Arr; 133 std::memcpy(Arr.data(), data(), ArrSize); 134 return Arr; 135 } 136}; 137 138ValueUnion GetSysctlImp(std::string const& Name) { 139#if defined BENCHMARK_OS_OPENBSD 140 int mib[2]; 141 142 mib[0] = CTL_HW; 143 if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){ 144 ValueUnion buff(sizeof(int)); 145 146 if (Name == "hw.ncpu") { 147 mib[1] = HW_NCPU; 148 } else { 149 mib[1] = HW_CPUSPEED; 150 } 151 152 if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) { 153 return ValueUnion(); 154 } 155 return buff; 156 } 157 return ValueUnion(); 158#else 159 size_t CurBuffSize = 0; 160 if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1) 161 return ValueUnion(); 162 163 ValueUnion buff(CurBuffSize); 164 if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) 165 return buff; 166 return ValueUnion(); 167#endif 168} 169 170BENCHMARK_MAYBE_UNUSED 171bool GetSysctl(std::string const& Name, std::string* Out) { 172 Out->clear(); 173 auto Buff = GetSysctlImp(Name); 174 if (!Buff) return false; 175 Out->assign(Buff.data()); 176 return true; 177} 178 179template <class Tp, 180 class = typename std::enable_if<std::is_integral<Tp>::value>::type> 181bool GetSysctl(std::string const& Name, Tp* Out) { 182 *Out = 0; 183 auto Buff = GetSysctlImp(Name); 184 if (!Buff) return false; 185 *Out = static_cast<Tp>(Buff.GetAsUnsigned()); 186 return true; 187} 188 189template <class Tp, size_t N> 190bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) { 191 auto Buff = GetSysctlImp(Name); 192 if (!Buff) return false; 193 *Out = Buff.GetAsArray<Tp, N>(); 194 return true; 195} 196#endif 197 198template <class ArgT> 199bool ReadFromFile(std::string const& fname, ArgT* arg) { 200 *arg = ArgT(); 201 std::ifstream f(fname.c_str()); 202 if (!f.is_open()) return false; 203 f >> *arg; 204 return f.good(); 205} 206 207bool CpuScalingEnabled(int num_cpus) { 208 // We don't have a valid CPU count, so don't even bother. 209 if (num_cpus <= 0) return false; 210#ifndef BENCHMARK_OS_WINDOWS 211 // On Linux, the CPUfreq subsystem exposes CPU information as files on the 212 // local file system. If reading the exported files fails, then we may not be 213 // running on Linux, so we silently ignore all the read errors. 214 std::string res; 215 for (int cpu = 0; cpu < num_cpus; ++cpu) { 216 std::string governor_file = 217 StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); 218 if (ReadFromFile(governor_file, &res) && res != "performance") return true; 219 } 220#endif 221 return false; 222} 223 224int CountSetBitsInCPUMap(std::string Val) { 225 auto CountBits = [](std::string Part) { 226 using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>; 227 Part = "0x" + Part; 228 CPUMask Mask(std::stoul(Part, nullptr, 16)); 229 return static_cast<int>(Mask.count()); 230 }; 231 size_t Pos; 232 int total = 0; 233 while ((Pos = Val.find(',')) != std::string::npos) { 234 total += CountBits(Val.substr(0, Pos)); 235 Val = Val.substr(Pos + 1); 236 } 237 if (!Val.empty()) { 238 total += CountBits(Val); 239 } 240 return total; 241} 242 243BENCHMARK_MAYBE_UNUSED 244std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() { 245 std::vector<CPUInfo::CacheInfo> res; 246 std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; 247 int Idx = 0; 248 while (true) { 249 CPUInfo::CacheInfo info; 250 std::string FPath = StrCat(dir, "index", Idx++, "/"); 251 std::ifstream f(StrCat(FPath, "size").c_str()); 252 if (!f.is_open()) break; 253 std::string suffix; 254 f >> info.size; 255 if (f.fail()) 256 PrintErrorAndDie("Failed while reading file '", FPath, "size'"); 257 if (f.good()) { 258 f >> suffix; 259 if (f.bad()) 260 PrintErrorAndDie( 261 "Invalid cache size format: failed to read size suffix"); 262 else if (f && suffix != "K") 263 PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix); 264 else if (suffix == "K") 265 info.size *= 1000; 266 } 267 if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) 268 PrintErrorAndDie("Failed to read from file ", FPath, "type"); 269 if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) 270 PrintErrorAndDie("Failed to read from file ", FPath, "level"); 271 std::string map_str; 272 if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str)) 273 PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map"); 274 info.num_sharing = CountSetBitsInCPUMap(map_str); 275 res.push_back(info); 276 } 277 278 return res; 279} 280 281#ifdef BENCHMARK_OS_MACOSX 282std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() { 283 std::vector<CPUInfo::CacheInfo> res; 284 std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}}; 285 GetSysctl("hw.cacheconfig", &CacheCounts); 286 287 struct { 288 std::string name; 289 std::string type; 290 int level; 291 uint64_t num_sharing; 292 } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]}, 293 {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]}, 294 {"hw.l2cachesize", "Unified", 2, CacheCounts[2]}, 295 {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}}; 296 for (auto& C : Cases) { 297 int val; 298 if (!GetSysctl(C.name, &val)) continue; 299 CPUInfo::CacheInfo info; 300 info.type = C.type; 301 info.level = C.level; 302 info.size = val; 303 info.num_sharing = static_cast<int>(C.num_sharing); 304 res.push_back(std::move(info)); 305 } 306 return res; 307} 308#elif defined(BENCHMARK_OS_WINDOWS) 309std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() { 310 std::vector<CPUInfo::CacheInfo> res; 311 DWORD buffer_size = 0; 312 using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; 313 using CInfo = CACHE_DESCRIPTOR; 314 315 using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>; 316 GetLogicalProcessorInformation(nullptr, &buffer_size); 317 UPtr buff((PInfo*)malloc(buffer_size), &std::free); 318 if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) 319 PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ", 320 GetLastError()); 321 322 PInfo* it = buff.get(); 323 PInfo* end = buff.get() + (buffer_size / sizeof(PInfo)); 324 325 for (; it != end; ++it) { 326 if (it->Relationship != RelationCache) continue; 327 using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>; 328 BitSet B(it->ProcessorMask); 329 // To prevent duplicates, only consider caches where CPU 0 is specified 330 if (!B.test(0)) continue; 331 CInfo* Cache = &it->Cache; 332 CPUInfo::CacheInfo C; 333 C.num_sharing = static_cast<int>(B.count()); 334 C.level = Cache->Level; 335 C.size = Cache->Size; 336 C.type = "Unknown"; 337 switch (Cache->Type) { 338 case CacheUnified: 339 C.type = "Unified"; 340 break; 341 case CacheInstruction: 342 C.type = "Instruction"; 343 break; 344 case CacheData: 345 C.type = "Data"; 346 break; 347 case CacheTrace: 348 C.type = "Trace"; 349 break; 350 } 351 res.push_back(C); 352 } 353 return res; 354} 355#endif 356 357std::vector<CPUInfo::CacheInfo> GetCacheSizes() { 358#ifdef BENCHMARK_OS_MACOSX 359 return GetCacheSizesMacOSX(); 360#elif defined(BENCHMARK_OS_WINDOWS) 361 return GetCacheSizesWindows(); 362#else 363 return GetCacheSizesFromKVFS(); 364#endif 365} 366 367int GetNumCPUs() { 368#ifdef BENCHMARK_HAS_SYSCTL 369 int NumCPU = -1; 370 if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU; 371 fprintf(stderr, "Err: %s\n", strerror(errno)); 372 std::exit(EXIT_FAILURE); 373#elif defined(BENCHMARK_OS_WINDOWS) 374 SYSTEM_INFO sysinfo; 375 // Use memset as opposed to = {} to avoid GCC missing initializer false 376 // positives. 377 std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); 378 GetSystemInfo(&sysinfo); 379 return sysinfo.dwNumberOfProcessors; // number of logical 380 // processors in the current 381 // group 382#elif defined(BENCHMARK_OS_SOLARIS) 383 // Returns -1 in case of a failure. 384 int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); 385 if (NumCPU < 0) { 386 fprintf(stderr, 387 "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", 388 strerror(errno)); 389 } 390 return NumCPU; 391#else 392 int NumCPUs = 0; 393 int MaxID = -1; 394 std::ifstream f("/proc/cpuinfo"); 395 if (!f.is_open()) { 396 std::cerr << "failed to open /proc/cpuinfo\n"; 397 return -1; 398 } 399 const std::string Key = "processor"; 400 std::string ln; 401 while (std::getline(f, ln)) { 402 if (ln.empty()) continue; 403 size_t SplitIdx = ln.find(':'); 404 std::string value; 405 if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); 406 if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { 407 NumCPUs++; 408 if (!value.empty()) { 409 int CurID = std::stoi(value); 410 MaxID = std::max(CurID, MaxID); 411 } 412 } 413 } 414 if (f.bad()) { 415 std::cerr << "Failure reading /proc/cpuinfo\n"; 416 return -1; 417 } 418 if (!f.eof()) { 419 std::cerr << "Failed to read to end of /proc/cpuinfo\n"; 420 return -1; 421 } 422 f.close(); 423 424 if ((MaxID + 1) != NumCPUs) { 425 fprintf(stderr, 426 "CPU ID assignments in /proc/cpuinfo seem messed up." 427 " This is usually caused by a bad BIOS.\n"); 428 } 429 return NumCPUs; 430#endif 431 BENCHMARK_UNREACHABLE(); 432} 433 434double GetCPUCyclesPerSecond() { 435#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN 436 long freq; 437 438 // If the kernel is exporting the tsc frequency use that. There are issues 439 // where cpuinfo_max_freq cannot be relied on because the BIOS may be 440 // exporintg an invalid p-state (on x86) or p-states may be used to put the 441 // processor in a new mode (turbo mode). Essentially, those frequencies 442 // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as 443 // well. 444 if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) 445 // If CPU scaling is in effect, we want to use the *maximum* frequency, 446 // not whatever CPU speed some random processor happens to be using now. 447 || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 448 &freq)) { 449 // The value is in kHz (as the file name suggests). For example, on a 450 // 2GHz warpstation, the file contains the value "2000000". 451 return freq * 1000.0; 452 } 453 454 const double error_value = -1; 455 double bogo_clock = error_value; 456 457 std::ifstream f("/proc/cpuinfo"); 458 if (!f.is_open()) { 459 std::cerr << "failed to open /proc/cpuinfo\n"; 460 return error_value; 461 } 462 463 auto startsWithKey = [](std::string const& Value, std::string const& Key) { 464 if (Key.size() > Value.size()) return false; 465 auto Cmp = [&](char X, char Y) { 466 return std::tolower(X) == std::tolower(Y); 467 }; 468 return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp); 469 }; 470 471 std::string ln; 472 while (std::getline(f, ln)) { 473 if (ln.empty()) continue; 474 size_t SplitIdx = ln.find(':'); 475 std::string value; 476 if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); 477 // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only 478 // accept positive values. Some environments (virtual machines) report zero, 479 // which would cause infinite looping in WallTime_Init. 480 if (startsWithKey(ln, "cpu MHz")) { 481 if (!value.empty()) { 482 double cycles_per_second = std::stod(value) * 1000000.0; 483 if (cycles_per_second > 0) return cycles_per_second; 484 } 485 } else if (startsWithKey(ln, "bogomips")) { 486 if (!value.empty()) { 487 bogo_clock = std::stod(value) * 1000000.0; 488 if (bogo_clock < 0.0) bogo_clock = error_value; 489 } 490 } 491 } 492 if (f.bad()) { 493 std::cerr << "Failure reading /proc/cpuinfo\n"; 494 return error_value; 495 } 496 if (!f.eof()) { 497 std::cerr << "Failed to read to end of /proc/cpuinfo\n"; 498 return error_value; 499 } 500 f.close(); 501 // If we found the bogomips clock, but nothing better, we'll use it (but 502 // we're not happy about it); otherwise, fallback to the rough estimation 503 // below. 504 if (bogo_clock >= 0.0) return bogo_clock; 505 506#elif defined BENCHMARK_HAS_SYSCTL 507 constexpr auto* FreqStr = 508#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) 509 "machdep.tsc_freq"; 510#elif defined BENCHMARK_OS_OPENBSD 511 "hw.cpuspeed"; 512#else 513 "hw.cpufrequency"; 514#endif 515 unsigned long long hz = 0; 516#if defined BENCHMARK_OS_OPENBSD 517 if (GetSysctl(FreqStr, &hz)) return hz * 1000000; 518#else 519 if (GetSysctl(FreqStr, &hz)) return hz; 520#endif 521 fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", 522 FreqStr, strerror(errno)); 523 524#elif defined BENCHMARK_OS_WINDOWS 525 // In NT, read MHz from the registry. If we fail to do so or we're in win9x 526 // then make a crude estimate. 527 DWORD data, data_size = sizeof(data); 528 if (IsWindowsXPOrGreater() && 529 SUCCEEDED( 530 SHGetValueA(HKEY_LOCAL_MACHINE, 531 "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 532 "~MHz", nullptr, &data, &data_size))) 533 return static_cast<double>((int64_t)data * 534 (int64_t)(1000 * 1000)); // was mhz 535#elif defined (BENCHMARK_OS_SOLARIS) 536 kstat_ctl_t *kc = kstat_open(); 537 if (!kc) { 538 std::cerr << "failed to open /dev/kstat\n"; 539 return -1; 540 } 541 kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); 542 if (!ksp) { 543 std::cerr << "failed to lookup in /dev/kstat\n"; 544 return -1; 545 } 546 if (kstat_read(kc, ksp, NULL) < 0) { 547 std::cerr << "failed to read from /dev/kstat\n"; 548 return -1; 549 } 550 kstat_named_t *knp = 551 (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); 552 if (!knp) { 553 std::cerr << "failed to lookup data in /dev/kstat\n"; 554 return -1; 555 } 556 if (knp->data_type != KSTAT_DATA_UINT64) { 557 std::cerr << "current_clock_Hz is of unexpected data type: " 558 << knp->data_type << "\n"; 559 return -1; 560 } 561 double clock_hz = knp->value.ui64; 562 kstat_close(kc); 563 return clock_hz; 564#endif 565 // If we've fallen through, attempt to roughly estimate the CPU clock rate. 566 const int estimate_time_ms = 1000; 567 const auto start_ticks = cycleclock::Now(); 568 SleepForMilliseconds(estimate_time_ms); 569 return static_cast<double>(cycleclock::Now() - start_ticks); 570} 571 572} // end namespace 573 574const CPUInfo& CPUInfo::Get() { 575 static const CPUInfo* info = new CPUInfo(); 576 return *info; 577} 578 579CPUInfo::CPUInfo() 580 : num_cpus(GetNumCPUs()), 581 cycles_per_second(GetCPUCyclesPerSecond()), 582 caches(GetCacheSizes()), 583 scaling_enabled(CpuScalingEnabled(num_cpus)) {} 584 585} // end namespace benchmark 586