sanitizer_allocator.h revision 245614
1//===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef SANITIZER_ALLOCATOR_H 15#define SANITIZER_ALLOCATOR_H 16 17#include "sanitizer_internal_defs.h" 18#include "sanitizer_common.h" 19#include "sanitizer_libc.h" 20#include "sanitizer_list.h" 21#include "sanitizer_mutex.h" 22#include "sanitizer_lfstack.h" 23 24namespace __sanitizer { 25 26// SizeClassMap maps allocation sizes into size classes and back. 27// Class 0 corresponds to size 0. 28// Classes 1 - 16 correspond to sizes 8 - 128 (size = class_id * 8). 29// Next 8 classes: 128 + i * 16 (i = 1 to 8). 30// Next 8 classes: 256 + i * 32 (i = 1 to 8). 31// ... 32// Next 8 classes: 2^k + i * 2^(k-3) (i = 1 to 8). 33// Last class corresponds to kMaxSize = 1 << kMaxSizeLog. 34// 35// This structure of the size class map gives us: 36// - Efficient table-free class-to-size and size-to-class functions. 37// - Difference between two consequent size classes is betweed 12% and 6% 38// 39// This class also gives a hint to a thread-caching allocator about the amount 40// of chunks that need to be cached per-thread: 41// - kMaxNumCached is the maximal number of chunks per size class. 42// - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class. 43// 44// Part of output of SizeClassMap::Print(): 45// c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0 46// c01 => s: 8 diff: +8 00% l 3 cached: 256 2048; id 1 47// c02 => s: 16 diff: +8 100% l 4 cached: 256 4096; id 2 48// ... 49// c07 => s: 56 diff: +8 16% l 5 cached: 256 14336; id 7 50// 51// c08 => s: 64 diff: +8 14% l 6 cached: 256 16384; id 8 52// ... 53// c15 => s: 120 diff: +8 07% l 6 cached: 256 30720; id 15 54// 55// c16 => s: 128 diff: +8 06% l 7 cached: 256 32768; id 16 56// c17 => s: 144 diff: +16 12% l 7 cached: 227 32688; id 17 57// ... 58// c23 => s: 240 diff: +16 07% l 7 cached: 136 32640; id 23 59// 60// c24 => s: 256 diff: +16 06% l 8 cached: 128 32768; id 24 61// c25 => s: 288 diff: +32 12% l 8 cached: 113 32544; id 25 62// ... 63// c31 => s: 480 diff: +32 07% l 8 cached: 68 32640; id 31 64// 65// c32 => s: 512 diff: +32 06% l 9 cached: 64 32768; id 32 66 67 68template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog, 69 uptr kMinBatchClassT> 70class SizeClassMap { 71 static const uptr kMinSizeLog = 3; 72 static const uptr kMidSizeLog = kMinSizeLog + 4; 73 static const uptr kMinSize = 1 << kMinSizeLog; 74 static const uptr kMidSize = 1 << kMidSizeLog; 75 static const uptr kMidClass = kMidSize / kMinSize; 76 static const uptr S = 3; 77 static const uptr M = (1 << S) - 1; 78 79 public: 80 static const uptr kMaxNumCached = kMaxNumCachedT; 81 struct TransferBatch { 82 TransferBatch *next; 83 uptr count; 84 void *batch[kMaxNumCached]; 85 }; 86 87 static const uptr kMinBatchClass = kMinBatchClassT; 88 static const uptr kMaxSize = 1 << kMaxSizeLog; 89 static const uptr kNumClasses = 90 kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1; 91 COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256); 92 static const uptr kNumClassesRounded = 93 kNumClasses == 32 ? 32 : 94 kNumClasses <= 64 ? 64 : 95 kNumClasses <= 128 ? 128 : 256; 96 97 static uptr Size(uptr class_id) { 98 if (class_id <= kMidClass) 99 return kMinSize * class_id; 100 class_id -= kMidClass; 101 uptr t = kMidSize << (class_id >> S); 102 return t + (t >> S) * (class_id & M); 103 } 104 105 static uptr ClassID(uptr size) { 106 if (size <= kMidSize) 107 return (size + kMinSize - 1) >> kMinSizeLog; 108 if (size > kMaxSize) return 0; 109 uptr l = SANITIZER_WORDSIZE - 1 - __builtin_clzl(size); 110 uptr hbits = (size >> (l - S)) & M; 111 uptr lbits = size & ((1 << (l - S)) - 1); 112 uptr l1 = l - kMidSizeLog; 113 return kMidClass + (l1 << S) + hbits + (lbits > 0); 114 } 115 116 static uptr MaxCached(uptr class_id) { 117 if (class_id == 0) return 0; 118 uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id); 119 return Max(1UL, Min(kMaxNumCached, n)); 120 } 121 122 static void Print() { 123 uptr prev_s = 0; 124 uptr total_cached = 0; 125 for (uptr i = 0; i < kNumClasses; i++) { 126 uptr s = Size(i); 127 if (s >= kMidSize / 2 && (s & (s - 1)) == 0) 128 Printf("\n"); 129 uptr d = s - prev_s; 130 uptr p = prev_s ? (d * 100 / prev_s) : 0; 131 uptr l = SANITIZER_WORDSIZE - 1 - __builtin_clzl(s); 132 uptr cached = MaxCached(i) * s; 133 Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd " 134 "cached: %zd %zd; id %zd\n", 135 i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s)); 136 total_cached += cached; 137 prev_s = s; 138 } 139 Printf("Total cached: %zd\n", total_cached); 140 } 141 142 static void Validate() { 143 for (uptr c = 1; c < kNumClasses; c++) { 144 // Printf("Validate: c%zd\n", c); 145 uptr s = Size(c); 146 CHECK_EQ(ClassID(s), c); 147 if (c != kNumClasses - 1) 148 CHECK_EQ(ClassID(s + 1), c + 1); 149 CHECK_EQ(ClassID(s - 1), c); 150 if (c) 151 CHECK_GT(Size(c), Size(c-1)); 152 } 153 CHECK_EQ(ClassID(kMaxSize + 1), 0); 154 155 for (uptr s = 1; s <= kMaxSize; s++) { 156 uptr c = ClassID(s); 157 // Printf("s%zd => c%zd\n", s, c); 158 CHECK_LT(c, kNumClasses); 159 CHECK_GE(Size(c), s); 160 if (c > 0) 161 CHECK_LT(Size(c-1), s); 162 } 163 164 // TransferBatch for kMinBatchClass must fit into the block itself. 165 const uptr batch_size = sizeof(TransferBatch) 166 - sizeof(void*) // NOLINT 167 * (kMaxNumCached - MaxCached(kMinBatchClass)); 168 CHECK_LE(batch_size, Size(kMinBatchClass)); 169 // TransferBatch for kMinBatchClass-1 must not fit into the block itself. 170 const uptr batch_size1 = sizeof(TransferBatch) 171 - sizeof(void*) // NOLINT 172 * (kMaxNumCached - MaxCached(kMinBatchClass - 1)); 173 CHECK_GT(batch_size1, Size(kMinBatchClass - 1)); 174 } 175}; 176 177typedef SizeClassMap<17, 256, 16, FIRST_32_SECOND_64(33, 36)> 178 DefaultSizeClassMap; 179typedef SizeClassMap<17, 64, 14, FIRST_32_SECOND_64(25, 28)> 180 CompactSizeClassMap; 181template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache; 182 183// Allocators call these callbacks on mmap/munmap. 184struct NoOpMapUnmapCallback { 185 void OnMap(uptr p, uptr size) const { } 186 void OnUnmap(uptr p, uptr size) const { } 187}; 188 189// SizeClassAllocator64 -- allocator for 64-bit address space. 190// 191// Space: a portion of address space of kSpaceSize bytes starting at 192// a fixed address (kSpaceBeg). Both constants are powers of two and 193// kSpaceBeg is kSpaceSize-aligned. 194// At the beginning the entire space is mprotect-ed, then small parts of it 195// are mapped on demand. 196// 197// Region: a part of Space dedicated to a single size class. 198// There are kNumClasses Regions of equal size. 199// 200// UserChunk: a piece of memory returned to user. 201// MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk. 202// 203// A Region looks like this: 204// UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1 205template <const uptr kSpaceBeg, const uptr kSpaceSize, 206 const uptr kMetadataSize, class SizeClassMap, 207 class MapUnmapCallback = NoOpMapUnmapCallback> 208class SizeClassAllocator64 { 209 public: 210 typedef typename SizeClassMap::TransferBatch Batch; 211 typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize, 212 SizeClassMap, MapUnmapCallback> ThisT; 213 typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache; 214 215 void Init() { 216 CHECK_EQ(kSpaceBeg, 217 reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize))); 218 MapWithCallback(kSpaceEnd, AdditionalSize()); 219 } 220 221 void MapWithCallback(uptr beg, uptr size) { 222 CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size))); 223 MapUnmapCallback().OnMap(beg, size); 224 } 225 226 void UnmapWithCallback(uptr beg, uptr size) { 227 MapUnmapCallback().OnUnmap(beg, size); 228 UnmapOrDie(reinterpret_cast<void *>(beg), size); 229 } 230 231 static bool CanAllocate(uptr size, uptr alignment) { 232 return size <= SizeClassMap::kMaxSize && 233 alignment <= SizeClassMap::kMaxSize; 234 } 235 236 Batch *NOINLINE AllocateBatch(AllocatorCache *c, uptr class_id) { 237 CHECK_LT(class_id, kNumClasses); 238 RegionInfo *region = GetRegionInfo(class_id); 239 Batch *b = region->free_list.Pop(); 240 if (b == 0) 241 b = PopulateFreeList(c, class_id, region); 242 region->n_allocated += b->count; 243 return b; 244 } 245 246 void NOINLINE DeallocateBatch(uptr class_id, Batch *b) { 247 RegionInfo *region = GetRegionInfo(class_id); 248 region->free_list.Push(b); 249 region->n_freed += b->count; 250 } 251 252 static bool PointerIsMine(void *p) { 253 return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize; 254 } 255 256 static uptr GetSizeClass(void *p) { 257 return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded; 258 } 259 260 void *GetBlockBegin(void *p) { 261 uptr class_id = GetSizeClass(p); 262 uptr size = SizeClassMap::Size(class_id); 263 uptr chunk_idx = GetChunkIdx((uptr)p, size); 264 uptr reg_beg = (uptr)p & ~(kRegionSize - 1); 265 uptr beg = chunk_idx * size; 266 uptr next_beg = beg + size; 267 RegionInfo *region = GetRegionInfo(class_id); 268 if (region->mapped_user >= next_beg) 269 return reinterpret_cast<void*>(reg_beg + beg); 270 return 0; 271 } 272 273 static uptr GetActuallyAllocatedSize(void *p) { 274 CHECK(PointerIsMine(p)); 275 return SizeClassMap::Size(GetSizeClass(p)); 276 } 277 278 uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); } 279 280 void *GetMetaData(void *p) { 281 uptr class_id = GetSizeClass(p); 282 uptr size = SizeClassMap::Size(class_id); 283 uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size); 284 return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) - 285 (1 + chunk_idx) * kMetadataSize); 286 } 287 288 uptr TotalMemoryUsed() { 289 uptr res = 0; 290 for (uptr i = 0; i < kNumClasses; i++) 291 res += GetRegionInfo(i)->allocated_user; 292 return res; 293 } 294 295 // Test-only. 296 void TestOnlyUnmap() { 297 UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize()); 298 } 299 300 void PrintStats() { 301 uptr total_mapped = 0; 302 uptr n_allocated = 0; 303 uptr n_freed = 0; 304 for (uptr class_id = 1; class_id < kNumClasses; class_id++) { 305 RegionInfo *region = GetRegionInfo(class_id); 306 total_mapped += region->mapped_user; 307 n_allocated += region->n_allocated; 308 n_freed += region->n_freed; 309 } 310 Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; " 311 "remains %zd\n", 312 total_mapped >> 20, n_allocated, n_allocated - n_freed); 313 for (uptr class_id = 1; class_id < kNumClasses; class_id++) { 314 RegionInfo *region = GetRegionInfo(class_id); 315 if (region->mapped_user == 0) continue; 316 Printf(" %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n", 317 class_id, 318 SizeClassMap::Size(class_id), 319 region->mapped_user >> 10, 320 region->n_allocated, 321 region->n_allocated - region->n_freed); 322 } 323 } 324 325 typedef SizeClassMap SizeClassMapT; 326 static const uptr kNumClasses = SizeClassMap::kNumClasses; 327 static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded; 328 329 private: 330 static const uptr kRegionSize = kSpaceSize / kNumClassesRounded; 331 static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize; 332 COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0); 333 // kRegionSize must be >= 2^32. 334 COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2))); 335 // Populate the free list with at most this number of bytes at once 336 // or with one element if its size is greater. 337 static const uptr kPopulateSize = 1 << 14; 338 // Call mmap for user memory with at least this size. 339 static const uptr kUserMapSize = 1 << 15; 340 // Call mmap for metadata memory with at least this size. 341 static const uptr kMetaMapSize = 1 << 16; 342 343 struct RegionInfo { 344 BlockingMutex mutex; 345 LFStack<Batch> free_list; 346 uptr allocated_user; // Bytes allocated for user memory. 347 uptr allocated_meta; // Bytes allocated for metadata. 348 uptr mapped_user; // Bytes mapped for user memory. 349 uptr mapped_meta; // Bytes mapped for metadata. 350 uptr n_allocated, n_freed; // Just stats. 351 }; 352 COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize); 353 354 static uptr AdditionalSize() { 355 return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded, 356 GetPageSizeCached()); 357 } 358 359 RegionInfo *GetRegionInfo(uptr class_id) { 360 CHECK_LT(class_id, kNumClasses); 361 RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize); 362 return ®ions[class_id]; 363 } 364 365 static uptr GetChunkIdx(uptr chunk, uptr size) { 366 u32 offset = chunk % kRegionSize; 367 // Here we divide by a non-constant. This is costly. 368 // We require that kRegionSize is at least 2^32 so that offset is 32-bit. 369 // We save 2x by using 32-bit div, but may need to use a 256-way switch. 370 return offset / (u32)size; 371 } 372 373 Batch *NOINLINE PopulateFreeList(AllocatorCache *c, uptr class_id, 374 RegionInfo *region) { 375 BlockingMutexLock l(®ion->mutex); 376 Batch *b = region->free_list.Pop(); 377 if (b) 378 return b; 379 uptr size = SizeClassMap::Size(class_id); 380 uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1; 381 uptr beg_idx = region->allocated_user; 382 uptr end_idx = beg_idx + count * size; 383 uptr region_beg = kSpaceBeg + kRegionSize * class_id; 384 if (end_idx + size > region->mapped_user) { 385 // Do the mmap for the user memory. 386 uptr map_size = kUserMapSize; 387 while (end_idx + size > region->mapped_user + map_size) 388 map_size += kUserMapSize; 389 CHECK_GE(region->mapped_user + map_size, end_idx); 390 MapWithCallback(region_beg + region->mapped_user, map_size); 391 region->mapped_user += map_size; 392 } 393 uptr total_count = (region->mapped_user - beg_idx - size) 394 / size / count * count; 395 region->allocated_meta += total_count * kMetadataSize; 396 if (region->allocated_meta > region->mapped_meta) { 397 uptr map_size = kMetaMapSize; 398 while (region->allocated_meta > region->mapped_meta + map_size) 399 map_size += kMetaMapSize; 400 // Do the mmap for the metadata. 401 CHECK_GE(region->mapped_meta + map_size, region->allocated_meta); 402 MapWithCallback(region_beg + kRegionSize - 403 region->mapped_meta - map_size, map_size); 404 region->mapped_meta += map_size; 405 } 406 CHECK_LE(region->allocated_meta, region->mapped_meta); 407 if (region->allocated_user + region->allocated_meta > kRegionSize) { 408 Printf("Out of memory. Dying.\n"); 409 Printf("The process has exhausted %zuMB for size class %zu.\n", 410 kRegionSize / 1024 / 1024, size); 411 Die(); 412 } 413 for (;;) { 414 if (class_id < SizeClassMap::kMinBatchClass) 415 b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch))); 416 else 417 b = (Batch*)(region_beg + beg_idx); 418 b->count = count; 419 for (uptr i = 0; i < count; i++) 420 b->batch[i] = (void*)(region_beg + beg_idx + i * size); 421 region->allocated_user += count * size; 422 CHECK_LE(region->allocated_user, region->mapped_user); 423 beg_idx += count * size; 424 if (beg_idx + count * size + size > region->mapped_user) 425 break; 426 region->free_list.Push(b); 427 } 428 return b; 429 } 430}; 431 432// SizeClassAllocator32 -- allocator for 32-bit address space. 433// This allocator can theoretically be used on 64-bit arch, but there it is less 434// efficient than SizeClassAllocator64. 435// 436// [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can 437// be returned by MmapOrDie(). 438// 439// Region: 440// a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize). 441// Since the regions are aligned by kRegionSize, there are exactly 442// kNumPossibleRegions possible regions in the address space and so we keep 443// an u8 array possible_regions[kNumPossibleRegions] to store the size classes. 444// 0 size class means the region is not used by the allocator. 445// 446// One Region is used to allocate chunks of a single size class. 447// A Region looks like this: 448// UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1 449// 450// In order to avoid false sharing the objects of this class should be 451// chache-line aligned. 452template <const uptr kSpaceBeg, const u64 kSpaceSize, 453 const uptr kMetadataSize, class SizeClassMap, 454 class MapUnmapCallback = NoOpMapUnmapCallback> 455class SizeClassAllocator32 { 456 public: 457 typedef typename SizeClassMap::TransferBatch Batch; 458 typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize, 459 SizeClassMap, MapUnmapCallback> ThisT; 460 typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache; 461 462 void Init() { 463 state_ = reinterpret_cast<State *>(MapWithCallback(sizeof(State))); 464 } 465 466 void *MapWithCallback(uptr size) { 467 size = RoundUpTo(size, GetPageSizeCached()); 468 void *res = MmapOrDie(size, "SizeClassAllocator32"); 469 MapUnmapCallback().OnMap((uptr)res, size); 470 return res; 471 } 472 void UnmapWithCallback(uptr beg, uptr size) { 473 MapUnmapCallback().OnUnmap(beg, size); 474 UnmapOrDie(reinterpret_cast<void *>(beg), size); 475 } 476 477 static bool CanAllocate(uptr size, uptr alignment) { 478 return size <= SizeClassMap::kMaxSize && 479 alignment <= SizeClassMap::kMaxSize; 480 } 481 482 void *GetMetaData(void *p) { 483 CHECK(PointerIsMine(p)); 484 uptr mem = reinterpret_cast<uptr>(p); 485 uptr beg = ComputeRegionBeg(mem); 486 uptr size = SizeClassMap::Size(GetSizeClass(p)); 487 u32 offset = mem - beg; 488 uptr n = offset / (u32)size; // 32-bit division 489 uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize; 490 return reinterpret_cast<void*>(meta); 491 } 492 493 Batch *NOINLINE AllocateBatch(AllocatorCache *c, uptr class_id) { 494 CHECK_LT(class_id, kNumClasses); 495 SizeClassInfo *sci = GetSizeClassInfo(class_id); 496 SpinMutexLock l(&sci->mutex); 497 if (sci->free_list.empty()) 498 PopulateFreeList(c, sci, class_id); 499 CHECK(!sci->free_list.empty()); 500 Batch *b = sci->free_list.front(); 501 sci->free_list.pop_front(); 502 return b; 503 } 504 505 void NOINLINE DeallocateBatch(uptr class_id, Batch *b) { 506 CHECK_LT(class_id, kNumClasses); 507 SizeClassInfo *sci = GetSizeClassInfo(class_id); 508 SpinMutexLock l(&sci->mutex); 509 sci->free_list.push_front(b); 510 } 511 512 bool PointerIsMine(void *p) { 513 return GetSizeClass(p) != 0; 514 } 515 516 uptr GetSizeClass(void *p) { 517 return state_->possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))]; 518 } 519 520 void *GetBlockBegin(void *p) { 521 CHECK(PointerIsMine(p)); 522 uptr mem = reinterpret_cast<uptr>(p); 523 uptr beg = ComputeRegionBeg(mem); 524 uptr size = SizeClassMap::Size(GetSizeClass(p)); 525 u32 offset = mem - beg; 526 u32 n = offset / (u32)size; // 32-bit division 527 uptr res = beg + (n * (u32)size); 528 return reinterpret_cast<void*>(res); 529 } 530 531 uptr GetActuallyAllocatedSize(void *p) { 532 CHECK(PointerIsMine(p)); 533 return SizeClassMap::Size(GetSizeClass(p)); 534 } 535 536 uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); } 537 538 uptr TotalMemoryUsed() { 539 // No need to lock here. 540 uptr res = 0; 541 for (uptr i = 0; i < kNumPossibleRegions; i++) 542 if (state_->possible_regions[i]) 543 res += kRegionSize; 544 return res; 545 } 546 547 void TestOnlyUnmap() { 548 for (uptr i = 0; i < kNumPossibleRegions; i++) 549 if (state_->possible_regions[i]) 550 UnmapWithCallback((i * kRegionSize), kRegionSize); 551 UnmapWithCallback(reinterpret_cast<uptr>(state_), sizeof(State)); 552 } 553 554 void PrintStats() { 555 } 556 557 typedef SizeClassMap SizeClassMapT; 558 static const uptr kNumClasses = SizeClassMap::kNumClasses; 559 560 private: 561 static const uptr kRegionSizeLog = SANITIZER_WORDSIZE == 64 ? 24 : 20; 562 static const uptr kRegionSize = 1 << kRegionSizeLog; 563 static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize; 564 565 struct SizeClassInfo { 566 SpinMutex mutex; 567 IntrusiveList<Batch> free_list; 568 char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)]; 569 }; 570 COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize); 571 572 uptr ComputeRegionId(uptr mem) { 573 uptr res = mem >> kRegionSizeLog; 574 CHECK_LT(res, kNumPossibleRegions); 575 return res; 576 } 577 578 uptr ComputeRegionBeg(uptr mem) { 579 return mem & ~(kRegionSize - 1); 580 } 581 582 uptr AllocateRegion(uptr class_id) { 583 CHECK_LT(class_id, kNumClasses); 584 uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize, 585 "SizeClassAllocator32")); 586 MapUnmapCallback().OnMap(res, kRegionSize); 587 CHECK_EQ(0U, (res & (kRegionSize - 1))); 588 CHECK_EQ(0U, state_->possible_regions[ComputeRegionId(res)]); 589 state_->possible_regions[ComputeRegionId(res)] = class_id; 590 return res; 591 } 592 593 SizeClassInfo *GetSizeClassInfo(uptr class_id) { 594 CHECK_LT(class_id, kNumClasses); 595 return &state_->size_class_info_array[class_id]; 596 } 597 598 void PopulateFreeList(AllocatorCache *c, SizeClassInfo *sci, uptr class_id) { 599 uptr size = SizeClassMap::Size(class_id); 600 uptr reg = AllocateRegion(class_id); 601 uptr n_chunks = kRegionSize / (size + kMetadataSize); 602 uptr max_count = SizeClassMap::MaxCached(class_id); 603 Batch *b = 0; 604 for (uptr i = reg; i < reg + n_chunks * size; i += size) { 605 if (b == 0) { 606 if (class_id < SizeClassMap::kMinBatchClass) 607 b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch))); 608 else 609 b = (Batch*)i; 610 b->count = 0; 611 } 612 b->batch[b->count++] = (void*)i; 613 if (b->count == max_count) { 614 sci->free_list.push_back(b); 615 b = 0; 616 } 617 } 618 if (b) 619 sci->free_list.push_back(b); 620 } 621 622 struct State { 623 u8 possible_regions[kNumPossibleRegions]; 624 SizeClassInfo size_class_info_array[kNumClasses]; 625 }; 626 State *state_; 627}; 628 629// Objects of this type should be used as local caches for SizeClassAllocator64 630// or SizeClassAllocator32. Since the typical use of this class is to have one 631// object per thread in TLS, is has to be POD. 632template<class SizeClassAllocator> 633struct SizeClassAllocatorLocalCache { 634 typedef SizeClassAllocator Allocator; 635 static const uptr kNumClasses = SizeClassAllocator::kNumClasses; 636 637 // Don't need to call Init if the object is a global (i.e. zero-initialized). 638 void Init() { 639 internal_memset(this, 0, sizeof(*this)); 640 } 641 642 void *Allocate(SizeClassAllocator *allocator, uptr class_id) { 643 CHECK_NE(class_id, 0UL); 644 CHECK_LT(class_id, kNumClasses); 645 PerClass *c = &per_class_[class_id]; 646 if (UNLIKELY(c->count == 0)) 647 Refill(allocator, class_id); 648 void *res = c->batch[--c->count]; 649 PREFETCH(c->batch[c->count - 1]); 650 return res; 651 } 652 653 void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) { 654 CHECK_NE(class_id, 0UL); 655 CHECK_LT(class_id, kNumClasses); 656 PerClass *c = &per_class_[class_id]; 657 if (UNLIKELY(c->count == c->max_count)) 658 Drain(allocator, class_id); 659 c->batch[c->count++] = p; 660 } 661 662 void Drain(SizeClassAllocator *allocator) { 663 for (uptr class_id = 0; class_id < kNumClasses; class_id++) { 664 PerClass *c = &per_class_[class_id]; 665 while (c->count > 0) 666 Drain(allocator, class_id); 667 } 668 } 669 670 // private: 671 typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap; 672 typedef typename SizeClassMap::TransferBatch Batch; 673 struct PerClass { 674 uptr count; 675 uptr max_count; 676 void *batch[2 * SizeClassMap::kMaxNumCached]; 677 }; 678 PerClass per_class_[kNumClasses]; 679 680 void InitCache() { 681 if (per_class_[0].max_count) 682 return; 683 for (uptr i = 0; i < kNumClasses; i++) { 684 PerClass *c = &per_class_[i]; 685 c->max_count = 2 * SizeClassMap::MaxCached(i); 686 } 687 } 688 689 void NOINLINE Refill(SizeClassAllocator *allocator, uptr class_id) { 690 InitCache(); 691 PerClass *c = &per_class_[class_id]; 692 Batch *b = allocator->AllocateBatch(this, class_id); 693 for (uptr i = 0; i < b->count; i++) 694 c->batch[i] = b->batch[i]; 695 c->count = b->count; 696 if (class_id < SizeClassMap::kMinBatchClass) 697 Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b); 698 } 699 700 void NOINLINE Drain(SizeClassAllocator *allocator, uptr class_id) { 701 InitCache(); 702 PerClass *c = &per_class_[class_id]; 703 Batch *b; 704 if (class_id < SizeClassMap::kMinBatchClass) 705 b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch))); 706 else 707 b = (Batch*)c->batch[0]; 708 uptr cnt = Min(c->max_count / 2, c->count); 709 for (uptr i = 0; i < cnt; i++) { 710 b->batch[i] = c->batch[i]; 711 c->batch[i] = c->batch[i + c->max_count / 2]; 712 } 713 b->count = cnt; 714 c->count -= cnt; 715 allocator->DeallocateBatch(class_id, b); 716 } 717}; 718 719// This class can (de)allocate only large chunks of memory using mmap/unmap. 720// The main purpose of this allocator is to cover large and rare allocation 721// sizes not covered by more efficient allocators (e.g. SizeClassAllocator64). 722template <class MapUnmapCallback = NoOpMapUnmapCallback> 723class LargeMmapAllocator { 724 public: 725 void Init() { 726 internal_memset(this, 0, sizeof(*this)); 727 page_size_ = GetPageSizeCached(); 728 } 729 730 void *Allocate(uptr size, uptr alignment) { 731 CHECK(IsPowerOfTwo(alignment)); 732 uptr map_size = RoundUpMapSize(size); 733 if (alignment > page_size_) 734 map_size += alignment; 735 if (map_size < size) return 0; // Overflow. 736 uptr map_beg = reinterpret_cast<uptr>( 737 MmapOrDie(map_size, "LargeMmapAllocator")); 738 MapUnmapCallback().OnMap(map_beg, map_size); 739 uptr map_end = map_beg + map_size; 740 uptr res = map_beg + page_size_; 741 if (res & (alignment - 1)) // Align. 742 res += alignment - (res & (alignment - 1)); 743 CHECK_EQ(0, res & (alignment - 1)); 744 CHECK_LE(res + size, map_end); 745 Header *h = GetHeader(res); 746 h->size = size; 747 h->map_beg = map_beg; 748 h->map_size = map_size; 749 uptr size_log = SANITIZER_WORDSIZE - __builtin_clzl(map_size) - 1; 750 CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log)); 751 { 752 SpinMutexLock l(&mutex_); 753 uptr idx = n_chunks_++; 754 CHECK_LT(idx, kMaxNumChunks); 755 h->chunk_idx = idx; 756 chunks_[idx] = h; 757 stats.n_allocs++; 758 stats.currently_allocated += map_size; 759 stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated); 760 stats.by_size_log[size_log]++; 761 } 762 return reinterpret_cast<void*>(res); 763 } 764 765 void Deallocate(void *p) { 766 Header *h = GetHeader(p); 767 { 768 SpinMutexLock l(&mutex_); 769 uptr idx = h->chunk_idx; 770 CHECK_EQ(chunks_[idx], h); 771 CHECK_LT(idx, n_chunks_); 772 chunks_[idx] = chunks_[n_chunks_ - 1]; 773 chunks_[idx]->chunk_idx = idx; 774 n_chunks_--; 775 stats.n_frees++; 776 stats.currently_allocated -= h->map_size; 777 } 778 MapUnmapCallback().OnUnmap(h->map_beg, h->map_size); 779 UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size); 780 } 781 782 uptr TotalMemoryUsed() { 783 SpinMutexLock l(&mutex_); 784 uptr res = 0; 785 for (uptr i = 0; i < n_chunks_; i++) { 786 Header *h = chunks_[i]; 787 CHECK_EQ(h->chunk_idx, i); 788 res += RoundUpMapSize(h->size); 789 } 790 return res; 791 } 792 793 bool PointerIsMine(void *p) { 794 return GetBlockBegin(p) != 0; 795 } 796 797 uptr GetActuallyAllocatedSize(void *p) { 798 return RoundUpTo(GetHeader(p)->size, page_size_); 799 } 800 801 // At least page_size_/2 metadata bytes is available. 802 void *GetMetaData(void *p) { 803 // Too slow: CHECK_EQ(p, GetBlockBegin(p)); 804 CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_)); 805 return GetHeader(p) + 1; 806 } 807 808 void *GetBlockBegin(void *ptr) { 809 uptr p = reinterpret_cast<uptr>(ptr); 810 SpinMutexLock l(&mutex_); 811 uptr nearest_chunk = 0; 812 // Cache-friendly linear search. 813 for (uptr i = 0; i < n_chunks_; i++) { 814 uptr ch = reinterpret_cast<uptr>(chunks_[i]); 815 if (p < ch) continue; // p is at left to this chunk, skip it. 816 if (p - ch < p - nearest_chunk) 817 nearest_chunk = ch; 818 } 819 if (!nearest_chunk) 820 return 0; 821 Header *h = reinterpret_cast<Header *>(nearest_chunk); 822 CHECK_GE(nearest_chunk, h->map_beg); 823 CHECK_LT(nearest_chunk, h->map_beg + h->map_size); 824 CHECK_LE(nearest_chunk, p); 825 if (h->map_beg + h->map_size < p) 826 return 0; 827 return GetUser(h); 828 } 829 830 void PrintStats() { 831 Printf("Stats: LargeMmapAllocator: allocated %zd times, " 832 "remains %zd (%zd K) max %zd M; by size logs: ", 833 stats.n_allocs, stats.n_allocs - stats.n_frees, 834 stats.currently_allocated >> 10, stats.max_allocated >> 20); 835 for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) { 836 uptr c = stats.by_size_log[i]; 837 if (!c) continue; 838 Printf("%zd:%zd; ", i, c); 839 } 840 Printf("\n"); 841 } 842 843 private: 844 static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18); 845 struct Header { 846 uptr map_beg; 847 uptr map_size; 848 uptr size; 849 uptr chunk_idx; 850 }; 851 852 Header *GetHeader(uptr p) { 853 CHECK_EQ(p % page_size_, 0); 854 return reinterpret_cast<Header*>(p - page_size_); 855 } 856 Header *GetHeader(void *p) { return GetHeader(reinterpret_cast<uptr>(p)); } 857 858 void *GetUser(Header *h) { 859 CHECK_EQ((uptr)h % page_size_, 0); 860 return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_); 861 } 862 863 uptr RoundUpMapSize(uptr size) { 864 return RoundUpTo(size, page_size_) + page_size_; 865 } 866 867 uptr page_size_; 868 Header *chunks_[kMaxNumChunks]; 869 uptr n_chunks_; 870 struct Stats { 871 uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64]; 872 } stats; 873 SpinMutex mutex_; 874}; 875 876// This class implements a complete memory allocator by using two 877// internal allocators: 878// PrimaryAllocator is efficient, but may not allocate some sizes (alignments). 879// When allocating 2^x bytes it should return 2^x aligned chunk. 880// PrimaryAllocator is used via a local AllocatorCache. 881// SecondaryAllocator can allocate anything, but is not efficient. 882template <class PrimaryAllocator, class AllocatorCache, 883 class SecondaryAllocator> // NOLINT 884class CombinedAllocator { 885 public: 886 void Init() { 887 primary_.Init(); 888 secondary_.Init(); 889 } 890 891 void *Allocate(AllocatorCache *cache, uptr size, uptr alignment, 892 bool cleared = false) { 893 // Returning 0 on malloc(0) may break a lot of code. 894 if (size == 0) 895 size = 1; 896 if (size + alignment < size) 897 return 0; 898 if (alignment > 8) 899 size = RoundUpTo(size, alignment); 900 void *res; 901 if (primary_.CanAllocate(size, alignment)) 902 res = cache->Allocate(&primary_, primary_.ClassID(size)); 903 else 904 res = secondary_.Allocate(size, alignment); 905 if (alignment > 8) 906 CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0); 907 if (cleared && res) 908 internal_memset(res, 0, size); 909 return res; 910 } 911 912 void Deallocate(AllocatorCache *cache, void *p) { 913 if (!p) return; 914 if (primary_.PointerIsMine(p)) 915 cache->Deallocate(&primary_, primary_.GetSizeClass(p), p); 916 else 917 secondary_.Deallocate(p); 918 } 919 920 void *Reallocate(AllocatorCache *cache, void *p, uptr new_size, 921 uptr alignment) { 922 if (!p) 923 return Allocate(cache, new_size, alignment); 924 if (!new_size) { 925 Deallocate(cache, p); 926 return 0; 927 } 928 CHECK(PointerIsMine(p)); 929 uptr old_size = GetActuallyAllocatedSize(p); 930 uptr memcpy_size = Min(new_size, old_size); 931 void *new_p = Allocate(cache, new_size, alignment); 932 if (new_p) 933 internal_memcpy(new_p, p, memcpy_size); 934 Deallocate(cache, p); 935 return new_p; 936 } 937 938 bool PointerIsMine(void *p) { 939 if (primary_.PointerIsMine(p)) 940 return true; 941 return secondary_.PointerIsMine(p); 942 } 943 944 bool FromPrimary(void *p) { 945 return primary_.PointerIsMine(p); 946 } 947 948 void *GetMetaData(void *p) { 949 if (primary_.PointerIsMine(p)) 950 return primary_.GetMetaData(p); 951 return secondary_.GetMetaData(p); 952 } 953 954 void *GetBlockBegin(void *p) { 955 if (primary_.PointerIsMine(p)) 956 return primary_.GetBlockBegin(p); 957 return secondary_.GetBlockBegin(p); 958 } 959 960 uptr GetActuallyAllocatedSize(void *p) { 961 if (primary_.PointerIsMine(p)) 962 return primary_.GetActuallyAllocatedSize(p); 963 return secondary_.GetActuallyAllocatedSize(p); 964 } 965 966 uptr TotalMemoryUsed() { 967 return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed(); 968 } 969 970 void TestOnlyUnmap() { primary_.TestOnlyUnmap(); } 971 972 void SwallowCache(AllocatorCache *cache) { 973 cache->Drain(&primary_); 974 } 975 976 void PrintStats() { 977 primary_.PrintStats(); 978 secondary_.PrintStats(); 979 } 980 981 private: 982 PrimaryAllocator primary_; 983 SecondaryAllocator secondary_; 984}; 985 986} // namespace __sanitizer 987 988#endif // SANITIZER_ALLOCATOR_H 989 990