sanitizer_allocator.h revision 245614
1//===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef SANITIZER_ALLOCATOR_H
15#define SANITIZER_ALLOCATOR_H
16
17#include "sanitizer_internal_defs.h"
18#include "sanitizer_common.h"
19#include "sanitizer_libc.h"
20#include "sanitizer_list.h"
21#include "sanitizer_mutex.h"
22#include "sanitizer_lfstack.h"
23
24namespace __sanitizer {
25
26// SizeClassMap maps allocation sizes into size classes and back.
27// Class 0 corresponds to size 0.
28// Classes 1 - 16 correspond to sizes 8 - 128 (size = class_id * 8).
29// Next 8 classes: 128 + i * 16 (i = 1 to 8).
30// Next 8 classes: 256 + i * 32 (i = 1 to 8).
31// ...
32// Next 8 classes: 2^k + i * 2^(k-3) (i = 1 to 8).
33// Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
34//
35// This structure of the size class map gives us:
36//   - Efficient table-free class-to-size and size-to-class functions.
37//   - Difference between two consequent size classes is betweed 12% and 6%
38//
39// This class also gives a hint to a thread-caching allocator about the amount
40// of chunks that need to be cached per-thread:
41//  - kMaxNumCached is the maximal number of chunks per size class.
42//  - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
43//
44// Part of output of SizeClassMap::Print():
45//    c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
46//    c01 => s: 8 diff: +8 00% l 3 cached: 256 2048; id 1
47//    c02 => s: 16 diff: +8 100% l 4 cached: 256 4096; id 2
48//    ...
49//    c07 => s: 56 diff: +8 16% l 5 cached: 256 14336; id 7
50//
51//    c08 => s: 64 diff: +8 14% l 6 cached: 256 16384; id 8
52//    ...
53//    c15 => s: 120 diff: +8 07% l 6 cached: 256 30720; id 15
54//
55//    c16 => s: 128 diff: +8 06% l 7 cached: 256 32768; id 16
56//    c17 => s: 144 diff: +16 12% l 7 cached: 227 32688; id 17
57//    ...
58//    c23 => s: 240 diff: +16 07% l 7 cached: 136 32640; id 23
59//
60//    c24 => s: 256 diff: +16 06% l 8 cached: 128 32768; id 24
61//    c25 => s: 288 diff: +32 12% l 8 cached: 113 32544; id 25
62//    ...
63//    c31 => s: 480 diff: +32 07% l 8 cached: 68 32640; id 31
64//
65//    c32 => s: 512 diff: +32 06% l 9 cached: 64 32768; id 32
66
67
68template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog,
69          uptr kMinBatchClassT>
70class SizeClassMap {
71  static const uptr kMinSizeLog = 3;
72  static const uptr kMidSizeLog = kMinSizeLog + 4;
73  static const uptr kMinSize = 1 << kMinSizeLog;
74  static const uptr kMidSize = 1 << kMidSizeLog;
75  static const uptr kMidClass = kMidSize / kMinSize;
76  static const uptr S = 3;
77  static const uptr M = (1 << S) - 1;
78
79 public:
80  static const uptr kMaxNumCached = kMaxNumCachedT;
81  struct TransferBatch {
82    TransferBatch *next;
83    uptr count;
84    void *batch[kMaxNumCached];
85  };
86
87  static const uptr kMinBatchClass = kMinBatchClassT;
88  static const uptr kMaxSize = 1 << kMaxSizeLog;
89  static const uptr kNumClasses =
90      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
91  COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
92  static const uptr kNumClassesRounded =
93      kNumClasses == 32  ? 32 :
94      kNumClasses <= 64  ? 64 :
95      kNumClasses <= 128 ? 128 : 256;
96
97  static uptr Size(uptr class_id) {
98    if (class_id <= kMidClass)
99      return kMinSize * class_id;
100    class_id -= kMidClass;
101    uptr t = kMidSize << (class_id >> S);
102    return t + (t >> S) * (class_id & M);
103  }
104
105  static uptr ClassID(uptr size) {
106    if (size <= kMidSize)
107      return (size + kMinSize - 1) >> kMinSizeLog;
108    if (size > kMaxSize) return 0;
109    uptr l = SANITIZER_WORDSIZE - 1 - __builtin_clzl(size);
110    uptr hbits = (size >> (l - S)) & M;
111    uptr lbits = size & ((1 << (l - S)) - 1);
112    uptr l1 = l - kMidSizeLog;
113    return kMidClass + (l1 << S) + hbits + (lbits > 0);
114  }
115
116  static uptr MaxCached(uptr class_id) {
117    if (class_id == 0) return 0;
118    uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
119    return Max(1UL, Min(kMaxNumCached, n));
120  }
121
122  static void Print() {
123    uptr prev_s = 0;
124    uptr total_cached = 0;
125    for (uptr i = 0; i < kNumClasses; i++) {
126      uptr s = Size(i);
127      if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
128        Printf("\n");
129      uptr d = s - prev_s;
130      uptr p = prev_s ? (d * 100 / prev_s) : 0;
131      uptr l = SANITIZER_WORDSIZE - 1 - __builtin_clzl(s);
132      uptr cached = MaxCached(i) * s;
133      Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
134             "cached: %zd %zd; id %zd\n",
135             i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s));
136      total_cached += cached;
137      prev_s = s;
138    }
139    Printf("Total cached: %zd\n", total_cached);
140  }
141
142  static void Validate() {
143    for (uptr c = 1; c < kNumClasses; c++) {
144      // Printf("Validate: c%zd\n", c);
145      uptr s = Size(c);
146      CHECK_EQ(ClassID(s), c);
147      if (c != kNumClasses - 1)
148        CHECK_EQ(ClassID(s + 1), c + 1);
149      CHECK_EQ(ClassID(s - 1), c);
150      if (c)
151        CHECK_GT(Size(c), Size(c-1));
152    }
153    CHECK_EQ(ClassID(kMaxSize + 1), 0);
154
155    for (uptr s = 1; s <= kMaxSize; s++) {
156      uptr c = ClassID(s);
157      // Printf("s%zd => c%zd\n", s, c);
158      CHECK_LT(c, kNumClasses);
159      CHECK_GE(Size(c), s);
160      if (c > 0)
161        CHECK_LT(Size(c-1), s);
162    }
163
164    // TransferBatch for kMinBatchClass must fit into the block itself.
165    const uptr batch_size = sizeof(TransferBatch)
166        - sizeof(void*)  // NOLINT
167            * (kMaxNumCached - MaxCached(kMinBatchClass));
168    CHECK_LE(batch_size, Size(kMinBatchClass));
169    // TransferBatch for kMinBatchClass-1 must not fit into the block itself.
170    const uptr batch_size1 = sizeof(TransferBatch)
171        - sizeof(void*)  // NOLINT
172            * (kMaxNumCached - MaxCached(kMinBatchClass - 1));
173    CHECK_GT(batch_size1, Size(kMinBatchClass - 1));
174  }
175};
176
177typedef SizeClassMap<17, 256, 16, FIRST_32_SECOND_64(33, 36)>
178    DefaultSizeClassMap;
179typedef SizeClassMap<17, 64, 14, FIRST_32_SECOND_64(25, 28)>
180    CompactSizeClassMap;
181template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
182
183// Allocators call these callbacks on mmap/munmap.
184struct NoOpMapUnmapCallback {
185  void OnMap(uptr p, uptr size) const { }
186  void OnUnmap(uptr p, uptr size) const { }
187};
188
189// SizeClassAllocator64 -- allocator for 64-bit address space.
190//
191// Space: a portion of address space of kSpaceSize bytes starting at
192// a fixed address (kSpaceBeg). Both constants are powers of two and
193// kSpaceBeg is kSpaceSize-aligned.
194// At the beginning the entire space is mprotect-ed, then small parts of it
195// are mapped on demand.
196//
197// Region: a part of Space dedicated to a single size class.
198// There are kNumClasses Regions of equal size.
199//
200// UserChunk: a piece of memory returned to user.
201// MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
202//
203// A Region looks like this:
204// UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
205template <const uptr kSpaceBeg, const uptr kSpaceSize,
206          const uptr kMetadataSize, class SizeClassMap,
207          class MapUnmapCallback = NoOpMapUnmapCallback>
208class SizeClassAllocator64 {
209 public:
210  typedef typename SizeClassMap::TransferBatch Batch;
211  typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
212      SizeClassMap, MapUnmapCallback> ThisT;
213  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
214
215  void Init() {
216    CHECK_EQ(kSpaceBeg,
217             reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize)));
218    MapWithCallback(kSpaceEnd, AdditionalSize());
219  }
220
221  void MapWithCallback(uptr beg, uptr size) {
222    CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
223    MapUnmapCallback().OnMap(beg, size);
224  }
225
226  void UnmapWithCallback(uptr beg, uptr size) {
227    MapUnmapCallback().OnUnmap(beg, size);
228    UnmapOrDie(reinterpret_cast<void *>(beg), size);
229  }
230
231  static bool CanAllocate(uptr size, uptr alignment) {
232    return size <= SizeClassMap::kMaxSize &&
233      alignment <= SizeClassMap::kMaxSize;
234  }
235
236  Batch *NOINLINE AllocateBatch(AllocatorCache *c, uptr class_id) {
237    CHECK_LT(class_id, kNumClasses);
238    RegionInfo *region = GetRegionInfo(class_id);
239    Batch *b = region->free_list.Pop();
240    if (b == 0)
241      b = PopulateFreeList(c, class_id, region);
242    region->n_allocated += b->count;
243    return b;
244  }
245
246  void NOINLINE DeallocateBatch(uptr class_id, Batch *b) {
247    RegionInfo *region = GetRegionInfo(class_id);
248    region->free_list.Push(b);
249    region->n_freed += b->count;
250  }
251
252  static bool PointerIsMine(void *p) {
253    return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize;
254  }
255
256  static uptr GetSizeClass(void *p) {
257    return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded;
258  }
259
260  void *GetBlockBegin(void *p) {
261    uptr class_id = GetSizeClass(p);
262    uptr size = SizeClassMap::Size(class_id);
263    uptr chunk_idx = GetChunkIdx((uptr)p, size);
264    uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
265    uptr beg = chunk_idx * size;
266    uptr next_beg = beg + size;
267    RegionInfo *region = GetRegionInfo(class_id);
268    if (region->mapped_user >= next_beg)
269      return reinterpret_cast<void*>(reg_beg + beg);
270    return 0;
271  }
272
273  static uptr GetActuallyAllocatedSize(void *p) {
274    CHECK(PointerIsMine(p));
275    return SizeClassMap::Size(GetSizeClass(p));
276  }
277
278  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
279
280  void *GetMetaData(void *p) {
281    uptr class_id = GetSizeClass(p);
282    uptr size = SizeClassMap::Size(class_id);
283    uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
284    return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) -
285                                   (1 + chunk_idx) * kMetadataSize);
286  }
287
288  uptr TotalMemoryUsed() {
289    uptr res = 0;
290    for (uptr i = 0; i < kNumClasses; i++)
291      res += GetRegionInfo(i)->allocated_user;
292    return res;
293  }
294
295  // Test-only.
296  void TestOnlyUnmap() {
297    UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize());
298  }
299
300  void PrintStats() {
301    uptr total_mapped = 0;
302    uptr n_allocated = 0;
303    uptr n_freed = 0;
304    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
305      RegionInfo *region = GetRegionInfo(class_id);
306      total_mapped += region->mapped_user;
307      n_allocated += region->n_allocated;
308      n_freed += region->n_freed;
309    }
310    Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
311           "remains %zd\n",
312           total_mapped >> 20, n_allocated, n_allocated - n_freed);
313    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
314      RegionInfo *region = GetRegionInfo(class_id);
315      if (region->mapped_user == 0) continue;
316      Printf("  %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
317             class_id,
318             SizeClassMap::Size(class_id),
319             region->mapped_user >> 10,
320             region->n_allocated,
321             region->n_allocated - region->n_freed);
322    }
323  }
324
325  typedef SizeClassMap SizeClassMapT;
326  static const uptr kNumClasses = SizeClassMap::kNumClasses;
327  static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
328
329 private:
330  static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
331  static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize;
332  COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
333  // kRegionSize must be >= 2^32.
334  COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
335  // Populate the free list with at most this number of bytes at once
336  // or with one element if its size is greater.
337  static const uptr kPopulateSize = 1 << 14;
338  // Call mmap for user memory with at least this size.
339  static const uptr kUserMapSize = 1 << 15;
340  // Call mmap for metadata memory with at least this size.
341  static const uptr kMetaMapSize = 1 << 16;
342
343  struct RegionInfo {
344    BlockingMutex mutex;
345    LFStack<Batch> free_list;
346    uptr allocated_user;  // Bytes allocated for user memory.
347    uptr allocated_meta;  // Bytes allocated for metadata.
348    uptr mapped_user;  // Bytes mapped for user memory.
349    uptr mapped_meta;  // Bytes mapped for metadata.
350    uptr n_allocated, n_freed;  // Just stats.
351  };
352  COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
353
354  static uptr AdditionalSize() {
355    return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
356                     GetPageSizeCached());
357  }
358
359  RegionInfo *GetRegionInfo(uptr class_id) {
360    CHECK_LT(class_id, kNumClasses);
361    RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize);
362    return &regions[class_id];
363  }
364
365  static uptr GetChunkIdx(uptr chunk, uptr size) {
366    u32 offset = chunk % kRegionSize;
367    // Here we divide by a non-constant. This is costly.
368    // We require that kRegionSize is at least 2^32 so that offset is 32-bit.
369    // We save 2x by using 32-bit div, but may need to use a 256-way switch.
370    return offset / (u32)size;
371  }
372
373  Batch *NOINLINE PopulateFreeList(AllocatorCache *c, uptr class_id,
374                                   RegionInfo *region) {
375    BlockingMutexLock l(&region->mutex);
376    Batch *b = region->free_list.Pop();
377    if (b)
378      return b;
379    uptr size = SizeClassMap::Size(class_id);
380    uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1;
381    uptr beg_idx = region->allocated_user;
382    uptr end_idx = beg_idx + count * size;
383    uptr region_beg = kSpaceBeg + kRegionSize * class_id;
384    if (end_idx + size > region->mapped_user) {
385      // Do the mmap for the user memory.
386      uptr map_size = kUserMapSize;
387      while (end_idx + size > region->mapped_user + map_size)
388        map_size += kUserMapSize;
389      CHECK_GE(region->mapped_user + map_size, end_idx);
390      MapWithCallback(region_beg + region->mapped_user, map_size);
391      region->mapped_user += map_size;
392    }
393    uptr total_count = (region->mapped_user - beg_idx - size)
394        / size / count * count;
395    region->allocated_meta += total_count * kMetadataSize;
396    if (region->allocated_meta > region->mapped_meta) {
397      uptr map_size = kMetaMapSize;
398      while (region->allocated_meta > region->mapped_meta + map_size)
399        map_size += kMetaMapSize;
400      // Do the mmap for the metadata.
401      CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
402      MapWithCallback(region_beg + kRegionSize -
403                      region->mapped_meta - map_size, map_size);
404      region->mapped_meta += map_size;
405    }
406    CHECK_LE(region->allocated_meta, region->mapped_meta);
407    if (region->allocated_user + region->allocated_meta > kRegionSize) {
408      Printf("Out of memory. Dying.\n");
409      Printf("The process has exhausted %zuMB for size class %zu.\n",
410          kRegionSize / 1024 / 1024, size);
411      Die();
412    }
413    for (;;) {
414      if (class_id < SizeClassMap::kMinBatchClass)
415        b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
416      else
417        b = (Batch*)(region_beg + beg_idx);
418      b->count = count;
419      for (uptr i = 0; i < count; i++)
420        b->batch[i] = (void*)(region_beg + beg_idx + i * size);
421      region->allocated_user += count * size;
422      CHECK_LE(region->allocated_user, region->mapped_user);
423      beg_idx += count * size;
424      if (beg_idx + count * size + size > region->mapped_user)
425        break;
426      region->free_list.Push(b);
427    }
428    return b;
429  }
430};
431
432// SizeClassAllocator32 -- allocator for 32-bit address space.
433// This allocator can theoretically be used on 64-bit arch, but there it is less
434// efficient than SizeClassAllocator64.
435//
436// [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
437// be returned by MmapOrDie().
438//
439// Region:
440//   a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
441// Since the regions are aligned by kRegionSize, there are exactly
442// kNumPossibleRegions possible regions in the address space and so we keep
443// an u8 array possible_regions[kNumPossibleRegions] to store the size classes.
444// 0 size class means the region is not used by the allocator.
445//
446// One Region is used to allocate chunks of a single size class.
447// A Region looks like this:
448// UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
449//
450// In order to avoid false sharing the objects of this class should be
451// chache-line aligned.
452template <const uptr kSpaceBeg, const u64 kSpaceSize,
453          const uptr kMetadataSize, class SizeClassMap,
454          class MapUnmapCallback = NoOpMapUnmapCallback>
455class SizeClassAllocator32 {
456 public:
457  typedef typename SizeClassMap::TransferBatch Batch;
458  typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
459      SizeClassMap, MapUnmapCallback> ThisT;
460  typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
461
462  void Init() {
463    state_ = reinterpret_cast<State *>(MapWithCallback(sizeof(State)));
464  }
465
466  void *MapWithCallback(uptr size) {
467    size = RoundUpTo(size, GetPageSizeCached());
468    void *res = MmapOrDie(size, "SizeClassAllocator32");
469    MapUnmapCallback().OnMap((uptr)res, size);
470    return res;
471  }
472  void UnmapWithCallback(uptr beg, uptr size) {
473    MapUnmapCallback().OnUnmap(beg, size);
474    UnmapOrDie(reinterpret_cast<void *>(beg), size);
475  }
476
477  static bool CanAllocate(uptr size, uptr alignment) {
478    return size <= SizeClassMap::kMaxSize &&
479      alignment <= SizeClassMap::kMaxSize;
480  }
481
482  void *GetMetaData(void *p) {
483    CHECK(PointerIsMine(p));
484    uptr mem = reinterpret_cast<uptr>(p);
485    uptr beg = ComputeRegionBeg(mem);
486    uptr size = SizeClassMap::Size(GetSizeClass(p));
487    u32 offset = mem - beg;
488    uptr n = offset / (u32)size;  // 32-bit division
489    uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
490    return reinterpret_cast<void*>(meta);
491  }
492
493  Batch *NOINLINE AllocateBatch(AllocatorCache *c, uptr class_id) {
494    CHECK_LT(class_id, kNumClasses);
495    SizeClassInfo *sci = GetSizeClassInfo(class_id);
496    SpinMutexLock l(&sci->mutex);
497    if (sci->free_list.empty())
498      PopulateFreeList(c, sci, class_id);
499    CHECK(!sci->free_list.empty());
500    Batch *b = sci->free_list.front();
501    sci->free_list.pop_front();
502    return b;
503  }
504
505  void NOINLINE DeallocateBatch(uptr class_id, Batch *b) {
506    CHECK_LT(class_id, kNumClasses);
507    SizeClassInfo *sci = GetSizeClassInfo(class_id);
508    SpinMutexLock l(&sci->mutex);
509    sci->free_list.push_front(b);
510  }
511
512  bool PointerIsMine(void *p) {
513    return GetSizeClass(p) != 0;
514  }
515
516  uptr GetSizeClass(void *p) {
517    return state_->possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
518  }
519
520  void *GetBlockBegin(void *p) {
521    CHECK(PointerIsMine(p));
522    uptr mem = reinterpret_cast<uptr>(p);
523    uptr beg = ComputeRegionBeg(mem);
524    uptr size = SizeClassMap::Size(GetSizeClass(p));
525    u32 offset = mem - beg;
526    u32 n = offset / (u32)size;  // 32-bit division
527    uptr res = beg + (n * (u32)size);
528    return reinterpret_cast<void*>(res);
529  }
530
531  uptr GetActuallyAllocatedSize(void *p) {
532    CHECK(PointerIsMine(p));
533    return SizeClassMap::Size(GetSizeClass(p));
534  }
535
536  uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
537
538  uptr TotalMemoryUsed() {
539    // No need to lock here.
540    uptr res = 0;
541    for (uptr i = 0; i < kNumPossibleRegions; i++)
542      if (state_->possible_regions[i])
543        res += kRegionSize;
544    return res;
545  }
546
547  void TestOnlyUnmap() {
548    for (uptr i = 0; i < kNumPossibleRegions; i++)
549      if (state_->possible_regions[i])
550        UnmapWithCallback((i * kRegionSize), kRegionSize);
551    UnmapWithCallback(reinterpret_cast<uptr>(state_), sizeof(State));
552  }
553
554  void PrintStats() {
555  }
556
557  typedef SizeClassMap SizeClassMapT;
558  static const uptr kNumClasses = SizeClassMap::kNumClasses;
559
560 private:
561  static const uptr kRegionSizeLog = SANITIZER_WORDSIZE == 64 ? 24 : 20;
562  static const uptr kRegionSize = 1 << kRegionSizeLog;
563  static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
564
565  struct SizeClassInfo {
566    SpinMutex mutex;
567    IntrusiveList<Batch> free_list;
568    char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)];
569  };
570  COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
571
572  uptr ComputeRegionId(uptr mem) {
573    uptr res = mem >> kRegionSizeLog;
574    CHECK_LT(res, kNumPossibleRegions);
575    return res;
576  }
577
578  uptr ComputeRegionBeg(uptr mem) {
579    return mem & ~(kRegionSize - 1);
580  }
581
582  uptr AllocateRegion(uptr class_id) {
583    CHECK_LT(class_id, kNumClasses);
584    uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
585                                      "SizeClassAllocator32"));
586    MapUnmapCallback().OnMap(res, kRegionSize);
587    CHECK_EQ(0U, (res & (kRegionSize - 1)));
588    CHECK_EQ(0U, state_->possible_regions[ComputeRegionId(res)]);
589    state_->possible_regions[ComputeRegionId(res)] = class_id;
590    return res;
591  }
592
593  SizeClassInfo *GetSizeClassInfo(uptr class_id) {
594    CHECK_LT(class_id, kNumClasses);
595    return &state_->size_class_info_array[class_id];
596  }
597
598  void PopulateFreeList(AllocatorCache *c, SizeClassInfo *sci, uptr class_id) {
599    uptr size = SizeClassMap::Size(class_id);
600    uptr reg = AllocateRegion(class_id);
601    uptr n_chunks = kRegionSize / (size + kMetadataSize);
602    uptr max_count = SizeClassMap::MaxCached(class_id);
603    Batch *b = 0;
604    for (uptr i = reg; i < reg + n_chunks * size; i += size) {
605      if (b == 0) {
606        if (class_id < SizeClassMap::kMinBatchClass)
607          b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
608        else
609          b = (Batch*)i;
610        b->count = 0;
611      }
612      b->batch[b->count++] = (void*)i;
613      if (b->count == max_count) {
614        sci->free_list.push_back(b);
615        b = 0;
616      }
617    }
618    if (b)
619      sci->free_list.push_back(b);
620  }
621
622  struct State {
623    u8 possible_regions[kNumPossibleRegions];
624    SizeClassInfo size_class_info_array[kNumClasses];
625  };
626  State *state_;
627};
628
629// Objects of this type should be used as local caches for SizeClassAllocator64
630// or SizeClassAllocator32. Since the typical use of this class is to have one
631// object per thread in TLS, is has to be POD.
632template<class SizeClassAllocator>
633struct SizeClassAllocatorLocalCache {
634  typedef SizeClassAllocator Allocator;
635  static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
636
637  // Don't need to call Init if the object is a global (i.e. zero-initialized).
638  void Init() {
639    internal_memset(this, 0, sizeof(*this));
640  }
641
642  void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
643    CHECK_NE(class_id, 0UL);
644    CHECK_LT(class_id, kNumClasses);
645    PerClass *c = &per_class_[class_id];
646    if (UNLIKELY(c->count == 0))
647      Refill(allocator, class_id);
648    void *res = c->batch[--c->count];
649    PREFETCH(c->batch[c->count - 1]);
650    return res;
651  }
652
653  void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
654    CHECK_NE(class_id, 0UL);
655    CHECK_LT(class_id, kNumClasses);
656    PerClass *c = &per_class_[class_id];
657    if (UNLIKELY(c->count == c->max_count))
658      Drain(allocator, class_id);
659    c->batch[c->count++] = p;
660  }
661
662  void Drain(SizeClassAllocator *allocator) {
663    for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
664      PerClass *c = &per_class_[class_id];
665      while (c->count > 0)
666        Drain(allocator, class_id);
667    }
668  }
669
670  // private:
671  typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
672  typedef typename SizeClassMap::TransferBatch Batch;
673  struct PerClass {
674    uptr count;
675    uptr max_count;
676    void *batch[2 * SizeClassMap::kMaxNumCached];
677  };
678  PerClass per_class_[kNumClasses];
679
680  void InitCache() {
681    if (per_class_[0].max_count)
682      return;
683    for (uptr i = 0; i < kNumClasses; i++) {
684      PerClass *c = &per_class_[i];
685      c->max_count = 2 * SizeClassMap::MaxCached(i);
686    }
687  }
688
689  void NOINLINE Refill(SizeClassAllocator *allocator, uptr class_id) {
690    InitCache();
691    PerClass *c = &per_class_[class_id];
692    Batch *b = allocator->AllocateBatch(this, class_id);
693    for (uptr i = 0; i < b->count; i++)
694      c->batch[i] = b->batch[i];
695    c->count = b->count;
696    if (class_id < SizeClassMap::kMinBatchClass)
697      Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b);
698  }
699
700  void NOINLINE Drain(SizeClassAllocator *allocator, uptr class_id) {
701    InitCache();
702    PerClass *c = &per_class_[class_id];
703    Batch *b;
704    if (class_id < SizeClassMap::kMinBatchClass)
705      b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch)));
706    else
707      b = (Batch*)c->batch[0];
708    uptr cnt = Min(c->max_count / 2, c->count);
709    for (uptr i = 0; i < cnt; i++) {
710      b->batch[i] = c->batch[i];
711      c->batch[i] = c->batch[i + c->max_count / 2];
712    }
713    b->count = cnt;
714    c->count -= cnt;
715    allocator->DeallocateBatch(class_id, b);
716  }
717};
718
719// This class can (de)allocate only large chunks of memory using mmap/unmap.
720// The main purpose of this allocator is to cover large and rare allocation
721// sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
722template <class MapUnmapCallback = NoOpMapUnmapCallback>
723class LargeMmapAllocator {
724 public:
725  void Init() {
726    internal_memset(this, 0, sizeof(*this));
727    page_size_ = GetPageSizeCached();
728  }
729
730  void *Allocate(uptr size, uptr alignment) {
731    CHECK(IsPowerOfTwo(alignment));
732    uptr map_size = RoundUpMapSize(size);
733    if (alignment > page_size_)
734      map_size += alignment;
735    if (map_size < size) return 0;  // Overflow.
736    uptr map_beg = reinterpret_cast<uptr>(
737        MmapOrDie(map_size, "LargeMmapAllocator"));
738    MapUnmapCallback().OnMap(map_beg, map_size);
739    uptr map_end = map_beg + map_size;
740    uptr res = map_beg + page_size_;
741    if (res & (alignment - 1))  // Align.
742      res += alignment - (res & (alignment - 1));
743    CHECK_EQ(0, res & (alignment - 1));
744    CHECK_LE(res + size, map_end);
745    Header *h = GetHeader(res);
746    h->size = size;
747    h->map_beg = map_beg;
748    h->map_size = map_size;
749    uptr size_log = SANITIZER_WORDSIZE - __builtin_clzl(map_size) - 1;
750    CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
751    {
752      SpinMutexLock l(&mutex_);
753      uptr idx = n_chunks_++;
754      CHECK_LT(idx, kMaxNumChunks);
755      h->chunk_idx = idx;
756      chunks_[idx] = h;
757      stats.n_allocs++;
758      stats.currently_allocated += map_size;
759      stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
760      stats.by_size_log[size_log]++;
761    }
762    return reinterpret_cast<void*>(res);
763  }
764
765  void Deallocate(void *p) {
766    Header *h = GetHeader(p);
767    {
768      SpinMutexLock l(&mutex_);
769      uptr idx = h->chunk_idx;
770      CHECK_EQ(chunks_[idx], h);
771      CHECK_LT(idx, n_chunks_);
772      chunks_[idx] = chunks_[n_chunks_ - 1];
773      chunks_[idx]->chunk_idx = idx;
774      n_chunks_--;
775      stats.n_frees++;
776      stats.currently_allocated -= h->map_size;
777    }
778    MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
779    UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
780  }
781
782  uptr TotalMemoryUsed() {
783    SpinMutexLock l(&mutex_);
784    uptr res = 0;
785    for (uptr i = 0; i < n_chunks_; i++) {
786      Header *h = chunks_[i];
787      CHECK_EQ(h->chunk_idx, i);
788      res += RoundUpMapSize(h->size);
789    }
790    return res;
791  }
792
793  bool PointerIsMine(void *p) {
794    return GetBlockBegin(p) != 0;
795  }
796
797  uptr GetActuallyAllocatedSize(void *p) {
798    return RoundUpTo(GetHeader(p)->size, page_size_);
799  }
800
801  // At least page_size_/2 metadata bytes is available.
802  void *GetMetaData(void *p) {
803    // Too slow: CHECK_EQ(p, GetBlockBegin(p));
804    CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
805    return GetHeader(p) + 1;
806  }
807
808  void *GetBlockBegin(void *ptr) {
809    uptr p = reinterpret_cast<uptr>(ptr);
810    SpinMutexLock l(&mutex_);
811    uptr nearest_chunk = 0;
812    // Cache-friendly linear search.
813    for (uptr i = 0; i < n_chunks_; i++) {
814      uptr ch = reinterpret_cast<uptr>(chunks_[i]);
815      if (p < ch) continue;  // p is at left to this chunk, skip it.
816      if (p - ch < p - nearest_chunk)
817        nearest_chunk = ch;
818    }
819    if (!nearest_chunk)
820      return 0;
821    Header *h = reinterpret_cast<Header *>(nearest_chunk);
822    CHECK_GE(nearest_chunk, h->map_beg);
823    CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
824    CHECK_LE(nearest_chunk, p);
825    if (h->map_beg + h->map_size < p)
826      return 0;
827    return GetUser(h);
828  }
829
830  void PrintStats() {
831    Printf("Stats: LargeMmapAllocator: allocated %zd times, "
832           "remains %zd (%zd K) max %zd M; by size logs: ",
833           stats.n_allocs, stats.n_allocs - stats.n_frees,
834           stats.currently_allocated >> 10, stats.max_allocated >> 20);
835    for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
836      uptr c = stats.by_size_log[i];
837      if (!c) continue;
838      Printf("%zd:%zd; ", i, c);
839    }
840    Printf("\n");
841  }
842
843 private:
844  static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
845  struct Header {
846    uptr map_beg;
847    uptr map_size;
848    uptr size;
849    uptr chunk_idx;
850  };
851
852  Header *GetHeader(uptr p) {
853    CHECK_EQ(p % page_size_, 0);
854    return reinterpret_cast<Header*>(p - page_size_);
855  }
856  Header *GetHeader(void *p) { return GetHeader(reinterpret_cast<uptr>(p)); }
857
858  void *GetUser(Header *h) {
859    CHECK_EQ((uptr)h % page_size_, 0);
860    return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
861  }
862
863  uptr RoundUpMapSize(uptr size) {
864    return RoundUpTo(size, page_size_) + page_size_;
865  }
866
867  uptr page_size_;
868  Header *chunks_[kMaxNumChunks];
869  uptr n_chunks_;
870  struct Stats {
871    uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
872  } stats;
873  SpinMutex mutex_;
874};
875
876// This class implements a complete memory allocator by using two
877// internal allocators:
878// PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
879//  When allocating 2^x bytes it should return 2^x aligned chunk.
880// PrimaryAllocator is used via a local AllocatorCache.
881// SecondaryAllocator can allocate anything, but is not efficient.
882template <class PrimaryAllocator, class AllocatorCache,
883          class SecondaryAllocator>  // NOLINT
884class CombinedAllocator {
885 public:
886  void Init() {
887    primary_.Init();
888    secondary_.Init();
889  }
890
891  void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
892                 bool cleared = false) {
893    // Returning 0 on malloc(0) may break a lot of code.
894    if (size == 0)
895      size = 1;
896    if (size + alignment < size)
897      return 0;
898    if (alignment > 8)
899      size = RoundUpTo(size, alignment);
900    void *res;
901    if (primary_.CanAllocate(size, alignment))
902      res = cache->Allocate(&primary_, primary_.ClassID(size));
903    else
904      res = secondary_.Allocate(size, alignment);
905    if (alignment > 8)
906      CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
907    if (cleared && res)
908      internal_memset(res, 0, size);
909    return res;
910  }
911
912  void Deallocate(AllocatorCache *cache, void *p) {
913    if (!p) return;
914    if (primary_.PointerIsMine(p))
915      cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
916    else
917      secondary_.Deallocate(p);
918  }
919
920  void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
921                   uptr alignment) {
922    if (!p)
923      return Allocate(cache, new_size, alignment);
924    if (!new_size) {
925      Deallocate(cache, p);
926      return 0;
927    }
928    CHECK(PointerIsMine(p));
929    uptr old_size = GetActuallyAllocatedSize(p);
930    uptr memcpy_size = Min(new_size, old_size);
931    void *new_p = Allocate(cache, new_size, alignment);
932    if (new_p)
933      internal_memcpy(new_p, p, memcpy_size);
934    Deallocate(cache, p);
935    return new_p;
936  }
937
938  bool PointerIsMine(void *p) {
939    if (primary_.PointerIsMine(p))
940      return true;
941    return secondary_.PointerIsMine(p);
942  }
943
944  bool FromPrimary(void *p) {
945    return primary_.PointerIsMine(p);
946  }
947
948  void *GetMetaData(void *p) {
949    if (primary_.PointerIsMine(p))
950      return primary_.GetMetaData(p);
951    return secondary_.GetMetaData(p);
952  }
953
954  void *GetBlockBegin(void *p) {
955    if (primary_.PointerIsMine(p))
956      return primary_.GetBlockBegin(p);
957    return secondary_.GetBlockBegin(p);
958  }
959
960  uptr GetActuallyAllocatedSize(void *p) {
961    if (primary_.PointerIsMine(p))
962      return primary_.GetActuallyAllocatedSize(p);
963    return secondary_.GetActuallyAllocatedSize(p);
964  }
965
966  uptr TotalMemoryUsed() {
967    return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed();
968  }
969
970  void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
971
972  void SwallowCache(AllocatorCache *cache) {
973    cache->Drain(&primary_);
974  }
975
976  void PrintStats() {
977    primary_.PrintStats();
978    secondary_.PrintStats();
979  }
980
981 private:
982  PrimaryAllocator primary_;
983  SecondaryAllocator secondary_;
984};
985
986}  // namespace __sanitizer
987
988#endif  // SANITIZER_ALLOCATOR_H
989
990