1351282Sdim//===-- local_cache.h -------------------------------------------*- C++ -*-===//
2351282Sdim//
3351282Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4351282Sdim// See https://llvm.org/LICENSE.txt for license information.
5351282Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6351282Sdim//
7351282Sdim//===----------------------------------------------------------------------===//
8351282Sdim
9351282Sdim#ifndef SCUDO_LOCAL_CACHE_H_
10351282Sdim#define SCUDO_LOCAL_CACHE_H_
11351282Sdim
12351282Sdim#include "internal_defs.h"
13351282Sdim#include "report.h"
14351282Sdim#include "stats.h"
15351282Sdim
16351282Sdimnamespace scudo {
17351282Sdim
18351282Sdimtemplate <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
19351282Sdim  typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
20351282Sdim
21351282Sdim  struct TransferBatch {
22351282Sdim    static const u32 MaxNumCached = SizeClassMap::MaxNumCachedHint;
23351282Sdim    void setFromArray(void **Array, u32 N) {
24351282Sdim      DCHECK_LE(N, MaxNumCached);
25351282Sdim      Count = N;
26360784Sdim      memcpy(Batch, Array, sizeof(void *) * Count);
27351282Sdim    }
28351282Sdim    void clear() { Count = 0; }
29351282Sdim    void add(void *P) {
30351282Sdim      DCHECK_LT(Count, MaxNumCached);
31351282Sdim      Batch[Count++] = P;
32351282Sdim    }
33351282Sdim    void copyToArray(void **Array) const {
34360784Sdim      memcpy(Array, Batch, sizeof(void *) * Count);
35351282Sdim    }
36351282Sdim    u32 getCount() const { return Count; }
37351282Sdim    void *get(u32 I) const {
38351282Sdim      DCHECK_LE(I, Count);
39351282Sdim      return Batch[I];
40351282Sdim    }
41351282Sdim    static u32 getMaxCached(uptr Size) {
42351282Sdim      return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size));
43351282Sdim    }
44351282Sdim    TransferBatch *Next;
45351282Sdim
46351282Sdim  private:
47351282Sdim    u32 Count;
48351282Sdim    void *Batch[MaxNumCached];
49351282Sdim  };
50351282Sdim
51351282Sdim  void initLinkerInitialized(GlobalStats *S, SizeClassAllocator *A) {
52351282Sdim    Stats.initLinkerInitialized();
53360784Sdim    if (LIKELY(S))
54351282Sdim      S->link(&Stats);
55351282Sdim    Allocator = A;
56351282Sdim  }
57351282Sdim
58351282Sdim  void init(GlobalStats *S, SizeClassAllocator *A) {
59351282Sdim    memset(this, 0, sizeof(*this));
60351282Sdim    initLinkerInitialized(S, A);
61351282Sdim  }
62351282Sdim
63351282Sdim  void destroy(GlobalStats *S) {
64351282Sdim    drain();
65360784Sdim    if (LIKELY(S))
66351282Sdim      S->unlink(&Stats);
67351282Sdim  }
68351282Sdim
69351282Sdim  void *allocate(uptr ClassId) {
70360784Sdim    DCHECK_LT(ClassId, NumClasses);
71351282Sdim    PerClass *C = &PerClassArray[ClassId];
72351282Sdim    if (C->Count == 0) {
73351282Sdim      if (UNLIKELY(!refill(C, ClassId)))
74351282Sdim        return nullptr;
75351282Sdim      DCHECK_GT(C->Count, 0);
76351282Sdim    }
77351282Sdim    // We read ClassSize first before accessing Chunks because it's adjacent to
78351282Sdim    // Count, while Chunks might be further off (depending on Count). That keeps
79351282Sdim    // the memory accesses in close quarters.
80351282Sdim    const uptr ClassSize = C->ClassSize;
81351282Sdim    void *P = C->Chunks[--C->Count];
82351282Sdim    // The jury is still out as to whether any kind of PREFETCH here increases
83351282Sdim    // performance. It definitely decreases performance on Android though.
84351282Sdim    // if (!SCUDO_ANDROID) PREFETCH(P);
85351282Sdim    Stats.add(StatAllocated, ClassSize);
86360784Sdim    Stats.sub(StatFree, ClassSize);
87351282Sdim    return P;
88351282Sdim  }
89351282Sdim
90351282Sdim  void deallocate(uptr ClassId, void *P) {
91351282Sdim    CHECK_LT(ClassId, NumClasses);
92351282Sdim    PerClass *C = &PerClassArray[ClassId];
93351282Sdim    // We still have to initialize the cache in the event that the first heap
94351282Sdim    // operation in a thread is a deallocation.
95351282Sdim    initCacheMaybe(C);
96351282Sdim    if (C->Count == C->MaxCount)
97351282Sdim      drain(C, ClassId);
98351282Sdim    // See comment in allocate() about memory accesses.
99351282Sdim    const uptr ClassSize = C->ClassSize;
100351282Sdim    C->Chunks[C->Count++] = P;
101351282Sdim    Stats.sub(StatAllocated, ClassSize);
102360784Sdim    Stats.add(StatFree, ClassSize);
103351282Sdim  }
104351282Sdim
105351282Sdim  void drain() {
106351282Sdim    for (uptr I = 0; I < NumClasses; I++) {
107351282Sdim      PerClass *C = &PerClassArray[I];
108351282Sdim      while (C->Count > 0)
109351282Sdim        drain(C, I);
110351282Sdim    }
111351282Sdim  }
112351282Sdim
113351282Sdim  TransferBatch *createBatch(uptr ClassId, void *B) {
114351282Sdim    if (ClassId != SizeClassMap::BatchClassId)
115351282Sdim      B = allocate(SizeClassMap::BatchClassId);
116351282Sdim    return reinterpret_cast<TransferBatch *>(B);
117351282Sdim  }
118351282Sdim
119351282Sdim  LocalStats &getStats() { return Stats; }
120351282Sdim
121351282Sdimprivate:
122351282Sdim  static const uptr NumClasses = SizeClassMap::NumClasses;
123351282Sdim  struct PerClass {
124351282Sdim    u32 Count;
125351282Sdim    u32 MaxCount;
126351282Sdim    uptr ClassSize;
127351282Sdim    void *Chunks[2 * TransferBatch::MaxNumCached];
128351282Sdim  };
129351282Sdim  PerClass PerClassArray[NumClasses];
130351282Sdim  LocalStats Stats;
131351282Sdim  SizeClassAllocator *Allocator;
132351282Sdim
133351282Sdim  ALWAYS_INLINE void initCacheMaybe(PerClass *C) {
134351282Sdim    if (LIKELY(C->MaxCount))
135351282Sdim      return;
136351282Sdim    initCache();
137351282Sdim    DCHECK_NE(C->MaxCount, 0U);
138351282Sdim  }
139351282Sdim
140351282Sdim  NOINLINE void initCache() {
141351282Sdim    for (uptr I = 0; I < NumClasses; I++) {
142351282Sdim      PerClass *P = &PerClassArray[I];
143351282Sdim      const uptr Size = SizeClassAllocator::getSizeByClassId(I);
144351282Sdim      P->MaxCount = 2 * TransferBatch::getMaxCached(Size);
145351282Sdim      P->ClassSize = Size;
146351282Sdim    }
147351282Sdim  }
148351282Sdim
149351282Sdim  void destroyBatch(uptr ClassId, void *B) {
150351282Sdim    if (ClassId != SizeClassMap::BatchClassId)
151351282Sdim      deallocate(SizeClassMap::BatchClassId, B);
152351282Sdim  }
153351282Sdim
154351282Sdim  NOINLINE bool refill(PerClass *C, uptr ClassId) {
155351282Sdim    initCacheMaybe(C);
156351282Sdim    TransferBatch *B = Allocator->popBatch(this, ClassId);
157351282Sdim    if (UNLIKELY(!B))
158351282Sdim      return false;
159351282Sdim    DCHECK_GT(B->getCount(), 0);
160360784Sdim    C->Count = B->getCount();
161351282Sdim    B->copyToArray(C->Chunks);
162351282Sdim    destroyBatch(ClassId, B);
163351282Sdim    return true;
164351282Sdim  }
165351282Sdim
166351282Sdim  NOINLINE void drain(PerClass *C, uptr ClassId) {
167351282Sdim    const u32 Count = Min(C->MaxCount / 2, C->Count);
168351282Sdim    const uptr FirstIndexToDrain = C->Count - Count;
169351282Sdim    TransferBatch *B = createBatch(ClassId, C->Chunks[FirstIndexToDrain]);
170351282Sdim    if (UNLIKELY(!B))
171351282Sdim      reportOutOfMemory(
172351282Sdim          SizeClassAllocator::getSizeByClassId(SizeClassMap::BatchClassId));
173351282Sdim    B->setFromArray(&C->Chunks[FirstIndexToDrain], Count);
174351282Sdim    C->Count -= Count;
175351282Sdim    Allocator->pushBatch(ClassId, B);
176351282Sdim  }
177351282Sdim};
178351282Sdim
179351282Sdim} // namespace scudo
180351282Sdim
181351282Sdim#endif // SCUDO_LOCAL_CACHE_H_
182