1//===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of ThreadSanitizer (TSan), a race detector.
10//
11//===----------------------------------------------------------------------===//
12#ifndef TSAN_CLOCK_H
13#define TSAN_CLOCK_H
14
15#include "tsan_defs.h"
16#include "tsan_dense_alloc.h"
17
18namespace __tsan {
19
20typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc;
21typedef DenseSlabAllocCache ClockCache;
22
23// The clock that lives in sync variables (mutexes, atomics, etc).
24class SyncClock {
25 public:
26  SyncClock();
27  ~SyncClock();
28
29  uptr size() const;
30
31  // These are used only in tests.
32  u64 get(unsigned tid) const;
33  u64 get_clean(unsigned tid) const;
34
35  void Resize(ClockCache *c, uptr nclk);
36  void Reset(ClockCache *c);
37
38  void DebugDump(int(*printf)(const char *s, ...));
39
40  // Clock element iterator.
41  // Note: it iterates only over the table without regard to dirty entries.
42  class Iter {
43   public:
44    explicit Iter(SyncClock* parent);
45    Iter& operator++();
46    bool operator!=(const Iter& other);
47    ClockElem &operator*();
48
49   private:
50    SyncClock *parent_;
51    // [pos_, end_) is the current continuous range of clock elements.
52    ClockElem *pos_;
53    ClockElem *end_;
54    int block_;  // Current number of second level block.
55
56    NOINLINE void Next();
57  };
58
59  Iter begin();
60  Iter end();
61
62 private:
63  friend class ThreadClock;
64  friend class Iter;
65  static const uptr kDirtyTids = 2;
66
67  struct Dirty {
68    u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; }
69    void set_tid(u32 tid) {
70      tid_ = tid == kInvalidTid ? kShortInvalidTid : tid;
71    }
72    u64 epoch : kClkBits;
73
74   private:
75    // Full kInvalidTid won't fit into Dirty::tid.
76    static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1;
77    u64 tid_ : 64 - kClkBits;  // kInvalidId if not active
78  };
79
80  static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit");
81
82  unsigned release_store_tid_;
83  unsigned release_store_reused_;
84  Dirty dirty_[kDirtyTids];
85  // If size_ is 0, tab_ is nullptr.
86  // If size <= 64 (kClockCount), tab_ contains pointer to an array with
87  // 64 ClockElem's (ClockBlock::clock).
88  // Otherwise, tab_ points to an array with up to 127 u32 elements,
89  // each pointing to the second-level 512b block with 64 ClockElem's.
90  // Unused space in the first level ClockBlock is used to store additional
91  // clock elements.
92  // The last u32 element in the first level ClockBlock is always used as
93  // reference counter.
94  //
95  // See the following scheme for details.
96  // All memory blocks are 512 bytes (allocated from ClockAlloc).
97  // Clock (clk) elements are 64 bits.
98  // Idx and ref are 32 bits.
99  //
100  // tab_
101  //    |
102  //    \/
103  //    +----------------------------------------------------+
104  //    | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
105  //    +----------------------------------------------------+
106  //                                        |      |
107  //                                        |      \/
108  //                                        |      +----------------+
109  //                                        |      | clk0 ... clk63 |
110  //                                        |      +----------------+
111  //                                        \/
112  //                                        +------------------+
113  //                                        | clk64 ... clk127 |
114  //                                        +------------------+
115  //
116  // Note: dirty entries, if active, always override what's stored in the clock.
117  ClockBlock *tab_;
118  u32 tab_idx_;
119  u16 size_;
120  u16 blocks_;  // Number of second level blocks.
121
122  void Unshare(ClockCache *c);
123  bool IsShared() const;
124  bool Cachable() const;
125  void ResetImpl();
126  void FlushDirty();
127  uptr capacity() const;
128  u32 get_block(uptr bi) const;
129  void append_block(u32 idx);
130  ClockElem &elem(unsigned tid) const;
131};
132
133// The clock that lives in threads.
134class ThreadClock {
135 public:
136  typedef DenseSlabAllocCache Cache;
137
138  explicit ThreadClock(unsigned tid, unsigned reused = 0);
139
140  u64 get(unsigned tid) const;
141  void set(ClockCache *c, unsigned tid, u64 v);
142  void set(u64 v);
143  void tick();
144  uptr size() const;
145
146  void acquire(ClockCache *c, SyncClock *src);
147  void releaseStoreAcquire(ClockCache *c, SyncClock *src);
148  void release(ClockCache *c, SyncClock *dst);
149  void acq_rel(ClockCache *c, SyncClock *dst);
150  void ReleaseStore(ClockCache *c, SyncClock *dst);
151  void ResetCached(ClockCache *c);
152  void NoteGlobalAcquire(u64 v);
153
154  void DebugReset();
155  void DebugDump(int(*printf)(const char *s, ...));
156
157 private:
158  static const uptr kDirtyTids = SyncClock::kDirtyTids;
159  // Index of the thread associated with he clock ("current thread").
160  const unsigned tid_;
161  const unsigned reused_;  // tid_ reuse count.
162  // Current thread time when it acquired something from other threads.
163  u64 last_acquire_;
164
165  // Last time another thread has done a global acquire of this thread's clock.
166  // It helps to avoid problem described in:
167  // https://github.com/golang/go/issues/39186
168  // See test/tsan/java_finalizer2.cpp for a regression test.
169  // Note the failuire is _extremely_ hard to hit, so if you are trying
170  // to reproduce it, you may want to run something like:
171  // $ go get golang.org/x/tools/cmd/stress
172  // $ stress -p=64 ./a.out
173  //
174  // The crux of the problem is roughly as follows.
175  // A number of O(1) optimizations in the clocks algorithm assume proper
176  // transitive cumulative propagation of clock values. The AcquireGlobal
177  // operation may produce an inconsistent non-linearazable view of
178  // thread clocks. Namely, it may acquire a later value from a thread
179  // with a higher ID, but fail to acquire an earlier value from a thread
180  // with a lower ID. If a thread that executed AcquireGlobal then releases
181  // to a sync clock, it will spoil the sync clock with the inconsistent
182  // values. If another thread later releases to the sync clock, the optimized
183  // algorithm may break.
184  //
185  // The exact sequence of events that leads to the failure.
186  // - thread 1 executes AcquireGlobal
187  // - thread 1 acquires value 1 for thread 2
188  // - thread 2 increments clock to 2
189  // - thread 2 releases to sync object 1
190  // - thread 3 at time 1
191  // - thread 3 acquires from sync object 1
192  // - thread 3 increments clock to 2
193  // - thread 1 acquires value 2 for thread 3
194  // - thread 1 releases to sync object 2
195  // - sync object 2 clock has 1 for thread 2 and 2 for thread 3
196  // - thread 3 releases to sync object 2
197  // - thread 3 sees value 2 in the clock for itself
198  //   and decides that it has already released to the clock
199  //   and did not acquire anything from other threads after that
200  //   (the last_acquire_ check in release operation)
201  // - thread 3 does not update the value for thread 2 in the clock from 1 to 2
202  // - thread 4 acquires from sync object 2
203  // - thread 4 detects a false race with thread 2
204  //   as it should have been synchronized with thread 2 up to time 2,
205  //   but because of the broken clock it is now synchronized only up to time 1
206  //
207  // The global_acquire_ value helps to prevent this scenario.
208  // Namely, thread 3 will not trust any own clock values up to global_acquire_
209  // for the purposes of the last_acquire_ optimization.
210  atomic_uint64_t global_acquire_;
211
212  // Cached SyncClock (without dirty entries and release_store_tid_).
213  // We reuse it for subsequent store-release operations without intervening
214  // acquire operations. Since it is shared (and thus constant), clock value
215  // for the current thread is then stored in dirty entries in the SyncClock.
216  // We host a reference to the table while it is cached here.
217  u32 cached_idx_;
218  u16 cached_size_;
219  u16 cached_blocks_;
220
221  // Number of active elements in the clk_ table (the rest is zeros).
222  uptr nclk_;
223  u64 clk_[kMaxTidInClock];  // Fixed size vector clock.
224
225  bool IsAlreadyAcquired(const SyncClock *src) const;
226  bool HasAcquiredAfterRelease(const SyncClock *dst) const;
227  void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
228};
229
230ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
231  DCHECK_LT(tid, kMaxTidInClock);
232  return clk_[tid];
233}
234
235ALWAYS_INLINE void ThreadClock::set(u64 v) {
236  DCHECK_GE(v, clk_[tid_]);
237  clk_[tid_] = v;
238}
239
240ALWAYS_INLINE void ThreadClock::tick() {
241  clk_[tid_]++;
242}
243
244ALWAYS_INLINE uptr ThreadClock::size() const {
245  return nclk_;
246}
247
248ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) {
249  // Here we rely on the fact that AcquireGlobal is protected by
250  // ThreadRegistryLock, thus only one thread at a time executes it
251  // and values passed to this function should not go backwards.
252  CHECK_LE(atomic_load_relaxed(&global_acquire_), v);
253  atomic_store_relaxed(&global_acquire_, v);
254}
255
256ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
257  return Iter(this);
258}
259
260ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
261  return Iter(nullptr);
262}
263
264ALWAYS_INLINE uptr SyncClock::size() const {
265  return size_;
266}
267
268ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
269    : parent_(parent)
270    , pos_(nullptr)
271    , end_(nullptr)
272    , block_(-1) {
273  if (parent)
274    Next();
275}
276
277ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
278  pos_++;
279  if (UNLIKELY(pos_ >= end_))
280    Next();
281  return *this;
282}
283
284ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
285  return parent_ != other.parent_;
286}
287
288ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
289  return *pos_;
290}
291}  // namespace __tsan
292
293#endif  // TSAN_CLOCK_H
294