tsan_clock.h revision 1.4
1//===-- tsan_clock.h --------------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file is a part of ThreadSanitizer (TSan), a race detector. 10// 11//===----------------------------------------------------------------------===// 12#ifndef TSAN_CLOCK_H 13#define TSAN_CLOCK_H 14 15#include "tsan_defs.h" 16#include "tsan_dense_alloc.h" 17 18namespace __tsan { 19 20typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc; 21typedef DenseSlabAllocCache ClockCache; 22 23// The clock that lives in sync variables (mutexes, atomics, etc). 24class SyncClock { 25 public: 26 SyncClock(); 27 ~SyncClock(); 28 29 uptr size() const; 30 31 // These are used only in tests. 32 u64 get(unsigned tid) const; 33 u64 get_clean(unsigned tid) const; 34 35 void Resize(ClockCache *c, uptr nclk); 36 void Reset(ClockCache *c); 37 38 void DebugDump(int(*printf)(const char *s, ...)); 39 40 // Clock element iterator. 41 // Note: it iterates only over the table without regard to dirty entries. 42 class Iter { 43 public: 44 explicit Iter(SyncClock* parent); 45 Iter& operator++(); 46 bool operator!=(const Iter& other); 47 ClockElem &operator*(); 48 49 private: 50 SyncClock *parent_; 51 // [pos_, end_) is the current continuous range of clock elements. 52 ClockElem *pos_; 53 ClockElem *end_; 54 int block_; // Current number of second level block. 55 56 NOINLINE void Next(); 57 }; 58 59 Iter begin(); 60 Iter end(); 61 62 private: 63 friend class ThreadClock; 64 friend class Iter; 65 static const uptr kDirtyTids = 2; 66 67 struct Dirty { 68 u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; } 69 void set_tid(u32 tid) { 70 tid_ = tid == kInvalidTid ? kShortInvalidTid : tid; 71 } 72 u64 epoch : kClkBits; 73 74 private: 75 // Full kInvalidTid won't fit into Dirty::tid. 76 static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1; 77 u64 tid_ : 64 - kClkBits; // kInvalidId if not active 78 }; 79 80 static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit"); 81 82 unsigned release_store_tid_; 83 unsigned release_store_reused_; 84 Dirty dirty_[kDirtyTids]; 85 // If size_ is 0, tab_ is nullptr. 86 // If size <= 64 (kClockCount), tab_ contains pointer to an array with 87 // 64 ClockElem's (ClockBlock::clock). 88 // Otherwise, tab_ points to an array with up to 127 u32 elements, 89 // each pointing to the second-level 512b block with 64 ClockElem's. 90 // Unused space in the first level ClockBlock is used to store additional 91 // clock elements. 92 // The last u32 element in the first level ClockBlock is always used as 93 // reference counter. 94 // 95 // See the following scheme for details. 96 // All memory blocks are 512 bytes (allocated from ClockAlloc). 97 // Clock (clk) elements are 64 bits. 98 // Idx and ref are 32 bits. 99 // 100 // tab_ 101 // | 102 // \/ 103 // +----------------------------------------------------+ 104 // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref | 105 // +----------------------------------------------------+ 106 // | | 107 // | \/ 108 // | +----------------+ 109 // | | clk0 ... clk63 | 110 // | +----------------+ 111 // \/ 112 // +------------------+ 113 // | clk64 ... clk127 | 114 // +------------------+ 115 // 116 // Note: dirty entries, if active, always override what's stored in the clock. 117 ClockBlock *tab_; 118 u32 tab_idx_; 119 u16 size_; 120 u16 blocks_; // Number of second level blocks. 121 122 void Unshare(ClockCache *c); 123 bool IsShared() const; 124 bool Cachable() const; 125 void ResetImpl(); 126 void FlushDirty(); 127 uptr capacity() const; 128 u32 get_block(uptr bi) const; 129 void append_block(u32 idx); 130 ClockElem &elem(unsigned tid) const; 131}; 132 133// The clock that lives in threads. 134class ThreadClock { 135 public: 136 typedef DenseSlabAllocCache Cache; 137 138 explicit ThreadClock(unsigned tid, unsigned reused = 0); 139 140 u64 get(unsigned tid) const; 141 void set(ClockCache *c, unsigned tid, u64 v); 142 void set(u64 v); 143 void tick(); 144 uptr size() const; 145 146 void acquire(ClockCache *c, SyncClock *src); 147 void releaseStoreAcquire(ClockCache *c, SyncClock *src); 148 void release(ClockCache *c, SyncClock *dst); 149 void acq_rel(ClockCache *c, SyncClock *dst); 150 void ReleaseStore(ClockCache *c, SyncClock *dst); 151 void ResetCached(ClockCache *c); 152 void NoteGlobalAcquire(u64 v); 153 154 void DebugReset(); 155 void DebugDump(int(*printf)(const char *s, ...)); 156 157 private: 158 static const uptr kDirtyTids = SyncClock::kDirtyTids; 159 // Index of the thread associated with he clock ("current thread"). 160 const unsigned tid_; 161 const unsigned reused_; // tid_ reuse count. 162 // Current thread time when it acquired something from other threads. 163 u64 last_acquire_; 164 165 // Last time another thread has done a global acquire of this thread's clock. 166 // It helps to avoid problem described in: 167 // https://github.com/golang/go/issues/39186 168 // See test/tsan/java_finalizer2.cpp for a regression test. 169 // Note the failuire is _extremely_ hard to hit, so if you are trying 170 // to reproduce it, you may want to run something like: 171 // $ go get golang.org/x/tools/cmd/stress 172 // $ stress -p=64 ./a.out 173 // 174 // The crux of the problem is roughly as follows. 175 // A number of O(1) optimizations in the clocks algorithm assume proper 176 // transitive cumulative propagation of clock values. The AcquireGlobal 177 // operation may produce an inconsistent non-linearazable view of 178 // thread clocks. Namely, it may acquire a later value from a thread 179 // with a higher ID, but fail to acquire an earlier value from a thread 180 // with a lower ID. If a thread that executed AcquireGlobal then releases 181 // to a sync clock, it will spoil the sync clock with the inconsistent 182 // values. If another thread later releases to the sync clock, the optimized 183 // algorithm may break. 184 // 185 // The exact sequence of events that leads to the failure. 186 // - thread 1 executes AcquireGlobal 187 // - thread 1 acquires value 1 for thread 2 188 // - thread 2 increments clock to 2 189 // - thread 2 releases to sync object 1 190 // - thread 3 at time 1 191 // - thread 3 acquires from sync object 1 192 // - thread 3 increments clock to 2 193 // - thread 1 acquires value 2 for thread 3 194 // - thread 1 releases to sync object 2 195 // - sync object 2 clock has 1 for thread 2 and 2 for thread 3 196 // - thread 3 releases to sync object 2 197 // - thread 3 sees value 2 in the clock for itself 198 // and decides that it has already released to the clock 199 // and did not acquire anything from other threads after that 200 // (the last_acquire_ check in release operation) 201 // - thread 3 does not update the value for thread 2 in the clock from 1 to 2 202 // - thread 4 acquires from sync object 2 203 // - thread 4 detects a false race with thread 2 204 // as it should have been synchronized with thread 2 up to time 2, 205 // but because of the broken clock it is now synchronized only up to time 1 206 // 207 // The global_acquire_ value helps to prevent this scenario. 208 // Namely, thread 3 will not trust any own clock values up to global_acquire_ 209 // for the purposes of the last_acquire_ optimization. 210 atomic_uint64_t global_acquire_; 211 212 // Cached SyncClock (without dirty entries and release_store_tid_). 213 // We reuse it for subsequent store-release operations without intervening 214 // acquire operations. Since it is shared (and thus constant), clock value 215 // for the current thread is then stored in dirty entries in the SyncClock. 216 // We host a reference to the table while it is cached here. 217 u32 cached_idx_; 218 u16 cached_size_; 219 u16 cached_blocks_; 220 221 // Number of active elements in the clk_ table (the rest is zeros). 222 uptr nclk_; 223 u64 clk_[kMaxTidInClock]; // Fixed size vector clock. 224 225 bool IsAlreadyAcquired(const SyncClock *src) const; 226 bool HasAcquiredAfterRelease(const SyncClock *dst) const; 227 void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const; 228}; 229 230ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const { 231 DCHECK_LT(tid, kMaxTidInClock); 232 return clk_[tid]; 233} 234 235ALWAYS_INLINE void ThreadClock::set(u64 v) { 236 DCHECK_GE(v, clk_[tid_]); 237 clk_[tid_] = v; 238} 239 240ALWAYS_INLINE void ThreadClock::tick() { 241 clk_[tid_]++; 242} 243 244ALWAYS_INLINE uptr ThreadClock::size() const { 245 return nclk_; 246} 247 248ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) { 249 // Here we rely on the fact that AcquireGlobal is protected by 250 // ThreadRegistryLock, thus only one thread at a time executes it 251 // and values passed to this function should not go backwards. 252 CHECK_LE(atomic_load_relaxed(&global_acquire_), v); 253 atomic_store_relaxed(&global_acquire_, v); 254} 255 256ALWAYS_INLINE SyncClock::Iter SyncClock::begin() { 257 return Iter(this); 258} 259 260ALWAYS_INLINE SyncClock::Iter SyncClock::end() { 261 return Iter(nullptr); 262} 263 264ALWAYS_INLINE uptr SyncClock::size() const { 265 return size_; 266} 267 268ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent) 269 : parent_(parent) 270 , pos_(nullptr) 271 , end_(nullptr) 272 , block_(-1) { 273 if (parent) 274 Next(); 275} 276 277ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() { 278 pos_++; 279 if (UNLIKELY(pos_ >= end_)) 280 Next(); 281 return *this; 282} 283 284ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) { 285 return parent_ != other.parent_; 286} 287 288ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() { 289 return *pos_; 290} 291} // namespace __tsan 292 293#endif // TSAN_CLOCK_H 294