1//===-- tsan_clock.cc -----------------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is a part of ThreadSanitizer (TSan), a race detector. 11// 12//===----------------------------------------------------------------------===// 13#include "tsan_clock.h" 14#include "tsan_rtl.h" 15#include "sanitizer_common/sanitizer_placement_new.h" 16 17// SyncClock and ThreadClock implement vector clocks for sync variables 18// (mutexes, atomic variables, file descriptors, etc) and threads, respectively. 19// ThreadClock contains fixed-size vector clock for maximum number of threads. 20// SyncClock contains growable vector clock for currently necessary number of 21// threads. 22// Together they implement very simple model of operations, namely: 23// 24// void ThreadClock::acquire(const SyncClock *src) { 25// for (int i = 0; i < kMaxThreads; i++) 26// clock[i] = max(clock[i], src->clock[i]); 27// } 28// 29// void ThreadClock::release(SyncClock *dst) const { 30// for (int i = 0; i < kMaxThreads; i++) 31// dst->clock[i] = max(dst->clock[i], clock[i]); 32// } 33// 34// void ThreadClock::ReleaseStore(SyncClock *dst) const { 35// for (int i = 0; i < kMaxThreads; i++) 36// dst->clock[i] = clock[i]; 37// } 38// 39// void ThreadClock::acq_rel(SyncClock *dst) { 40// acquire(dst); 41// release(dst); 42// } 43// 44// Conformance to this model is extensively verified in tsan_clock_test.cc. 45// However, the implementation is significantly more complex. The complexity 46// allows to implement important classes of use cases in O(1) instead of O(N). 47// 48// The use cases are: 49// 1. Singleton/once atomic that has a single release-store operation followed 50// by zillions of acquire-loads (the acquire-load is O(1)). 51// 2. Thread-local mutex (both lock and unlock can be O(1)). 52// 3. Leaf mutex (unlock is O(1)). 53// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)). 54// 5. An atomic with a single writer (writes can be O(1)). 55// The implementation dynamically adopts to workload. So if an atomic is in 56// read-only phase, these reads will be O(1); if it later switches to read/write 57// phase, the implementation will correctly handle that by switching to O(N). 58// 59// Thread-safety note: all const operations on SyncClock's are conducted under 60// a shared lock; all non-const operations on SyncClock's are conducted under 61// an exclusive lock; ThreadClock's are private to respective threads and so 62// do not need any protection. 63// 64// Description of ThreadClock state: 65// clk_ - fixed size vector clock. 66// nclk_ - effective size of the vector clock (the rest is zeros). 67// tid_ - index of the thread associated with he clock ("current thread"). 68// last_acquire_ - current thread time when it acquired something from 69// other threads. 70// 71// Description of SyncClock state: 72// clk_ - variable size vector clock, low kClkBits hold timestamp, 73// the remaining bits hold "acquired" flag (the actual value is thread's 74// reused counter); 75// if acquried == thr->reused_, then the respective thread has already 76// acquired this clock (except possibly dirty_tids_). 77// dirty_tids_ - holds up to two indeces in the vector clock that other threads 78// need to acquire regardless of "acquired" flag value; 79// release_store_tid_ - denotes that the clock state is a result of 80// release-store operation by the thread with release_store_tid_ index. 81// release_store_reused_ - reuse count of release_store_tid_. 82 83// We don't have ThreadState in these methods, so this is an ugly hack that 84// works only in C++. 85#ifndef SANITIZER_GO 86# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ) 87#else 88# define CPP_STAT_INC(typ) (void)0 89#endif 90 91namespace __tsan { 92 93ThreadClock::ThreadClock(unsigned tid, unsigned reused) 94 : tid_(tid) 95 , reused_(reused + 1) { // 0 has special meaning 96 CHECK_LT(tid, kMaxTidInClock); 97 CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits); 98 nclk_ = tid_ + 1; 99 last_acquire_ = 0; 100 internal_memset(clk_, 0, sizeof(clk_)); 101 clk_[tid_].reused = reused_; 102} 103 104void ThreadClock::acquire(ClockCache *c, const SyncClock *src) { 105 DCHECK_LE(nclk_, kMaxTid); 106 DCHECK_LE(src->size_, kMaxTid); 107 CPP_STAT_INC(StatClockAcquire); 108 109 // Check if it's empty -> no need to do anything. 110 const uptr nclk = src->size_; 111 if (nclk == 0) { 112 CPP_STAT_INC(StatClockAcquireEmpty); 113 return; 114 } 115 116 // Check if we've already acquired src after the last release operation on src 117 bool acquired = false; 118 if (nclk > tid_) { 119 CPP_STAT_INC(StatClockAcquireLarge); 120 if (src->elem(tid_).reused == reused_) { 121 CPP_STAT_INC(StatClockAcquireRepeat); 122 for (unsigned i = 0; i < kDirtyTids; i++) { 123 unsigned tid = src->dirty_tids_[i]; 124 if (tid != kInvalidTid) { 125 u64 epoch = src->elem(tid).epoch; 126 if (clk_[tid].epoch < epoch) { 127 clk_[tid].epoch = epoch; 128 acquired = true; 129 } 130 } 131 } 132 if (acquired) { 133 CPP_STAT_INC(StatClockAcquiredSomething); 134 last_acquire_ = clk_[tid_].epoch; 135 } 136 return; 137 } 138 } 139 140 // O(N) acquire. 141 CPP_STAT_INC(StatClockAcquireFull); 142 nclk_ = max(nclk_, nclk); 143 for (uptr i = 0; i < nclk; i++) { 144 u64 epoch = src->elem(i).epoch; 145 if (clk_[i].epoch < epoch) { 146 clk_[i].epoch = epoch; 147 acquired = true; 148 } 149 } 150 151 // Remember that this thread has acquired this clock. 152 if (nclk > tid_) 153 src->elem(tid_).reused = reused_; 154 155 if (acquired) { 156 CPP_STAT_INC(StatClockAcquiredSomething); 157 last_acquire_ = clk_[tid_].epoch; 158 } 159} 160 161void ThreadClock::release(ClockCache *c, SyncClock *dst) const { 162 DCHECK_LE(nclk_, kMaxTid); 163 DCHECK_LE(dst->size_, kMaxTid); 164 165 if (dst->size_ == 0) { 166 // ReleaseStore will correctly set release_store_tid_, 167 // which can be important for future operations. 168 ReleaseStore(c, dst); 169 return; 170 } 171 172 CPP_STAT_INC(StatClockRelease); 173 // Check if we need to resize dst. 174 if (dst->size_ < nclk_) 175 dst->Resize(c, nclk_); 176 177 // Check if we had not acquired anything from other threads 178 // since the last release on dst. If so, we need to update 179 // only dst->elem(tid_). 180 if (dst->elem(tid_).epoch > last_acquire_) { 181 UpdateCurrentThread(dst); 182 if (dst->release_store_tid_ != tid_ || 183 dst->release_store_reused_ != reused_) 184 dst->release_store_tid_ = kInvalidTid; 185 return; 186 } 187 188 // O(N) release. 189 CPP_STAT_INC(StatClockReleaseFull); 190 // First, remember whether we've acquired dst. 191 bool acquired = IsAlreadyAcquired(dst); 192 if (acquired) 193 CPP_STAT_INC(StatClockReleaseAcquired); 194 // Update dst->clk_. 195 for (uptr i = 0; i < nclk_; i++) { 196 ClockElem &ce = dst->elem(i); 197 ce.epoch = max(ce.epoch, clk_[i].epoch); 198 ce.reused = 0; 199 } 200 // Clear 'acquired' flag in the remaining elements. 201 if (nclk_ < dst->size_) 202 CPP_STAT_INC(StatClockReleaseClearTail); 203 for (uptr i = nclk_; i < dst->size_; i++) 204 dst->elem(i).reused = 0; 205 for (unsigned i = 0; i < kDirtyTids; i++) 206 dst->dirty_tids_[i] = kInvalidTid; 207 dst->release_store_tid_ = kInvalidTid; 208 dst->release_store_reused_ = 0; 209 // If we've acquired dst, remember this fact, 210 // so that we don't need to acquire it on next acquire. 211 if (acquired) 212 dst->elem(tid_).reused = reused_; 213} 214 215void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const { 216 DCHECK_LE(nclk_, kMaxTid); 217 DCHECK_LE(dst->size_, kMaxTid); 218 CPP_STAT_INC(StatClockStore); 219 220 // Check if we need to resize dst. 221 if (dst->size_ < nclk_) 222 dst->Resize(c, nclk_); 223 224 if (dst->release_store_tid_ == tid_ && 225 dst->release_store_reused_ == reused_ && 226 dst->elem(tid_).epoch > last_acquire_) { 227 CPP_STAT_INC(StatClockStoreFast); 228 UpdateCurrentThread(dst); 229 return; 230 } 231 232 // O(N) release-store. 233 CPP_STAT_INC(StatClockStoreFull); 234 for (uptr i = 0; i < nclk_; i++) { 235 ClockElem &ce = dst->elem(i); 236 ce.epoch = clk_[i].epoch; 237 ce.reused = 0; 238 } 239 // Clear the tail of dst->clk_. 240 if (nclk_ < dst->size_) { 241 for (uptr i = nclk_; i < dst->size_; i++) { 242 ClockElem &ce = dst->elem(i); 243 ce.epoch = 0; 244 ce.reused = 0; 245 } 246 CPP_STAT_INC(StatClockStoreTail); 247 } 248 for (unsigned i = 0; i < kDirtyTids; i++) 249 dst->dirty_tids_[i] = kInvalidTid; 250 dst->release_store_tid_ = tid_; 251 dst->release_store_reused_ = reused_; 252 // Rememeber that we don't need to acquire it in future. 253 dst->elem(tid_).reused = reused_; 254} 255 256void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { 257 CPP_STAT_INC(StatClockAcquireRelease); 258 acquire(c, dst); 259 ReleaseStore(c, dst); 260} 261 262// Updates only single element related to the current thread in dst->clk_. 263void ThreadClock::UpdateCurrentThread(SyncClock *dst) const { 264 // Update the threads time, but preserve 'acquired' flag. 265 dst->elem(tid_).epoch = clk_[tid_].epoch; 266 267 for (unsigned i = 0; i < kDirtyTids; i++) { 268 if (dst->dirty_tids_[i] == tid_) { 269 CPP_STAT_INC(StatClockReleaseFast1); 270 return; 271 } 272 if (dst->dirty_tids_[i] == kInvalidTid) { 273 CPP_STAT_INC(StatClockReleaseFast2); 274 dst->dirty_tids_[i] = tid_; 275 return; 276 } 277 } 278 // Reset all 'acquired' flags, O(N). 279 CPP_STAT_INC(StatClockReleaseSlow); 280 for (uptr i = 0; i < dst->size_; i++) 281 dst->elem(i).reused = 0; 282 for (unsigned i = 0; i < kDirtyTids; i++) 283 dst->dirty_tids_[i] = kInvalidTid; 284} 285 286// Checks whether the current threads has already acquired src. 287bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { 288 if (src->elem(tid_).reused != reused_) 289 return false; 290 for (unsigned i = 0; i < kDirtyTids; i++) { 291 unsigned tid = src->dirty_tids_[i]; 292 if (tid != kInvalidTid) { 293 if (clk_[tid].epoch < src->elem(tid).epoch) 294 return false; 295 } 296 } 297 return true; 298} 299 300void SyncClock::Resize(ClockCache *c, uptr nclk) { 301 CPP_STAT_INC(StatClockReleaseResize); 302 if (RoundUpTo(nclk, ClockBlock::kClockCount) <= 303 RoundUpTo(size_, ClockBlock::kClockCount)) { 304 // Growing within the same block. 305 // Memory is already allocated, just increase the size. 306 size_ = nclk; 307 return; 308 } 309 if (nclk <= ClockBlock::kClockCount) { 310 // Grow from 0 to one-level table. 311 CHECK_EQ(size_, 0); 312 CHECK_EQ(tab_, 0); 313 CHECK_EQ(tab_idx_, 0); 314 size_ = nclk; 315 tab_idx_ = ctx->clock_alloc.Alloc(c); 316 tab_ = ctx->clock_alloc.Map(tab_idx_); 317 internal_memset(tab_, 0, sizeof(*tab_)); 318 return; 319 } 320 // Growing two-level table. 321 if (size_ == 0) { 322 // Allocate first level table. 323 tab_idx_ = ctx->clock_alloc.Alloc(c); 324 tab_ = ctx->clock_alloc.Map(tab_idx_); 325 internal_memset(tab_, 0, sizeof(*tab_)); 326 } else if (size_ <= ClockBlock::kClockCount) { 327 // Transform one-level table to two-level table. 328 u32 old = tab_idx_; 329 tab_idx_ = ctx->clock_alloc.Alloc(c); 330 tab_ = ctx->clock_alloc.Map(tab_idx_); 331 internal_memset(tab_, 0, sizeof(*tab_)); 332 tab_->table[0] = old; 333 } 334 // At this point we have first level table allocated. 335 // Add second level tables as necessary. 336 for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount); 337 i < nclk; i += ClockBlock::kClockCount) { 338 u32 idx = ctx->clock_alloc.Alloc(c); 339 ClockBlock *cb = ctx->clock_alloc.Map(idx); 340 internal_memset(cb, 0, sizeof(*cb)); 341 CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0); 342 tab_->table[i/ClockBlock::kClockCount] = idx; 343 } 344 size_ = nclk; 345} 346 347// Sets a single element in the vector clock. 348// This function is called only from weird places like AcquireGlobal. 349void ThreadClock::set(unsigned tid, u64 v) { 350 DCHECK_LT(tid, kMaxTid); 351 DCHECK_GE(v, clk_[tid].epoch); 352 clk_[tid].epoch = v; 353 if (nclk_ <= tid) 354 nclk_ = tid + 1; 355 last_acquire_ = clk_[tid_].epoch; 356} 357 358void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) { 359 printf("clock=["); 360 for (uptr i = 0; i < nclk_; i++) 361 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch); 362 printf("] reused=["); 363 for (uptr i = 0; i < nclk_; i++) 364 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused); 365 printf("] tid=%u/%u last_acq=%llu", 366 tid_, reused_, last_acquire_); 367} 368 369SyncClock::SyncClock() 370 : release_store_tid_(kInvalidTid) 371 , release_store_reused_() 372 , tab_() 373 , tab_idx_() 374 , size_() { 375 for (uptr i = 0; i < kDirtyTids; i++) 376 dirty_tids_[i] = kInvalidTid; 377} 378 379SyncClock::~SyncClock() { 380 // Reset must be called before dtor. 381 CHECK_EQ(size_, 0); 382 CHECK_EQ(tab_, 0); 383 CHECK_EQ(tab_idx_, 0); 384} 385 386void SyncClock::Reset(ClockCache *c) { 387 if (size_ == 0) { 388 // nothing 389 } else if (size_ <= ClockBlock::kClockCount) { 390 // One-level table. 391 ctx->clock_alloc.Free(c, tab_idx_); 392 } else { 393 // Two-level table. 394 for (uptr i = 0; i < size_; i += ClockBlock::kClockCount) 395 ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]); 396 ctx->clock_alloc.Free(c, tab_idx_); 397 } 398 tab_ = 0; 399 tab_idx_ = 0; 400 size_ = 0; 401 release_store_tid_ = kInvalidTid; 402 release_store_reused_ = 0; 403 for (uptr i = 0; i < kDirtyTids; i++) 404 dirty_tids_[i] = kInvalidTid; 405} 406 407ClockElem &SyncClock::elem(unsigned tid) const { 408 DCHECK_LT(tid, size_); 409 if (size_ <= ClockBlock::kClockCount) 410 return tab_->clock[tid]; 411 u32 idx = tab_->table[tid / ClockBlock::kClockCount]; 412 ClockBlock *cb = ctx->clock_alloc.Map(idx); 413 return cb->clock[tid % ClockBlock::kClockCount]; 414} 415 416void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { 417 printf("clock=["); 418 for (uptr i = 0; i < size_; i++) 419 printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch); 420 printf("] reused=["); 421 for (uptr i = 0; i < size_; i++) 422 printf("%s%llu", i == 0 ? "" : ",", elem(i).reused); 423 printf("] release_store_tid=%d/%d dirty_tids=%d/%d", 424 release_store_tid_, release_store_reused_, 425 dirty_tids_[0], dirty_tids_[1]); 426} 427} // namespace __tsan 428