1//===-- tsan_clock.cc -----------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of ThreadSanitizer (TSan), a race detector.
11//
12//===----------------------------------------------------------------------===//
13#include "tsan_clock.h"
14#include "tsan_rtl.h"
15#include "sanitizer_common/sanitizer_placement_new.h"
16
17// SyncClock and ThreadClock implement vector clocks for sync variables
18// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
19// ThreadClock contains fixed-size vector clock for maximum number of threads.
20// SyncClock contains growable vector clock for currently necessary number of
21// threads.
22// Together they implement very simple model of operations, namely:
23//
24//   void ThreadClock::acquire(const SyncClock *src) {
25//     for (int i = 0; i < kMaxThreads; i++)
26//       clock[i] = max(clock[i], src->clock[i]);
27//   }
28//
29//   void ThreadClock::release(SyncClock *dst) const {
30//     for (int i = 0; i < kMaxThreads; i++)
31//       dst->clock[i] = max(dst->clock[i], clock[i]);
32//   }
33//
34//   void ThreadClock::ReleaseStore(SyncClock *dst) const {
35//     for (int i = 0; i < kMaxThreads; i++)
36//       dst->clock[i] = clock[i];
37//   }
38//
39//   void ThreadClock::acq_rel(SyncClock *dst) {
40//     acquire(dst);
41//     release(dst);
42//   }
43//
44// Conformance to this model is extensively verified in tsan_clock_test.cc.
45// However, the implementation is significantly more complex. The complexity
46// allows to implement important classes of use cases in O(1) instead of O(N).
47//
48// The use cases are:
49// 1. Singleton/once atomic that has a single release-store operation followed
50//    by zillions of acquire-loads (the acquire-load is O(1)).
51// 2. Thread-local mutex (both lock and unlock can be O(1)).
52// 3. Leaf mutex (unlock is O(1)).
53// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
54// 5. An atomic with a single writer (writes can be O(1)).
55// The implementation dynamically adopts to workload. So if an atomic is in
56// read-only phase, these reads will be O(1); if it later switches to read/write
57// phase, the implementation will correctly handle that by switching to O(N).
58//
59// Thread-safety note: all const operations on SyncClock's are conducted under
60// a shared lock; all non-const operations on SyncClock's are conducted under
61// an exclusive lock; ThreadClock's are private to respective threads and so
62// do not need any protection.
63//
64// Description of ThreadClock state:
65// clk_ - fixed size vector clock.
66// nclk_ - effective size of the vector clock (the rest is zeros).
67// tid_ - index of the thread associated with he clock ("current thread").
68// last_acquire_ - current thread time when it acquired something from
69//   other threads.
70//
71// Description of SyncClock state:
72// clk_ - variable size vector clock, low kClkBits hold timestamp,
73//   the remaining bits hold "acquired" flag (the actual value is thread's
74//   reused counter);
75//   if acquried == thr->reused_, then the respective thread has already
76//   acquired this clock (except possibly dirty_tids_).
77// dirty_tids_ - holds up to two indeces in the vector clock that other threads
78//   need to acquire regardless of "acquired" flag value;
79// release_store_tid_ - denotes that the clock state is a result of
80//   release-store operation by the thread with release_store_tid_ index.
81// release_store_reused_ - reuse count of release_store_tid_.
82
83// We don't have ThreadState in these methods, so this is an ugly hack that
84// works only in C++.
85#ifndef SANITIZER_GO
86# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
87#else
88# define CPP_STAT_INC(typ) (void)0
89#endif
90
91namespace __tsan {
92
93ThreadClock::ThreadClock(unsigned tid, unsigned reused)
94    : tid_(tid)
95    , reused_(reused + 1) {  // 0 has special meaning
96  CHECK_LT(tid, kMaxTidInClock);
97  CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
98  nclk_ = tid_ + 1;
99  last_acquire_ = 0;
100  internal_memset(clk_, 0, sizeof(clk_));
101  clk_[tid_].reused = reused_;
102}
103
104void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
105  DCHECK_LE(nclk_, kMaxTid);
106  DCHECK_LE(src->size_, kMaxTid);
107  CPP_STAT_INC(StatClockAcquire);
108
109  // Check if it's empty -> no need to do anything.
110  const uptr nclk = src->size_;
111  if (nclk == 0) {
112    CPP_STAT_INC(StatClockAcquireEmpty);
113    return;
114  }
115
116  // Check if we've already acquired src after the last release operation on src
117  bool acquired = false;
118  if (nclk > tid_) {
119    CPP_STAT_INC(StatClockAcquireLarge);
120    if (src->elem(tid_).reused == reused_) {
121      CPP_STAT_INC(StatClockAcquireRepeat);
122      for (unsigned i = 0; i < kDirtyTids; i++) {
123        unsigned tid = src->dirty_tids_[i];
124        if (tid != kInvalidTid) {
125          u64 epoch = src->elem(tid).epoch;
126          if (clk_[tid].epoch < epoch) {
127            clk_[tid].epoch = epoch;
128            acquired = true;
129          }
130        }
131      }
132      if (acquired) {
133        CPP_STAT_INC(StatClockAcquiredSomething);
134        last_acquire_ = clk_[tid_].epoch;
135      }
136      return;
137    }
138  }
139
140  // O(N) acquire.
141  CPP_STAT_INC(StatClockAcquireFull);
142  nclk_ = max(nclk_, nclk);
143  for (uptr i = 0; i < nclk; i++) {
144    u64 epoch = src->elem(i).epoch;
145    if (clk_[i].epoch < epoch) {
146      clk_[i].epoch = epoch;
147      acquired = true;
148    }
149  }
150
151  // Remember that this thread has acquired this clock.
152  if (nclk > tid_)
153    src->elem(tid_).reused = reused_;
154
155  if (acquired) {
156    CPP_STAT_INC(StatClockAcquiredSomething);
157    last_acquire_ = clk_[tid_].epoch;
158  }
159}
160
161void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
162  DCHECK_LE(nclk_, kMaxTid);
163  DCHECK_LE(dst->size_, kMaxTid);
164
165  if (dst->size_ == 0) {
166    // ReleaseStore will correctly set release_store_tid_,
167    // which can be important for future operations.
168    ReleaseStore(c, dst);
169    return;
170  }
171
172  CPP_STAT_INC(StatClockRelease);
173  // Check if we need to resize dst.
174  if (dst->size_ < nclk_)
175    dst->Resize(c, nclk_);
176
177  // Check if we had not acquired anything from other threads
178  // since the last release on dst. If so, we need to update
179  // only dst->elem(tid_).
180  if (dst->elem(tid_).epoch > last_acquire_) {
181    UpdateCurrentThread(dst);
182    if (dst->release_store_tid_ != tid_ ||
183        dst->release_store_reused_ != reused_)
184      dst->release_store_tid_ = kInvalidTid;
185    return;
186  }
187
188  // O(N) release.
189  CPP_STAT_INC(StatClockReleaseFull);
190  // First, remember whether we've acquired dst.
191  bool acquired = IsAlreadyAcquired(dst);
192  if (acquired)
193    CPP_STAT_INC(StatClockReleaseAcquired);
194  // Update dst->clk_.
195  for (uptr i = 0; i < nclk_; i++) {
196    ClockElem &ce = dst->elem(i);
197    ce.epoch = max(ce.epoch, clk_[i].epoch);
198    ce.reused = 0;
199  }
200  // Clear 'acquired' flag in the remaining elements.
201  if (nclk_ < dst->size_)
202    CPP_STAT_INC(StatClockReleaseClearTail);
203  for (uptr i = nclk_; i < dst->size_; i++)
204    dst->elem(i).reused = 0;
205  for (unsigned i = 0; i < kDirtyTids; i++)
206    dst->dirty_tids_[i] = kInvalidTid;
207  dst->release_store_tid_ = kInvalidTid;
208  dst->release_store_reused_ = 0;
209  // If we've acquired dst, remember this fact,
210  // so that we don't need to acquire it on next acquire.
211  if (acquired)
212    dst->elem(tid_).reused = reused_;
213}
214
215void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
216  DCHECK_LE(nclk_, kMaxTid);
217  DCHECK_LE(dst->size_, kMaxTid);
218  CPP_STAT_INC(StatClockStore);
219
220  // Check if we need to resize dst.
221  if (dst->size_ < nclk_)
222    dst->Resize(c, nclk_);
223
224  if (dst->release_store_tid_ == tid_ &&
225      dst->release_store_reused_ == reused_ &&
226      dst->elem(tid_).epoch > last_acquire_) {
227    CPP_STAT_INC(StatClockStoreFast);
228    UpdateCurrentThread(dst);
229    return;
230  }
231
232  // O(N) release-store.
233  CPP_STAT_INC(StatClockStoreFull);
234  for (uptr i = 0; i < nclk_; i++) {
235    ClockElem &ce = dst->elem(i);
236    ce.epoch = clk_[i].epoch;
237    ce.reused = 0;
238  }
239  // Clear the tail of dst->clk_.
240  if (nclk_ < dst->size_) {
241    for (uptr i = nclk_; i < dst->size_; i++) {
242      ClockElem &ce = dst->elem(i);
243      ce.epoch = 0;
244      ce.reused = 0;
245    }
246    CPP_STAT_INC(StatClockStoreTail);
247  }
248  for (unsigned i = 0; i < kDirtyTids; i++)
249    dst->dirty_tids_[i] = kInvalidTid;
250  dst->release_store_tid_ = tid_;
251  dst->release_store_reused_ = reused_;
252  // Rememeber that we don't need to acquire it in future.
253  dst->elem(tid_).reused = reused_;
254}
255
256void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
257  CPP_STAT_INC(StatClockAcquireRelease);
258  acquire(c, dst);
259  ReleaseStore(c, dst);
260}
261
262// Updates only single element related to the current thread in dst->clk_.
263void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
264  // Update the threads time, but preserve 'acquired' flag.
265  dst->elem(tid_).epoch = clk_[tid_].epoch;
266
267  for (unsigned i = 0; i < kDirtyTids; i++) {
268    if (dst->dirty_tids_[i] == tid_) {
269      CPP_STAT_INC(StatClockReleaseFast1);
270      return;
271    }
272    if (dst->dirty_tids_[i] == kInvalidTid) {
273      CPP_STAT_INC(StatClockReleaseFast2);
274      dst->dirty_tids_[i] = tid_;
275      return;
276    }
277  }
278  // Reset all 'acquired' flags, O(N).
279  CPP_STAT_INC(StatClockReleaseSlow);
280  for (uptr i = 0; i < dst->size_; i++)
281    dst->elem(i).reused = 0;
282  for (unsigned i = 0; i < kDirtyTids; i++)
283    dst->dirty_tids_[i] = kInvalidTid;
284}
285
286// Checks whether the current threads has already acquired src.
287bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
288  if (src->elem(tid_).reused != reused_)
289    return false;
290  for (unsigned i = 0; i < kDirtyTids; i++) {
291    unsigned tid = src->dirty_tids_[i];
292    if (tid != kInvalidTid) {
293      if (clk_[tid].epoch < src->elem(tid).epoch)
294        return false;
295    }
296  }
297  return true;
298}
299
300void SyncClock::Resize(ClockCache *c, uptr nclk) {
301  CPP_STAT_INC(StatClockReleaseResize);
302  if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
303      RoundUpTo(size_, ClockBlock::kClockCount)) {
304    // Growing within the same block.
305    // Memory is already allocated, just increase the size.
306    size_ = nclk;
307    return;
308  }
309  if (nclk <= ClockBlock::kClockCount) {
310    // Grow from 0 to one-level table.
311    CHECK_EQ(size_, 0);
312    CHECK_EQ(tab_, 0);
313    CHECK_EQ(tab_idx_, 0);
314    size_ = nclk;
315    tab_idx_ = ctx->clock_alloc.Alloc(c);
316    tab_ = ctx->clock_alloc.Map(tab_idx_);
317    internal_memset(tab_, 0, sizeof(*tab_));
318    return;
319  }
320  // Growing two-level table.
321  if (size_ == 0) {
322    // Allocate first level table.
323    tab_idx_ = ctx->clock_alloc.Alloc(c);
324    tab_ = ctx->clock_alloc.Map(tab_idx_);
325    internal_memset(tab_, 0, sizeof(*tab_));
326  } else if (size_ <= ClockBlock::kClockCount) {
327    // Transform one-level table to two-level table.
328    u32 old = tab_idx_;
329    tab_idx_ = ctx->clock_alloc.Alloc(c);
330    tab_ = ctx->clock_alloc.Map(tab_idx_);
331    internal_memset(tab_, 0, sizeof(*tab_));
332    tab_->table[0] = old;
333  }
334  // At this point we have first level table allocated.
335  // Add second level tables as necessary.
336  for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
337      i < nclk; i += ClockBlock::kClockCount) {
338    u32 idx = ctx->clock_alloc.Alloc(c);
339    ClockBlock *cb = ctx->clock_alloc.Map(idx);
340    internal_memset(cb, 0, sizeof(*cb));
341    CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
342    tab_->table[i/ClockBlock::kClockCount] = idx;
343  }
344  size_ = nclk;
345}
346
347// Sets a single element in the vector clock.
348// This function is called only from weird places like AcquireGlobal.
349void ThreadClock::set(unsigned tid, u64 v) {
350  DCHECK_LT(tid, kMaxTid);
351  DCHECK_GE(v, clk_[tid].epoch);
352  clk_[tid].epoch = v;
353  if (nclk_ <= tid)
354    nclk_ = tid + 1;
355  last_acquire_ = clk_[tid_].epoch;
356}
357
358void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
359  printf("clock=[");
360  for (uptr i = 0; i < nclk_; i++)
361    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
362  printf("] reused=[");
363  for (uptr i = 0; i < nclk_; i++)
364    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
365  printf("] tid=%u/%u last_acq=%llu",
366      tid_, reused_, last_acquire_);
367}
368
369SyncClock::SyncClock()
370    : release_store_tid_(kInvalidTid)
371    , release_store_reused_()
372    , tab_()
373    , tab_idx_()
374    , size_() {
375  for (uptr i = 0; i < kDirtyTids; i++)
376    dirty_tids_[i] = kInvalidTid;
377}
378
379SyncClock::~SyncClock() {
380  // Reset must be called before dtor.
381  CHECK_EQ(size_, 0);
382  CHECK_EQ(tab_, 0);
383  CHECK_EQ(tab_idx_, 0);
384}
385
386void SyncClock::Reset(ClockCache *c) {
387  if (size_ == 0) {
388    // nothing
389  } else if (size_ <= ClockBlock::kClockCount) {
390    // One-level table.
391    ctx->clock_alloc.Free(c, tab_idx_);
392  } else {
393    // Two-level table.
394    for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
395      ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
396    ctx->clock_alloc.Free(c, tab_idx_);
397  }
398  tab_ = 0;
399  tab_idx_ = 0;
400  size_ = 0;
401  release_store_tid_ = kInvalidTid;
402  release_store_reused_ = 0;
403  for (uptr i = 0; i < kDirtyTids; i++)
404    dirty_tids_[i] = kInvalidTid;
405}
406
407ClockElem &SyncClock::elem(unsigned tid) const {
408  DCHECK_LT(tid, size_);
409  if (size_ <= ClockBlock::kClockCount)
410    return tab_->clock[tid];
411  u32 idx = tab_->table[tid / ClockBlock::kClockCount];
412  ClockBlock *cb = ctx->clock_alloc.Map(idx);
413  return cb->clock[tid % ClockBlock::kClockCount];
414}
415
416void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
417  printf("clock=[");
418  for (uptr i = 0; i < size_; i++)
419    printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
420  printf("] reused=[");
421  for (uptr i = 0; i < size_; i++)
422    printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
423  printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
424      release_store_tid_, release_store_reused_,
425      dirty_tids_[0], dirty_tids_[1]);
426}
427}  // namespace __tsan
428