114519Sserb//===-- sanitizer_deadlock_detector.h ---------------------------*- C++ -*-===// 214519Sserb// 314519Sserb// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 414519Sserb// See https://llvm.org/LICENSE.txt for license information. 514519Sserb// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 614519Sserb// 714519Sserb//===----------------------------------------------------------------------===// 814519Sserb// 914519Sserb// This file is a part of Sanitizer runtime. 1014519Sserb// The deadlock detector maintains a directed graph of lock acquisitions. 1114519Sserb// When a lock event happens, the detector checks if the locks already held by 1214519Sserb// the current thread are reachable from the newly acquired lock. 1314519Sserb// 1414519Sserb// The detector can handle only a fixed amount of simultaneously live locks 1514519Sserb// (a lock is alive if it has been locked at least once and has not been 1614519Sserb// destroyed). When the maximal number of locks is reached the entire graph 1714519Sserb// is flushed and the new lock epoch is started. The node ids from the old 1814519Sserb// epochs can not be used with any of the detector methods except for 1914519Sserb// nodeBelongsToCurrentEpoch(). 2014519Sserb// 2114519Sserb// FIXME: this is work in progress, nothing really works yet. 2214519Sserb// 2314519Sserb//===----------------------------------------------------------------------===// 2414519Sserb 2514519Sserb#ifndef SANITIZER_DEADLOCK_DETECTOR_H 2614519Sserb#define SANITIZER_DEADLOCK_DETECTOR_H 2714519Sserb 2814519Sserb#include "sanitizer_bvgraph.h" 2914519Sserb#include "sanitizer_common.h" 3014519Sserb 3114519Sserbnamespace __sanitizer { 3214519Sserb 3314519Sserb// Thread-local state for DeadlockDetector. 3414519Sserb// It contains the locks currently held by the owning thread. 3514519Sserbtemplate <class BV> 3614519Sserbclass DeadlockDetectorTLS { 3714519Sserb public: 3814519Sserb // No CTOR. 3914519Sserb void clear() { 4014519Sserb bv_.clear(); 4114519Sserb epoch_ = 0; 4214519Sserb n_recursive_locks = 0; 4314519Sserb n_all_locks_ = 0; 4414519Sserb } 4514519Sserb 4614519Sserb bool empty() const { return bv_.empty(); } 4714519Sserb 4814519Sserb void ensureCurrentEpoch(uptr current_epoch) { 4914519Sserb if (epoch_ == current_epoch) return; 5014519Sserb bv_.clear(); 5114519Sserb epoch_ = current_epoch; 5214519Sserb n_recursive_locks = 0; 5314519Sserb n_all_locks_ = 0; 5414519Sserb } 5514519Sserb 5614519Sserb uptr getEpoch() const { return epoch_; } 5714519Sserb 5814519Sserb // Returns true if this is the first (non-recursive) acquisition of this lock. 5914519Sserb bool addLock(uptr lock_id, uptr current_epoch, u32 stk) { 6014519Sserb CHECK_EQ(epoch_, current_epoch); 6114519Sserb if (!bv_.setBit(lock_id)) { 6214519Sserb // The lock is already held by this thread, it must be recursive. 6314519Sserb CHECK_LT(n_recursive_locks, ARRAY_SIZE(recursive_locks)); 6414519Sserb recursive_locks[n_recursive_locks++] = lock_id; 6514519Sserb return false; 6614519Sserb } 6714519Sserb CHECK_LT(n_all_locks_, ARRAY_SIZE(all_locks_with_contexts_)); 6814519Sserb // lock_id < BV::kSize, can cast to a smaller int. 6914519Sserb u32 lock_id_short = static_cast<u32>(lock_id); 7014519Sserb LockWithContext l = {lock_id_short, stk}; 7114519Sserb all_locks_with_contexts_[n_all_locks_++] = l; 7214519Sserb return true; 7314519Sserb } 7414519Sserb 7514519Sserb void removeLock(uptr lock_id) { 7614519Sserb if (n_recursive_locks) { 7714519Sserb for (sptr i = n_recursive_locks - 1; i >= 0; i--) { 7814519Sserb if (recursive_locks[i] == lock_id) { 7914519Sserb n_recursive_locks--; 8014519Sserb Swap(recursive_locks[i], recursive_locks[n_recursive_locks]); 8114519Sserb return; 8214519Sserb } 8314519Sserb } 8414519Sserb } 8514519Sserb if (!bv_.clearBit(lock_id)) 8614519Sserb return; // probably addLock happened before flush 8714519Sserb if (n_all_locks_) { 8814519Sserb for (sptr i = n_all_locks_ - 1; i >= 0; i--) { 8914519Sserb if (all_locks_with_contexts_[i].lock == static_cast<u32>(lock_id)) { 9014519Sserb Swap(all_locks_with_contexts_[i], 9114519Sserb all_locks_with_contexts_[n_all_locks_ - 1]); 9214519Sserb n_all_locks_--; 9314519Sserb break; 9414519Sserb } 9514519Sserb } 9614519Sserb } 9714519Sserb } 9814519Sserb 9914519Sserb u32 findLockContext(uptr lock_id) { 10014519Sserb for (uptr i = 0; i < n_all_locks_; i++) 10114519Sserb if (all_locks_with_contexts_[i].lock == static_cast<u32>(lock_id)) 10214519Sserb return all_locks_with_contexts_[i].stk; 10314519Sserb return 0; 10414519Sserb } 10514519Sserb 10614519Sserb const BV &getLocks(uptr current_epoch) const { 10714519Sserb CHECK_EQ(epoch_, current_epoch); 10814519Sserb return bv_; 10914519Sserb } 11014519Sserb 11114519Sserb uptr getNumLocks() const { return n_all_locks_; } 11214519Sserb uptr getLock(uptr idx) const { return all_locks_with_contexts_[idx].lock; } 11314519Sserb 11414519Sserb private: 11514519Sserb BV bv_; 11614519Sserb uptr epoch_; 11714519Sserb uptr recursive_locks[64]; 11814519Sserb uptr n_recursive_locks; 11914519Sserb struct LockWithContext { 12014519Sserb u32 lock; 12114519Sserb u32 stk; 12214519Sserb }; 12314519Sserb LockWithContext all_locks_with_contexts_[64]; 12414519Sserb uptr n_all_locks_; 12514519Sserb}; 12614519Sserb 12714519Sserb// DeadlockDetector. 12814519Sserb// For deadlock detection to work we need one global DeadlockDetector object 12914519Sserb// and one DeadlockDetectorTLS object per evey thread. 13014519Sserb// This class is not thread safe, all concurrent accesses should be guarded 13114519Sserb// by an external lock. 13214519Sserb// Most of the methods of this class are not thread-safe (i.e. should 13314519Sserb// be protected by an external lock) unless explicitly told otherwise. 13414519Sserbtemplate <class BV> 13514519Sserbclass DeadlockDetector { 13614519Sserb public: 13714519Sserb typedef BV BitVector; 13814519Sserb 13914519Sserb uptr size() const { return g_.size(); } 14014519Sserb 14114519Sserb // No CTOR. 14214519Sserb void clear() { 14314519Sserb current_epoch_ = 0; 14414519Sserb available_nodes_.clear(); 14514519Sserb recycled_nodes_.clear(); 14614519Sserb g_.clear(); 14714519Sserb n_edges_ = 0; 14814519Sserb } 14914519Sserb 15014519Sserb // Allocate new deadlock detector node. 15114519Sserb // If we are out of available nodes first try to recycle some. 15214519Sserb // If there is nothing to recycle, flush the graph and increment the epoch. 15314519Sserb // Associate 'data' (opaque user's object) with the new node. 15414519Sserb uptr newNode(uptr data) { 15514519Sserb if (!available_nodes_.empty()) 15614519Sserb return getAvailableNode(data); 15714519Sserb if (!recycled_nodes_.empty()) { 15814519Sserb for (sptr i = n_edges_ - 1; i >= 0; i--) { 15914519Sserb if (recycled_nodes_.getBit(edges_[i].from) || 16014519Sserb recycled_nodes_.getBit(edges_[i].to)) { 16114519Sserb Swap(edges_[i], edges_[n_edges_ - 1]); 16214519Sserb n_edges_--; 16314519Sserb } 16414519Sserb } 16514519Sserb CHECK(available_nodes_.empty()); 16614519Sserb // removeEdgesFrom was called in removeNode. 16714519Sserb g_.removeEdgesTo(recycled_nodes_); 16814519Sserb available_nodes_.setUnion(recycled_nodes_); 16914519Sserb recycled_nodes_.clear(); 17014519Sserb return getAvailableNode(data); 17114519Sserb } 17214519Sserb // We are out of vacant nodes. Flush and increment the current_epoch_. 17314519Sserb current_epoch_ += size(); 17414519Sserb recycled_nodes_.clear(); 17514519Sserb available_nodes_.setAll(); 17614519Sserb g_.clear(); 17714519Sserb n_edges_ = 0; 17814519Sserb return getAvailableNode(data); 17914519Sserb } 18014519Sserb 18114519Sserb // Get data associated with the node created by newNode(). 18214519Sserb uptr getData(uptr node) const { return data_[nodeToIndex(node)]; } 18314519Sserb 18414519Sserb bool nodeBelongsToCurrentEpoch(uptr node) { 18514519Sserb return node && (node / size() * size()) == current_epoch_; 18614519Sserb } 18714519Sserb 18814519Sserb void removeNode(uptr node) { 18914519Sserb uptr idx = nodeToIndex(node); 19014519Sserb CHECK(!available_nodes_.getBit(idx)); 19114519Sserb CHECK(recycled_nodes_.setBit(idx)); 19214519Sserb g_.removeEdgesFrom(idx); 19314519Sserb } 19414519Sserb 19514519Sserb void ensureCurrentEpoch(DeadlockDetectorTLS<BV> *dtls) { 19614519Sserb dtls->ensureCurrentEpoch(current_epoch_); 19714519Sserb } 19814519Sserb 19914519Sserb // Returns true if there is a cycle in the graph after this lock event. 20014519Sserb // Ideally should be called before the lock is acquired so that we can 20114519Sserb // report a deadlock before a real deadlock happens. 20214519Sserb bool onLockBefore(DeadlockDetectorTLS<BV> *dtls, uptr cur_node) { 20314519Sserb ensureCurrentEpoch(dtls); 20414519Sserb uptr cur_idx = nodeToIndex(cur_node); 20514519Sserb return g_.isReachable(cur_idx, dtls->getLocks(current_epoch_)); 20614519Sserb } 20714519Sserb 20814519Sserb u32 findLockContext(DeadlockDetectorTLS<BV> *dtls, uptr node) { 20914519Sserb return dtls->findLockContext(nodeToIndex(node)); 21014519Sserb } 21114519Sserb 21214519Sserb // Add cur_node to the set of locks held currently by dtls. 21314519Sserb void onLockAfter(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, u32 stk = 0) { 21414519Sserb ensureCurrentEpoch(dtls); 21514519Sserb uptr cur_idx = nodeToIndex(cur_node); 21614519Sserb dtls->addLock(cur_idx, current_epoch_, stk); 21714519Sserb } 21814519Sserb 21914519Sserb // Experimental *racy* fast path function. 22014519Sserb // Returns true if all edges from the currently held locks to cur_node exist. 22114519Sserb bool hasAllEdges(DeadlockDetectorTLS<BV> *dtls, uptr cur_node) { 22214519Sserb uptr local_epoch = dtls->getEpoch(); 22314519Sserb // Read from current_epoch_ is racy. 22414519Sserb if (cur_node && local_epoch == current_epoch_ && 22514519Sserb local_epoch == nodeToEpoch(cur_node)) { 22614519Sserb uptr cur_idx = nodeToIndexUnchecked(cur_node); 22714519Sserb for (uptr i = 0, n = dtls->getNumLocks(); i < n; i++) { 22814519Sserb if (!g_.hasEdge(dtls->getLock(i), cur_idx)) 22914519Sserb return false; 23014519Sserb } 23114519Sserb return true; 23214519Sserb } 23314519Sserb return false; 23414519Sserb } 23514519Sserb 23614519Sserb // Adds edges from currently held locks to cur_node, 23714519Sserb // returns the number of added edges, and puts the sources of added edges 23814519Sserb // into added_edges[]. 23914519Sserb // Should be called before onLockAfter. 24014519Sserb uptr addEdges(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, u32 stk, 24114519Sserb int unique_tid) { 24214519Sserb ensureCurrentEpoch(dtls); 24314519Sserb uptr cur_idx = nodeToIndex(cur_node); 24414519Sserb uptr added_edges[40]; 24514519Sserb uptr n_added_edges = g_.addEdges(dtls->getLocks(current_epoch_), cur_idx, 24614519Sserb added_edges, ARRAY_SIZE(added_edges)); 24714519Sserb for (uptr i = 0; i < n_added_edges; i++) { 248 if (n_edges_ < ARRAY_SIZE(edges_)) { 249 Edge e = {(u16)added_edges[i], (u16)cur_idx, 250 dtls->findLockContext(added_edges[i]), stk, 251 unique_tid}; 252 edges_[n_edges_++] = e; 253 } 254 } 255 return n_added_edges; 256 } 257 258 bool findEdge(uptr from_node, uptr to_node, u32 *stk_from, u32 *stk_to, 259 int *unique_tid) { 260 uptr from_idx = nodeToIndex(from_node); 261 uptr to_idx = nodeToIndex(to_node); 262 for (uptr i = 0; i < n_edges_; i++) { 263 if (edges_[i].from == from_idx && edges_[i].to == to_idx) { 264 *stk_from = edges_[i].stk_from; 265 *stk_to = edges_[i].stk_to; 266 *unique_tid = edges_[i].unique_tid; 267 return true; 268 } 269 } 270 return false; 271 } 272 273 // Test-only function. Handles the before/after lock events, 274 // returns true if there is a cycle. 275 bool onLock(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, u32 stk = 0) { 276 ensureCurrentEpoch(dtls); 277 bool is_reachable = !isHeld(dtls, cur_node) && onLockBefore(dtls, cur_node); 278 addEdges(dtls, cur_node, stk, 0); 279 onLockAfter(dtls, cur_node, stk); 280 return is_reachable; 281 } 282 283 // Handles the try_lock event, returns false. 284 // When a try_lock event happens (i.e. a try_lock call succeeds) we need 285 // to add this lock to the currently held locks, but we should not try to 286 // change the lock graph or to detect a cycle. We may want to investigate 287 // whether a more aggressive strategy is possible for try_lock. 288 bool onTryLock(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, u32 stk = 0) { 289 ensureCurrentEpoch(dtls); 290 uptr cur_idx = nodeToIndex(cur_node); 291 dtls->addLock(cur_idx, current_epoch_, stk); 292 return false; 293 } 294 295 // Returns true iff dtls is empty (no locks are currently held) and we can 296 // add the node to the currently held locks w/o changing the global state. 297 // This operation is thread-safe as it only touches the dtls. 298 bool onFirstLock(DeadlockDetectorTLS<BV> *dtls, uptr node, u32 stk = 0) { 299 if (!dtls->empty()) return false; 300 if (dtls->getEpoch() && dtls->getEpoch() == nodeToEpoch(node)) { 301 dtls->addLock(nodeToIndexUnchecked(node), nodeToEpoch(node), stk); 302 return true; 303 } 304 return false; 305 } 306 307 // Finds a path between the lock 'cur_node' (currently not held in dtls) 308 // and some currently held lock, returns the length of the path 309 // or 0 on failure. 310 uptr findPathToLock(DeadlockDetectorTLS<BV> *dtls, uptr cur_node, uptr *path, 311 uptr path_size) { 312 tmp_bv_.copyFrom(dtls->getLocks(current_epoch_)); 313 uptr idx = nodeToIndex(cur_node); 314 CHECK(!tmp_bv_.getBit(idx)); 315 uptr res = g_.findShortestPath(idx, tmp_bv_, path, path_size); 316 for (uptr i = 0; i < res; i++) 317 path[i] = indexToNode(path[i]); 318 if (res) 319 CHECK_EQ(path[0], cur_node); 320 return res; 321 } 322 323 // Handle the unlock event. 324 // This operation is thread-safe as it only touches the dtls. 325 void onUnlock(DeadlockDetectorTLS<BV> *dtls, uptr node) { 326 if (dtls->getEpoch() == nodeToEpoch(node)) 327 dtls->removeLock(nodeToIndexUnchecked(node)); 328 } 329 330 // Tries to handle the lock event w/o writing to global state. 331 // Returns true on success. 332 // This operation is thread-safe as it only touches the dtls 333 // (modulo racy nature of hasAllEdges). 334 bool onLockFast(DeadlockDetectorTLS<BV> *dtls, uptr node, u32 stk = 0) { 335 if (hasAllEdges(dtls, node)) { 336 dtls->addLock(nodeToIndexUnchecked(node), nodeToEpoch(node), stk); 337 return true; 338 } 339 return false; 340 } 341 342 bool isHeld(DeadlockDetectorTLS<BV> *dtls, uptr node) const { 343 return dtls->getLocks(current_epoch_).getBit(nodeToIndex(node)); 344 } 345 346 uptr testOnlyGetEpoch() const { return current_epoch_; } 347 bool testOnlyHasEdge(uptr l1, uptr l2) { 348 return g_.hasEdge(nodeToIndex(l1), nodeToIndex(l2)); 349 } 350 // idx1 and idx2 are raw indices to g_, not lock IDs. 351 bool testOnlyHasEdgeRaw(uptr idx1, uptr idx2) { 352 return g_.hasEdge(idx1, idx2); 353 } 354 355 void Print() { 356 for (uptr from = 0; from < size(); from++) 357 for (uptr to = 0; to < size(); to++) 358 if (g_.hasEdge(from, to)) 359 Printf(" %zx => %zx\n", from, to); 360 } 361 362 private: 363 void check_idx(uptr idx) const { CHECK_LT(idx, size()); } 364 365 void check_node(uptr node) const { 366 CHECK_GE(node, size()); 367 CHECK_EQ(current_epoch_, nodeToEpoch(node)); 368 } 369 370 uptr indexToNode(uptr idx) const { 371 check_idx(idx); 372 return idx + current_epoch_; 373 } 374 375 uptr nodeToIndexUnchecked(uptr node) const { return node % size(); } 376 377 uptr nodeToIndex(uptr node) const { 378 check_node(node); 379 return nodeToIndexUnchecked(node); 380 } 381 382 uptr nodeToEpoch(uptr node) const { return node / size() * size(); } 383 384 uptr getAvailableNode(uptr data) { 385 uptr idx = available_nodes_.getAndClearFirstOne(); 386 data_[idx] = data; 387 return indexToNode(idx); 388 } 389 390 struct Edge { 391 u16 from; 392 u16 to; 393 u32 stk_from; 394 u32 stk_to; 395 int unique_tid; 396 }; 397 398 uptr current_epoch_; 399 BV available_nodes_; 400 BV recycled_nodes_; 401 BV tmp_bv_; 402 BVGraph<BV> g_; 403 uptr data_[BV::kSize]; 404 Edge edges_[BV::kSize * 32]; 405 uptr n_edges_; 406}; 407 408} // namespace __sanitizer 409 410#endif // SANITIZER_DEADLOCK_DETECTOR_H 411