1/** 2 * testcode/checklocks.c - wrapper on locks that checks access. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36#include "config.h" 37#include <signal.h> 38#include "util/locks.h" /* include before checklocks.h */ 39#include "testcode/checklocks.h" 40 41/** 42 * \file 43 * Locks that are checked. 44 * 45 * Ugly hack: uses the fact that workers start with an int thread_num, and 46 * are passed to thread_create to make the thread numbers here the same as 47 * those used for logging which is nice. 48 * 49 * Todo: 50 * - debug status print, of thread lock stacks, and current waiting. 51 */ 52#ifdef USE_THREAD_DEBUG 53 54/** How long to wait before lock attempt is a failure. */ 55#define CHECK_LOCK_TIMEOUT 120 /* seconds */ 56/** How long to wait before join attempt is a failure. */ 57#define CHECK_JOIN_TIMEOUT 120 /* seconds */ 58 59/** if key has been created */ 60static int key_created = 0; 61/** if the key was deleted, i.e. we have quit */ 62static int key_deleted = 0; 63/** we hide the thread debug info with this key. */ 64static ub_thread_key_type thr_debug_key; 65/** the list of threads, so all threads can be examined. NULL if unused. */ 66static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS]; 67/** do we check locking order */ 68int check_locking_order = 1; 69/** the pid of this runset, reasonably unique. */ 70static pid_t check_lock_pid; 71 72/** print all possible debug info on the state of the system */ 73static void total_debug_info(void); 74/** print pretty lock error and exit (decl for NORETURN attribute) */ 75static void lock_error(struct checked_lock* lock, const char* func, 76 const char* file, int line, const char* err) ATTR_NORETURN; 77 78/** print pretty lock error and exit */ 79static void lock_error(struct checked_lock* lock, 80 const char* func, const char* file, int line, const char* err) 81{ 82 log_err("lock error (description follows)"); 83 log_err("Created at %s %s:%d", lock->create_func, 84 lock->create_file, lock->create_line); 85 if(lock->holder_func && lock->holder_file) 86 log_err("Previously %s %s:%d", lock->holder_func, 87 lock->holder_file, lock->holder_line); 88 log_err("At %s %s:%d", func, file, line); 89 log_err("Error for %s lock: %s", 90 (lock->type==check_lock_mutex)?"mutex": ( 91 (lock->type==check_lock_spinlock)?"spinlock": ( 92 (lock->type==check_lock_rwlock)?"rwlock": "badtype")), err); 93 log_err("complete status display:"); 94 total_debug_info(); 95 fatal_exit("bailing out"); 96} 97 98/** 99 * Obtain lock on debug lock structure. This could be a deadlock by the caller. 100 * The debug code itself does not deadlock. Anyway, check with timeouts. 101 * @param lock: on what to acquire lock. 102 * @param func: user level caller identification. 103 * @param file: user level caller identification. 104 * @param line: user level caller identification. 105 */ 106static void 107acquire_locklock(struct checked_lock* lock, 108 const char* func, const char* file, int line) 109{ 110 struct timespec to; 111 int err; 112 int contend = 0; 113 /* first try; inc contention counter if not immediately */ 114 if((err = pthread_mutex_trylock(&lock->lock))) { 115 if(err==EBUSY) 116 contend++; 117 else fatal_exit("error in mutex_trylock: %s", strerror(err)); 118 } 119 if(!err) 120 return; /* immediate success */ 121 to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT; 122 to.tv_nsec = 0; 123 err = pthread_mutex_timedlock(&lock->lock, &to); 124 if(err) { 125 log_err("in acquiring locklock: %s", strerror(err)); 126 lock_error(lock, func, file, line, "acquire locklock"); 127 } 128 /* since we hold the lock, we can edit the contention_count */ 129 lock->contention_count += contend; 130} 131 132/** add protected region */ 133void 134lock_protect(void *p, void* area, size_t size) 135{ 136 struct checked_lock* lock = *(struct checked_lock**)p; 137 struct protected_area* e = (struct protected_area*)malloc( 138 sizeof(struct protected_area)); 139 if(!e) 140 fatal_exit("lock_protect: out of memory"); 141 e->region = area; 142 e->size = size; 143 e->hold = malloc(size); 144 if(!e->hold) 145 fatal_exit("lock_protect: out of memory"); 146 memcpy(e->hold, e->region, e->size); 147 148 acquire_locklock(lock, __func__, __FILE__, __LINE__); 149 e->next = lock->prot; 150 lock->prot = e; 151 LOCKRET(pthread_mutex_unlock(&lock->lock)); 152} 153 154/** remove protected region */ 155void 156lock_unprotect(void* mangled, void* area) 157{ 158 struct checked_lock* lock = *(struct checked_lock**)mangled; 159 struct protected_area* p, **prevp; 160 if(!lock) 161 return; 162 acquire_locklock(lock, __func__, __FILE__, __LINE__); 163 p = lock->prot; 164 prevp = &lock->prot; 165 while(p) { 166 if(p->region == area) { 167 *prevp = p->next; 168 free(p->hold); 169 free(p); 170 LOCKRET(pthread_mutex_unlock(&lock->lock)); 171 return; 172 } 173 prevp = &p->next; 174 p = p->next; 175 } 176 LOCKRET(pthread_mutex_unlock(&lock->lock)); 177} 178 179/** 180 * Check protected memory region. Memory compare. Exit on error. 181 * @param lock: which lock to check. 182 * @param func: location we are now (when failure is detected). 183 * @param file: location we are now (when failure is detected). 184 * @param line: location we are now (when failure is detected). 185 */ 186static void 187prot_check(struct checked_lock* lock, 188 const char* func, const char* file, int line) 189{ 190 struct protected_area* p = lock->prot; 191 while(p) { 192 if(memcmp(p->hold, p->region, p->size) != 0) { 193 log_hex("memory prev", p->hold, p->size); 194 log_hex("memory here", p->region, p->size); 195 lock_error(lock, func, file, line, 196 "protected area modified"); 197 } 198 p = p->next; 199 } 200} 201 202/** Copy protected memory region */ 203static void 204prot_store(struct checked_lock* lock) 205{ 206 struct protected_area* p = lock->prot; 207 while(p) { 208 memcpy(p->hold, p->region, p->size); 209 p = p->next; 210 } 211} 212 213/** get memory held by lock */ 214size_t 215lock_get_mem(void* pp) 216{ 217 size_t s; 218 struct checked_lock* lock = *(struct checked_lock**)pp; 219 struct protected_area* p; 220 s = sizeof(struct checked_lock); 221 acquire_locklock(lock, __func__, __FILE__, __LINE__); 222 for(p = lock->prot; p; p = p->next) { 223 s += sizeof(struct protected_area); 224 s += p->size; 225 } 226 LOCKRET(pthread_mutex_unlock(&lock->lock)); 227 return s; 228} 229 230/** write lock trace info to file, while you hold those locks */ 231static void 232ordercheck_locklock(struct thr_check* thr, struct checked_lock* lock) 233{ 234 int info[4]; 235 if(!check_locking_order) return; 236 if(!thr->holding_first) return; /* no older lock, no info */ 237 /* write: <lock id held> <lock id new> <file> <line> */ 238 info[0] = thr->holding_first->create_thread; 239 info[1] = thr->holding_first->create_instance; 240 info[2] = lock->create_thread; 241 info[3] = lock->create_instance; 242 if(fwrite(info, 4*sizeof(int), 1, thr->order_info) != 1 || 243 fwrite(lock->holder_file, strlen(lock->holder_file)+1, 1, 244 thr->order_info) != 1 || 245 fwrite(&lock->holder_line, sizeof(int), 1, 246 thr->order_info) != 1) 247 log_err("fwrite: %s", strerror(errno)); 248} 249 250/** write ordercheck lock creation details to file */ 251static void 252ordercheck_lockcreate(struct thr_check* thr, struct checked_lock* lock) 253{ 254 /* write: <ffff = create> <lock id> <file> <line> */ 255 int cmd = -1; 256 if(!check_locking_order) return; 257 258 if( fwrite(&cmd, sizeof(int), 1, thr->order_info) != 1 || 259 fwrite(&lock->create_thread, sizeof(int), 1, 260 thr->order_info) != 1 || 261 fwrite(&lock->create_instance, sizeof(int), 1, 262 thr->order_info) != 1 || 263 fwrite(lock->create_file, strlen(lock->create_file)+1, 1, 264 thr->order_info) != 1 || 265 fwrite(&lock->create_line, sizeof(int), 1, 266 thr->order_info) != 1) 267 log_err("fwrite: %s", strerror(errno)); 268} 269 270/** alloc struct, init lock empty */ 271void 272checklock_init(enum check_lock_type type, struct checked_lock** lock, 273 const char* func, const char* file, int line) 274{ 275 struct checked_lock* e = (struct checked_lock*)calloc(1, 276 sizeof(struct checked_lock)); 277 struct thr_check *thr = (struct thr_check*)pthread_getspecific( 278 thr_debug_key); 279 if(!e) 280 fatal_exit("%s %s %d: out of memory", func, file, line); 281 if(!thr) { 282 /* this is called when log_init() calls lock_init() 283 * functions, and the test check code has not yet 284 * been initialised. But luckily, the checklock_start() 285 * routine can be called multiple times without ill effect. 286 */ 287 checklock_start(); 288 thr = (struct thr_check*)pthread_getspecific(thr_debug_key); 289 } 290 if(!thr) 291 fatal_exit("%s %s %d: lock_init no thread info", func, file, 292 line); 293 *lock = e; 294 e->type = type; 295 e->create_func = func; 296 e->create_file = file; 297 e->create_line = line; 298 e->create_thread = thr->num; 299 e->create_instance = thr->locks_created++; 300 ordercheck_lockcreate(thr, e); 301 LOCKRET(pthread_mutex_init(&e->lock, NULL)); 302 switch(e->type) { 303 case check_lock_mutex: 304 LOCKRET(pthread_mutex_init(&e->u.mutex, NULL)); 305 break; 306 case check_lock_spinlock: 307 LOCKRET(pthread_spin_init(&e->u.spinlock, PTHREAD_PROCESS_PRIVATE)); 308 break; 309 case check_lock_rwlock: 310 LOCKRET(pthread_rwlock_init(&e->u.rwlock, NULL)); 311 break; 312 default: 313 log_assert(0); 314 } 315} 316 317/** delete prot items */ 318static void 319prot_clear(struct checked_lock* lock) 320{ 321 struct protected_area* p=lock->prot, *np; 322 while(p) { 323 np = p->next; 324 free(p->hold); 325 free(p); 326 p = np; 327 } 328} 329 330/** check if type is OK for the lock given */ 331static void 332checktype(enum check_lock_type type, struct checked_lock* lock, 333 const char* func, const char* file, int line) 334{ 335 if(!lock) 336 fatal_exit("use of null/deleted lock at %s %s:%d", 337 func, file, line); 338 if(type != lock->type) { 339 lock_error(lock, func, file, line, "wrong lock type"); 340 } 341} 342 343/** check if OK, free struct */ 344void 345checklock_destroy(enum check_lock_type type, struct checked_lock** lock, 346 const char* func, const char* file, int line) 347{ 348 const size_t contention_interest = 1; /* promille contented locks */ 349 struct checked_lock* e; 350 if(!lock) 351 return; 352 e = *lock; 353 if(!e) 354 return; 355 checktype(type, e, func, file, line); 356 357 /* check if delete is OK */ 358 acquire_locklock(e, func, file, line); 359 if(e->hold_count != 0) 360 lock_error(e, func, file, line, "delete while locked."); 361 if(e->wait_count != 0) 362 lock_error(e, func, file, line, "delete while waited on."); 363 prot_check(e, func, file, line); 364 *lock = NULL; /* use after free will fail */ 365 LOCKRET(pthread_mutex_unlock(&e->lock)); 366 367 /* contention, look at fraction in trouble. */ 368 if(e->history_count > 1 && 369 1000*e->contention_count/e->history_count > contention_interest) { 370 log_info("lock created %s %s %d has contention %u of %u (%d%%)", 371 e->create_func, e->create_file, e->create_line, 372 (unsigned int)e->contention_count, 373 (unsigned int)e->history_count, 374 (int)(100*e->contention_count/e->history_count)); 375 } 376 377 /* delete it */ 378 LOCKRET(pthread_mutex_destroy(&e->lock)); 379 prot_clear(e); 380 /* since nobody holds the lock - see check above, no need to unlink 381 * from the thread-held locks list. */ 382 switch(e->type) { 383 case check_lock_mutex: 384 LOCKRET(pthread_mutex_destroy(&e->u.mutex)); 385 break; 386 case check_lock_spinlock: 387 LOCKRET(pthread_spin_destroy(&e->u.spinlock)); 388 break; 389 case check_lock_rwlock: 390 LOCKRET(pthread_rwlock_destroy(&e->u.rwlock)); 391 break; 392 default: 393 log_assert(0); 394 } 395 memset(e, 0, sizeof(struct checked_lock)); 396 free(e); 397} 398 399/** finish acquiring lock, shared between _(rd|wr||)lock() routines */ 400static void 401finish_acquire_lock(struct thr_check* thr, struct checked_lock* lock, 402 const char* func, const char* file, int line) 403{ 404 thr->waiting = NULL; 405 lock->wait_count --; 406 lock->holder = thr; 407 lock->hold_count ++; 408 lock->holder_func = func; 409 lock->holder_file = file; 410 lock->holder_line = line; 411 ordercheck_locklock(thr, lock); 412 413 /* insert in thread lock list, as first */ 414 lock->prev_held_lock[thr->num] = NULL; 415 lock->next_held_lock[thr->num] = thr->holding_first; 416 if(thr->holding_first) 417 /* no need to lock it, since this thread already holds the 418 * lock (since it is on this list) and we only edit thr->num 419 * member in array. So it is safe. */ 420 thr->holding_first->prev_held_lock[thr->num] = lock; 421 else thr->holding_last = lock; 422 thr->holding_first = lock; 423} 424 425/** 426 * Locking routine. 427 * @param type: as passed by user. 428 * @param lock: as passed by user. 429 * @param func: caller location. 430 * @param file: caller location. 431 * @param line: caller location. 432 * @param tryfunc: the pthread_mutex_trylock or similar function. 433 * @param timedfunc: the pthread_mutex_timedlock or similar function. 434 * Uses absolute timeout value. 435 * @param arg: what to pass to tryfunc and timedlock. 436 * @param exclusive: if lock must be exclusive (only one allowed). 437 * @param getwr: if attempts to get writelock (or readlock) for rwlocks. 438 */ 439static void 440checklock_lockit(enum check_lock_type type, struct checked_lock* lock, 441 const char* func, const char* file, int line, 442 int (*tryfunc)(void*), int (*timedfunc)(void*, struct timespec*), 443 void* arg, int exclusive, int getwr) 444{ 445 int err; 446 int contend = 0; 447 struct thr_check *thr = (struct thr_check*)pthread_getspecific( 448 thr_debug_key); 449 checktype(type, lock, func, file, line); 450 if(!thr) lock_error(lock, func, file, line, "no thread info"); 451 452 acquire_locklock(lock, func, file, line); 453 lock->wait_count ++; 454 thr->waiting = lock; 455 if(exclusive && lock->hold_count > 0 && lock->holder == thr) 456 lock_error(lock, func, file, line, "thread already owns lock"); 457 if(type==check_lock_rwlock && getwr && lock->writeholder == thr) 458 lock_error(lock, func, file, line, "thread already has wrlock"); 459 LOCKRET(pthread_mutex_unlock(&lock->lock)); 460 461 /* first try; if busy increase contention counter */ 462 if((err=tryfunc(arg))) { 463 struct timespec to; 464 if(err != EBUSY) log_err("trylock: %s", strerror(err)); 465 to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT; 466 to.tv_nsec = 0; 467 if((err=timedfunc(arg, &to))) { 468 if(err == ETIMEDOUT) 469 lock_error(lock, func, file, line, 470 "timeout possible deadlock"); 471 log_err("timedlock: %s", strerror(err)); 472 } 473 contend ++; 474 } 475 /* got the lock */ 476 477 acquire_locklock(lock, func, file, line); 478 lock->contention_count += contend; 479 lock->history_count++; 480 if(exclusive && lock->hold_count > 0) 481 lock_error(lock, func, file, line, "got nonexclusive lock"); 482 if(type==check_lock_rwlock && getwr && lock->writeholder) 483 lock_error(lock, func, file, line, "got nonexclusive wrlock"); 484 if(type==check_lock_rwlock && getwr) 485 lock->writeholder = thr; 486 /* check the memory areas for unauthorized changes, 487 * between last unlock time and current lock time. 488 * we check while holding the lock (threadsafe). 489 */ 490 if(getwr || exclusive) 491 prot_check(lock, func, file, line); 492 finish_acquire_lock(thr, lock, func, file, line); 493 LOCKRET(pthread_mutex_unlock(&lock->lock)); 494} 495 496/** helper for rdlock: try */ 497static int try_rd(void* arg) 498{ return pthread_rwlock_tryrdlock((pthread_rwlock_t*)arg); } 499/** helper for rdlock: timed */ 500static int timed_rd(void* arg, struct timespec* to) 501{ return pthread_rwlock_timedrdlock((pthread_rwlock_t*)arg, to); } 502 503/** check if OK, lock */ 504void 505checklock_rdlock(enum check_lock_type type, struct checked_lock* lock, 506 const char* func, const char* file, int line) 507{ 508 if(key_deleted) 509 return; 510 511 log_assert(type == check_lock_rwlock); 512 checklock_lockit(type, lock, func, file, line, 513 try_rd, timed_rd, &lock->u.rwlock, 0, 0); 514} 515 516/** helper for wrlock: try */ 517static int try_wr(void* arg) 518{ return pthread_rwlock_trywrlock((pthread_rwlock_t*)arg); } 519/** helper for wrlock: timed */ 520static int timed_wr(void* arg, struct timespec* to) 521{ return pthread_rwlock_timedwrlock((pthread_rwlock_t*)arg, to); } 522 523/** check if OK, lock */ 524void 525checklock_wrlock(enum check_lock_type type, struct checked_lock* lock, 526 const char* func, const char* file, int line) 527{ 528 if(key_deleted) 529 return; 530 log_assert(type == check_lock_rwlock); 531 checklock_lockit(type, lock, func, file, line, 532 try_wr, timed_wr, &lock->u.rwlock, 0, 1); 533} 534 535/** helper for lock mutex: try */ 536static int try_mutex(void* arg) 537{ return pthread_mutex_trylock((pthread_mutex_t*)arg); } 538/** helper for lock mutex: timed */ 539static int timed_mutex(void* arg, struct timespec* to) 540{ return pthread_mutex_timedlock((pthread_mutex_t*)arg, to); } 541 542/** helper for lock spinlock: try */ 543static int try_spinlock(void* arg) 544{ return pthread_spin_trylock((pthread_spinlock_t*)arg); } 545/** helper for lock spinlock: timed */ 546static int timed_spinlock(void* arg, struct timespec* to) 547{ 548 int err; 549 /* spin for 5 seconds. (ouch for the CPU, but it beats forever) */ 550 while( (err=try_spinlock(arg)) == EBUSY) { 551#ifndef S_SPLINT_S 552 if(time(NULL) >= to->tv_sec) 553 return ETIMEDOUT; 554 usleep(1000); /* in 1/1000000s of a second */ 555#endif 556 } 557 return err; 558} 559 560/** check if OK, lock */ 561void 562checklock_lock(enum check_lock_type type, struct checked_lock* lock, 563 const char* func, const char* file, int line) 564{ 565 if(key_deleted) 566 return; 567 log_assert(type != check_lock_rwlock); 568 switch(type) { 569 case check_lock_mutex: 570 checklock_lockit(type, lock, func, file, line, 571 try_mutex, timed_mutex, &lock->u.mutex, 1, 0); 572 break; 573 case check_lock_spinlock: 574 /* void* cast needed because 'volatile' on some OS */ 575 checklock_lockit(type, lock, func, file, line, 576 try_spinlock, timed_spinlock, 577 (void*)&lock->u.spinlock, 1, 0); 578 break; 579 default: 580 log_assert(0); 581 } 582} 583 584/** check if OK, unlock */ 585void 586checklock_unlock(enum check_lock_type type, struct checked_lock* lock, 587 const char* func, const char* file, int line) 588{ 589 struct thr_check *thr; 590 if(key_deleted) 591 return; 592 thr = (struct thr_check*)pthread_getspecific(thr_debug_key); 593 checktype(type, lock, func, file, line); 594 if(!thr) lock_error(lock, func, file, line, "no thread info"); 595 596 acquire_locklock(lock, func, file, line); 597 /* was this thread even holding this lock? */ 598 if(thr->holding_first != lock && 599 lock->prev_held_lock[thr->num] == NULL) { 600 lock_error(lock, func, file, line, "unlock nonlocked lock"); 601 } 602 if(lock->hold_count <= 0) 603 lock_error(lock, func, file, line, "too many unlocks"); 604 605 /* store this point as last touched by */ 606 lock->holder = thr; 607 lock->hold_count --; 608 lock->holder_func = func; 609 lock->holder_file = file; 610 lock->holder_line = line; 611 612 /* delete from thread holder list */ 613 /* no need to lock other lockstructs, because they are all on the 614 * held-locks list, and this thread holds their locks. 615 * we only touch the thr->num members, so it is safe. */ 616 if(thr->holding_first == lock) 617 thr->holding_first = lock->next_held_lock[thr->num]; 618 if(thr->holding_last == lock) 619 thr->holding_last = lock->prev_held_lock[thr->num]; 620 if(lock->next_held_lock[thr->num]) 621 lock->next_held_lock[thr->num]->prev_held_lock[thr->num] = 622 lock->prev_held_lock[thr->num]; 623 if(lock->prev_held_lock[thr->num]) 624 lock->prev_held_lock[thr->num]->next_held_lock[thr->num] = 625 lock->next_held_lock[thr->num]; 626 lock->next_held_lock[thr->num] = NULL; 627 lock->prev_held_lock[thr->num] = NULL; 628 629 if(type==check_lock_rwlock && lock->writeholder == thr) { 630 lock->writeholder = NULL; 631 prot_store(lock); 632 } else if(type != check_lock_rwlock) { 633 /* store memory areas that are protected, for later checks */ 634 prot_store(lock); 635 } 636 LOCKRET(pthread_mutex_unlock(&lock->lock)); 637 638 /* unlock it */ 639 switch(type) { 640 case check_lock_mutex: 641 LOCKRET(pthread_mutex_unlock(&lock->u.mutex)); 642 break; 643 case check_lock_spinlock: 644 LOCKRET(pthread_spin_unlock(&lock->u.spinlock)); 645 break; 646 case check_lock_rwlock: 647 LOCKRET(pthread_rwlock_unlock(&lock->u.rwlock)); 648 break; 649 default: 650 log_assert(0); 651 } 652} 653 654/** open order info debug file, thr->num must be valid */ 655static void 656open_lockorder(struct thr_check* thr) 657{ 658 char buf[24]; 659 time_t t; 660 snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num); 661 thr->order_info = fopen(buf, "w"); 662 if(!thr->order_info) 663 fatal_exit("could not open %s: %s", buf, strerror(errno)); 664 thr->locks_created = 0; 665 t = time(NULL); 666 /* write: <time_stamp> <runpid> <thread_num> */ 667 if(fwrite(&t, sizeof(t), 1, thr->order_info) != 1 || 668 fwrite(&thr->num, sizeof(thr->num), 1, thr->order_info) != 1 || 669 fwrite(&check_lock_pid, sizeof(check_lock_pid), 1, 670 thr->order_info) != 1) 671 log_err("fwrite: %s", strerror(errno)); 672} 673 674/** checklock thread main, Inits thread structure */ 675static void* checklock_main(void* arg) 676{ 677 struct thr_check* thr = (struct thr_check*)arg; 678 void* ret; 679 thr->id = pthread_self(); 680 /* Hack to get same numbers as in log file */ 681 thr->num = *(int*)(thr->arg); 682 log_assert(thr->num < THRDEBUG_MAX_THREADS); 683 /* as an aside, due to this, won't work for libunbound bg thread */ 684 if(thread_infos[thr->num] != NULL) 685 log_warn("thread warning, thr->num %d not NULL", thr->num); 686 thread_infos[thr->num] = thr; 687 LOCKRET(pthread_setspecific(thr_debug_key, thr)); 688 if(check_locking_order) 689 open_lockorder(thr); 690 ret = thr->func(thr->arg); 691 thread_infos[thr->num] = NULL; 692 if(check_locking_order) 693 fclose(thr->order_info); 694 free(thr); 695 return ret; 696} 697 698/** init the main thread */ 699void checklock_start(void) 700{ 701 if(key_deleted) 702 return; 703 if(!key_created) { 704 struct thr_check* thisthr = (struct thr_check*)calloc(1, 705 sizeof(struct thr_check)); 706 if(!thisthr) 707 fatal_exit("thrcreate: out of memory"); 708 key_created = 1; 709 check_lock_pid = getpid(); 710 LOCKRET(pthread_key_create(&thr_debug_key, NULL)); 711 LOCKRET(pthread_setspecific(thr_debug_key, thisthr)); 712 thread_infos[0] = thisthr; 713 if(check_locking_order) 714 open_lockorder(thisthr); 715 } 716} 717 718/** stop checklocks */ 719void checklock_stop(void) 720{ 721 if(key_created) { 722 int i; 723 key_deleted = 1; 724 if(check_locking_order) 725 fclose(thread_infos[0]->order_info); 726 free(thread_infos[0]); 727 thread_infos[0] = NULL; 728 for(i = 0; i < THRDEBUG_MAX_THREADS; i++) 729 log_assert(thread_infos[i] == NULL); 730 /* should have been cleaned up. */ 731 LOCKRET(pthread_key_delete(thr_debug_key)); 732 key_created = 0; 733 } 734} 735 736/** allocate debug info and create thread */ 737void 738checklock_thrcreate(pthread_t* id, void* (*func)(void*), void* arg) 739{ 740 struct thr_check* thr = (struct thr_check*)calloc(1, 741 sizeof(struct thr_check)); 742 if(!thr) 743 fatal_exit("thrcreate: out of memory"); 744 if(!key_created) { 745 checklock_start(); 746 } 747 thr->func = func; 748 thr->arg = arg; 749 LOCKRET(pthread_create(id, NULL, checklock_main, thr)); 750} 751 752/** count number of thread infos */ 753static int 754count_thread_infos(void) 755{ 756 int cnt = 0; 757 int i; 758 for(i=0; i<THRDEBUG_MAX_THREADS; i++) 759 if(thread_infos[i]) 760 cnt++; 761 return cnt; 762} 763 764/** print lots of info on a lock */ 765static void 766lock_debug_info(struct checked_lock* lock) 767{ 768 if(!lock) return; 769 log_info("+++ Lock %llx, %d %d create %s %s %d", 770 (unsigned long long)(size_t)lock, 771 lock->create_thread, lock->create_instance, 772 lock->create_func, lock->create_file, lock->create_line); 773 log_info("lock type: %s", 774 (lock->type==check_lock_mutex)?"mutex": ( 775 (lock->type==check_lock_spinlock)?"spinlock": ( 776 (lock->type==check_lock_rwlock)?"rwlock": "badtype"))); 777 log_info("lock contention %u, history:%u, hold:%d, wait:%d", 778 (unsigned)lock->contention_count, (unsigned)lock->history_count, 779 lock->hold_count, lock->wait_count); 780 log_info("last touch %s %s %d", lock->holder_func, lock->holder_file, 781 lock->holder_line); 782 log_info("holder thread %d, writeholder thread %d", 783 lock->holder?lock->holder->num:-1, 784 lock->writeholder?lock->writeholder->num:-1); 785} 786 787/** print debug locks held by a thread */ 788static void 789held_debug_info(struct thr_check* thr, struct checked_lock* lock) 790{ 791 if(!lock) return; 792 lock_debug_info(lock); 793 held_debug_info(thr, lock->next_held_lock[thr->num]); 794} 795 796/** print debug info for a thread */ 797static void 798thread_debug_info(struct thr_check* thr) 799{ 800 struct checked_lock* w = NULL; 801 struct checked_lock* f = NULL; 802 struct checked_lock* l = NULL; 803 if(!thr) return; 804 log_info("pthread id is %x", (int)thr->id); 805 log_info("thread func is %llx", (unsigned long long)(size_t)thr->func); 806 log_info("thread arg is %llx (%d)", 807 (unsigned long long)(size_t)thr->arg, 808 (thr->arg?*(int*)thr->arg:0)); 809 log_info("thread num is %d", thr->num); 810 log_info("locks created %d", thr->locks_created); 811 log_info("open file for lockinfo: %s", 812 thr->order_info?"yes, flushing":"no"); 813 fflush(thr->order_info); 814 w = thr->waiting; 815 f = thr->holding_first; 816 l = thr->holding_last; 817 log_info("thread waiting for a lock: %s %llx", w?"yes":"no", 818 (unsigned long long)(size_t)w); 819 lock_debug_info(w); 820 log_info("thread holding first: %s, last: %s", f?"yes":"no", 821 l?"yes":"no"); 822 held_debug_info(thr, f); 823} 824 825static void 826total_debug_info(void) 827{ 828 int i; 829 log_info("checklocks: supervising %d threads.", 830 count_thread_infos()); 831 if(!key_created) { 832 log_info("No thread debug key created yet"); 833 } 834 for(i=0; i<THRDEBUG_MAX_THREADS; i++) { 835 if(thread_infos[i]) { 836 log_info("*** Thread %d information: ***", i); 837 thread_debug_info(thread_infos[i]); 838 } 839 } 840} 841 842/** signal handler for join timeout, Exits */ 843static RETSIGTYPE joinalarm(int ATTR_UNUSED(sig)) 844{ 845 log_err("join thread timeout. hangup or deadlock. Info follows."); 846 total_debug_info(); 847 fatal_exit("join thread timeout. hangup or deadlock."); 848} 849 850/** wait for thread with a timeout */ 851void 852checklock_thrjoin(pthread_t thread) 853{ 854 /* wait with a timeout */ 855 if(signal(SIGALRM, joinalarm) == SIG_ERR) 856 fatal_exit("signal(): %s", strerror(errno)); 857 (void)alarm(CHECK_JOIN_TIMEOUT); 858 LOCKRET(pthread_join(thread, NULL)); 859 (void)alarm(0); 860} 861 862#endif /* USE_THREAD_DEBUG */ 863