1/**
2 * testcode/checklocks.c - wrapper on locks that checks access.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "config.h"
37#include <signal.h>
38#include "util/locks.h"   /* include before checklocks.h */
39#include "testcode/checklocks.h"
40
41/**
42 * \file
43 * Locks that are checked.
44 *
45 * Ugly hack: uses the fact that workers start with an int thread_num, and
46 * are passed to thread_create to make the thread numbers here the same as
47 * those used for logging which is nice.
48 *
49 * Todo:
50 *	 - debug status print, of thread lock stacks, and current waiting.
51 */
52#ifdef USE_THREAD_DEBUG
53
54/** How long to wait before lock attempt is a failure. */
55#define CHECK_LOCK_TIMEOUT 120 /* seconds */
56/** How long to wait before join attempt is a failure. */
57#define CHECK_JOIN_TIMEOUT 120 /* seconds */
58
59/** if key has been created */
60static int key_created = 0;
61/** if the key was deleted, i.e. we have quit */
62static int key_deleted = 0;
63/** we hide the thread debug info with this key. */
64static ub_thread_key_type thr_debug_key;
65/** the list of threads, so all threads can be examined. NULL if unused. */
66static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS];
67/** do we check locking order */
68int check_locking_order = 1;
69/** the pid of this runset, reasonably unique. */
70static pid_t check_lock_pid;
71
72/** print all possible debug info on the state of the system */
73static void total_debug_info(void);
74/** print pretty lock error and exit (decl for NORETURN attribute) */
75static void lock_error(struct checked_lock* lock, const char* func,
76	const char* file, int line, const char* err) ATTR_NORETURN;
77
78/** print pretty lock error and exit */
79static void lock_error(struct checked_lock* lock,
80	const char* func, const char* file, int line, const char* err)
81{
82	log_err("lock error (description follows)");
83	log_err("Created at %s %s:%d", lock->create_func,
84		lock->create_file, lock->create_line);
85	if(lock->holder_func && lock->holder_file)
86		log_err("Previously %s %s:%d", lock->holder_func,
87			lock->holder_file, lock->holder_line);
88	log_err("At %s %s:%d", func, file, line);
89	log_err("Error for %s lock: %s",
90		(lock->type==check_lock_mutex)?"mutex": (
91		(lock->type==check_lock_spinlock)?"spinlock": (
92		(lock->type==check_lock_rwlock)?"rwlock": "badtype")), err);
93	log_err("complete status display:");
94	total_debug_info();
95	fatal_exit("bailing out");
96}
97
98/**
99 * Obtain lock on debug lock structure. This could be a deadlock by the caller.
100 * The debug code itself does not deadlock. Anyway, check with timeouts.
101 * @param lock: on what to acquire lock.
102 * @param func: user level caller identification.
103 * @param file: user level caller identification.
104 * @param line: user level caller identification.
105 */
106static void
107acquire_locklock(struct checked_lock* lock,
108	const char* func, const char* file, int line)
109{
110	struct timespec to;
111	int err;
112	int contend = 0;
113	/* first try; inc contention counter if not immediately */
114	if((err = pthread_mutex_trylock(&lock->lock))) {
115		if(err==EBUSY)
116			contend++;
117		else fatal_exit("error in mutex_trylock: %s", strerror(err));
118	}
119	if(!err)
120		return; /* immediate success */
121	to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
122	to.tv_nsec = 0;
123	err = pthread_mutex_timedlock(&lock->lock, &to);
124	if(err) {
125		log_err("in acquiring locklock: %s", strerror(err));
126		lock_error(lock, func, file, line, "acquire locklock");
127	}
128	/* since we hold the lock, we can edit the contention_count */
129	lock->contention_count += contend;
130}
131
132/** add protected region */
133void
134lock_protect(void *p, void* area, size_t size)
135{
136	struct checked_lock* lock = *(struct checked_lock**)p;
137	struct protected_area* e = (struct protected_area*)malloc(
138		sizeof(struct protected_area));
139	if(!e)
140		fatal_exit("lock_protect: out of memory");
141	e->region = area;
142	e->size = size;
143	e->hold = malloc(size);
144	if(!e->hold)
145		fatal_exit("lock_protect: out of memory");
146	memcpy(e->hold, e->region, e->size);
147
148	acquire_locklock(lock, __func__, __FILE__, __LINE__);
149	e->next = lock->prot;
150	lock->prot = e;
151	LOCKRET(pthread_mutex_unlock(&lock->lock));
152}
153
154/** remove protected region */
155void
156lock_unprotect(void* mangled, void* area)
157{
158	struct checked_lock* lock = *(struct checked_lock**)mangled;
159	struct protected_area* p, **prevp;
160	if(!lock)
161		return;
162	acquire_locklock(lock, __func__, __FILE__, __LINE__);
163	p = lock->prot;
164	prevp = &lock->prot;
165	while(p) {
166		if(p->region == area) {
167			*prevp = p->next;
168			free(p->hold);
169			free(p);
170			LOCKRET(pthread_mutex_unlock(&lock->lock));
171			return;
172		}
173		prevp = &p->next;
174		p = p->next;
175	}
176	LOCKRET(pthread_mutex_unlock(&lock->lock));
177}
178
179/**
180 * Check protected memory region. Memory compare. Exit on error.
181 * @param lock: which lock to check.
182 * @param func: location we are now (when failure is detected).
183 * @param file: location we are now (when failure is detected).
184 * @param line: location we are now (when failure is detected).
185 */
186static void
187prot_check(struct checked_lock* lock,
188	const char* func, const char* file, int line)
189{
190	struct protected_area* p = lock->prot;
191	while(p) {
192		if(memcmp(p->hold, p->region, p->size) != 0) {
193			log_hex("memory prev", p->hold, p->size);
194			log_hex("memory here", p->region, p->size);
195			lock_error(lock, func, file, line,
196				"protected area modified");
197		}
198		p = p->next;
199	}
200}
201
202/** Copy protected memory region */
203static void
204prot_store(struct checked_lock* lock)
205{
206	struct protected_area* p = lock->prot;
207	while(p) {
208		memcpy(p->hold, p->region, p->size);
209		p = p->next;
210	}
211}
212
213/** get memory held by lock */
214size_t
215lock_get_mem(void* pp)
216{
217	size_t s;
218	struct checked_lock* lock = *(struct checked_lock**)pp;
219	struct protected_area* p;
220	s = sizeof(struct checked_lock);
221	acquire_locklock(lock, __func__, __FILE__, __LINE__);
222	for(p = lock->prot; p; p = p->next) {
223		s += sizeof(struct protected_area);
224		s += p->size;
225	}
226	LOCKRET(pthread_mutex_unlock(&lock->lock));
227	return s;
228}
229
230/** write lock trace info to file, while you hold those locks */
231static void
232ordercheck_locklock(struct thr_check* thr, struct checked_lock* lock)
233{
234	int info[4];
235	if(!check_locking_order) return;
236	if(!thr->holding_first) return; /* no older lock, no info */
237	/* write: <lock id held> <lock id new> <file> <line> */
238	info[0] = thr->holding_first->create_thread;
239	info[1] = thr->holding_first->create_instance;
240	info[2] = lock->create_thread;
241	info[3] = lock->create_instance;
242	if(fwrite(info, 4*sizeof(int), 1, thr->order_info) != 1 ||
243		fwrite(lock->holder_file, strlen(lock->holder_file)+1, 1,
244		thr->order_info) != 1 ||
245		fwrite(&lock->holder_line, sizeof(int), 1,
246		thr->order_info) != 1)
247		log_err("fwrite: %s", strerror(errno));
248}
249
250/** write ordercheck lock creation details to file */
251static void
252ordercheck_lockcreate(struct thr_check* thr, struct checked_lock* lock)
253{
254	/* write: <ffff = create> <lock id> <file> <line> */
255	int cmd = -1;
256	if(!check_locking_order) return;
257
258	if( fwrite(&cmd, sizeof(int), 1, thr->order_info) != 1 ||
259		fwrite(&lock->create_thread, sizeof(int), 1,
260			thr->order_info) != 1 ||
261		fwrite(&lock->create_instance, sizeof(int), 1,
262			thr->order_info) != 1 ||
263		fwrite(lock->create_file, strlen(lock->create_file)+1, 1,
264			thr->order_info) != 1 ||
265		fwrite(&lock->create_line, sizeof(int), 1,
266		thr->order_info) != 1)
267		log_err("fwrite: %s", strerror(errno));
268}
269
270/** alloc struct, init lock empty */
271void
272checklock_init(enum check_lock_type type, struct checked_lock** lock,
273        const char* func, const char* file, int line)
274{
275	struct checked_lock* e = (struct checked_lock*)calloc(1,
276		sizeof(struct checked_lock));
277	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
278		thr_debug_key);
279	if(!e)
280		fatal_exit("%s %s %d: out of memory", func, file, line);
281	if(!thr) {
282		/* this is called when log_init() calls lock_init()
283		 * functions, and the test check code has not yet
284		 * been initialised.  But luckily, the checklock_start()
285		 * routine can be called multiple times without ill effect.
286		 */
287		checklock_start();
288		thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
289	}
290	if(!thr)
291		fatal_exit("%s %s %d: lock_init no thread info", func, file,
292			line);
293	*lock = e;
294	e->type = type;
295	e->create_func = func;
296	e->create_file = file;
297	e->create_line = line;
298	e->create_thread = thr->num;
299	e->create_instance = thr->locks_created++;
300	ordercheck_lockcreate(thr, e);
301	LOCKRET(pthread_mutex_init(&e->lock, NULL));
302	switch(e->type) {
303		case check_lock_mutex:
304			LOCKRET(pthread_mutex_init(&e->u.mutex, NULL));
305			break;
306		case check_lock_spinlock:
307			LOCKRET(pthread_spin_init(&e->u.spinlock, PTHREAD_PROCESS_PRIVATE));
308			break;
309		case check_lock_rwlock:
310			LOCKRET(pthread_rwlock_init(&e->u.rwlock, NULL));
311			break;
312		default:
313			log_assert(0);
314	}
315}
316
317/** delete prot items */
318static void
319prot_clear(struct checked_lock* lock)
320{
321	struct protected_area* p=lock->prot, *np;
322	while(p) {
323		np = p->next;
324		free(p->hold);
325		free(p);
326		p = np;
327	}
328}
329
330/** check if type is OK for the lock given */
331static void
332checktype(enum check_lock_type type, struct checked_lock* lock,
333        const char* func, const char* file, int line)
334{
335	if(!lock)
336		fatal_exit("use of null/deleted lock at %s %s:%d",
337			func, file, line);
338	if(type != lock->type) {
339		lock_error(lock, func, file, line, "wrong lock type");
340	}
341}
342
343/** check if OK, free struct */
344void
345checklock_destroy(enum check_lock_type type, struct checked_lock** lock,
346        const char* func, const char* file, int line)
347{
348	const size_t contention_interest = 1; /* promille contented locks */
349	struct checked_lock* e;
350	if(!lock)
351		return;
352	e = *lock;
353	if(!e)
354		return;
355	checktype(type, e, func, file, line);
356
357	/* check if delete is OK */
358	acquire_locklock(e, func, file, line);
359	if(e->hold_count != 0)
360		lock_error(e, func, file, line, "delete while locked.");
361	if(e->wait_count != 0)
362		lock_error(e, func, file, line, "delete while waited on.");
363	prot_check(e, func, file, line);
364	*lock = NULL; /* use after free will fail */
365	LOCKRET(pthread_mutex_unlock(&e->lock));
366
367	/* contention, look at fraction in trouble. */
368	if(e->history_count > 1 &&
369	   1000*e->contention_count/e->history_count > contention_interest) {
370		log_info("lock created %s %s %d has contention %u of %u (%d%%)",
371			e->create_func, e->create_file, e->create_line,
372			(unsigned int)e->contention_count,
373			(unsigned int)e->history_count,
374			(int)(100*e->contention_count/e->history_count));
375	}
376
377	/* delete it */
378	LOCKRET(pthread_mutex_destroy(&e->lock));
379	prot_clear(e);
380	/* since nobody holds the lock - see check above, no need to unlink
381	 * from the thread-held locks list. */
382	switch(e->type) {
383		case check_lock_mutex:
384			LOCKRET(pthread_mutex_destroy(&e->u.mutex));
385			break;
386		case check_lock_spinlock:
387			LOCKRET(pthread_spin_destroy(&e->u.spinlock));
388			break;
389		case check_lock_rwlock:
390			LOCKRET(pthread_rwlock_destroy(&e->u.rwlock));
391			break;
392		default:
393			log_assert(0);
394	}
395	memset(e, 0, sizeof(struct checked_lock));
396	free(e);
397}
398
399/** finish acquiring lock, shared between _(rd|wr||)lock() routines */
400static void
401finish_acquire_lock(struct thr_check* thr, struct checked_lock* lock,
402        const char* func, const char* file, int line)
403{
404	thr->waiting = NULL;
405	lock->wait_count --;
406	lock->holder = thr;
407	lock->hold_count ++;
408	lock->holder_func = func;
409	lock->holder_file = file;
410	lock->holder_line = line;
411	ordercheck_locklock(thr, lock);
412
413	/* insert in thread lock list, as first */
414	lock->prev_held_lock[thr->num] = NULL;
415	lock->next_held_lock[thr->num] = thr->holding_first;
416	if(thr->holding_first)
417		/* no need to lock it, since this thread already holds the
418		 * lock (since it is on this list) and we only edit thr->num
419		 * member in array. So it is safe.  */
420		thr->holding_first->prev_held_lock[thr->num] = lock;
421	else	thr->holding_last = lock;
422	thr->holding_first = lock;
423}
424
425/**
426 * Locking routine.
427 * @param type: as passed by user.
428 * @param lock: as passed by user.
429 * @param func: caller location.
430 * @param file: caller location.
431 * @param line: caller location.
432 * @param tryfunc: the pthread_mutex_trylock or similar function.
433 * @param timedfunc: the pthread_mutex_timedlock or similar function.
434 *	Uses absolute timeout value.
435 * @param arg: what to pass to tryfunc and timedlock.
436 * @param exclusive: if lock must be exclusive (only one allowed).
437 * @param getwr: if attempts to get writelock (or readlock) for rwlocks.
438 */
439static void
440checklock_lockit(enum check_lock_type type, struct checked_lock* lock,
441        const char* func, const char* file, int line,
442	int (*tryfunc)(void*), int (*timedfunc)(void*, struct timespec*),
443	void* arg, int exclusive, int getwr)
444{
445	int err;
446	int contend = 0;
447	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
448		thr_debug_key);
449	checktype(type, lock, func, file, line);
450	if(!thr) lock_error(lock, func, file, line, "no thread info");
451
452	acquire_locklock(lock, func, file, line);
453	lock->wait_count ++;
454	thr->waiting = lock;
455	if(exclusive && lock->hold_count > 0 && lock->holder == thr)
456		lock_error(lock, func, file, line, "thread already owns lock");
457	if(type==check_lock_rwlock && getwr && lock->writeholder == thr)
458		lock_error(lock, func, file, line, "thread already has wrlock");
459	LOCKRET(pthread_mutex_unlock(&lock->lock));
460
461	/* first try; if busy increase contention counter */
462	if((err=tryfunc(arg))) {
463		struct timespec to;
464		if(err != EBUSY) log_err("trylock: %s", strerror(err));
465		to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
466		to.tv_nsec = 0;
467		if((err=timedfunc(arg, &to))) {
468			if(err == ETIMEDOUT)
469				lock_error(lock, func, file, line,
470					"timeout possible deadlock");
471			log_err("timedlock: %s", strerror(err));
472		}
473		contend ++;
474	}
475	/* got the lock */
476
477	acquire_locklock(lock, func, file, line);
478	lock->contention_count += contend;
479	lock->history_count++;
480	if(exclusive && lock->hold_count > 0)
481		lock_error(lock, func, file, line, "got nonexclusive lock");
482	if(type==check_lock_rwlock && getwr && lock->writeholder)
483		lock_error(lock, func, file, line, "got nonexclusive wrlock");
484	if(type==check_lock_rwlock && getwr)
485		lock->writeholder = thr;
486	/* check the memory areas for unauthorized changes,
487	 * between last unlock time and current lock time.
488	 * we check while holding the lock (threadsafe).
489	 */
490	if(getwr || exclusive)
491		prot_check(lock, func, file, line);
492	finish_acquire_lock(thr, lock, func, file, line);
493	LOCKRET(pthread_mutex_unlock(&lock->lock));
494}
495
496/** helper for rdlock: try */
497static int try_rd(void* arg)
498{ return pthread_rwlock_tryrdlock((pthread_rwlock_t*)arg); }
499/** helper for rdlock: timed */
500static int timed_rd(void* arg, struct timespec* to)
501{ return pthread_rwlock_timedrdlock((pthread_rwlock_t*)arg, to); }
502
503/** check if OK, lock */
504void
505checklock_rdlock(enum check_lock_type type, struct checked_lock* lock,
506        const char* func, const char* file, int line)
507{
508	if(key_deleted)
509		return;
510
511	log_assert(type == check_lock_rwlock);
512	checklock_lockit(type, lock, func, file, line,
513		try_rd, timed_rd, &lock->u.rwlock, 0, 0);
514}
515
516/** helper for wrlock: try */
517static int try_wr(void* arg)
518{ return pthread_rwlock_trywrlock((pthread_rwlock_t*)arg); }
519/** helper for wrlock: timed */
520static int timed_wr(void* arg, struct timespec* to)
521{ return pthread_rwlock_timedwrlock((pthread_rwlock_t*)arg, to); }
522
523/** check if OK, lock */
524void
525checklock_wrlock(enum check_lock_type type, struct checked_lock* lock,
526        const char* func, const char* file, int line)
527{
528	if(key_deleted)
529		return;
530	log_assert(type == check_lock_rwlock);
531	checklock_lockit(type, lock, func, file, line,
532		try_wr, timed_wr, &lock->u.rwlock, 0, 1);
533}
534
535/** helper for lock mutex: try */
536static int try_mutex(void* arg)
537{ return pthread_mutex_trylock((pthread_mutex_t*)arg); }
538/** helper for lock mutex: timed */
539static int timed_mutex(void* arg, struct timespec* to)
540{ return pthread_mutex_timedlock((pthread_mutex_t*)arg, to); }
541
542/** helper for lock spinlock: try */
543static int try_spinlock(void* arg)
544{ return pthread_spin_trylock((pthread_spinlock_t*)arg); }
545/** helper for lock spinlock: timed */
546static int timed_spinlock(void* arg, struct timespec* to)
547{
548	int err;
549	/* spin for 5 seconds. (ouch for the CPU, but it beats forever) */
550	while( (err=try_spinlock(arg)) == EBUSY) {
551#ifndef S_SPLINT_S
552		if(time(NULL) >= to->tv_sec)
553			return ETIMEDOUT;
554		usleep(1000); /* in 1/1000000s of a second */
555#endif
556	}
557	return err;
558}
559
560/** check if OK, lock */
561void
562checklock_lock(enum check_lock_type type, struct checked_lock* lock,
563        const char* func, const char* file, int line)
564{
565	if(key_deleted)
566		return;
567	log_assert(type != check_lock_rwlock);
568	switch(type) {
569		case check_lock_mutex:
570			checklock_lockit(type, lock, func, file, line,
571				try_mutex, timed_mutex, &lock->u.mutex, 1, 0);
572			break;
573		case check_lock_spinlock:
574			/* void* cast needed because 'volatile' on some OS */
575			checklock_lockit(type, lock, func, file, line,
576				try_spinlock, timed_spinlock,
577				(void*)&lock->u.spinlock, 1, 0);
578			break;
579		default:
580			log_assert(0);
581	}
582}
583
584/** check if OK, unlock */
585void
586checklock_unlock(enum check_lock_type type, struct checked_lock* lock,
587        const char* func, const char* file, int line)
588{
589	struct thr_check *thr;
590	if(key_deleted)
591		return;
592	thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
593	checktype(type, lock, func, file, line);
594	if(!thr) lock_error(lock, func, file, line, "no thread info");
595
596	acquire_locklock(lock, func, file, line);
597	/* was this thread even holding this lock? */
598	if(thr->holding_first != lock &&
599		lock->prev_held_lock[thr->num] == NULL) {
600		lock_error(lock, func, file, line, "unlock nonlocked lock");
601	}
602	if(lock->hold_count <= 0)
603		lock_error(lock, func, file, line, "too many unlocks");
604
605	/* store this point as last touched by */
606	lock->holder = thr;
607	lock->hold_count --;
608	lock->holder_func = func;
609	lock->holder_file = file;
610	lock->holder_line = line;
611
612	/* delete from thread holder list */
613	/* no need to lock other lockstructs, because they are all on the
614	 * held-locks list, and this thread holds their locks.
615	 * we only touch the thr->num members, so it is safe.  */
616	if(thr->holding_first == lock)
617		thr->holding_first = lock->next_held_lock[thr->num];
618	if(thr->holding_last == lock)
619		thr->holding_last = lock->prev_held_lock[thr->num];
620	if(lock->next_held_lock[thr->num])
621		lock->next_held_lock[thr->num]->prev_held_lock[thr->num] =
622			lock->prev_held_lock[thr->num];
623	if(lock->prev_held_lock[thr->num])
624		lock->prev_held_lock[thr->num]->next_held_lock[thr->num] =
625			lock->next_held_lock[thr->num];
626	lock->next_held_lock[thr->num] = NULL;
627	lock->prev_held_lock[thr->num] = NULL;
628
629	if(type==check_lock_rwlock && lock->writeholder == thr) {
630		lock->writeholder = NULL;
631		prot_store(lock);
632	} else if(type != check_lock_rwlock) {
633		/* store memory areas that are protected, for later checks */
634		prot_store(lock);
635	}
636	LOCKRET(pthread_mutex_unlock(&lock->lock));
637
638	/* unlock it */
639	switch(type) {
640		case check_lock_mutex:
641			LOCKRET(pthread_mutex_unlock(&lock->u.mutex));
642			break;
643		case check_lock_spinlock:
644			LOCKRET(pthread_spin_unlock(&lock->u.spinlock));
645			break;
646		case check_lock_rwlock:
647			LOCKRET(pthread_rwlock_unlock(&lock->u.rwlock));
648			break;
649		default:
650			log_assert(0);
651	}
652}
653
654/** open order info debug file, thr->num must be valid */
655static void
656open_lockorder(struct thr_check* thr)
657{
658	char buf[24];
659	time_t t;
660	snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num);
661	thr->order_info = fopen(buf, "w");
662	if(!thr->order_info)
663		fatal_exit("could not open %s: %s", buf, strerror(errno));
664	thr->locks_created = 0;
665	t = time(NULL);
666	/* write: <time_stamp> <runpid> <thread_num> */
667	if(fwrite(&t, sizeof(t), 1, thr->order_info) != 1 ||
668		fwrite(&thr->num, sizeof(thr->num), 1, thr->order_info) != 1 ||
669		fwrite(&check_lock_pid, sizeof(check_lock_pid), 1,
670		thr->order_info) != 1)
671		log_err("fwrite: %s", strerror(errno));
672}
673
674/** checklock thread main, Inits thread structure */
675static void* checklock_main(void* arg)
676{
677	struct thr_check* thr = (struct thr_check*)arg;
678	void* ret;
679	thr->id = pthread_self();
680	/* Hack to get same numbers as in log file */
681	thr->num = *(int*)(thr->arg);
682	log_assert(thr->num < THRDEBUG_MAX_THREADS);
683	/* as an aside, due to this, won't work for libunbound bg thread */
684	if(thread_infos[thr->num] != NULL)
685		log_warn("thread warning, thr->num %d not NULL", thr->num);
686	thread_infos[thr->num] = thr;
687	LOCKRET(pthread_setspecific(thr_debug_key, thr));
688	if(check_locking_order)
689		open_lockorder(thr);
690	ret = thr->func(thr->arg);
691	thread_infos[thr->num] = NULL;
692	if(check_locking_order)
693		fclose(thr->order_info);
694	free(thr);
695	return ret;
696}
697
698/** init the main thread */
699void checklock_start(void)
700{
701	if(key_deleted)
702		return;
703	if(!key_created) {
704		struct thr_check* thisthr = (struct thr_check*)calloc(1,
705			sizeof(struct thr_check));
706		if(!thisthr)
707			fatal_exit("thrcreate: out of memory");
708		key_created = 1;
709		check_lock_pid = getpid();
710		LOCKRET(pthread_key_create(&thr_debug_key, NULL));
711		LOCKRET(pthread_setspecific(thr_debug_key, thisthr));
712		thread_infos[0] = thisthr;
713		if(check_locking_order)
714			open_lockorder(thisthr);
715	}
716}
717
718/** stop checklocks */
719void checklock_stop(void)
720{
721	if(key_created) {
722		int i;
723		key_deleted = 1;
724		if(check_locking_order)
725			fclose(thread_infos[0]->order_info);
726		free(thread_infos[0]);
727		thread_infos[0] = NULL;
728		for(i = 0; i < THRDEBUG_MAX_THREADS; i++)
729			log_assert(thread_infos[i] == NULL);
730			/* should have been cleaned up. */
731		LOCKRET(pthread_key_delete(thr_debug_key));
732		key_created = 0;
733	}
734}
735
736/** allocate debug info and create thread */
737void
738checklock_thrcreate(pthread_t* id, void* (*func)(void*), void* arg)
739{
740	struct thr_check* thr = (struct thr_check*)calloc(1,
741		sizeof(struct thr_check));
742	if(!thr)
743		fatal_exit("thrcreate: out of memory");
744	if(!key_created) {
745		checklock_start();
746	}
747	thr->func = func;
748	thr->arg = arg;
749	LOCKRET(pthread_create(id, NULL, checklock_main, thr));
750}
751
752/** count number of thread infos */
753static int
754count_thread_infos(void)
755{
756	int cnt = 0;
757	int i;
758	for(i=0; i<THRDEBUG_MAX_THREADS; i++)
759		if(thread_infos[i])
760			cnt++;
761	return cnt;
762}
763
764/** print lots of info on a lock */
765static void
766lock_debug_info(struct checked_lock* lock)
767{
768	if(!lock) return;
769	log_info("+++ Lock %llx, %d %d create %s %s %d",
770		(unsigned long long)(size_t)lock,
771		lock->create_thread, lock->create_instance,
772		lock->create_func, lock->create_file, lock->create_line);
773	log_info("lock type: %s",
774		(lock->type==check_lock_mutex)?"mutex": (
775		(lock->type==check_lock_spinlock)?"spinlock": (
776		(lock->type==check_lock_rwlock)?"rwlock": "badtype")));
777	log_info("lock contention %u, history:%u, hold:%d, wait:%d",
778		(unsigned)lock->contention_count, (unsigned)lock->history_count,
779		lock->hold_count, lock->wait_count);
780	log_info("last touch %s %s %d", lock->holder_func, lock->holder_file,
781		lock->holder_line);
782	log_info("holder thread %d, writeholder thread %d",
783		lock->holder?lock->holder->num:-1,
784		lock->writeholder?lock->writeholder->num:-1);
785}
786
787/** print debug locks held by a thread */
788static void
789held_debug_info(struct thr_check* thr, struct checked_lock* lock)
790{
791	if(!lock) return;
792	lock_debug_info(lock);
793	held_debug_info(thr, lock->next_held_lock[thr->num]);
794}
795
796/** print debug info for a thread */
797static void
798thread_debug_info(struct thr_check* thr)
799{
800	struct checked_lock* w = NULL;
801	struct checked_lock* f = NULL;
802	struct checked_lock* l = NULL;
803	if(!thr) return;
804	log_info("pthread id is %x", (int)thr->id);
805	log_info("thread func is %llx", (unsigned long long)(size_t)thr->func);
806	log_info("thread arg is %llx (%d)",
807		(unsigned long long)(size_t)thr->arg,
808		(thr->arg?*(int*)thr->arg:0));
809	log_info("thread num is %d", thr->num);
810	log_info("locks created %d", thr->locks_created);
811	log_info("open file for lockinfo: %s",
812		thr->order_info?"yes, flushing":"no");
813	fflush(thr->order_info);
814	w = thr->waiting;
815	f = thr->holding_first;
816	l = thr->holding_last;
817	log_info("thread waiting for a lock: %s %llx", w?"yes":"no",
818		(unsigned long long)(size_t)w);
819	lock_debug_info(w);
820	log_info("thread holding first: %s, last: %s", f?"yes":"no",
821		l?"yes":"no");
822	held_debug_info(thr, f);
823}
824
825static void
826total_debug_info(void)
827{
828	int i;
829	log_info("checklocks: supervising %d threads.",
830		count_thread_infos());
831	if(!key_created) {
832		log_info("No thread debug key created yet");
833	}
834	for(i=0; i<THRDEBUG_MAX_THREADS; i++) {
835		if(thread_infos[i]) {
836			log_info("*** Thread %d information: ***", i);
837			thread_debug_info(thread_infos[i]);
838		}
839	}
840}
841
842/** signal handler for join timeout, Exits */
843static RETSIGTYPE joinalarm(int ATTR_UNUSED(sig))
844{
845	log_err("join thread timeout. hangup or deadlock. Info follows.");
846	total_debug_info();
847	fatal_exit("join thread timeout. hangup or deadlock.");
848}
849
850/** wait for thread with a timeout */
851void
852checklock_thrjoin(pthread_t thread)
853{
854	/* wait with a timeout */
855	if(signal(SIGALRM, joinalarm) == SIG_ERR)
856		fatal_exit("signal(): %s", strerror(errno));
857	(void)alarm(CHECK_JOIN_TIMEOUT);
858	LOCKRET(pthread_join(thread, NULL));
859	(void)alarm(0);
860}
861
862#endif /* USE_THREAD_DEBUG */
863