1/**
2 * testcode/checklocks.c - wrapper on locks that checks access.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36#include "config.h"
37#include <signal.h>
38#include "util/locks.h"   /* include before checklocks.h */
39#include "testcode/checklocks.h"
40
41/**
42 * \file
43 * Locks that are checked.
44 *
45 * Ugly hack: uses the fact that workers start with an int thread_num, and
46 * are passed to thread_create to make the thread numbers here the same as
47 * those used for logging which is nice.
48 *
49 * Todo:
50 *	 - debug status print, of thread lock stacks, and current waiting.
51 */
52#ifdef USE_THREAD_DEBUG
53
54/** How long to wait before lock attempt is a failure. */
55#define CHECK_LOCK_TIMEOUT 120 /* seconds */
56/** How long to wait before join attempt is a failure. */
57#define CHECK_JOIN_TIMEOUT 120 /* seconds */
58
59/** if key has been created */
60static int key_created = 0;
61/** if the key was deleted, i.e. we have quit */
62static int key_deleted = 0;
63/** we hide the thread debug info with this key. */
64static ub_thread_key_type thr_debug_key;
65/** the list of threads, so all threads can be examined. NULL if unused. */
66static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS];
67/** do we check locking order */
68int check_locking_order = 1;
69/** the pid of this runset, reasonably unique. */
70static pid_t check_lock_pid;
71/**
72 * Should checklocks print a trace of the lock and unlock calls.
73 * It uses fprintf for that because the log function uses a lock and that
74 * would loop otherwise.
75 */
76static int verbose_locking = 0;
77/**
78 * Assume lock 0 0 (create_thread, create_instance), is the log lock and
79 * do not print for that. Otherwise the output is full of log lock accesses.
80 */
81static int verbose_locking_not_loglock = 1;
82
83/** print all possible debug info on the state of the system */
84static void total_debug_info(void);
85/** print pretty lock error and exit (decl for NORETURN attribute) */
86static void lock_error(struct checked_lock* lock, const char* func,
87	const char* file, int line, const char* err) ATTR_NORETURN;
88
89/** print pretty lock error and exit */
90static void lock_error(struct checked_lock* lock,
91	const char* func, const char* file, int line, const char* err)
92{
93	log_err("lock error (description follows)");
94	log_err("Created at %s %s:%d", lock->create_func,
95		lock->create_file, lock->create_line);
96	if(lock->holder_func && lock->holder_file)
97		log_err("Previously %s %s:%d", lock->holder_func,
98			lock->holder_file, lock->holder_line);
99	log_err("At %s %s:%d", func, file, line);
100	log_err("Error for %s lock: %s",
101		(lock->type==check_lock_mutex)?"mutex": (
102		(lock->type==check_lock_spinlock)?"spinlock": (
103		(lock->type==check_lock_rwlock)?"rwlock": "badtype")), err);
104	log_err("complete status display:");
105	total_debug_info();
106	fatal_exit("bailing out");
107}
108
109/**
110 * Obtain lock on debug lock structure. This could be a deadlock by the caller.
111 * The debug code itself does not deadlock. Anyway, check with timeouts.
112 * @param lock: on what to acquire lock.
113 * @param func: user level caller identification.
114 * @param file: user level caller identification.
115 * @param line: user level caller identification.
116 */
117static void
118acquire_locklock(struct checked_lock* lock,
119	const char* func, const char* file, int line)
120{
121	struct timespec to;
122	int err;
123	int contend = 0;
124	/* first try; inc contention counter if not immediately */
125	if((err = pthread_mutex_trylock(&lock->lock))) {
126		if(err==EBUSY)
127			contend++;
128		else fatal_exit("error in mutex_trylock: %s", strerror(err));
129	}
130	if(!err)
131		return; /* immediate success */
132	to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
133	to.tv_nsec = 0;
134	err = pthread_mutex_timedlock(&lock->lock, &to);
135	if(err) {
136		log_err("in acquiring locklock: %s", strerror(err));
137		lock_error(lock, func, file, line, "acquire locklock");
138	}
139	/* since we hold the lock, we can edit the contention_count */
140	lock->contention_count += contend;
141}
142
143/** add protected region */
144void
145lock_protect(void *p, void* area, size_t size)
146{
147	struct checked_lock* lock = *(struct checked_lock**)p;
148	struct protected_area* e = (struct protected_area*)malloc(
149		sizeof(struct protected_area));
150	if(!e)
151		fatal_exit("lock_protect: out of memory");
152	e->region = area;
153	e->size = size;
154	e->hold = malloc(size);
155	if(!e->hold)
156		fatal_exit("lock_protect: out of memory");
157	memcpy(e->hold, e->region, e->size);
158
159	acquire_locklock(lock, __func__, __FILE__, __LINE__);
160	e->next = lock->prot;
161	lock->prot = e;
162	LOCKRET(pthread_mutex_unlock(&lock->lock));
163}
164
165/** remove protected region */
166void
167lock_unprotect(void* mangled, void* area)
168{
169	struct checked_lock* lock = *(struct checked_lock**)mangled;
170	struct protected_area* p, **prevp;
171	if(!lock)
172		return;
173	acquire_locklock(lock, __func__, __FILE__, __LINE__);
174	p = lock->prot;
175	prevp = &lock->prot;
176	while(p) {
177		if(p->region == area) {
178			*prevp = p->next;
179			free(p->hold);
180			free(p);
181			LOCKRET(pthread_mutex_unlock(&lock->lock));
182			return;
183		}
184		prevp = &p->next;
185		p = p->next;
186	}
187	LOCKRET(pthread_mutex_unlock(&lock->lock));
188}
189
190/**
191 * Check protected memory region. Memory compare. Exit on error.
192 * @param lock: which lock to check.
193 * @param func: location we are now (when failure is detected).
194 * @param file: location we are now (when failure is detected).
195 * @param line: location we are now (when failure is detected).
196 */
197static void
198prot_check(struct checked_lock* lock,
199	const char* func, const char* file, int line)
200{
201	struct protected_area* p = lock->prot;
202	while(p) {
203		if(memcmp(p->hold, p->region, p->size) != 0) {
204			log_hex("memory prev", p->hold, p->size);
205			log_hex("memory here", p->region, p->size);
206			lock_error(lock, func, file, line,
207				"protected area modified");
208		}
209		p = p->next;
210	}
211}
212
213/** Copy protected memory region */
214static void
215prot_store(struct checked_lock* lock)
216{
217	struct protected_area* p = lock->prot;
218	while(p) {
219		memcpy(p->hold, p->region, p->size);
220		p = p->next;
221	}
222}
223
224/** get memory held by lock */
225size_t
226lock_get_mem(void* pp)
227{
228	size_t s;
229	struct checked_lock* lock = *(struct checked_lock**)pp;
230	struct protected_area* p;
231	s = sizeof(struct checked_lock);
232	acquire_locklock(lock, __func__, __FILE__, __LINE__);
233	for(p = lock->prot; p; p = p->next) {
234		s += sizeof(struct protected_area);
235		s += p->size;
236	}
237	LOCKRET(pthread_mutex_unlock(&lock->lock));
238	return s;
239}
240
241/** write lock trace info to file, while you hold those locks */
242static void
243ordercheck_locklock(struct thr_check* thr, struct checked_lock* lock)
244{
245	int info[4];
246	if(!check_locking_order) return;
247	if(!thr->holding_first) return; /* no older lock, no info */
248	/* write: <lock id held> <lock id new> <file> <line> */
249	info[0] = thr->holding_first->create_thread;
250	info[1] = thr->holding_first->create_instance;
251	info[2] = lock->create_thread;
252	info[3] = lock->create_instance;
253	if(fwrite(info, 4*sizeof(int), 1, thr->order_info) != 1 ||
254		fwrite(lock->holder_file, strlen(lock->holder_file)+1, 1,
255		thr->order_info) != 1 ||
256		fwrite(&lock->holder_line, sizeof(int), 1,
257		thr->order_info) != 1)
258		log_err("fwrite: %s", strerror(errno));
259}
260
261/** write ordercheck lock creation details to file */
262static void
263ordercheck_lockcreate(struct thr_check* thr, struct checked_lock* lock)
264{
265	/* write: <ffff = create> <lock id> <file> <line> */
266	int cmd = -1;
267	if(!check_locking_order) return;
268
269	if( fwrite(&cmd, sizeof(int), 1, thr->order_info) != 1 ||
270		fwrite(&lock->create_thread, sizeof(int), 1,
271			thr->order_info) != 1 ||
272		fwrite(&lock->create_instance, sizeof(int), 1,
273			thr->order_info) != 1 ||
274		fwrite(lock->create_file, strlen(lock->create_file)+1, 1,
275			thr->order_info) != 1 ||
276		fwrite(&lock->create_line, sizeof(int), 1,
277		thr->order_info) != 1)
278		log_err("fwrite: %s", strerror(errno));
279}
280
281/** alloc struct, init lock empty */
282void
283checklock_init(enum check_lock_type type, struct checked_lock** lock,
284        const char* func, const char* file, int line)
285{
286	struct checked_lock* e = (struct checked_lock*)calloc(1,
287		sizeof(struct checked_lock));
288	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
289		thr_debug_key);
290	if(!e)
291		fatal_exit("%s %s %d: out of memory", func, file, line);
292	if(!thr) {
293		/* this is called when log_init() calls lock_init()
294		 * functions, and the test check code has not yet
295		 * been initialised.  But luckily, the checklock_start()
296		 * routine can be called multiple times without ill effect.
297		 */
298		checklock_start();
299		thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
300	}
301	if(!thr)
302		fatal_exit("%s %s %d: lock_init no thread info", func, file,
303			line);
304	*lock = e;
305	e->type = type;
306	e->create_func = func;
307	e->create_file = file;
308	e->create_line = line;
309	e->create_thread = thr->num;
310	e->create_instance = thr->locks_created++;
311	ordercheck_lockcreate(thr, e);
312	LOCKRET(pthread_mutex_init(&e->lock, NULL));
313	switch(e->type) {
314		case check_lock_mutex:
315			LOCKRET(pthread_mutex_init(&e->u.mutex, NULL));
316			break;
317		case check_lock_spinlock:
318			LOCKRET(pthread_spin_init(&e->u.spinlock, PTHREAD_PROCESS_PRIVATE));
319			break;
320		case check_lock_rwlock:
321			LOCKRET(pthread_rwlock_init(&e->u.rwlock, NULL));
322			break;
323		default:
324			log_assert(0);
325	}
326}
327
328/** delete prot items */
329static void
330prot_clear(struct checked_lock* lock)
331{
332	struct protected_area* p=lock->prot, *np;
333	while(p) {
334		np = p->next;
335		free(p->hold);
336		free(p);
337		p = np;
338	}
339}
340
341/** check if type is OK for the lock given */
342static void
343checktype(enum check_lock_type type, struct checked_lock* lock,
344        const char* func, const char* file, int line)
345{
346	if(!lock)
347		fatal_exit("use of null/deleted lock at %s %s:%d",
348			func, file, line);
349	if(type != lock->type) {
350		lock_error(lock, func, file, line, "wrong lock type");
351	}
352}
353
354/** check if OK, free struct */
355void
356checklock_destroy(enum check_lock_type type, struct checked_lock** lock,
357        const char* func, const char* file, int line)
358{
359	const size_t contention_interest = 1; /* promille contented locks */
360	struct checked_lock* e;
361	if(!lock)
362		return;
363	e = *lock;
364	if(!e)
365		return;
366	checktype(type, e, func, file, line);
367
368	/* check if delete is OK */
369	acquire_locklock(e, func, file, line);
370	if(e->hold_count != 0)
371		lock_error(e, func, file, line, "delete while locked.");
372	if(e->wait_count != 0)
373		lock_error(e, func, file, line, "delete while waited on.");
374	prot_check(e, func, file, line);
375	*lock = NULL; /* use after free will fail */
376	LOCKRET(pthread_mutex_unlock(&e->lock));
377
378	/* contention, look at fraction in trouble. */
379	if(e->history_count > 1 &&
380	   1000*e->contention_count/e->history_count > contention_interest) {
381		log_info("lock created %s %s %d has contention %u of %u (%d%%)",
382			e->create_func, e->create_file, e->create_line,
383			(unsigned int)e->contention_count,
384			(unsigned int)e->history_count,
385			(int)(100*e->contention_count/e->history_count));
386	}
387
388	/* delete it */
389	LOCKRET(pthread_mutex_destroy(&e->lock));
390	prot_clear(e);
391	/* since nobody holds the lock - see check above, no need to unlink
392	 * from the thread-held locks list. */
393	switch(e->type) {
394		case check_lock_mutex:
395			LOCKRET(pthread_mutex_destroy(&e->u.mutex));
396			break;
397		case check_lock_spinlock:
398			LOCKRET(pthread_spin_destroy(&e->u.spinlock));
399			break;
400		case check_lock_rwlock:
401			LOCKRET(pthread_rwlock_destroy(&e->u.rwlock));
402			break;
403		default:
404			log_assert(0);
405	}
406	memset(e, 0, sizeof(struct checked_lock));
407	free(e);
408}
409
410/** finish acquiring lock, shared between _(rd|wr||)lock() routines */
411static void
412finish_acquire_lock(struct thr_check* thr, struct checked_lock* lock,
413        const char* func, const char* file, int line)
414{
415	thr->waiting = NULL;
416	lock->wait_count --;
417	lock->holder = thr;
418	lock->hold_count ++;
419	lock->holder_func = func;
420	lock->holder_file = file;
421	lock->holder_line = line;
422	ordercheck_locklock(thr, lock);
423
424	/* insert in thread lock list, as first */
425	lock->prev_held_lock[thr->num] = NULL;
426	lock->next_held_lock[thr->num] = thr->holding_first;
427	if(thr->holding_first)
428		/* no need to lock it, since this thread already holds the
429		 * lock (since it is on this list) and we only edit thr->num
430		 * member in array. So it is safe.  */
431		thr->holding_first->prev_held_lock[thr->num] = lock;
432	else	thr->holding_last = lock;
433	thr->holding_first = lock;
434}
435
436/**
437 * Locking routine.
438 * @param type: as passed by user.
439 * @param lock: as passed by user.
440 * @param func: caller location.
441 * @param file: caller location.
442 * @param line: caller location.
443 * @param tryfunc: the pthread_mutex_trylock or similar function.
444 * @param timedfunc: the pthread_mutex_timedlock or similar function.
445 *	Uses absolute timeout value.
446 * @param arg: what to pass to tryfunc and timedlock.
447 * @param exclusive: if lock must be exclusive (only one allowed).
448 * @param getwr: if attempts to get writelock (or readlock) for rwlocks.
449 */
450static void
451checklock_lockit(enum check_lock_type type, struct checked_lock* lock,
452        const char* func, const char* file, int line,
453	int (*tryfunc)(void*), int (*timedfunc)(void*, struct timespec*),
454	void* arg, int exclusive, int getwr)
455{
456	int err;
457	int contend = 0;
458	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
459		thr_debug_key);
460	checktype(type, lock, func, file, line);
461	if(!thr) lock_error(lock, func, file, line, "no thread info");
462
463	acquire_locklock(lock, func, file, line);
464	lock->wait_count ++;
465	thr->waiting = lock;
466	if(exclusive && lock->hold_count > 0 && lock->holder == thr)
467		lock_error(lock, func, file, line, "thread already owns lock");
468	if(type==check_lock_rwlock && getwr && lock->writeholder == thr)
469		lock_error(lock, func, file, line, "thread already has wrlock");
470	LOCKRET(pthread_mutex_unlock(&lock->lock));
471
472	/* first try; if busy increase contention counter */
473	if((err=tryfunc(arg))) {
474		struct timespec to;
475		if(err != EBUSY) log_err("trylock: %s", strerror(err));
476		to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
477		to.tv_nsec = 0;
478		if((err=timedfunc(arg, &to))) {
479			if(err == ETIMEDOUT)
480				lock_error(lock, func, file, line,
481					"timeout possible deadlock");
482			log_err("timedlock: %s", strerror(err));
483		}
484		contend ++;
485	}
486	/* got the lock */
487
488	acquire_locklock(lock, func, file, line);
489	lock->contention_count += contend;
490	lock->history_count++;
491	if(exclusive && lock->hold_count > 0)
492		lock_error(lock, func, file, line, "got nonexclusive lock");
493	if(type==check_lock_rwlock && getwr && lock->writeholder)
494		lock_error(lock, func, file, line, "got nonexclusive wrlock");
495	if(type==check_lock_rwlock && getwr)
496		lock->writeholder = thr;
497	/* check the memory areas for unauthorized changes,
498	 * between last unlock time and current lock time.
499	 * we check while holding the lock (threadsafe).
500	 */
501	if(getwr || exclusive)
502		prot_check(lock, func, file, line);
503	finish_acquire_lock(thr, lock, func, file, line);
504	LOCKRET(pthread_mutex_unlock(&lock->lock));
505}
506
507/** helper for rdlock: try */
508static int try_rd(void* arg)
509{ return pthread_rwlock_tryrdlock((pthread_rwlock_t*)arg); }
510/** helper for rdlock: timed */
511static int timed_rd(void* arg, struct timespec* to)
512{ return pthread_rwlock_timedrdlock((pthread_rwlock_t*)arg, to); }
513
514/** check if OK, lock */
515void
516checklock_rdlock(enum check_lock_type type, struct checked_lock* lock,
517        const char* func, const char* file, int line)
518{
519	if(key_deleted)
520		return;
521
522	if(verbose_locking && !(verbose_locking_not_loglock &&
523		lock->create_thread == 0 && lock->create_instance == 0))
524		fprintf(stderr, "checklock_rdlock lock %d %d %s:%d at %s:%d\n", lock->create_thread, lock->create_instance, lock->create_file, lock->create_line, file, line);
525	log_assert(type == check_lock_rwlock);
526	checklock_lockit(type, lock, func, file, line,
527		try_rd, timed_rd, &lock->u.rwlock, 0, 0);
528}
529
530/** helper for wrlock: try */
531static int try_wr(void* arg)
532{ return pthread_rwlock_trywrlock((pthread_rwlock_t*)arg); }
533/** helper for wrlock: timed */
534static int timed_wr(void* arg, struct timespec* to)
535{ return pthread_rwlock_timedwrlock((pthread_rwlock_t*)arg, to); }
536
537/** check if OK, lock */
538void
539checklock_wrlock(enum check_lock_type type, struct checked_lock* lock,
540        const char* func, const char* file, int line)
541{
542	if(key_deleted)
543		return;
544	log_assert(type == check_lock_rwlock);
545	if(verbose_locking && !(verbose_locking_not_loglock &&
546		lock->create_thread == 0 && lock->create_instance == 0))
547		fprintf(stderr, "checklock_wrlock lock %d %d %s:%d at %s:%d\n", lock->create_thread, lock->create_instance, lock->create_file, lock->create_line, file, line);
548	checklock_lockit(type, lock, func, file, line,
549		try_wr, timed_wr, &lock->u.rwlock, 0, 1);
550}
551
552/** helper for lock mutex: try */
553static int try_mutex(void* arg)
554{ return pthread_mutex_trylock((pthread_mutex_t*)arg); }
555/** helper for lock mutex: timed */
556static int timed_mutex(void* arg, struct timespec* to)
557{ return pthread_mutex_timedlock((pthread_mutex_t*)arg, to); }
558
559/** helper for lock spinlock: try */
560static int try_spinlock(void* arg)
561{ return pthread_spin_trylock((pthread_spinlock_t*)arg); }
562/** helper for lock spinlock: timed */
563static int timed_spinlock(void* arg, struct timespec* to)
564{
565	int err;
566	/* spin for 5 seconds. (ouch for the CPU, but it beats forever) */
567	while( (err=try_spinlock(arg)) == EBUSY) {
568#ifndef S_SPLINT_S
569		if(time(NULL) >= to->tv_sec)
570			return ETIMEDOUT;
571		usleep(1000); /* in 1/1000000s of a second */
572#endif
573	}
574	return err;
575}
576
577/** check if OK, lock */
578void
579checklock_lock(enum check_lock_type type, struct checked_lock* lock,
580        const char* func, const char* file, int line)
581{
582	if(key_deleted)
583		return;
584	log_assert(type != check_lock_rwlock);
585	if(verbose_locking && !(verbose_locking_not_loglock &&
586		lock->create_thread == 0 && lock->create_instance == 0))
587		fprintf(stderr, "checklock_lock lock %d %d %s:%d at %s:%d\n", lock->create_thread, lock->create_instance, lock->create_file, lock->create_line, file, line);
588	switch(type) {
589		case check_lock_mutex:
590			checklock_lockit(type, lock, func, file, line,
591				try_mutex, timed_mutex, &lock->u.mutex, 1, 0);
592			break;
593		case check_lock_spinlock:
594			/* void* cast needed because 'volatile' on some OS */
595			checklock_lockit(type, lock, func, file, line,
596				try_spinlock, timed_spinlock,
597				(void*)&lock->u.spinlock, 1, 0);
598			break;
599		default:
600			log_assert(0);
601	}
602}
603
604/** check if OK, unlock */
605void
606checklock_unlock(enum check_lock_type type, struct checked_lock* lock,
607        const char* func, const char* file, int line)
608{
609	struct thr_check *thr;
610	if(key_deleted)
611		return;
612	thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
613	checktype(type, lock, func, file, line);
614	if(!thr) lock_error(lock, func, file, line, "no thread info");
615
616	acquire_locklock(lock, func, file, line);
617	/* was this thread even holding this lock? */
618	if(thr->holding_first != lock &&
619		lock->prev_held_lock[thr->num] == NULL) {
620		lock_error(lock, func, file, line, "unlock nonlocked lock");
621	}
622	if(lock->hold_count <= 0)
623		lock_error(lock, func, file, line, "too many unlocks");
624
625	if(verbose_locking && !(verbose_locking_not_loglock &&
626		lock->create_thread == 0 && lock->create_instance == 0))
627		fprintf(stderr, "checklock_unlock lock %d %d %s:%d at %s:%d\n", lock->create_thread, lock->create_instance, lock->create_file, lock->create_line, file, line);
628
629	/* store this point as last touched by */
630	lock->holder = thr;
631	lock->hold_count --;
632	lock->holder_func = func;
633	lock->holder_file = file;
634	lock->holder_line = line;
635
636	/* delete from thread holder list */
637	/* no need to lock other lockstructs, because they are all on the
638	 * held-locks list, and this thread holds their locks.
639	 * we only touch the thr->num members, so it is safe.  */
640	if(thr->holding_first == lock)
641		thr->holding_first = lock->next_held_lock[thr->num];
642	if(thr->holding_last == lock)
643		thr->holding_last = lock->prev_held_lock[thr->num];
644	if(lock->next_held_lock[thr->num])
645		lock->next_held_lock[thr->num]->prev_held_lock[thr->num] =
646			lock->prev_held_lock[thr->num];
647	if(lock->prev_held_lock[thr->num])
648		lock->prev_held_lock[thr->num]->next_held_lock[thr->num] =
649			lock->next_held_lock[thr->num];
650	lock->next_held_lock[thr->num] = NULL;
651	lock->prev_held_lock[thr->num] = NULL;
652
653	if(type==check_lock_rwlock && lock->writeholder == thr) {
654		lock->writeholder = NULL;
655		prot_store(lock);
656	} else if(type != check_lock_rwlock) {
657		/* store memory areas that are protected, for later checks */
658		prot_store(lock);
659	}
660	LOCKRET(pthread_mutex_unlock(&lock->lock));
661
662	/* unlock it */
663	switch(type) {
664		case check_lock_mutex:
665			LOCKRET(pthread_mutex_unlock(&lock->u.mutex));
666			break;
667		case check_lock_spinlock:
668			LOCKRET(pthread_spin_unlock(&lock->u.spinlock));
669			break;
670		case check_lock_rwlock:
671			LOCKRET(pthread_rwlock_unlock(&lock->u.rwlock));
672			break;
673		default:
674			log_assert(0);
675	}
676}
677
678/** open order info debug file, thr->num must be valid */
679static void
680open_lockorder(struct thr_check* thr)
681{
682	char buf[24];
683	time_t t;
684	snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num);
685	thr->order_info = fopen(buf, "w");
686	if(!thr->order_info)
687		fatal_exit("could not open %s: %s", buf, strerror(errno));
688	thr->locks_created = 0;
689	t = time(NULL);
690	/* write: <time_stamp> <runpid> <thread_num> */
691	if(fwrite(&t, sizeof(t), 1, thr->order_info) != 1 ||
692		fwrite(&thr->num, sizeof(thr->num), 1, thr->order_info) != 1 ||
693		fwrite(&check_lock_pid, sizeof(check_lock_pid), 1,
694		thr->order_info) != 1)
695		log_err("fwrite: %s", strerror(errno));
696}
697
698/** checklock thread main, Inits thread structure */
699static void* checklock_main(void* arg)
700{
701	struct thr_check* thr = (struct thr_check*)arg;
702	void* ret;
703	thr->id = pthread_self();
704	/* Hack to get same numbers as in log file */
705	thr->num = *(int*)(thr->arg);
706	log_assert(thr->num < THRDEBUG_MAX_THREADS);
707	/* as an aside, due to this, won't work for libunbound bg thread */
708	if(thread_infos[thr->num] != NULL)
709		log_warn("thread warning, thr->num %d not NULL", thr->num);
710	thread_infos[thr->num] = thr;
711	LOCKRET(pthread_setspecific(thr_debug_key, thr));
712	if(check_locking_order)
713		open_lockorder(thr);
714	ret = thr->func(thr->arg);
715	thread_infos[thr->num] = NULL;
716	if(check_locking_order)
717		fclose(thr->order_info);
718	free(thr);
719	return ret;
720}
721
722/** init the main thread */
723void checklock_start(void)
724{
725	if(key_deleted)
726		return;
727	if(!key_created) {
728		struct thr_check* thisthr = (struct thr_check*)calloc(1,
729			sizeof(struct thr_check));
730		if(!thisthr)
731			fatal_exit("thrcreate: out of memory");
732		key_created = 1;
733		check_lock_pid = getpid();
734		LOCKRET(pthread_key_create(&thr_debug_key, NULL));
735		LOCKRET(pthread_setspecific(thr_debug_key, thisthr));
736		thread_infos[0] = thisthr;
737		if(check_locking_order)
738			open_lockorder(thisthr);
739	}
740}
741
742/** stop checklocks */
743void checklock_stop(void)
744{
745	if(key_created) {
746		int i;
747		key_deleted = 1;
748		if(check_locking_order)
749			fclose(thread_infos[0]->order_info);
750		free(thread_infos[0]);
751		thread_infos[0] = NULL;
752		for(i = 0; i < THRDEBUG_MAX_THREADS; i++)
753			log_assert(thread_infos[i] == NULL);
754			/* should have been cleaned up. */
755		LOCKRET(pthread_key_delete(thr_debug_key));
756		key_created = 0;
757	}
758}
759
760/** allocate debug info and create thread */
761void
762checklock_thrcreate(pthread_t* id, void* (*func)(void*), void* arg)
763{
764	struct thr_check* thr = (struct thr_check*)calloc(1,
765		sizeof(struct thr_check));
766	if(!thr)
767		fatal_exit("thrcreate: out of memory");
768	if(!key_created) {
769		checklock_start();
770	}
771	thr->func = func;
772	thr->arg = arg;
773	LOCKRET(pthread_create(id, NULL, checklock_main, thr));
774}
775
776/** count number of thread infos */
777static int
778count_thread_infos(void)
779{
780	int cnt = 0;
781	int i;
782	for(i=0; i<THRDEBUG_MAX_THREADS; i++)
783		if(thread_infos[i])
784			cnt++;
785	return cnt;
786}
787
788/** print lots of info on a lock */
789static void
790lock_debug_info(struct checked_lock* lock)
791{
792	if(!lock) return;
793	log_info("+++ Lock %llx, %d %d create %s %s %d",
794		(unsigned long long)(size_t)lock,
795		lock->create_thread, lock->create_instance,
796		lock->create_func, lock->create_file, lock->create_line);
797	log_info("lock type: %s",
798		(lock->type==check_lock_mutex)?"mutex": (
799		(lock->type==check_lock_spinlock)?"spinlock": (
800		(lock->type==check_lock_rwlock)?"rwlock": "badtype")));
801	log_info("lock contention %u, history:%u, hold:%d, wait:%d",
802		(unsigned)lock->contention_count, (unsigned)lock->history_count,
803		lock->hold_count, lock->wait_count);
804	log_info("last touch %s %s %d", lock->holder_func, lock->holder_file,
805		lock->holder_line);
806	log_info("holder thread %d, writeholder thread %d",
807		lock->holder?lock->holder->num:-1,
808		lock->writeholder?lock->writeholder->num:-1);
809}
810
811/** print debug locks held by a thread */
812static void
813held_debug_info(struct thr_check* thr, struct checked_lock* lock)
814{
815	if(!lock) return;
816	lock_debug_info(lock);
817	held_debug_info(thr, lock->next_held_lock[thr->num]);
818}
819
820/** print debug info for a thread */
821static void
822thread_debug_info(struct thr_check* thr)
823{
824	struct checked_lock* w = NULL;
825	struct checked_lock* f = NULL;
826	struct checked_lock* l = NULL;
827	if(!thr) return;
828	log_info("pthread id is %x", (int)thr->id);
829	log_info("thread func is %llx", (unsigned long long)(size_t)thr->func);
830	log_info("thread arg is %llx (%d)",
831		(unsigned long long)(size_t)thr->arg,
832		(thr->arg?*(int*)thr->arg:0));
833	log_info("thread num is %d", thr->num);
834	log_info("locks created %d", thr->locks_created);
835	log_info("open file for lockinfo: %s",
836		thr->order_info?"yes, flushing":"no");
837	fflush(thr->order_info);
838	w = thr->waiting;
839	f = thr->holding_first;
840	l = thr->holding_last;
841	log_info("thread waiting for a lock: %s %llx", w?"yes":"no",
842		(unsigned long long)(size_t)w);
843	lock_debug_info(w);
844	log_info("thread holding first: %s, last: %s", f?"yes":"no",
845		l?"yes":"no");
846	held_debug_info(thr, f);
847}
848
849static void
850total_debug_info(void)
851{
852	int i;
853	log_info("checklocks: supervising %d threads.",
854		count_thread_infos());
855	if(!key_created) {
856		log_info("No thread debug key created yet");
857	}
858	for(i=0; i<THRDEBUG_MAX_THREADS; i++) {
859		if(thread_infos[i]) {
860			log_info("*** Thread %d information: ***", i);
861			thread_debug_info(thread_infos[i]);
862		}
863	}
864}
865
866/** signal handler for join timeout, Exits */
867static RETSIGTYPE joinalarm(int ATTR_UNUSED(sig))
868{
869	log_err("join thread timeout. hangup or deadlock. Info follows.");
870	total_debug_info();
871	fatal_exit("join thread timeout. hangup or deadlock.");
872}
873
874/** wait for thread with a timeout */
875void
876checklock_thrjoin(pthread_t thread)
877{
878	/* wait with a timeout */
879	if(signal(SIGALRM, joinalarm) == SIG_ERR)
880		fatal_exit("signal(): %s", strerror(errno));
881	(void)alarm(CHECK_JOIN_TIMEOUT);
882	LOCKRET(pthread_join(thread, NULL));
883	(void)alarm(0);
884}
885
886#endif /* USE_THREAD_DEBUG */
887