1/*	$NetBSD: pthread.c,v 1.185 2024/06/08 08:01:49 hannken Exp $	*/
2
3/*-
4 * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008, 2020
5 *     The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Nathan J. Williams and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__RCSID("$NetBSD: pthread.c,v 1.185 2024/06/08 08:01:49 hannken Exp $");
35
36#define	__EXPOSE_STACK	1
37
38/* Need to use libc-private names for atomic operations. */
39#include "../../common/lib/libc/atomic/atomic_op_namespace.h"
40
41#include <sys/param.h>
42#include <sys/exec_elf.h>
43#include <sys/mman.h>
44#include <sys/lwp.h>
45#include <sys/lwpctl.h>
46#include <sys/resource.h>
47#include <sys/sysctl.h>
48#include <sys/tls.h>
49#include <uvm/uvm_param.h>
50
51#include <assert.h>
52#include <dlfcn.h>
53#include <err.h>
54#include <errno.h>
55#include <lwp.h>
56#include <signal.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <stddef.h>
60#include <string.h>
61#include <syslog.h>
62#include <ucontext.h>
63#include <unistd.h>
64#include <sched.h>
65
66#include "atexit.h"
67#include "pthread.h"
68#include "pthread_int.h"
69#include "pthread_makelwp.h"
70#include "reentrant.h"
71
72__BEGIN_DECLS
73void _malloc_thread_cleanup(void) __weak;
74__END_DECLS
75
76pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
77static rb_tree_t	pthread__alltree;
78
79static signed int	pthread__cmp(void *, const void *, const void *);
80
81static const rb_tree_ops_t pthread__alltree_ops = {
82	.rbto_compare_nodes = pthread__cmp,
83	.rbto_compare_key = pthread__cmp,
84	.rbto_node_offset = offsetof(struct __pthread_st, pt_alltree),
85	.rbto_context = NULL
86};
87
88static void	pthread__create_tramp(void *);
89static void	pthread__initthread(pthread_t);
90static void	pthread__scrubthread(pthread_t, char *, int);
91static void	pthread__initmain(pthread_t *);
92static void	pthread__reap(pthread_t);
93
94void	pthread__init(void);
95
96int pthread__started;
97int __uselibcstub = 1;
98pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
99pthread_queue_t pthread__deadqueue;
100pthread_queue_t pthread__allqueue;
101
102static pthread_attr_t pthread_default_attr;
103static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
104
105enum {
106	DIAGASSERT_ABORT =	1<<0,
107	DIAGASSERT_STDERR =	1<<1,
108	DIAGASSERT_SYSLOG =	1<<2
109};
110
111static int pthread__diagassert;
112
113int pthread__concurrency;
114int pthread__nspins;
115size_t pthread__unpark_max = PTHREAD__UNPARK_MAX;
116int pthread__dbg;	/* set by libpthread_dbg if active */
117
118/*
119 * We have to initialize the pthread_stack* variables here because
120 * mutexes are used before pthread_init() and thus pthread__initmain()
121 * are called.  Since mutexes only save the stack pointer and not a
122 * pointer to the thread data, it is safe to change the mapping from
123 * stack pointer to thread data afterwards.
124 */
125size_t	pthread__stacksize;
126size_t	pthread__guardsize;
127size_t	pthread__pagesize;
128static struct __pthread_st *pthread__main;
129static size_t __pthread_st_size;
130
131int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
132
133__strong_alias(__libc_thr_self,pthread_self)
134__strong_alias(__libc_thr_create,pthread_create)
135__strong_alias(__libc_thr_exit,pthread_exit)
136__strong_alias(__libc_thr_errno,pthread__errno)
137__strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
138__strong_alias(__libc_thr_equal,pthread_equal)
139__strong_alias(__libc_thr_init,pthread__init)
140
141/*
142 * Static library kludge.  Place a reference to a symbol any library
143 * file which does not already have a reference here.
144 */
145extern int pthread__cancel_stub_binder;
146
147void *pthread__static_lib_binder[] = {
148	&pthread__cancel_stub_binder,
149	pthread_cond_init,
150	pthread_mutex_init,
151	pthread_rwlock_init,
152	pthread_barrier_init,
153	pthread_key_create,
154	pthread_setspecific,
155};
156
157#define	NHASHLOCK	64
158
159static union hashlock {
160	pthread_mutex_t	mutex;
161	char		pad[64];
162} hashlocks[NHASHLOCK] __aligned(64);
163
164static void
165pthread__prefork(void)
166{
167	pthread_mutex_lock(&pthread__deadqueue_lock);
168}
169
170static void
171pthread__fork_parent(void)
172{
173	pthread_mutex_unlock(&pthread__deadqueue_lock);
174}
175
176static void
177pthread__fork_child(void)
178{
179	struct __pthread_st *self = pthread__self();
180
181	pthread_mutex_init(&pthread__deadqueue_lock, NULL);
182
183	/* lwpctl state is not copied across fork. */
184	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
185		err(EXIT_FAILURE, "_lwp_ctl");
186	}
187	self->pt_lid = _lwp_self();
188}
189
190/*
191 * This needs to be started by the library loading code, before main()
192 * gets to run, for various things that use the state of the initial thread
193 * to work properly (thread-specific data is an application-visible example;
194 * spinlock counts for mutexes is an internal example).
195 */
196void
197pthread__init(void)
198{
199	pthread_t first;
200	char *p;
201	int mib[2];
202	unsigned int value;
203	ssize_t slen;
204	size_t len;
205	extern int __isthreaded;
206
207	/*
208	 * Allocate pthread_keys descriptors before
209	 * resetting __uselibcstub because otherwise
210	 * malloc() will call pthread_keys_create()
211	 * while pthread_keys descriptors are not
212	 * yet allocated.
213	 */
214	pthread__main = pthread_tsd_init(&__pthread_st_size);
215	if (pthread__main == NULL)
216		err(EXIT_FAILURE, "Cannot allocate pthread storage");
217
218	__uselibcstub = 0;
219
220	pthread__pagesize = (size_t)sysconf(_SC_PAGESIZE);
221	pthread__concurrency = (int)sysconf(_SC_NPROCESSORS_CONF);
222
223	mib[0] = CTL_VM;
224	mib[1] = VM_THREAD_GUARD_SIZE;
225	len = sizeof(value);
226	if (sysctl(mib, __arraycount(mib), &value, &len, NULL, 0) == 0)
227		pthread__guardsize = value;
228	else
229		pthread__guardsize = pthread__pagesize;
230
231	/* Initialize locks first; they're needed elsewhere. */
232	pthread__lockprim_init();
233	for (int i = 0; i < NHASHLOCK; i++) {
234		pthread_mutex_init(&hashlocks[i].mutex, NULL);
235	}
236
237	/* Fetch parameters. */
238	slen = _lwp_unpark_all(NULL, 0, NULL);
239	if (slen < 0)
240		err(EXIT_FAILURE, "_lwp_unpark_all");
241	if ((size_t)slen < pthread__unpark_max)
242		pthread__unpark_max = slen;
243
244	/* Basic data structure setup */
245	pthread_attr_init(&pthread_default_attr);
246	PTQ_INIT(&pthread__allqueue);
247	PTQ_INIT(&pthread__deadqueue);
248
249	rb_tree_init(&pthread__alltree, &pthread__alltree_ops);
250
251	/* Create the thread structure corresponding to main() */
252	pthread__initmain(&first);
253	pthread__initthread(first);
254	pthread__scrubthread(first, NULL, 0);
255
256	first->pt_lid = _lwp_self();
257	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
258	(void)rb_tree_insert_node(&pthread__alltree, first);
259
260	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
261		err(EXIT_FAILURE, "_lwp_ctl");
262	}
263
264	/* Start subsystems */
265	PTHREAD_MD_INIT
266
267	for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
268		switch (*p) {
269		case 'a':
270			pthread__diagassert |= DIAGASSERT_ABORT;
271			break;
272		case 'A':
273			pthread__diagassert &= ~DIAGASSERT_ABORT;
274			break;
275		case 'e':
276			pthread__diagassert |= DIAGASSERT_STDERR;
277			break;
278		case 'E':
279			pthread__diagassert &= ~DIAGASSERT_STDERR;
280			break;
281		case 'l':
282			pthread__diagassert |= DIAGASSERT_SYSLOG;
283			break;
284		case 'L':
285			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
286			break;
287		}
288	}
289
290	/* Tell libc that we're here and it should role-play accordingly. */
291	pthread_atfork(pthread__prefork, pthread__fork_parent, pthread__fork_child);
292	__isthreaded = 1;
293}
294
295/* General-purpose thread data structure sanitization. */
296/* ARGSUSED */
297static void
298pthread__initthread(pthread_t t)
299{
300
301	t->pt_self = t;
302	t->pt_magic = PT_MAGIC;
303	t->pt_sleepobj = NULL;
304	t->pt_havespecific = 0;
305	t->pt_lwpctl = &pthread__dummy_lwpctl;
306
307	memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
308	pthread_mutex_init(&t->pt_lock, NULL);
309	PTQ_INIT(&t->pt_cleanup_stack);
310}
311
312static void
313pthread__scrubthread(pthread_t t, char *name, int flags)
314{
315
316	t->pt_state = PT_STATE_RUNNING;
317	t->pt_exitval = NULL;
318	t->pt_flags = flags;
319	t->pt_cancel = 0;
320	t->pt_errno = 0;
321	t->pt_name = name;
322	t->pt_lid = 0;
323}
324
325static int
326pthread__getstack(pthread_t newthread, const pthread_attr_t *attr)
327{
328	void *stackbase, *stackbase2, *redzone;
329	size_t stacksize, guardsize;
330	bool allocated;
331
332	if (attr != NULL) {
333		pthread_attr_getstack(attr, &stackbase, &stacksize);
334		if (stackbase == NULL)
335			pthread_attr_getguardsize(attr, &guardsize);
336		else
337			guardsize = 0;
338	} else {
339		stackbase = NULL;
340		stacksize = 0;
341		guardsize = pthread__guardsize;
342	}
343	if (stacksize == 0)
344		stacksize = pthread__stacksize;
345
346	if (newthread->pt_stack_allocated) {
347		if (stackbase == NULL &&
348		    newthread->pt_stack.ss_size == stacksize &&
349		    newthread->pt_guardsize == guardsize)
350			return 0;
351		stackbase2 = newthread->pt_stack.ss_sp;
352#ifndef __MACHINE_STACK_GROWS_UP
353		stackbase2 = (char *)stackbase2 - newthread->pt_guardsize;
354#endif
355		munmap(stackbase2,
356		    newthread->pt_stack.ss_size + newthread->pt_guardsize);
357		newthread->pt_stack.ss_sp = NULL;
358		newthread->pt_stack.ss_size = 0;
359		newthread->pt_guardsize = 0;
360		newthread->pt_stack_allocated = false;
361	}
362
363	newthread->pt_stack_allocated = false;
364
365	if (stackbase == NULL) {
366		stacksize = ((stacksize - 1) | (pthread__pagesize - 1)) + 1;
367		guardsize = ((guardsize - 1) | (pthread__pagesize - 1)) + 1;
368		stackbase = mmap(NULL, stacksize + guardsize,
369		    PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, (off_t)0);
370		if (stackbase == MAP_FAILED)
371			return ENOMEM;
372		allocated = true;
373	} else {
374		allocated = false;
375	}
376#ifdef __MACHINE_STACK_GROWS_UP
377	redzone = (char *)stackbase + stacksize;
378	stackbase2 = (char *)stackbase;
379#else
380	redzone = (char *)stackbase;
381	stackbase2 = (char *)stackbase + guardsize;
382#endif
383	if (allocated && guardsize &&
384	    mprotect(redzone, guardsize, PROT_NONE) == -1) {
385		munmap(stackbase, stacksize + guardsize);
386		return EPERM;
387	}
388	newthread->pt_stack.ss_size = stacksize;
389	newthread->pt_stack.ss_sp = stackbase2;
390	newthread->pt_guardsize = guardsize;
391	newthread->pt_stack_allocated = allocated;
392	return 0;
393}
394
395int
396pthread_create(pthread_t *thread, const pthread_attr_t *attr,
397	    void *(*startfunc)(void *), void *arg)
398{
399	pthread_t newthread;
400	pthread_attr_t nattr;
401	struct pthread_attr_private *p;
402	char * volatile name;
403	unsigned long flag;
404	void *private_area;
405	int ret;
406
407	if (__predict_false(__uselibcstub)) {
408    		pthread__errorfunc(__FILE__, __LINE__, __func__,
409		    "pthread_create() requires linking with -lpthread");
410		return __libc_thr_create_stub(thread, attr, startfunc, arg);
411	}
412
413	if (attr == NULL)
414		nattr = pthread_default_attr;
415	else if (attr->pta_magic == PT_ATTR_MAGIC)
416		nattr = *attr;
417	else
418		return EINVAL;
419
420	if (!pthread__started) {
421		/*
422		 * Force the _lwp_park symbol to be resolved before we
423		 * begin any activity that might rely on concurrent
424		 * wakeups.
425		 *
426		 * This is necessary because rtld itself uses _lwp_park
427		 * and _lwp_unpark internally for its own locking: If
428		 * we wait to resolve _lwp_park until there is an
429		 * _lwp_unpark from another thread pending in the
430		 * current lwp (for example, pthread_mutex_unlock or
431		 * pthread_cond_signal), rtld's internal use of
432		 * _lwp_park might consume the pending unpark.  The
433		 * result is a deadlock where libpthread and rtld have
434		 * both correctly used _lwp_park and _lwp_unpark for
435		 * themselves, but rtld has consumed the wakeup meant
436		 * for libpthread so it is lost to libpthread.
437		 *
438		 * For the very first thread, before pthread__started
439		 * is set to true, pthread__self()->pt_lid should have
440		 * been initialized in pthread__init by the time we get
441		 * here to the correct lid so we go to sleep and wake
442		 * ourselves at the same time as a no-op.
443		 */
444		_lwp_park(CLOCK_REALTIME, 0, NULL, pthread__self()->pt_lid,
445		    NULL, NULL);
446	}
447
448	pthread__started = 1;
449
450	/* Fetch misc. attributes from the attr structure. */
451	name = NULL;
452	if ((p = nattr.pta_private) != NULL)
453		if (p->ptap_name[0] != '\0')
454			if ((name = strdup(p->ptap_name)) == NULL)
455				return ENOMEM;
456
457	newthread = NULL;
458
459	/*
460	 * Try to reclaim a dead thread.
461	 */
462	if (!PTQ_EMPTY(&pthread__deadqueue)) {
463		pthread_mutex_lock(&pthread__deadqueue_lock);
464		PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
465			/* Still running? */
466			if (_lwp_kill(newthread->pt_lid, 0) == -1 &&
467			    errno == ESRCH)
468				break;
469		}
470		if (newthread)
471			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
472		pthread_mutex_unlock(&pthread__deadqueue_lock);
473#if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
474		if (newthread && newthread->pt_tls) {
475			_rtld_tls_free(newthread->pt_tls);
476			newthread->pt_tls = NULL;
477		}
478#endif
479	}
480
481	/*
482	 * If necessary set up a stack, allocate space for a pthread_st,
483	 * and initialize it.
484	 */
485	if (newthread == NULL) {
486		newthread = calloc(1, __pthread_st_size);
487		if (newthread == NULL) {
488			free(name);
489			return ENOMEM;
490		}
491		newthread->pt_stack_allocated = false;
492
493		if (pthread__getstack(newthread, attr)) {
494			free(newthread);
495			free(name);
496			return ENOMEM;
497		}
498
499#if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
500		newthread->pt_tls = NULL;
501#endif
502
503		/* Add to list of all threads. */
504		pthread_rwlock_wrlock(&pthread__alltree_lock);
505		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
506		(void)rb_tree_insert_node(&pthread__alltree, newthread);
507		pthread_rwlock_unlock(&pthread__alltree_lock);
508
509		/* Will be reset by the thread upon exit. */
510		pthread__initthread(newthread);
511	} else {
512		if (pthread__getstack(newthread, attr)) {
513			pthread_mutex_lock(&pthread__deadqueue_lock);
514			PTQ_INSERT_TAIL(&pthread__deadqueue, newthread, pt_deadq);
515			pthread_mutex_unlock(&pthread__deadqueue_lock);
516			return ENOMEM;
517		}
518	}
519
520	/*
521	 * Create the new LWP.
522	 */
523	pthread__scrubthread(newthread, name, nattr.pta_flags);
524	newthread->pt_func = startfunc;
525	newthread->pt_arg = arg;
526#if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
527	private_area = newthread->pt_tls = _rtld_tls_allocate();
528	newthread->pt_tls->tcb_pthread = newthread;
529#else
530	private_area = newthread;
531#endif
532
533	flag = 0;
534	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
535	    (nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
536		flag |= LWP_SUSPENDED;
537	if ((newthread->pt_flags & PT_FLAG_DETACHED) != 0)
538		flag |= LWP_DETACHED;
539
540	ret = pthread__makelwp(pthread__create_tramp, newthread, private_area,
541	    newthread->pt_stack.ss_sp, newthread->pt_stack.ss_size,
542	    flag, &newthread->pt_lid);
543	if (ret != 0) {
544		ret = errno;
545		pthread_mutex_lock(&newthread->pt_lock);
546		/* Will unlock and free name. */
547		pthread__reap(newthread);
548		return ret;
549	}
550
551	if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
552		if (p != NULL) {
553			(void)pthread_setschedparam(newthread, p->ptap_policy,
554			    &p->ptap_sp);
555		}
556		if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
557			(void)_lwp_continue(newthread->pt_lid);
558		}
559	}
560
561	*thread = newthread;
562
563	return 0;
564}
565
566
567__dead static void
568pthread__create_tramp(void *cookie)
569{
570	pthread_t self;
571	void *retval;
572	void *junk __unused;
573
574	self = cookie;
575
576	/*
577	 * Throw away some stack in a feeble attempt to reduce cache
578	 * thrash.  May help for SMT processors.  XXX We should not
579	 * be allocating stacks on fixed 2MB boundaries.  Needs a
580	 * thread register or decent thread local storage.
581	 */
582	junk = alloca(((unsigned)self->pt_lid & 7) << 8);
583
584	if (self->pt_name != NULL) {
585		pthread_mutex_lock(&self->pt_lock);
586		if (self->pt_name != NULL)
587			(void)_lwp_setname(0, self->pt_name);
588		pthread_mutex_unlock(&self->pt_lock);
589	}
590
591	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
592		err(EXIT_FAILURE, "_lwp_ctl");
593	}
594
595	retval = (*self->pt_func)(self->pt_arg);
596
597	pthread_exit(retval);
598
599	/*NOTREACHED*/
600	pthread__abort();
601}
602
603int
604pthread_suspend_np(pthread_t thread)
605{
606	pthread_t self;
607
608	pthread__error(EINVAL, "Invalid thread",
609	    thread->pt_magic == PT_MAGIC);
610
611	self = pthread__self();
612	if (self == thread) {
613		return EDEADLK;
614	}
615	if (pthread__find(thread) != 0)
616		return ESRCH;
617	if (_lwp_suspend(thread->pt_lid) == 0)
618		return 0;
619	return errno;
620}
621
622int
623pthread_resume_np(pthread_t thread)
624{
625
626	pthread__error(EINVAL, "Invalid thread",
627	    thread->pt_magic == PT_MAGIC);
628
629	if (pthread__find(thread) != 0)
630		return ESRCH;
631	if (_lwp_continue(thread->pt_lid) == 0)
632		return 0;
633	return errno;
634}
635
636void
637pthread_exit(void *retval)
638{
639	pthread_t self;
640	struct pt_clean_t *cleanup;
641
642	if (__predict_false(__uselibcstub)) {
643		__libc_thr_exit_stub(retval);
644		goto out;
645	}
646
647	self = pthread__self();
648
649	/* Disable cancellability. */
650	pthread_mutex_lock(&self->pt_lock);
651	self->pt_flags |= PT_FLAG_CS_DISABLED;
652	self->pt_cancel = 0;
653	pthread_mutex_unlock(&self->pt_lock);
654
655	/* Call any cancellation cleanup handlers */
656	if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
657		while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
658			cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
659			PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
660			(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
661		}
662	}
663
664	__cxa_thread_run_atexit();
665
666	/* Perform cleanup of thread-specific data */
667	pthread__destroy_tsd(self);
668
669	if (_malloc_thread_cleanup)
670		_malloc_thread_cleanup();
671
672	/*
673	 * Signal our exit.  Our stack and pthread_t won't be reused until
674	 * pthread_create() can see from kernel info that this LWP is gone.
675	 */
676	pthread_mutex_lock(&self->pt_lock);
677	self->pt_exitval = retval;
678	if (self->pt_flags & PT_FLAG_DETACHED) {
679		/* pthread__reap() will drop the lock. */
680		pthread__reap(self);
681		_lwp_exit();
682	} else {
683		self->pt_state = PT_STATE_ZOMBIE;
684		pthread_mutex_unlock(&self->pt_lock);
685		/* Note: name will be freed by the joiner. */
686		_lwp_exit();
687	}
688
689out:
690	/*NOTREACHED*/
691	pthread__abort();
692	exit(1);
693}
694
695
696int
697pthread_join(pthread_t thread, void **valptr)
698{
699	pthread_t self;
700
701	pthread__error(EINVAL, "Invalid thread",
702	    thread->pt_magic == PT_MAGIC);
703
704	self = pthread__self();
705
706	if (pthread__find(thread) != 0)
707		return ESRCH;
708
709	if (thread == self)
710		return EDEADLK;
711
712	/* IEEE Std 1003.1 says pthread_join() never returns EINTR. */
713	for (;;) {
714		pthread__testcancel(self);
715		if (_lwp_wait(thread->pt_lid, NULL) == 0)
716			break;
717		if (errno != EINTR)
718			return errno;
719	}
720
721	/*
722	 * Don't test for cancellation again.  The spec is that if
723	 * cancelled, pthread_join() must not have succeeded.
724	 */
725	pthread_mutex_lock(&thread->pt_lock);
726	if (thread->pt_state != PT_STATE_ZOMBIE) {
727		pthread__errorfunc(__FILE__, __LINE__, __func__,
728		    "not a zombie");
729 	}
730	if (valptr != NULL)
731		*valptr = thread->pt_exitval;
732
733	/* pthread__reap() will drop the lock. */
734	pthread__reap(thread);
735	return 0;
736}
737
738static void
739pthread__reap(pthread_t thread)
740{
741	char *name;
742
743	name = thread->pt_name;
744	thread->pt_name = NULL;
745	thread->pt_state = PT_STATE_DEAD;
746	pthread_mutex_unlock(&thread->pt_lock);
747
748	pthread_mutex_lock(&pthread__deadqueue_lock);
749	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
750	pthread_mutex_unlock(&pthread__deadqueue_lock);
751
752	if (name != NULL)
753		free(name);
754}
755
756int
757pthread_equal(pthread_t t1, pthread_t t2)
758{
759
760	if (__predict_false(__uselibcstub))
761		return __libc_thr_equal_stub(t1, t2);
762
763	pthread__error(0, "Invalid thread",
764	    (t1 != NULL) && (t1->pt_magic == PT_MAGIC));
765
766	pthread__error(0, "Invalid thread",
767	    (t2 != NULL) && (t2->pt_magic == PT_MAGIC));
768
769	/* Nothing special here. */
770	return (t1 == t2);
771}
772
773
774int
775pthread_detach(pthread_t thread)
776{
777	int error;
778
779	pthread__error(EINVAL, "Invalid thread",
780	    thread->pt_magic == PT_MAGIC);
781
782	if (pthread__find(thread) != 0)
783		return ESRCH;
784
785	pthread_mutex_lock(&thread->pt_lock);
786	if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
787		error = EINVAL;
788	} else {
789		error = _lwp_detach(thread->pt_lid);
790		if (error == 0)
791			thread->pt_flags |= PT_FLAG_DETACHED;
792		else
793			error = errno;
794	}
795	if (thread->pt_state == PT_STATE_ZOMBIE) {
796		/* pthread__reap() will drop the lock. */
797		pthread__reap(thread);
798	} else
799		pthread_mutex_unlock(&thread->pt_lock);
800	return error;
801}
802
803
804int
805pthread_getname_np(pthread_t thread, char *name, size_t len)
806{
807
808	pthread__error(EINVAL, "Invalid thread",
809	    thread->pt_magic == PT_MAGIC);
810
811	if (pthread__find(thread) != 0)
812		return ESRCH;
813
814	pthread_mutex_lock(&thread->pt_lock);
815	if (thread->pt_name == NULL)
816		name[0] = '\0';
817	else
818		strlcpy(name, thread->pt_name, len);
819	pthread_mutex_unlock(&thread->pt_lock);
820
821	return 0;
822}
823
824
825int
826pthread_setname_np(pthread_t thread, const char *name, void *arg)
827{
828	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
829	int namelen;
830
831	pthread__error(EINVAL, "Invalid thread",
832	    thread->pt_magic == PT_MAGIC);
833
834	if (pthread__find(thread) != 0)
835		return ESRCH;
836
837	namelen = snprintf(newname, sizeof(newname), name, arg);
838	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
839		return EINVAL;
840
841	cp = strdup(newname);
842	if (cp == NULL)
843		return ENOMEM;
844
845	pthread_mutex_lock(&thread->pt_lock);
846	oldname = thread->pt_name;
847	thread->pt_name = cp;
848	(void)_lwp_setname(thread->pt_lid, cp);
849	pthread_mutex_unlock(&thread->pt_lock);
850
851	if (oldname != NULL)
852		free(oldname);
853
854	return 0;
855}
856
857
858pthread_t
859pthread_self(void)
860{
861	if (__predict_false(__uselibcstub))
862		return (pthread_t)__libc_thr_self_stub();
863
864	return pthread__self();
865}
866
867
868int
869pthread_cancel(pthread_t thread)
870{
871
872	pthread__error(EINVAL, "Invalid thread",
873	    thread->pt_magic == PT_MAGIC);
874
875	if (pthread__find(thread) != 0)
876		return ESRCH;
877	pthread_mutex_lock(&thread->pt_lock);
878	thread->pt_flags |= PT_FLAG_CS_PENDING;
879	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
880		thread->pt_cancel = 1;
881		pthread_mutex_unlock(&thread->pt_lock);
882		_lwp_wakeup(thread->pt_lid);
883	} else
884		pthread_mutex_unlock(&thread->pt_lock);
885
886	return 0;
887}
888
889
890int
891pthread_setcancelstate(int state, int *oldstate)
892{
893	pthread_t self;
894	int retval;
895
896	if (__predict_false(__uselibcstub))
897		return __libc_thr_setcancelstate_stub(state, oldstate);
898
899	self = pthread__self();
900	retval = 0;
901
902	pthread_mutex_lock(&self->pt_lock);
903
904	if (oldstate != NULL) {
905		if (self->pt_flags & PT_FLAG_CS_DISABLED)
906			*oldstate = PTHREAD_CANCEL_DISABLE;
907		else
908			*oldstate = PTHREAD_CANCEL_ENABLE;
909	}
910
911	if (state == PTHREAD_CANCEL_DISABLE) {
912		self->pt_flags |= PT_FLAG_CS_DISABLED;
913		if (self->pt_cancel) {
914			self->pt_flags |= PT_FLAG_CS_PENDING;
915			self->pt_cancel = 0;
916		}
917	} else if (state == PTHREAD_CANCEL_ENABLE) {
918		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
919		/*
920		 * If a cancellation was requested while cancellation
921		 * was disabled, note that fact for future
922		 * cancellation tests.
923		 */
924		if (self->pt_flags & PT_FLAG_CS_PENDING) {
925			self->pt_cancel = 1;
926			/* This is not a deferred cancellation point. */
927			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
928				pthread_mutex_unlock(&self->pt_lock);
929				pthread__cancelled();
930			}
931		}
932	} else
933		retval = EINVAL;
934
935	pthread_mutex_unlock(&self->pt_lock);
936
937	return retval;
938}
939
940
941int
942pthread_setcanceltype(int type, int *oldtype)
943{
944	pthread_t self;
945	int retval;
946
947	self = pthread__self();
948	retval = 0;
949
950	pthread_mutex_lock(&self->pt_lock);
951
952	if (oldtype != NULL) {
953		if (self->pt_flags & PT_FLAG_CS_ASYNC)
954			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
955		else
956			*oldtype = PTHREAD_CANCEL_DEFERRED;
957	}
958
959	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
960		self->pt_flags |= PT_FLAG_CS_ASYNC;
961		if (self->pt_cancel) {
962			pthread_mutex_unlock(&self->pt_lock);
963			pthread__cancelled();
964		}
965	} else if (type == PTHREAD_CANCEL_DEFERRED)
966		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
967	else
968		retval = EINVAL;
969
970	pthread_mutex_unlock(&self->pt_lock);
971
972	return retval;
973}
974
975
976void
977pthread_testcancel(void)
978{
979	pthread_t self;
980
981	self = pthread__self();
982	if (self->pt_cancel)
983		pthread__cancelled();
984}
985
986
987/*
988 * POSIX requires that certain functions return an error rather than
989 * invoking undefined behavior even when handed completely bogus
990 * pthread_t values, e.g. stack garbage.
991 */
992int
993pthread__find(pthread_t id)
994{
995	pthread_t target;
996	int error;
997
998	pthread_rwlock_rdlock(&pthread__alltree_lock);
999	target = rb_tree_find_node(&pthread__alltree, id);
1000	error = (target && target->pt_state != PT_STATE_DEAD) ? 0 : ESRCH;
1001	pthread_rwlock_unlock(&pthread__alltree_lock);
1002
1003	return error;
1004}
1005
1006
1007void
1008pthread__testcancel(pthread_t self)
1009{
1010
1011	if (self->pt_cancel)
1012		pthread__cancelled();
1013}
1014
1015
1016void
1017pthread__cancelled(void)
1018{
1019
1020	pthread_exit(PTHREAD_CANCELED);
1021}
1022
1023
1024void
1025pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
1026{
1027	pthread_t self;
1028	struct pt_clean_t *entry;
1029
1030	self = pthread__self();
1031	entry = store;
1032	entry->ptc_cleanup = cleanup;
1033	entry->ptc_arg = arg;
1034	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
1035}
1036
1037
1038void
1039pthread__cleanup_pop(int ex, void *store)
1040{
1041	pthread_t self;
1042	struct pt_clean_t *entry;
1043
1044	self = pthread__self();
1045	entry = store;
1046
1047	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
1048	if (ex)
1049		(*entry->ptc_cleanup)(entry->ptc_arg);
1050}
1051
1052
1053int *
1054pthread__errno(void)
1055{
1056	pthread_t self;
1057
1058	if (__predict_false(__uselibcstub)) {
1059    		pthread__errorfunc(__FILE__, __LINE__, __func__,
1060		    "pthread__errno() requires linking with -lpthread");
1061		return __libc_thr_errno_stub();
1062	}
1063
1064	self = pthread__self();
1065
1066	return &(self->pt_errno);
1067}
1068
1069ssize_t	_sys_write(int, const void *, size_t);
1070
1071void
1072pthread__assertfunc(const char *file, int line, const char *function,
1073		    const char *expr)
1074{
1075	char buf[1024];
1076	int len;
1077
1078	/*
1079	 * snprintf_ss should not acquire any locks, or we could
1080	 * end up deadlocked if the assert caller held locks.
1081	 */
1082	len = snprintf_ss(buf, 1024,
1083	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
1084	    expr, file, line,
1085	    function ? ", function \"" : "",
1086	    function ? function : "",
1087	    function ? "\"" : "");
1088
1089	_sys_write(STDERR_FILENO, buf, (size_t)len);
1090	(void)raise(SIGABRT);
1091	_exit(1);
1092}
1093
1094
1095void
1096pthread__errorfunc(const char *file, int line, const char *function,
1097		   const char *msg, ...)
1098{
1099	char buf[1024];
1100	char buf2[1024];
1101	size_t len;
1102	va_list ap;
1103
1104	if (pthread__diagassert == 0)
1105		return;
1106
1107	va_start(ap, msg);
1108	vsnprintf_ss(buf2, sizeof(buf2), msg, ap);
1109	va_end(ap);
1110
1111	/*
1112	 * snprintf_ss should not acquire any locks, or we could
1113	 * end up deadlocked if the assert caller held locks.
1114	 */
1115	len = snprintf_ss(buf, sizeof(buf),
1116	    "%s: Error detected by libpthread: %s.\n"
1117	    "Detected by file \"%s\", line %d%s%s%s.\n"
1118	    "See pthread(3) for information.\n",
1119	    getprogname(), buf2, file, line,
1120	    function ? ", function \"" : "",
1121	    function ? function : "",
1122	    function ? "\"" : "");
1123
1124	if (pthread__diagassert & DIAGASSERT_STDERR)
1125		_sys_write(STDERR_FILENO, buf, len);
1126
1127	if (pthread__diagassert & DIAGASSERT_SYSLOG)
1128		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
1129
1130	if (pthread__diagassert & DIAGASSERT_ABORT) {
1131		(void)raise(SIGABRT);
1132		_exit(1);
1133	}
1134}
1135
1136/*
1137 * Thread park/unpark operations.  The kernel operations are
1138 * modelled after a brief description from "Multithreading in
1139 * the Solaris Operating Environment":
1140 *
1141 * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
1142 */
1143
1144int
1145pthread__park(pthread_t self, pthread_mutex_t *lock,
1146	      pthread_queue_t *queue, const struct timespec *abstime,
1147	      int cancelpt)
1148{
1149	int rv, error;
1150
1151	pthread_mutex_unlock(lock);
1152
1153	/*
1154	 * Wait until we are awoken by a pending unpark operation,
1155	 * a signal, an unpark posted after we have gone asleep,
1156	 * or an expired timeout.
1157	 *
1158	 * It is fine to test the value of pt_sleepobj without
1159	 * holding any locks, because:
1160	 *
1161	 * o Only the blocking thread (this thread) ever sets it
1162	 *   to a non-NULL value.
1163	 *
1164	 * o Other threads may set it NULL, but if they do so they
1165	 *   must also make this thread return from _lwp_park.
1166	 *
1167	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1168	 *   calls and all make use of spinlocks in the kernel.  So
1169	 *   these system calls act as full memory barriers.
1170	 */
1171	rv = 0;
1172	do {
1173		/*
1174		 * If we deferred unparking a thread, arrange to
1175		 * have _lwp_park() restart it before blocking.
1176		 */
1177		error = _lwp_park(CLOCK_REALTIME, TIMER_ABSTIME,
1178		    __UNCONST(abstime), 0, NULL, NULL);
1179		if (error != 0) {
1180			switch (rv = errno) {
1181			case EINTR:
1182			case EALREADY:
1183				rv = 0;
1184				break;
1185			case ETIMEDOUT:
1186				break;
1187			default:
1188				pthread__errorfunc(__FILE__, __LINE__,
1189				    __func__, "_lwp_park failed: %d", errno);
1190				break;
1191			}
1192		}
1193		/* Check for cancellation. */
1194		if (cancelpt && self->pt_cancel)
1195			rv = EINTR;
1196	} while (self->pt_sleepobj != NULL && rv == 0);
1197	return rv;
1198}
1199
1200void
1201pthread__unpark(pthread_queue_t *queue, pthread_t self,
1202		pthread_mutex_t *interlock)
1203{
1204	pthread_t target;
1205
1206	target = PTQ_FIRST(queue);
1207	target->pt_sleepobj = NULL;
1208	PTQ_REMOVE(queue, target, pt_sleep);
1209	(void)_lwp_unpark(target->pt_lid, NULL);
1210}
1211
1212void
1213pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
1214		    pthread_mutex_t *interlock)
1215{
1216	lwpid_t lids[PTHREAD__UNPARK_MAX];
1217	const size_t mlid = pthread__unpark_max;
1218	pthread_t target;
1219	size_t nlid = 0;
1220
1221	PTQ_FOREACH(target, queue, pt_sleep) {
1222		if (nlid == mlid) {
1223			(void)_lwp_unpark_all(lids, nlid, NULL);
1224			nlid = 0;
1225		}
1226		target->pt_sleepobj = NULL;
1227		lids[nlid++] = target->pt_lid;
1228	}
1229	PTQ_INIT(queue);
1230	if (nlid == 1) {
1231		(void)_lwp_unpark(lids[0], NULL);
1232	} else if (nlid > 1) {
1233		(void)_lwp_unpark_all(lids, nlid, NULL);
1234	}
1235}
1236
1237#undef	OOPS
1238
1239static void
1240pthread__initmainstack(void)
1241{
1242	struct rlimit slimit;
1243	const AuxInfo *aux;
1244	size_t size, len;
1245	int mib[2];
1246	unsigned int value;
1247
1248	_DIAGASSERT(_dlauxinfo() != NULL);
1249
1250	if (getrlimit(RLIMIT_STACK, &slimit) == -1)
1251		err(EXIT_FAILURE,
1252		    "Couldn't get stack resource consumption limits");
1253	size = slimit.rlim_cur;
1254	pthread__main->pt_stack.ss_size = size;
1255	pthread__main->pt_guardsize = pthread__pagesize;
1256
1257	mib[0] = CTL_VM;
1258	mib[1] = VM_GUARD_SIZE;
1259	len = sizeof(value);
1260	if (sysctl(mib, __arraycount(mib), &value, &len, NULL, 0) == 0)
1261		pthread__main->pt_guardsize = value;
1262
1263	for (aux = _dlauxinfo(); aux->a_type != AT_NULL; ++aux) {
1264		if (aux->a_type == AT_STACKBASE) {
1265#ifdef __MACHINE_STACK_GROWS_UP
1266			pthread__main->pt_stack.ss_sp = (void *)aux->a_v;
1267#else
1268			pthread__main->pt_stack.ss_sp = (char *)aux->a_v - size;
1269#endif
1270			break;
1271		}
1272	}
1273	pthread__copy_tsd(pthread__main);
1274}
1275
1276/*
1277 * Set up the slightly special stack for the "initial" thread, which
1278 * runs on the normal system stack, and thus gets slightly different
1279 * treatment.
1280 */
1281static void
1282pthread__initmain(pthread_t *newt)
1283{
1284	char *value;
1285
1286	pthread__initmainstack();
1287
1288	value = pthread__getenv("PTHREAD_STACKSIZE");
1289	if (value != NULL) {
1290		pthread__stacksize = atoi(value) * 1024;
1291		if (pthread__stacksize > pthread__main->pt_stack.ss_size)
1292			pthread__stacksize = pthread__main->pt_stack.ss_size;
1293	}
1294	if (pthread__stacksize == 0)
1295		pthread__stacksize = pthread__main->pt_stack.ss_size;
1296	pthread__stacksize += pthread__pagesize - 1;
1297	pthread__stacksize &= ~(pthread__pagesize - 1);
1298	if (pthread__stacksize < 4 * pthread__pagesize)
1299		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1300		    4 * pthread__pagesize / 1024);
1301
1302	*newt = pthread__main;
1303#if defined(_PTHREAD_GETTCB_EXT)
1304	pthread__main->pt_tls = _PTHREAD_GETTCB_EXT();
1305#elif defined(__HAVE___LWP_GETTCB_FAST)
1306	pthread__main->pt_tls = __lwp_gettcb_fast();
1307#else
1308	pthread__main->pt_tls = _lwp_getprivate();
1309#endif
1310	pthread__main->pt_tls->tcb_pthread = pthread__main;
1311}
1312
1313static signed int
1314/*ARGSUSED*/
1315pthread__cmp(void *ctx, const void *n1, const void *n2)
1316{
1317	const uintptr_t p1 = (const uintptr_t)n1;
1318	const uintptr_t p2 = (const uintptr_t)n2;
1319
1320	if (p1 < p2)
1321		return -1;
1322	if (p1 > p2)
1323		return 1;
1324	return 0;
1325}
1326
1327/* Because getenv() wants to use locks. */
1328char *
1329pthread__getenv(const char *name)
1330{
1331	extern char **environ;
1332	size_t l_name, offset;
1333
1334	if (issetugid())
1335		return (NULL);
1336
1337	l_name = strlen(name);
1338	for (offset = 0; environ[offset] != NULL; offset++) {
1339		if (strncmp(name, environ[offset], l_name) == 0 &&
1340		    environ[offset][l_name] == '=') {
1341			return environ[offset] + l_name + 1;
1342		}
1343	}
1344
1345	return NULL;
1346}
1347
1348pthread_mutex_t *
1349pthread__hashlock(volatile const void *p)
1350{
1351	uintptr_t v;
1352
1353	v = (uintptr_t)p;
1354	return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
1355}
1356
1357int
1358pthread__checkpri(int pri)
1359{
1360	static int havepri;
1361	static long min, max;
1362
1363	if (!havepri) {
1364		min = sysconf(_SC_SCHED_PRI_MIN);
1365		max = sysconf(_SC_SCHED_PRI_MAX);
1366		havepri = 1;
1367	}
1368	return (pri < min || pri > max) ? EINVAL : 0;
1369}
1370