thr.c revision 11913:283e725df792
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include "lint.h"
28#include "thr_uberdata.h"
29#include <pthread.h>
30#include <procfs.h>
31#include <sys/uio.h>
32#include <ctype.h>
33#include "libc.h"
34
35/*
36 * These symbols should not be exported from libc, but
37 * /lib/libm.so.2 references _thr_main.  libm needs to be fixed.
38 * Also, some older versions of the Studio compiler/debugger
39 * components reference them.  These need to be fixed, too.
40 */
41#pragma weak _thr_main = thr_main
42#pragma weak _thr_create = thr_create
43#pragma weak _thr_join = thr_join
44#pragma weak _thr_self = thr_self
45
46#undef errno
47extern int errno;
48
49/*
50 * Between Solaris 2.5 and Solaris 9, __threaded was used to indicate
51 * "we are linked with libthread".  The Sun Workshop 6 update 1 compilation
52 * system used it illegally (it is a consolidation private symbol).
53 * To accommodate this and possibly other abusers of the symbol,
54 * we make it always equal to 1 now that libthread has been folded
55 * into libc.  The new __libc_threaded symbol is used to indicate
56 * the new meaning, "more than one thread exists".
57 */
58int __threaded = 1;		/* always equal to 1 */
59int __libc_threaded = 0;	/* zero until first thr_create() */
60
61/*
62 * thr_concurrency and pthread_concurrency are not used by the library.
63 * They exist solely to hold and return the values set by calls to
64 * thr_setconcurrency() and pthread_setconcurrency().
65 * Because thr_concurrency is affected by the THR_NEW_LWP flag
66 * to thr_create(), thr_concurrency is protected by link_lock.
67 */
68static	int	thr_concurrency = 1;
69static	int	pthread_concurrency;
70
71#define	HASHTBLSZ	1024	/* must be a power of two */
72#define	TIDHASH(tid, udp)	(tid & (udp)->hash_mask)
73
74/* initial allocation, just enough for one lwp */
75#pragma align 64(init_hash_table)
76thr_hash_table_t init_hash_table[1] = {
77	{ DEFAULTMUTEX, DEFAULTCV, NULL },
78};
79
80extern const Lc_interface rtld_funcs[];
81
82/*
83 * The weak version is known to libc_db and mdb.
84 */
85#pragma weak _uberdata = __uberdata
86uberdata_t __uberdata = {
87	{ DEFAULTMUTEX, NULL, 0 },	/* link_lock */
88	{ RECURSIVEMUTEX, NULL, 0 },	/* ld_lock */
89	{ RECURSIVEMUTEX, NULL, 0 },	/* fork_lock */
90	{ RECURSIVEMUTEX, NULL, 0 },	/* atfork_lock */
91	{ RECURSIVEMUTEX, NULL, 0 },	/* callout_lock */
92	{ DEFAULTMUTEX, NULL, 0 },	/* tdb_hash_lock */
93	{ 0, },				/* tdb_hash_lock_stats */
94	{ { 0 }, },			/* siguaction[NSIG] */
95	{{ DEFAULTMUTEX, NULL, 0 },		/* bucket[NBUCKETS] */
96	{ DEFAULTMUTEX, NULL, 0 },
97	{ DEFAULTMUTEX, NULL, 0 },
98	{ DEFAULTMUTEX, NULL, 0 },
99	{ DEFAULTMUTEX, NULL, 0 },
100	{ DEFAULTMUTEX, NULL, 0 },
101	{ DEFAULTMUTEX, NULL, 0 },
102	{ DEFAULTMUTEX, NULL, 0 },
103	{ DEFAULTMUTEX, NULL, 0 },
104	{ DEFAULTMUTEX, NULL, 0 }},
105	{ RECURSIVEMUTEX, NULL, NULL },		/* atexit_root */
106	{ DEFAULTMUTEX, 0, 0, NULL },		/* tsd_metadata */
107	{ DEFAULTMUTEX, {0, 0}, {0, 0} },	/* tls_metadata */
108	0,			/* primary_map */
109	0,			/* bucket_init */
110	0,			/* pad[0] */
111	0,			/* pad[1] */
112	{ 0 },			/* uberflags */
113	NULL,			/* queue_head */
114	init_hash_table,	/* thr_hash_table */
115	1,			/* hash_size: size of the hash table */
116	0,			/* hash_mask: hash_size - 1 */
117	NULL,			/* ulwp_one */
118	NULL,			/* all_lwps */
119	NULL,			/* all_zombies */
120	0,			/* nthreads */
121	0,			/* nzombies */
122	0,			/* ndaemons */
123	0,			/* pid */
124	sigacthandler,		/* sigacthandler */
125	NULL,			/* lwp_stacks */
126	NULL,			/* lwp_laststack */
127	0,			/* nfreestack */
128	10,			/* thread_stack_cache */
129	NULL,			/* ulwp_freelist */
130	NULL,			/* ulwp_lastfree */
131	NULL,			/* ulwp_replace_free */
132	NULL,			/* ulwp_replace_last */
133	NULL,			/* atforklist */
134	NULL,			/* robustlocks */
135	NULL,			/* robustlist */
136	NULL,			/* __tdb_bootstrap */
137	{			/* tdb */
138		NULL,		/* tdb_sync_addr_hash */
139		0,		/* tdb_register_count */
140		0,		/* tdb_hash_alloc_failed */
141		NULL,		/* tdb_sync_addr_free */
142		NULL,		/* tdb_sync_addr_last */
143		0,		/* tdb_sync_alloc */
144		{ 0, 0 },	/* tdb_ev_global_mask */
145		tdb_events,	/* tdb_events array */
146	},
147};
148
149/*
150 * The weak version is known to libc_db and mdb.
151 */
152#pragma weak _tdb_bootstrap = __tdb_bootstrap
153uberdata_t **__tdb_bootstrap = NULL;
154
155int	thread_queue_fifo = 4;
156int	thread_queue_dump = 0;
157int	thread_cond_wait_defer = 0;
158int	thread_error_detection = 0;
159int	thread_async_safe = 0;
160int	thread_stack_cache = 10;
161int	thread_door_noreserve = 0;
162int	thread_locks_misaligned = 0;
163
164static	ulwp_t	*ulwp_alloc(void);
165static	void	ulwp_free(ulwp_t *);
166
167/*
168 * Insert the lwp into the hash table.
169 */
170void
171hash_in_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
172{
173	ulwp->ul_hash = udp->thr_hash_table[ix].hash_bucket;
174	udp->thr_hash_table[ix].hash_bucket = ulwp;
175	ulwp->ul_ix = ix;
176}
177
178void
179hash_in(ulwp_t *ulwp, uberdata_t *udp)
180{
181	int ix = TIDHASH(ulwp->ul_lwpid, udp);
182	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
183
184	lmutex_lock(mp);
185	hash_in_unlocked(ulwp, ix, udp);
186	lmutex_unlock(mp);
187}
188
189/*
190 * Delete the lwp from the hash table.
191 */
192void
193hash_out_unlocked(ulwp_t *ulwp, int ix, uberdata_t *udp)
194{
195	ulwp_t **ulwpp;
196
197	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
198	    ulwp != *ulwpp;
199	    ulwpp = &(*ulwpp)->ul_hash)
200		;
201	*ulwpp = ulwp->ul_hash;
202	ulwp->ul_hash = NULL;
203	ulwp->ul_ix = -1;
204}
205
206void
207hash_out(ulwp_t *ulwp, uberdata_t *udp)
208{
209	int ix;
210
211	if ((ix = ulwp->ul_ix) >= 0) {
212		mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
213
214		lmutex_lock(mp);
215		hash_out_unlocked(ulwp, ix, udp);
216		lmutex_unlock(mp);
217	}
218}
219
220/*
221 * Retain stack information for thread structures that are being recycled for
222 * new threads.  All other members of the thread structure should be zeroed.
223 */
224static void
225ulwp_clean(ulwp_t *ulwp)
226{
227	caddr_t stk = ulwp->ul_stk;
228	size_t mapsiz = ulwp->ul_mapsiz;
229	size_t guardsize = ulwp->ul_guardsize;
230	uintptr_t stktop = ulwp->ul_stktop;
231	size_t stksiz = ulwp->ul_stksiz;
232
233	(void) memset(ulwp, 0, sizeof (*ulwp));
234
235	ulwp->ul_stk = stk;
236	ulwp->ul_mapsiz = mapsiz;
237	ulwp->ul_guardsize = guardsize;
238	ulwp->ul_stktop = stktop;
239	ulwp->ul_stksiz = stksiz;
240}
241
242static int stackprot;
243
244/*
245 * Answer the question, "Is the lwp in question really dead?"
246 * We must inquire of the operating system to be really sure
247 * because the lwp may have called lwp_exit() but it has not
248 * yet completed the exit.
249 */
250static int
251dead_and_buried(ulwp_t *ulwp)
252{
253	if (ulwp->ul_lwpid == (lwpid_t)(-1))
254		return (1);
255	if (ulwp->ul_dead && ulwp->ul_detached &&
256	    _lwp_kill(ulwp->ul_lwpid, 0) == ESRCH) {
257		ulwp->ul_lwpid = (lwpid_t)(-1);
258		return (1);
259	}
260	return (0);
261}
262
263/*
264 * Attempt to keep the stack cache within the specified cache limit.
265 */
266static void
267trim_stack_cache(int cache_limit)
268{
269	ulwp_t *self = curthread;
270	uberdata_t *udp = self->ul_uberdata;
271	ulwp_t *prev = NULL;
272	ulwp_t **ulwpp = &udp->lwp_stacks;
273	ulwp_t *ulwp;
274
275	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, self));
276
277	while (udp->nfreestack > cache_limit && (ulwp = *ulwpp) != NULL) {
278		if (dead_and_buried(ulwp)) {
279			*ulwpp = ulwp->ul_next;
280			if (ulwp == udp->lwp_laststack)
281				udp->lwp_laststack = prev;
282			hash_out(ulwp, udp);
283			udp->nfreestack--;
284			(void) munmap(ulwp->ul_stk, ulwp->ul_mapsiz);
285			/*
286			 * Now put the free ulwp on the ulwp freelist.
287			 */
288			ulwp->ul_mapsiz = 0;
289			ulwp->ul_next = NULL;
290			if (udp->ulwp_freelist == NULL)
291				udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
292			else {
293				udp->ulwp_lastfree->ul_next = ulwp;
294				udp->ulwp_lastfree = ulwp;
295			}
296		} else {
297			prev = ulwp;
298			ulwpp = &ulwp->ul_next;
299		}
300	}
301}
302
303/*
304 * Find an unused stack of the requested size
305 * or create a new stack of the requested size.
306 * Return a pointer to the ulwp_t structure referring to the stack, or NULL.
307 * thr_exit() stores 1 in the ul_dead member.
308 * thr_join() stores -1 in the ul_lwpid member.
309 */
310static ulwp_t *
311find_stack(size_t stksize, size_t guardsize)
312{
313	static size_t pagesize = 0;
314
315	uberdata_t *udp = curthread->ul_uberdata;
316	size_t mapsize;
317	ulwp_t *prev;
318	ulwp_t *ulwp;
319	ulwp_t **ulwpp;
320	void *stk;
321
322	/*
323	 * The stack is allocated PROT_READ|PROT_WRITE|PROT_EXEC
324	 * unless overridden by the system's configuration.
325	 */
326	if (stackprot == 0) {	/* do this once */
327		long lprot = _sysconf(_SC_STACK_PROT);
328		if (lprot <= 0)
329			lprot = (PROT_READ|PROT_WRITE|PROT_EXEC);
330		stackprot = (int)lprot;
331	}
332	if (pagesize == 0)	/* do this once */
333		pagesize = _sysconf(_SC_PAGESIZE);
334
335	/*
336	 * One megabyte stacks by default, but subtract off
337	 * two pages for the system-created red zones.
338	 * Round up a non-zero stack size to a pagesize multiple.
339	 */
340	if (stksize == 0)
341		stksize = DEFAULTSTACK - 2 * pagesize;
342	else
343		stksize = ((stksize + pagesize - 1) & -pagesize);
344
345	/*
346	 * Round up the mapping size to a multiple of pagesize.
347	 * Note: mmap() provides at least one page of red zone
348	 * so we deduct that from the value of guardsize.
349	 */
350	if (guardsize != 0)
351		guardsize = ((guardsize + pagesize - 1) & -pagesize) - pagesize;
352	mapsize = stksize + guardsize;
353
354	lmutex_lock(&udp->link_lock);
355	for (prev = NULL, ulwpp = &udp->lwp_stacks;
356	    (ulwp = *ulwpp) != NULL;
357	    prev = ulwp, ulwpp = &ulwp->ul_next) {
358		if (ulwp->ul_mapsiz == mapsize &&
359		    ulwp->ul_guardsize == guardsize &&
360		    dead_and_buried(ulwp)) {
361			/*
362			 * The previous lwp is gone; reuse the stack.
363			 * Remove the ulwp from the stack list.
364			 */
365			*ulwpp = ulwp->ul_next;
366			ulwp->ul_next = NULL;
367			if (ulwp == udp->lwp_laststack)
368				udp->lwp_laststack = prev;
369			hash_out(ulwp, udp);
370			udp->nfreestack--;
371			lmutex_unlock(&udp->link_lock);
372			ulwp_clean(ulwp);
373			return (ulwp);
374		}
375	}
376
377	/*
378	 * None of the cached stacks matched our mapping size.
379	 * Reduce the stack cache to get rid of possibly
380	 * very old stacks that will never be reused.
381	 */
382	if (udp->nfreestack > udp->thread_stack_cache)
383		trim_stack_cache(udp->thread_stack_cache);
384	else if (udp->nfreestack > 0)
385		trim_stack_cache(udp->nfreestack - 1);
386	lmutex_unlock(&udp->link_lock);
387
388	/*
389	 * Create a new stack.
390	 */
391	if ((stk = mmap(NULL, mapsize, stackprot,
392	    MAP_PRIVATE|MAP_NORESERVE|MAP_ANON, -1, (off_t)0)) != MAP_FAILED) {
393		/*
394		 * We have allocated our stack.  Now allocate the ulwp.
395		 */
396		ulwp = ulwp_alloc();
397		if (ulwp == NULL)
398			(void) munmap(stk, mapsize);
399		else {
400			ulwp->ul_stk = stk;
401			ulwp->ul_mapsiz = mapsize;
402			ulwp->ul_guardsize = guardsize;
403			ulwp->ul_stktop = (uintptr_t)stk + mapsize;
404			ulwp->ul_stksiz = stksize;
405			if (guardsize)	/* protect the extra red zone */
406				(void) mprotect(stk, guardsize, PROT_NONE);
407		}
408	}
409	return (ulwp);
410}
411
412/*
413 * Get a ulwp_t structure from the free list or allocate a new one.
414 * Such ulwp_t's do not have a stack allocated by the library.
415 */
416static ulwp_t *
417ulwp_alloc(void)
418{
419	ulwp_t *self = curthread;
420	uberdata_t *udp = self->ul_uberdata;
421	size_t tls_size;
422	ulwp_t *prev;
423	ulwp_t *ulwp;
424	ulwp_t **ulwpp;
425	caddr_t data;
426
427	lmutex_lock(&udp->link_lock);
428	for (prev = NULL, ulwpp = &udp->ulwp_freelist;
429	    (ulwp = *ulwpp) != NULL;
430	    prev = ulwp, ulwpp = &ulwp->ul_next) {
431		if (dead_and_buried(ulwp)) {
432			*ulwpp = ulwp->ul_next;
433			ulwp->ul_next = NULL;
434			if (ulwp == udp->ulwp_lastfree)
435				udp->ulwp_lastfree = prev;
436			hash_out(ulwp, udp);
437			lmutex_unlock(&udp->link_lock);
438			ulwp_clean(ulwp);
439			return (ulwp);
440		}
441	}
442	lmutex_unlock(&udp->link_lock);
443
444	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
445	data = lmalloc(sizeof (*ulwp) + tls_size);
446	if (data != NULL) {
447		/* LINTED pointer cast may result in improper alignment */
448		ulwp = (ulwp_t *)(data + tls_size);
449	}
450	return (ulwp);
451}
452
453/*
454 * Free a ulwp structure.
455 * If there is an associated stack, put it on the stack list and
456 * munmap() previously freed stacks up to the residual cache limit.
457 * Else put it on the ulwp free list and never call lfree() on it.
458 */
459static void
460ulwp_free(ulwp_t *ulwp)
461{
462	uberdata_t *udp = curthread->ul_uberdata;
463
464	ASSERT(udp->nthreads <= 1 || MUTEX_OWNED(&udp->link_lock, curthread));
465	ulwp->ul_next = NULL;
466	if (ulwp == udp->ulwp_one)	/* don't reuse the primoridal stack */
467		/*EMPTY*/;
468	else if (ulwp->ul_mapsiz != 0) {
469		if (udp->lwp_stacks == NULL)
470			udp->lwp_stacks = udp->lwp_laststack = ulwp;
471		else {
472			udp->lwp_laststack->ul_next = ulwp;
473			udp->lwp_laststack = ulwp;
474		}
475		if (++udp->nfreestack > udp->thread_stack_cache)
476			trim_stack_cache(udp->thread_stack_cache);
477	} else {
478		if (udp->ulwp_freelist == NULL)
479			udp->ulwp_freelist = udp->ulwp_lastfree = ulwp;
480		else {
481			udp->ulwp_lastfree->ul_next = ulwp;
482			udp->ulwp_lastfree = ulwp;
483		}
484	}
485}
486
487/*
488 * Find a named lwp and return a pointer to its hash list location.
489 * On success, returns with the hash lock held.
490 */
491ulwp_t **
492find_lwpp(thread_t tid)
493{
494	uberdata_t *udp = curthread->ul_uberdata;
495	int ix = TIDHASH(tid, udp);
496	mutex_t *mp = &udp->thr_hash_table[ix].hash_lock;
497	ulwp_t *ulwp;
498	ulwp_t **ulwpp;
499
500	if (tid == 0)
501		return (NULL);
502
503	lmutex_lock(mp);
504	for (ulwpp = &udp->thr_hash_table[ix].hash_bucket;
505	    (ulwp = *ulwpp) != NULL;
506	    ulwpp = &ulwp->ul_hash) {
507		if (ulwp->ul_lwpid == tid)
508			return (ulwpp);
509	}
510	lmutex_unlock(mp);
511	return (NULL);
512}
513
514/*
515 * Wake up all lwps waiting on this lwp for some reason.
516 */
517void
518ulwp_broadcast(ulwp_t *ulwp)
519{
520	ulwp_t *self = curthread;
521	uberdata_t *udp = self->ul_uberdata;
522
523	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
524	(void) cond_broadcast(ulwp_condvar(ulwp, udp));
525}
526
527/*
528 * Find a named lwp and return a pointer to it.
529 * Returns with the hash lock held.
530 */
531ulwp_t *
532find_lwp(thread_t tid)
533{
534	ulwp_t *self = curthread;
535	uberdata_t *udp = self->ul_uberdata;
536	ulwp_t *ulwp = NULL;
537	ulwp_t **ulwpp;
538
539	if (self->ul_lwpid == tid) {
540		ulwp = self;
541		ulwp_lock(ulwp, udp);
542	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
543		ulwp = *ulwpp;
544	}
545
546	if (ulwp && ulwp->ul_dead) {
547		ulwp_unlock(ulwp, udp);
548		ulwp = NULL;
549	}
550
551	return (ulwp);
552}
553
554int
555_thrp_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
556	long flags, thread_t *new_thread, size_t guardsize)
557{
558	ulwp_t *self = curthread;
559	uberdata_t *udp = self->ul_uberdata;
560	ucontext_t uc;
561	uint_t lwp_flags;
562	thread_t tid;
563	int error;
564	ulwp_t *ulwp;
565
566	/*
567	 * Enforce the restriction of not creating any threads
568	 * until the primary link map has been initialized.
569	 * Also, disallow thread creation to a child of vfork().
570	 */
571	if (!self->ul_primarymap || self->ul_vfork)
572		return (ENOTSUP);
573
574	if (udp->hash_size == 1)
575		finish_init();
576
577	if ((stk || stksize) && stksize < MINSTACK)
578		return (EINVAL);
579
580	if (stk == NULL) {
581		if ((ulwp = find_stack(stksize, guardsize)) == NULL)
582			return (ENOMEM);
583		stksize = ulwp->ul_mapsiz - ulwp->ul_guardsize;
584	} else {
585		/* initialize the private stack */
586		if ((ulwp = ulwp_alloc()) == NULL)
587			return (ENOMEM);
588		ulwp->ul_stk = stk;
589		ulwp->ul_stktop = (uintptr_t)stk + stksize;
590		ulwp->ul_stksiz = stksize;
591	}
592	/* ulwp is not in the hash table; make sure hash_out() doesn't fail */
593	ulwp->ul_ix = -1;
594	ulwp->ul_errnop = &ulwp->ul_errno;
595
596	lwp_flags = LWP_SUSPENDED;
597	if (flags & (THR_DETACHED|THR_DAEMON)) {
598		flags |= THR_DETACHED;
599		lwp_flags |= LWP_DETACHED;
600	}
601	if (flags & THR_DAEMON)
602		lwp_flags |= LWP_DAEMON;
603
604	/* creating a thread: enforce mt-correctness in mutex_lock() */
605	self->ul_async_safe = 1;
606
607	/* per-thread copies of global variables, for speed */
608	ulwp->ul_queue_fifo = self->ul_queue_fifo;
609	ulwp->ul_cond_wait_defer = self->ul_cond_wait_defer;
610	ulwp->ul_error_detection = self->ul_error_detection;
611	ulwp->ul_async_safe = self->ul_async_safe;
612	ulwp->ul_max_spinners = self->ul_max_spinners;
613	ulwp->ul_adaptive_spin = self->ul_adaptive_spin;
614	ulwp->ul_queue_spin = self->ul_queue_spin;
615	ulwp->ul_door_noreserve = self->ul_door_noreserve;
616	ulwp->ul_misaligned = self->ul_misaligned;
617
618	/* new thread inherits creating thread's scheduling parameters */
619	ulwp->ul_policy = self->ul_policy;
620	ulwp->ul_pri = (self->ul_epri? self->ul_epri : self->ul_pri);
621	ulwp->ul_cid = self->ul_cid;
622	ulwp->ul_rtclassid = self->ul_rtclassid;
623
624	ulwp->ul_primarymap = self->ul_primarymap;
625	ulwp->ul_self = ulwp;
626	ulwp->ul_uberdata = udp;
627
628	/* debugger support */
629	ulwp->ul_usropts = flags;
630
631#ifdef __sparc
632	/*
633	 * We cache several instructions in the thread structure for use
634	 * by the fasttrap DTrace provider. When changing this, read the
635	 * comment in fasttrap.h for the all the other places that must
636	 * be changed.
637	 */
638	ulwp->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
639	ulwp->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
640	ulwp->ul_dftret = 0x91d0203a;	/* ta 0x3a */
641	ulwp->ul_dreturn = 0x81ca0000;	/* return %o0 */
642#endif
643
644	ulwp->ul_startpc = func;
645	ulwp->ul_startarg = arg;
646	_fpinherit(ulwp);
647	/*
648	 * Defer signals on the new thread until its TLS constructors
649	 * have been called.  _thrp_setup() will call sigon() after
650	 * it has called tls_setup().
651	 */
652	ulwp->ul_sigdefer = 1;
653
654	error = setup_context(&uc, _thrp_setup, ulwp,
655	    (caddr_t)ulwp->ul_stk + ulwp->ul_guardsize, stksize);
656	if (error != 0 && stk != NULL)	/* inaccessible stack */
657		error = EFAULT;
658
659	/*
660	 * Call enter_critical() to avoid being suspended until we
661	 * have linked the new thread into the proper lists.
662	 * This is necessary because forkall() and fork1() must
663	 * suspend all threads and they must see a complete list.
664	 */
665	enter_critical(self);
666	uc.uc_sigmask = ulwp->ul_sigmask = self->ul_sigmask;
667	if (error != 0 ||
668	    (error = __lwp_create(&uc, lwp_flags, &tid)) != 0) {
669		exit_critical(self);
670		ulwp->ul_lwpid = (lwpid_t)(-1);
671		ulwp->ul_dead = 1;
672		ulwp->ul_detached = 1;
673		lmutex_lock(&udp->link_lock);
674		ulwp_free(ulwp);
675		lmutex_unlock(&udp->link_lock);
676		return (error);
677	}
678	self->ul_nocancel = 0;	/* cancellation is now possible */
679	udp->uberflags.uf_mt = 1;
680	if (new_thread)
681		*new_thread = tid;
682	if (flags & THR_DETACHED)
683		ulwp->ul_detached = 1;
684	ulwp->ul_lwpid = tid;
685	ulwp->ul_stop = TSTP_REGULAR;
686	if (flags & THR_SUSPENDED)
687		ulwp->ul_created = 1;
688
689	lmutex_lock(&udp->link_lock);
690	ulwp->ul_forw = udp->all_lwps;
691	ulwp->ul_back = udp->all_lwps->ul_back;
692	ulwp->ul_back->ul_forw = ulwp;
693	ulwp->ul_forw->ul_back = ulwp;
694	hash_in(ulwp, udp);
695	udp->nthreads++;
696	if (flags & THR_DAEMON)
697		udp->ndaemons++;
698	if (flags & THR_NEW_LWP)
699		thr_concurrency++;
700	__libc_threaded = 1;		/* inform stdio */
701	lmutex_unlock(&udp->link_lock);
702
703	if (__td_event_report(self, TD_CREATE, udp)) {
704		self->ul_td_evbuf.eventnum = TD_CREATE;
705		self->ul_td_evbuf.eventdata = (void *)(uintptr_t)tid;
706		tdb_event(TD_CREATE, udp);
707	}
708
709	exit_critical(self);
710
711	if (!(flags & THR_SUSPENDED))
712		(void) _thrp_continue(tid, TSTP_REGULAR);
713
714	return (0);
715}
716
717int
718thr_create(void *stk, size_t stksize, void *(*func)(void *), void *arg,
719	long flags, thread_t *new_thread)
720{
721	return (_thrp_create(stk, stksize, func, arg, flags, new_thread, 0));
722}
723
724/*
725 * A special cancellation cleanup hook for DCE.
726 * cleanuphndlr, when it is not NULL, will contain a callback
727 * function to be called before a thread is terminated in
728 * thr_exit() as a result of being cancelled.
729 */
730static void (*cleanuphndlr)(void) = NULL;
731
732/*
733 * _pthread_setcleanupinit: sets the cleanup hook.
734 */
735int
736_pthread_setcleanupinit(void (*func)(void))
737{
738	cleanuphndlr = func;
739	return (0);
740}
741
742void
743_thrp_exit()
744{
745	ulwp_t *self = curthread;
746	uberdata_t *udp = self->ul_uberdata;
747	ulwp_t *replace = NULL;
748
749	if (__td_event_report(self, TD_DEATH, udp)) {
750		self->ul_td_evbuf.eventnum = TD_DEATH;
751		tdb_event(TD_DEATH, udp);
752	}
753
754	ASSERT(self->ul_sigdefer != 0);
755
756	lmutex_lock(&udp->link_lock);
757	udp->nthreads--;
758	if (self->ul_usropts & THR_NEW_LWP)
759		thr_concurrency--;
760	if (self->ul_usropts & THR_DAEMON)
761		udp->ndaemons--;
762	else if (udp->nthreads == udp->ndaemons) {
763		/*
764		 * We are the last non-daemon thread exiting.
765		 * Exit the process.  We retain our TSD and TLS so
766		 * that atexit() application functions can use them.
767		 */
768		lmutex_unlock(&udp->link_lock);
769		exit(0);
770		thr_panic("_thrp_exit(): exit(0) returned");
771	}
772	lmutex_unlock(&udp->link_lock);
773
774	tsd_exit();		/* deallocate thread-specific data */
775	tls_exit();		/* deallocate thread-local storage */
776	heldlock_exit();	/* deal with left-over held locks */
777
778	/* block all signals to finish exiting */
779	block_all_signals(self);
780	/* also prevent ourself from being suspended */
781	enter_critical(self);
782	rwl_free(self);
783	lmutex_lock(&udp->link_lock);
784	ulwp_free(self);
785	(void) ulwp_lock(self, udp);
786
787	if (self->ul_mapsiz && !self->ul_detached) {
788		/*
789		 * We want to free the stack for reuse but must keep
790		 * the ulwp_t struct for the benefit of thr_join().
791		 * For this purpose we allocate a replacement ulwp_t.
792		 */
793		if ((replace = udp->ulwp_replace_free) == NULL)
794			replace = lmalloc(REPLACEMENT_SIZE);
795		else if ((udp->ulwp_replace_free = replace->ul_next) == NULL)
796			udp->ulwp_replace_last = NULL;
797	}
798
799	if (udp->all_lwps == self)
800		udp->all_lwps = self->ul_forw;
801	if (udp->all_lwps == self)
802		udp->all_lwps = NULL;
803	else {
804		self->ul_forw->ul_back = self->ul_back;
805		self->ul_back->ul_forw = self->ul_forw;
806	}
807	self->ul_forw = self->ul_back = NULL;
808#if defined(THREAD_DEBUG)
809	/* collect queue lock statistics before marking ourself dead */
810	record_spin_locks(self);
811#endif
812	self->ul_dead = 1;
813	self->ul_pleasestop = 0;
814	if (replace != NULL) {
815		int ix = self->ul_ix;		/* the hash index */
816		(void) memcpy(replace, self, REPLACEMENT_SIZE);
817		replace->ul_self = replace;
818		replace->ul_next = NULL;	/* clone not on stack list */
819		replace->ul_mapsiz = 0;		/* allows clone to be freed */
820		replace->ul_replace = 1;	/* requires clone to be freed */
821		hash_out_unlocked(self, ix, udp);
822		hash_in_unlocked(replace, ix, udp);
823		ASSERT(!(self->ul_detached));
824		self->ul_detached = 1;		/* this frees the stack */
825		self->ul_schedctl = NULL;
826		self->ul_schedctl_called = &udp->uberflags;
827		set_curthread(self = replace);
828		/*
829		 * Having just changed the address of curthread, we
830		 * must reset the ownership of the locks we hold so
831		 * that assertions will not fire when we release them.
832		 */
833		udp->link_lock.mutex_owner = (uintptr_t)self;
834		ulwp_mutex(self, udp)->mutex_owner = (uintptr_t)self;
835		/*
836		 * NOTE:
837		 * On i386, %gs still references the original, not the
838		 * replacement, ulwp structure.  Fetching the replacement
839		 * curthread pointer via %gs:0 works correctly since the
840		 * original ulwp structure will not be reallocated until
841		 * this lwp has completed its lwp_exit() system call (see
842		 * dead_and_buried()), but from here on out, we must make
843		 * no references to %gs:<offset> other than %gs:0.
844		 */
845	}
846	/*
847	 * Put non-detached terminated threads in the all_zombies list.
848	 */
849	if (!self->ul_detached) {
850		udp->nzombies++;
851		if (udp->all_zombies == NULL) {
852			ASSERT(udp->nzombies == 1);
853			udp->all_zombies = self->ul_forw = self->ul_back = self;
854		} else {
855			self->ul_forw = udp->all_zombies;
856			self->ul_back = udp->all_zombies->ul_back;
857			self->ul_back->ul_forw = self;
858			self->ul_forw->ul_back = self;
859		}
860	}
861	/*
862	 * Notify everyone waiting for this thread.
863	 */
864	ulwp_broadcast(self);
865	(void) ulwp_unlock(self, udp);
866	/*
867	 * Prevent any more references to the schedctl data.
868	 * We are exiting and continue_fork() may not find us.
869	 * Do this just before dropping link_lock, since fork
870	 * serializes on link_lock.
871	 */
872	self->ul_schedctl = NULL;
873	self->ul_schedctl_called = &udp->uberflags;
874	lmutex_unlock(&udp->link_lock);
875
876	ASSERT(self->ul_critical == 1);
877	ASSERT(self->ul_preempt == 0);
878	_lwp_terminate();	/* never returns */
879	thr_panic("_thrp_exit(): _lwp_terminate() returned");
880}
881
882#if defined(THREAD_DEBUG)
883void
884collect_queue_statistics()
885{
886	uberdata_t *udp = curthread->ul_uberdata;
887	ulwp_t *ulwp;
888
889	if (thread_queue_dump) {
890		lmutex_lock(&udp->link_lock);
891		if ((ulwp = udp->all_lwps) != NULL) {
892			do {
893				record_spin_locks(ulwp);
894			} while ((ulwp = ulwp->ul_forw) != udp->all_lwps);
895		}
896		lmutex_unlock(&udp->link_lock);
897	}
898}
899#endif
900
901static void __NORETURN
902_thrp_exit_common(void *status, int unwind)
903{
904	ulwp_t *self = curthread;
905	int cancelled = (self->ul_cancel_pending && status == PTHREAD_CANCELED);
906
907	ASSERT(self->ul_critical == 0 && self->ul_preempt == 0);
908
909	/*
910	 * Disable cancellation and call the special DCE cancellation
911	 * cleanup hook if it is enabled.  Do nothing else before calling
912	 * the DCE cancellation cleanup hook; it may call longjmp() and
913	 * never return here.
914	 */
915	self->ul_cancel_disabled = 1;
916	self->ul_cancel_async = 0;
917	self->ul_save_async = 0;
918	self->ul_cancelable = 0;
919	self->ul_cancel_pending = 0;
920	set_cancel_pending_flag(self, 1);
921	if (cancelled && cleanuphndlr != NULL)
922		(*cleanuphndlr)();
923
924	/*
925	 * Block application signals while we are exiting.
926	 * We call out to C++, TSD, and TLS destructors while exiting
927	 * and these are application-defined, so we cannot be assured
928	 * that they won't reset the signal mask.  We use sigoff() to
929	 * defer any signals that may be received as a result of this
930	 * bad behavior.  Such signals will be lost to the process
931	 * when the thread finishes exiting.
932	 */
933	(void) thr_sigsetmask(SIG_SETMASK, &maskset, NULL);
934	sigoff(self);
935
936	self->ul_rval = status;
937
938	/*
939	 * If thr_exit is being called from the places where
940	 * C++ destructors are to be called such as cancellation
941	 * points, then set this flag. It is checked in _t_cancel()
942	 * to decide whether _ex_unwind() is to be called or not.
943	 */
944	if (unwind)
945		self->ul_unwind = 1;
946
947	/*
948	 * _thrp_unwind() will eventually call _thrp_exit().
949	 * It never returns.
950	 */
951	_thrp_unwind(NULL);
952	thr_panic("_thrp_exit_common(): _thrp_unwind() returned");
953
954	for (;;)	/* to shut the compiler up about __NORETURN */
955		continue;
956}
957
958/*
959 * Called when a thread returns from its start function.
960 * We are at the top of the stack; no unwinding is necessary.
961 */
962void
963_thrp_terminate(void *status)
964{
965	_thrp_exit_common(status, 0);
966}
967
968#pragma weak pthread_exit = thr_exit
969#pragma weak _thr_exit = thr_exit
970void
971thr_exit(void *status)
972{
973	_thrp_exit_common(status, 1);
974}
975
976int
977_thrp_join(thread_t tid, thread_t *departed, void **status, int do_cancel)
978{
979	uberdata_t *udp = curthread->ul_uberdata;
980	mutex_t *mp;
981	void *rval;
982	thread_t found;
983	ulwp_t *ulwp;
984	ulwp_t **ulwpp;
985	int replace;
986	int error;
987
988	if (do_cancel)
989		error = lwp_wait(tid, &found);
990	else {
991		while ((error = __lwp_wait(tid, &found)) == EINTR)
992			;
993	}
994	if (error)
995		return (error);
996
997	/*
998	 * We must hold link_lock to avoid a race condition with find_stack().
999	 */
1000	lmutex_lock(&udp->link_lock);
1001	if ((ulwpp = find_lwpp(found)) == NULL) {
1002		/*
1003		 * lwp_wait() found an lwp that the library doesn't know
1004		 * about.  It must have been created with _lwp_create().
1005		 * Just return its lwpid; we can't know its status.
1006		 */
1007		lmutex_unlock(&udp->link_lock);
1008		rval = NULL;
1009	} else {
1010		/*
1011		 * Remove ulwp from the hash table.
1012		 */
1013		ulwp = *ulwpp;
1014		*ulwpp = ulwp->ul_hash;
1015		ulwp->ul_hash = NULL;
1016		/*
1017		 * Remove ulwp from all_zombies list.
1018		 */
1019		ASSERT(udp->nzombies >= 1);
1020		if (udp->all_zombies == ulwp)
1021			udp->all_zombies = ulwp->ul_forw;
1022		if (udp->all_zombies == ulwp)
1023			udp->all_zombies = NULL;
1024		else {
1025			ulwp->ul_forw->ul_back = ulwp->ul_back;
1026			ulwp->ul_back->ul_forw = ulwp->ul_forw;
1027		}
1028		ulwp->ul_forw = ulwp->ul_back = NULL;
1029		udp->nzombies--;
1030		ASSERT(ulwp->ul_dead && !ulwp->ul_detached &&
1031		    !(ulwp->ul_usropts & (THR_DETACHED|THR_DAEMON)));
1032		/*
1033		 * We can't call ulwp_unlock(ulwp) after we set
1034		 * ulwp->ul_ix = -1 so we have to get a pointer to the
1035		 * ulwp's hash table mutex now in order to unlock it below.
1036		 */
1037		mp = ulwp_mutex(ulwp, udp);
1038		ulwp->ul_lwpid = (lwpid_t)(-1);
1039		ulwp->ul_ix = -1;
1040		rval = ulwp->ul_rval;
1041		replace = ulwp->ul_replace;
1042		lmutex_unlock(mp);
1043		if (replace) {
1044			ulwp->ul_next = NULL;
1045			if (udp->ulwp_replace_free == NULL)
1046				udp->ulwp_replace_free =
1047				    udp->ulwp_replace_last = ulwp;
1048			else {
1049				udp->ulwp_replace_last->ul_next = ulwp;
1050				udp->ulwp_replace_last = ulwp;
1051			}
1052		}
1053		lmutex_unlock(&udp->link_lock);
1054	}
1055
1056	if (departed != NULL)
1057		*departed = found;
1058	if (status != NULL)
1059		*status = rval;
1060	return (0);
1061}
1062
1063int
1064thr_join(thread_t tid, thread_t *departed, void **status)
1065{
1066	int error = _thrp_join(tid, departed, status, 1);
1067	return ((error == EINVAL)? ESRCH : error);
1068}
1069
1070/*
1071 * pthread_join() differs from Solaris thr_join():
1072 * It does not return the departed thread's id
1073 * and hence does not have a "departed" argument.
1074 * It returns EINVAL if tid refers to a detached thread.
1075 */
1076#pragma weak _pthread_join = pthread_join
1077int
1078pthread_join(pthread_t tid, void **status)
1079{
1080	return ((tid == 0)? ESRCH : _thrp_join(tid, NULL, status, 1));
1081}
1082
1083int
1084pthread_detach(pthread_t tid)
1085{
1086	uberdata_t *udp = curthread->ul_uberdata;
1087	ulwp_t *ulwp;
1088	ulwp_t **ulwpp;
1089	int error = 0;
1090
1091	if ((ulwpp = find_lwpp(tid)) == NULL)
1092		return (ESRCH);
1093	ulwp = *ulwpp;
1094
1095	if (ulwp->ul_dead) {
1096		ulwp_unlock(ulwp, udp);
1097		error = _thrp_join(tid, NULL, NULL, 0);
1098	} else {
1099		error = __lwp_detach(tid);
1100		ulwp->ul_detached = 1;
1101		ulwp->ul_usropts |= THR_DETACHED;
1102		ulwp_unlock(ulwp, udp);
1103	}
1104	return (error);
1105}
1106
1107static const char *
1108ematch(const char *ev, const char *match)
1109{
1110	int c;
1111
1112	while ((c = *match++) != '\0') {
1113		if (*ev++ != c)
1114			return (NULL);
1115	}
1116	if (*ev++ != '=')
1117		return (NULL);
1118	return (ev);
1119}
1120
1121static int
1122envvar(const char *ev, const char *match, int limit)
1123{
1124	int val = -1;
1125	const char *ename;
1126
1127	if ((ename = ematch(ev, match)) != NULL) {
1128		int c;
1129		for (val = 0; (c = *ename) != '\0'; ename++) {
1130			if (!isdigit(c)) {
1131				val = -1;
1132				break;
1133			}
1134			val = val * 10 + (c - '0');
1135			if (val > limit) {
1136				val = limit;
1137				break;
1138			}
1139		}
1140	}
1141	return (val);
1142}
1143
1144static void
1145etest(const char *ev)
1146{
1147	int value;
1148
1149	if ((value = envvar(ev, "QUEUE_SPIN", 1000000)) >= 0)
1150		thread_queue_spin = value;
1151	if ((value = envvar(ev, "ADAPTIVE_SPIN", 1000000)) >= 0)
1152		thread_adaptive_spin = value;
1153	if ((value = envvar(ev, "MAX_SPINNERS", 255)) >= 0)
1154		thread_max_spinners = value;
1155	if ((value = envvar(ev, "QUEUE_FIFO", 8)) >= 0)
1156		thread_queue_fifo = value;
1157#if defined(THREAD_DEBUG)
1158	if ((value = envvar(ev, "QUEUE_VERIFY", 1)) >= 0)
1159		thread_queue_verify = value;
1160	if ((value = envvar(ev, "QUEUE_DUMP", 1)) >= 0)
1161		thread_queue_dump = value;
1162#endif
1163	if ((value = envvar(ev, "STACK_CACHE", 10000)) >= 0)
1164		thread_stack_cache = value;
1165	if ((value = envvar(ev, "COND_WAIT_DEFER", 1)) >= 0)
1166		thread_cond_wait_defer = value;
1167	if ((value = envvar(ev, "ERROR_DETECTION", 2)) >= 0)
1168		thread_error_detection = value;
1169	if ((value = envvar(ev, "ASYNC_SAFE", 1)) >= 0)
1170		thread_async_safe = value;
1171	if ((value = envvar(ev, "DOOR_NORESERVE", 1)) >= 0)
1172		thread_door_noreserve = value;
1173	if ((value = envvar(ev, "LOCKS_MISALIGNED", 1)) >= 0)
1174		thread_locks_misaligned = value;
1175}
1176
1177/*
1178 * Look for and evaluate environment variables of the form "_THREAD_*".
1179 * For compatibility with the past, we also look for environment
1180 * names of the form "LIBTHREAD_*".
1181 */
1182static void
1183set_thread_vars()
1184{
1185	extern const char **_environ;
1186	const char **pev;
1187	const char *ev;
1188	char c;
1189
1190	if ((pev = _environ) == NULL)
1191		return;
1192	while ((ev = *pev++) != NULL) {
1193		c = *ev;
1194		if (c == '_' && strncmp(ev, "_THREAD_", 8) == 0)
1195			etest(ev + 8);
1196		if (c == 'L' && strncmp(ev, "LIBTHREAD_", 10) == 0)
1197			etest(ev + 10);
1198	}
1199}
1200
1201/* PROBE_SUPPORT begin */
1202#pragma weak __tnf_probe_notify
1203extern void __tnf_probe_notify(void);
1204/* PROBE_SUPPORT end */
1205
1206/* same as atexit() but private to the library */
1207extern int _atexit(void (*)(void));
1208
1209/* same as _cleanup() but private to the library */
1210extern void __cleanup(void);
1211
1212extern void atfork_init(void);
1213
1214#ifdef __amd64
1215extern void __proc64id(void);
1216#endif
1217
1218/*
1219 * libc_init() is called by ld.so.1 for library initialization.
1220 * We perform minimal initialization; enough to work with the main thread.
1221 */
1222void
1223libc_init(void)
1224{
1225	uberdata_t *udp = &__uberdata;
1226	ulwp_t *oldself = __curthread();
1227	ucontext_t uc;
1228	ulwp_t *self;
1229	struct rlimit rl;
1230	caddr_t data;
1231	size_t tls_size;
1232	int setmask;
1233
1234	/*
1235	 * For the initial stage of initialization, we must be careful
1236	 * not to call any function that could possibly call _cerror().
1237	 * For this purpose, we call only the raw system call wrappers.
1238	 */
1239
1240#ifdef __amd64
1241	/*
1242	 * Gather information about cache layouts for optimized
1243	 * AMD and Intel assembler strfoo() and memfoo() functions.
1244	 */
1245	__proc64id();
1246#endif
1247
1248	/*
1249	 * Every libc, regardless of which link map, must register __cleanup().
1250	 */
1251	(void) _atexit(__cleanup);
1252
1253	/*
1254	 * We keep our uberdata on one of (a) the first alternate link map
1255	 * or (b) the primary link map.  We switch to the primary link map
1256	 * and stay there once we see it.  All intermediate link maps are
1257	 * subject to being unloaded at any time.
1258	 */
1259	if (oldself != NULL && (oldself->ul_primarymap || !primary_link_map)) {
1260		__tdb_bootstrap = oldself->ul_uberdata->tdb_bootstrap;
1261		mutex_setup();
1262		atfork_init();	/* every link map needs atfork() processing */
1263		return;
1264	}
1265
1266	/*
1267	 * To establish the main stack information, we have to get our context.
1268	 * This is also convenient to use for getting our signal mask.
1269	 */
1270	uc.uc_flags = UC_ALL;
1271	(void) __getcontext(&uc);
1272	ASSERT(uc.uc_link == NULL);
1273
1274	tls_size = roundup64(udp->tls_metadata.static_tls.tls_size);
1275	ASSERT(primary_link_map || tls_size == 0);
1276	data = lmalloc(sizeof (ulwp_t) + tls_size);
1277	if (data == NULL)
1278		thr_panic("cannot allocate thread structure for main thread");
1279	/* LINTED pointer cast may result in improper alignment */
1280	self = (ulwp_t *)(data + tls_size);
1281	init_hash_table[0].hash_bucket = self;
1282
1283	self->ul_sigmask = uc.uc_sigmask;
1284	delete_reserved_signals(&self->ul_sigmask);
1285	/*
1286	 * Are the old and new sets different?
1287	 * (This can happen if we are currently blocking SIGCANCEL.)
1288	 * If so, we must explicitly set our signal mask, below.
1289	 */
1290	setmask =
1291	    ((self->ul_sigmask.__sigbits[0] ^ uc.uc_sigmask.__sigbits[0]) |
1292	    (self->ul_sigmask.__sigbits[1] ^ uc.uc_sigmask.__sigbits[1]) |
1293	    (self->ul_sigmask.__sigbits[2] ^ uc.uc_sigmask.__sigbits[2]) |
1294	    (self->ul_sigmask.__sigbits[3] ^ uc.uc_sigmask.__sigbits[3]));
1295
1296#ifdef __sparc
1297	/*
1298	 * We cache several instructions in the thread structure for use
1299	 * by the fasttrap DTrace provider. When changing this, read the
1300	 * comment in fasttrap.h for the all the other places that must
1301	 * be changed.
1302	 */
1303	self->ul_dsave = 0x9de04000;	/* save %g1, %g0, %sp */
1304	self->ul_drestore = 0x81e80000;	/* restore %g0, %g0, %g0 */
1305	self->ul_dftret = 0x91d0203a;	/* ta 0x3a */
1306	self->ul_dreturn = 0x81ca0000;	/* return %o0 */
1307#endif
1308
1309	self->ul_stktop = (uintptr_t)uc.uc_stack.ss_sp + uc.uc_stack.ss_size;
1310	(void) getrlimit(RLIMIT_STACK, &rl);
1311	self->ul_stksiz = rl.rlim_cur;
1312	self->ul_stk = (caddr_t)(self->ul_stktop - self->ul_stksiz);
1313
1314	self->ul_forw = self->ul_back = self;
1315	self->ul_hash = NULL;
1316	self->ul_ix = 0;
1317	self->ul_lwpid = 1; /* _lwp_self() */
1318	self->ul_main = 1;
1319	self->ul_self = self;
1320	self->ul_policy = -1;		/* initialize only when needed */
1321	self->ul_pri = 0;
1322	self->ul_cid = 0;
1323	self->ul_rtclassid = -1;
1324	self->ul_uberdata = udp;
1325	if (oldself != NULL) {
1326		int i;
1327
1328		ASSERT(primary_link_map);
1329		ASSERT(oldself->ul_main == 1);
1330		self->ul_stsd = oldself->ul_stsd;
1331		for (i = 0; i < TSD_NFAST; i++)
1332			self->ul_ftsd[i] = oldself->ul_ftsd[i];
1333		self->ul_tls = oldself->ul_tls;
1334		/*
1335		 * Retrieve all pointers to uberdata allocated
1336		 * while running on previous link maps.
1337		 * We would like to do a structure assignment here, but
1338		 * gcc turns structure assignments into calls to memcpy(),
1339		 * a function exported from libc.  We can't call any such
1340		 * external functions until we establish curthread, below,
1341		 * so we just call our private version of memcpy().
1342		 */
1343		(void) memcpy(udp, oldself->ul_uberdata, sizeof (*udp));
1344		/*
1345		 * These items point to global data on the primary link map.
1346		 */
1347		udp->thr_hash_table = init_hash_table;
1348		udp->sigacthandler = sigacthandler;
1349		udp->tdb.tdb_events = tdb_events;
1350		ASSERT(udp->nthreads == 1 && !udp->uberflags.uf_mt);
1351		ASSERT(udp->lwp_stacks == NULL);
1352		ASSERT(udp->ulwp_freelist == NULL);
1353		ASSERT(udp->ulwp_replace_free == NULL);
1354		ASSERT(udp->hash_size == 1);
1355	}
1356	udp->all_lwps = self;
1357	udp->ulwp_one = self;
1358	udp->pid = getpid();
1359	udp->nthreads = 1;
1360	/*
1361	 * In every link map, tdb_bootstrap points to the same piece of
1362	 * allocated memory.  When the primary link map is initialized,
1363	 * the allocated memory is assigned a pointer to the one true
1364	 * uberdata.  This allows libc_db to initialize itself regardless
1365	 * of which instance of libc it finds in the address space.
1366	 */
1367	if (udp->tdb_bootstrap == NULL)
1368		udp->tdb_bootstrap = lmalloc(sizeof (uberdata_t *));
1369	__tdb_bootstrap = udp->tdb_bootstrap;
1370	if (primary_link_map) {
1371		self->ul_primarymap = 1;
1372		udp->primary_map = 1;
1373		*udp->tdb_bootstrap = udp;
1374	}
1375	/*
1376	 * Cancellation can't happen until:
1377	 *	pthread_cancel() is called
1378	 * or:
1379	 *	another thread is created
1380	 * For now, as a single-threaded process, set the flag that tells
1381	 * PROLOGUE/EPILOGUE (in scalls.c) that cancellation can't happen.
1382	 */
1383	self->ul_nocancel = 1;
1384
1385#if defined(__amd64)
1386	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_FSBASE, self);
1387#elif defined(__i386)
1388	(void) ___lwp_private(_LWP_SETPRIVATE, _LWP_GSBASE, self);
1389#endif	/* __i386 || __amd64 */
1390	set_curthread(self);		/* redundant on i386 */
1391	/*
1392	 * Now curthread is established and it is safe to call any
1393	 * function in libc except one that uses thread-local storage.
1394	 */
1395	self->ul_errnop = &errno;
1396	if (oldself != NULL) {
1397		/* tls_size was zero when oldself was allocated */
1398		lfree(oldself, sizeof (ulwp_t));
1399	}
1400	mutex_setup();
1401	atfork_init();
1402	signal_init();
1403
1404	/*
1405	 * If the stack is unlimited, we set the size to zero to disable
1406	 * stack checking.
1407	 * XXX: Work harder here.  Get the stack size from /proc/self/rmap
1408	 */
1409	if (self->ul_stksiz == RLIM_INFINITY) {
1410		self->ul_ustack.ss_sp = (void *)self->ul_stktop;
1411		self->ul_ustack.ss_size = 0;
1412	} else {
1413		self->ul_ustack.ss_sp = self->ul_stk;
1414		self->ul_ustack.ss_size = self->ul_stksiz;
1415	}
1416	self->ul_ustack.ss_flags = 0;
1417	(void) setustack(&self->ul_ustack);
1418
1419	/*
1420	 * Get the variables that affect thread behavior from the environment.
1421	 */
1422	set_thread_vars();
1423	udp->uberflags.uf_thread_error_detection = (char)thread_error_detection;
1424	udp->thread_stack_cache = thread_stack_cache;
1425
1426	/*
1427	 * Make per-thread copies of global variables, for speed.
1428	 */
1429	self->ul_queue_fifo = (char)thread_queue_fifo;
1430	self->ul_cond_wait_defer = (char)thread_cond_wait_defer;
1431	self->ul_error_detection = (char)thread_error_detection;
1432	self->ul_async_safe = (char)thread_async_safe;
1433	self->ul_door_noreserve = (char)thread_door_noreserve;
1434	self->ul_misaligned = (char)thread_locks_misaligned;
1435	self->ul_max_spinners = (uint8_t)thread_max_spinners;
1436	self->ul_adaptive_spin = thread_adaptive_spin;
1437	self->ul_queue_spin = thread_queue_spin;
1438
1439#if defined(__sparc) && !defined(_LP64)
1440	if (self->ul_misaligned) {
1441		/*
1442		 * Tell the kernel to fix up ldx/stx instructions that
1443		 * refer to non-8-byte aligned data instead of giving
1444		 * the process an alignment trap and generating SIGBUS.
1445		 *
1446		 * Programs compiled for 32-bit sparc with the Studio SS12
1447		 * compiler get this done for them automatically (in _init()).
1448		 * We do it here for the benefit of programs compiled with
1449		 * other compilers, like gcc.
1450		 *
1451		 * This is necessary for the _THREAD_LOCKS_MISALIGNED=1
1452		 * environment variable horrible hack to work.
1453		 */
1454		extern void _do_fix_align(void);
1455		_do_fix_align();
1456	}
1457#endif
1458
1459	/*
1460	 * When we have initialized the primary link map, inform
1461	 * the dynamic linker about our interface functions.
1462	 */
1463	if (self->ul_primarymap)
1464		_ld_libc((void *)rtld_funcs);
1465
1466	/*
1467	 * Defer signals until TLS constructors have been called.
1468	 */
1469	sigoff(self);
1470	tls_setup();
1471	sigon(self);
1472	if (setmask)
1473		(void) restore_signals(self);
1474
1475	/*
1476	 * Make private copies of __xpg4 and __xpg6 so libc can test
1477	 * them after this point without invoking the dynamic linker.
1478	 */
1479	libc__xpg4 = __xpg4;
1480	libc__xpg6 = __xpg6;
1481
1482	/* PROBE_SUPPORT begin */
1483	if (self->ul_primarymap && __tnf_probe_notify != NULL)
1484		__tnf_probe_notify();
1485	/* PROBE_SUPPORT end */
1486
1487	init_sigev_thread();
1488	init_aio();
1489
1490	/*
1491	 * We need to reset __threaded dynamically at runtime, so that
1492	 * __threaded can be bound to __threaded outside libc which may not
1493	 * have initial value of 1 (without a copy relocation in a.out).
1494	 */
1495	__threaded = 1;
1496}
1497
1498#pragma fini(libc_fini)
1499void
1500libc_fini()
1501{
1502	/*
1503	 * If we are doing fini processing for the instance of libc
1504	 * on the first alternate link map (this happens only when
1505	 * the dynamic linker rejects a bad audit library), then clear
1506	 * __curthread().  We abandon whatever memory was allocated by
1507	 * lmalloc() while running on this alternate link-map but we
1508	 * don't care (and can't find the memory in any case); we just
1509	 * want to protect the application from this bad audit library.
1510	 * No fini processing is done by libc in the normal case.
1511	 */
1512
1513	uberdata_t *udp = curthread->ul_uberdata;
1514
1515	if (udp->primary_map == 0 && udp == &__uberdata)
1516		set_curthread(NULL);
1517}
1518
1519/*
1520 * finish_init is called when we are about to become multi-threaded,
1521 * that is, on the first call to thr_create().
1522 */
1523void
1524finish_init()
1525{
1526	ulwp_t *self = curthread;
1527	uberdata_t *udp = self->ul_uberdata;
1528	thr_hash_table_t *htp;
1529	void *data;
1530	int i;
1531
1532	/*
1533	 * No locks needed here; we are single-threaded on the first call.
1534	 * We can be called only after the primary link map has been set up.
1535	 */
1536	ASSERT(self->ul_primarymap);
1537	ASSERT(self == udp->ulwp_one);
1538	ASSERT(!udp->uberflags.uf_mt);
1539	ASSERT(udp->hash_size == 1);
1540
1541	/*
1542	 * Initialize self->ul_policy, self->ul_cid, and self->ul_pri.
1543	 */
1544	update_sched(self);
1545
1546	/*
1547	 * Allocate the queue_head array if not already allocated.
1548	 */
1549	if (udp->queue_head == NULL)
1550		queue_alloc();
1551
1552	/*
1553	 * Now allocate the thread hash table.
1554	 */
1555	if ((data = mmap(NULL, HASHTBLSZ * sizeof (thr_hash_table_t),
1556	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0))
1557	    == MAP_FAILED)
1558		thr_panic("cannot allocate thread hash table");
1559
1560	udp->thr_hash_table = htp = (thr_hash_table_t *)data;
1561	udp->hash_size = HASHTBLSZ;
1562	udp->hash_mask = HASHTBLSZ - 1;
1563
1564	for (i = 0; i < HASHTBLSZ; i++, htp++) {
1565		htp->hash_lock.mutex_flag = LOCK_INITED;
1566		htp->hash_lock.mutex_magic = MUTEX_MAGIC;
1567		htp->hash_cond.cond_magic = COND_MAGIC;
1568	}
1569	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1570
1571	/*
1572	 * Set up the SIGCANCEL handler for threads cancellation.
1573	 */
1574	setup_cancelsig(SIGCANCEL);
1575
1576	/*
1577	 * Arrange to do special things on exit --
1578	 * - collect queue statistics from all remaining active threads.
1579	 * - dump queue statistics to stderr if _THREAD_QUEUE_DUMP is set.
1580	 * - grab assert_lock to ensure that assertion failures
1581	 *   and a core dump take precedence over _exit().
1582	 * (Functions are called in the reverse order of their registration.)
1583	 */
1584	(void) _atexit(grab_assert_lock);
1585#if defined(THREAD_DEBUG)
1586	(void) _atexit(dump_queue_statistics);
1587	(void) _atexit(collect_queue_statistics);
1588#endif
1589}
1590
1591/*
1592 * Used only by postfork1_child(), below.
1593 */
1594static void
1595mark_dead_and_buried(ulwp_t *ulwp)
1596{
1597	ulwp->ul_dead = 1;
1598	ulwp->ul_lwpid = (lwpid_t)(-1);
1599	ulwp->ul_hash = NULL;
1600	ulwp->ul_ix = -1;
1601	ulwp->ul_schedctl = NULL;
1602	ulwp->ul_schedctl_called = NULL;
1603}
1604
1605/*
1606 * This is called from fork1() in the child.
1607 * Reset our data structures to reflect one lwp.
1608 */
1609void
1610postfork1_child()
1611{
1612	ulwp_t *self = curthread;
1613	uberdata_t *udp = self->ul_uberdata;
1614	queue_head_t *qp;
1615	ulwp_t *next;
1616	ulwp_t *ulwp;
1617	int i;
1618
1619	/* daemon threads shouldn't call fork1(), but oh well... */
1620	self->ul_usropts &= ~THR_DAEMON;
1621	udp->nthreads = 1;
1622	udp->ndaemons = 0;
1623	udp->uberflags.uf_mt = 0;
1624	__libc_threaded = 0;
1625	for (i = 0; i < udp->hash_size; i++)
1626		udp->thr_hash_table[i].hash_bucket = NULL;
1627	self->ul_lwpid = _lwp_self();
1628	hash_in_unlocked(self, TIDHASH(self->ul_lwpid, udp), udp);
1629
1630	/*
1631	 * Some thread in the parent might have been suspended
1632	 * while holding udp->callout_lock or udp->ld_lock.
1633	 * Reinitialize the child's copies.
1634	 */
1635	(void) mutex_init(&udp->callout_lock,
1636	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1637	(void) mutex_init(&udp->ld_lock,
1638	    USYNC_THREAD | LOCK_RECURSIVE, NULL);
1639
1640	/* no one in the child is on a sleep queue; reinitialize */
1641	if ((qp = udp->queue_head) != NULL) {
1642		(void) memset(qp, 0, 2 * QHASHSIZE * sizeof (queue_head_t));
1643		for (i = 0; i < 2 * QHASHSIZE; qp++, i++) {
1644			qp->qh_type = (i < QHASHSIZE)? MX : CV;
1645			qp->qh_lock.mutex_flag = LOCK_INITED;
1646			qp->qh_lock.mutex_magic = MUTEX_MAGIC;
1647			qp->qh_hlist = &qp->qh_def_root;
1648#if defined(THREAD_DEBUG)
1649			qp->qh_hlen = 1;
1650			qp->qh_hmax = 1;
1651#endif
1652		}
1653	}
1654
1655	/*
1656	 * All lwps except ourself are gone.  Mark them so.
1657	 * First mark all of the lwps that have already been freed.
1658	 * Then mark and free all of the active lwps except ourself.
1659	 * Since we are single-threaded, no locks are required here.
1660	 */
1661	for (ulwp = udp->lwp_stacks; ulwp != NULL; ulwp = ulwp->ul_next)
1662		mark_dead_and_buried(ulwp);
1663	for (ulwp = udp->ulwp_freelist; ulwp != NULL; ulwp = ulwp->ul_next)
1664		mark_dead_and_buried(ulwp);
1665	for (ulwp = self->ul_forw; ulwp != self; ulwp = next) {
1666		next = ulwp->ul_forw;
1667		ulwp->ul_forw = ulwp->ul_back = NULL;
1668		mark_dead_and_buried(ulwp);
1669		tsd_free(ulwp);
1670		tls_free(ulwp);
1671		rwl_free(ulwp);
1672		heldlock_free(ulwp);
1673		ulwp_free(ulwp);
1674	}
1675	self->ul_forw = self->ul_back = udp->all_lwps = self;
1676	if (self != udp->ulwp_one)
1677		mark_dead_and_buried(udp->ulwp_one);
1678	if ((ulwp = udp->all_zombies) != NULL) {
1679		ASSERT(udp->nzombies != 0);
1680		do {
1681			next = ulwp->ul_forw;
1682			ulwp->ul_forw = ulwp->ul_back = NULL;
1683			mark_dead_and_buried(ulwp);
1684			udp->nzombies--;
1685			if (ulwp->ul_replace) {
1686				ulwp->ul_next = NULL;
1687				if (udp->ulwp_replace_free == NULL) {
1688					udp->ulwp_replace_free =
1689					    udp->ulwp_replace_last = ulwp;
1690				} else {
1691					udp->ulwp_replace_last->ul_next = ulwp;
1692					udp->ulwp_replace_last = ulwp;
1693				}
1694			}
1695		} while ((ulwp = next) != udp->all_zombies);
1696		ASSERT(udp->nzombies == 0);
1697		udp->all_zombies = NULL;
1698		udp->nzombies = 0;
1699	}
1700	trim_stack_cache(0);
1701
1702	/*
1703	 * Do post-fork1 processing for subsystems that need it.
1704	 */
1705	postfork1_child_tpool();
1706	postfork1_child_sigev_aio();
1707	postfork1_child_sigev_mq();
1708	postfork1_child_sigev_timer();
1709	postfork1_child_aio();
1710}
1711
1712lwpid_t
1713lwp_self(void)
1714{
1715	return (curthread->ul_lwpid);
1716}
1717
1718#pragma weak _ti_thr_self = thr_self
1719#pragma weak pthread_self = thr_self
1720thread_t
1721thr_self()
1722{
1723	return (curthread->ul_lwpid);
1724}
1725
1726int
1727thr_main()
1728{
1729	ulwp_t *self = __curthread();
1730
1731	return ((self == NULL)? -1 : self->ul_main);
1732}
1733
1734int
1735_thrp_cancelled(void)
1736{
1737	return (curthread->ul_rval == PTHREAD_CANCELED);
1738}
1739
1740int
1741_thrp_stksegment(ulwp_t *ulwp, stack_t *stk)
1742{
1743	stk->ss_sp = (void *)ulwp->ul_stktop;
1744	stk->ss_size = ulwp->ul_stksiz;
1745	stk->ss_flags = 0;
1746	return (0);
1747}
1748
1749#pragma weak _thr_stksegment = thr_stksegment
1750int
1751thr_stksegment(stack_t *stk)
1752{
1753	return (_thrp_stksegment(curthread, stk));
1754}
1755
1756void
1757force_continue(ulwp_t *ulwp)
1758{
1759#if defined(THREAD_DEBUG)
1760	ulwp_t *self = curthread;
1761	uberdata_t *udp = self->ul_uberdata;
1762#endif
1763	int error;
1764	timespec_t ts;
1765
1766	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1767	ASSERT(MUTEX_OWNED(ulwp_mutex(ulwp, udp), self));
1768
1769	for (;;) {
1770		error = _lwp_continue(ulwp->ul_lwpid);
1771		if (error != 0 && error != EINTR)
1772			break;
1773		error = 0;
1774		if (ulwp->ul_stopping) {	/* he is stopping himself */
1775			ts.tv_sec = 0;		/* give him a chance to run */
1776			ts.tv_nsec = 100000;	/* 100 usecs or clock tick */
1777			(void) __nanosleep(&ts, NULL);
1778		}
1779		if (!ulwp->ul_stopping)		/* he is running now */
1780			break;			/* so we are done */
1781		/*
1782		 * He is marked as being in the process of stopping
1783		 * himself.  Loop around and continue him again.
1784		 * He may not have been stopped the first time.
1785		 */
1786	}
1787}
1788
1789/*
1790 * Suspend an lwp with lwp_suspend(), then move it to a safe point,
1791 * that is, to a point where ul_critical and ul_rtld are both zero.
1792 * On return, the ulwp_lock() is dropped as with ulwp_unlock().
1793 * If 'link_dropped' is non-NULL, then 'link_lock' is held on entry.
1794 * If we have to drop link_lock, we store 1 through link_dropped.
1795 * If the lwp exits before it can be suspended, we return ESRCH.
1796 */
1797int
1798safe_suspend(ulwp_t *ulwp, uchar_t whystopped, int *link_dropped)
1799{
1800	ulwp_t *self = curthread;
1801	uberdata_t *udp = self->ul_uberdata;
1802	cond_t *cvp = ulwp_condvar(ulwp, udp);
1803	mutex_t *mp = ulwp_mutex(ulwp, udp);
1804	thread_t tid = ulwp->ul_lwpid;
1805	int ix = ulwp->ul_ix;
1806	int error = 0;
1807
1808	ASSERT(whystopped == TSTP_REGULAR ||
1809	    whystopped == TSTP_MUTATOR ||
1810	    whystopped == TSTP_FORK);
1811	ASSERT(ulwp != self);
1812	ASSERT(!ulwp->ul_stop);
1813	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
1814	ASSERT(MUTEX_OWNED(mp, self));
1815
1816	if (link_dropped != NULL)
1817		*link_dropped = 0;
1818
1819	/*
1820	 * We must grab the target's spin lock before suspending it.
1821	 * See the comments below and in _thrp_suspend() for why.
1822	 */
1823	spin_lock_set(&ulwp->ul_spinlock);
1824	(void) ___lwp_suspend(tid);
1825	spin_lock_clear(&ulwp->ul_spinlock);
1826
1827top:
1828	if ((ulwp->ul_critical == 0 && ulwp->ul_rtld == 0) ||
1829	    ulwp->ul_stopping) {
1830		/* thread is already safe */
1831		ulwp->ul_stop |= whystopped;
1832	} else {
1833		/*
1834		 * Setting ul_pleasestop causes the target thread to stop
1835		 * itself in _thrp_suspend(), below, after we drop its lock.
1836		 * We must continue the critical thread before dropping
1837		 * link_lock because the critical thread may be holding
1838		 * the queue lock for link_lock.  This is delicate.
1839		 */
1840		ulwp->ul_pleasestop |= whystopped;
1841		force_continue(ulwp);
1842		if (link_dropped != NULL) {
1843			*link_dropped = 1;
1844			lmutex_unlock(&udp->link_lock);
1845			/* be sure to drop link_lock only once */
1846			link_dropped = NULL;
1847		}
1848
1849		/*
1850		 * The thread may disappear by calling thr_exit() so we
1851		 * cannot rely on the ulwp pointer after dropping the lock.
1852		 * Instead, we search the hash table to find it again.
1853		 * When we return, we may find that the thread has been
1854		 * continued by some other thread.  The suspend/continue
1855		 * interfaces are prone to such race conditions by design.
1856		 */
1857		while (ulwp && !ulwp->ul_dead && !ulwp->ul_stop &&
1858		    (ulwp->ul_pleasestop & whystopped)) {
1859			(void) __cond_wait(cvp, mp);
1860			for (ulwp = udp->thr_hash_table[ix].hash_bucket;
1861			    ulwp != NULL; ulwp = ulwp->ul_hash) {
1862				if (ulwp->ul_lwpid == tid)
1863					break;
1864			}
1865		}
1866
1867		if (ulwp == NULL || ulwp->ul_dead)
1868			error = ESRCH;
1869		else {
1870			/*
1871			 * Do another lwp_suspend() to make sure we don't
1872			 * return until the target thread is fully stopped
1873			 * in the kernel.  Don't apply lwp_suspend() until
1874			 * we know that the target is not holding any
1875			 * queue locks, that is, that it has completed
1876			 * ulwp_unlock(self) and has, or at least is
1877			 * about to, call lwp_suspend() on itself.  We do
1878			 * this by grabbing the target's spin lock.
1879			 */
1880			ASSERT(ulwp->ul_lwpid == tid);
1881			spin_lock_set(&ulwp->ul_spinlock);
1882			(void) ___lwp_suspend(tid);
1883			spin_lock_clear(&ulwp->ul_spinlock);
1884			/*
1885			 * If some other thread did a thr_continue()
1886			 * on the target thread we have to start over.
1887			 */
1888			if (!ulwp->ul_stopping || !(ulwp->ul_stop & whystopped))
1889				goto top;
1890		}
1891	}
1892
1893	(void) cond_broadcast(cvp);
1894	lmutex_unlock(mp);
1895	return (error);
1896}
1897
1898int
1899_thrp_suspend(thread_t tid, uchar_t whystopped)
1900{
1901	ulwp_t *self = curthread;
1902	uberdata_t *udp = self->ul_uberdata;
1903	ulwp_t *ulwp;
1904	int error = 0;
1905
1906	ASSERT((whystopped & (TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) != 0);
1907	ASSERT((whystopped & ~(TSTP_REGULAR|TSTP_MUTATOR|TSTP_FORK)) == 0);
1908
1909	/*
1910	 * We can't suspend anyone except ourself while
1911	 * some other thread is performing a fork.
1912	 * This also allows only one suspension at a time.
1913	 */
1914	if (tid != self->ul_lwpid)
1915		fork_lock_enter();
1916
1917	if ((ulwp = find_lwp(tid)) == NULL)
1918		error = ESRCH;
1919	else if (whystopped == TSTP_MUTATOR && !ulwp->ul_mutator) {
1920		ulwp_unlock(ulwp, udp);
1921		error = EINVAL;
1922	} else if (ulwp->ul_stop) {	/* already stopped */
1923		ulwp->ul_stop |= whystopped;
1924		ulwp_broadcast(ulwp);
1925		ulwp_unlock(ulwp, udp);
1926	} else if (ulwp != self) {
1927		/*
1928		 * After suspending the other thread, move it out of a
1929		 * critical section and deal with the schedctl mappings.
1930		 * safe_suspend() suspends the other thread, calls
1931		 * ulwp_broadcast(ulwp) and drops the ulwp lock.
1932		 */
1933		error = safe_suspend(ulwp, whystopped, NULL);
1934	} else {
1935		int schedctl_after_fork = 0;
1936
1937		/*
1938		 * We are suspending ourself.  We must not take a signal
1939		 * until we return from lwp_suspend() and clear ul_stopping.
1940		 * This is to guard against siglongjmp().
1941		 */
1942		enter_critical(self);
1943		self->ul_sp = stkptr();
1944		_flush_windows();	/* sparc */
1945		self->ul_pleasestop = 0;
1946		self->ul_stop |= whystopped;
1947		/*
1948		 * Grab our spin lock before dropping ulwp_mutex(self).
1949		 * This prevents the suspending thread from applying
1950		 * lwp_suspend() to us before we emerge from
1951		 * lmutex_unlock(mp) and have dropped mp's queue lock.
1952		 */
1953		spin_lock_set(&self->ul_spinlock);
1954		self->ul_stopping = 1;
1955		ulwp_broadcast(self);
1956		ulwp_unlock(self, udp);
1957		/*
1958		 * From this point until we return from lwp_suspend(),
1959		 * we must not call any function that might invoke the
1960		 * dynamic linker, that is, we can only call functions
1961		 * private to the library.
1962		 *
1963		 * Also, this is a nasty race condition for a process
1964		 * that is undergoing a forkall() operation:
1965		 * Once we clear our spinlock (below), we are vulnerable
1966		 * to being suspended by the forkall() thread before
1967		 * we manage to suspend ourself in ___lwp_suspend().
1968		 * See safe_suspend() and force_continue().
1969		 *
1970		 * To avoid a SIGSEGV due to the disappearance
1971		 * of the schedctl mappings in the child process,
1972		 * which can happen in spin_lock_clear() if we
1973		 * are suspended while we are in the middle of
1974		 * its call to preempt(), we preemptively clear
1975		 * our own schedctl pointer before dropping our
1976		 * spinlock.  We reinstate it, in both the parent
1977		 * and (if this really is a forkall()) the child.
1978		 */
1979		if (whystopped & TSTP_FORK) {
1980			schedctl_after_fork = 1;
1981			self->ul_schedctl = NULL;
1982			self->ul_schedctl_called = &udp->uberflags;
1983		}
1984		spin_lock_clear(&self->ul_spinlock);
1985		(void) ___lwp_suspend(tid);
1986		/*
1987		 * Somebody else continued us.
1988		 * We can't grab ulwp_lock(self)
1989		 * until after clearing ul_stopping.
1990		 * force_continue() relies on this.
1991		 */
1992		self->ul_stopping = 0;
1993		self->ul_sp = 0;
1994		if (schedctl_after_fork) {
1995			self->ul_schedctl_called = NULL;
1996			self->ul_schedctl = NULL;
1997			(void) setup_schedctl();
1998		}
1999		ulwp_lock(self, udp);
2000		ulwp_broadcast(self);
2001		ulwp_unlock(self, udp);
2002		exit_critical(self);
2003	}
2004
2005	if (tid != self->ul_lwpid)
2006		fork_lock_exit();
2007
2008	return (error);
2009}
2010
2011/*
2012 * Suspend all lwps other than ourself in preparation for fork.
2013 */
2014void
2015suspend_fork()
2016{
2017	ulwp_t *self = curthread;
2018	uberdata_t *udp = self->ul_uberdata;
2019	ulwp_t *ulwp;
2020	int link_dropped;
2021
2022	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2023top:
2024	lmutex_lock(&udp->link_lock);
2025
2026	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2027		ulwp_lock(ulwp, udp);
2028		if (ulwp->ul_stop) {	/* already stopped */
2029			ulwp->ul_stop |= TSTP_FORK;
2030			ulwp_broadcast(ulwp);
2031			ulwp_unlock(ulwp, udp);
2032		} else {
2033			/*
2034			 * Move the stopped lwp out of a critical section.
2035			 */
2036			if (safe_suspend(ulwp, TSTP_FORK, &link_dropped) ||
2037			    link_dropped)
2038				goto top;
2039		}
2040	}
2041
2042	lmutex_unlock(&udp->link_lock);
2043}
2044
2045void
2046continue_fork(int child)
2047{
2048	ulwp_t *self = curthread;
2049	uberdata_t *udp = self->ul_uberdata;
2050	ulwp_t *ulwp;
2051
2052	ASSERT(MUTEX_OWNED(&udp->fork_lock, self));
2053
2054	/*
2055	 * Clear the schedctl pointers in the child of forkall().
2056	 */
2057	if (child) {
2058		for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2059			ulwp->ul_schedctl_called =
2060			    ulwp->ul_dead? &udp->uberflags : NULL;
2061			ulwp->ul_schedctl = NULL;
2062		}
2063	}
2064
2065	/*
2066	 * Set all lwps that were stopped for fork() running again.
2067	 */
2068	lmutex_lock(&udp->link_lock);
2069	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2070		mutex_t *mp = ulwp_mutex(ulwp, udp);
2071		lmutex_lock(mp);
2072		ASSERT(ulwp->ul_stop & TSTP_FORK);
2073		ulwp->ul_stop &= ~TSTP_FORK;
2074		ulwp_broadcast(ulwp);
2075		if (!ulwp->ul_stop)
2076			force_continue(ulwp);
2077		lmutex_unlock(mp);
2078	}
2079	lmutex_unlock(&udp->link_lock);
2080}
2081
2082int
2083_thrp_continue(thread_t tid, uchar_t whystopped)
2084{
2085	uberdata_t *udp = curthread->ul_uberdata;
2086	ulwp_t *ulwp;
2087	mutex_t *mp;
2088	int error = 0;
2089
2090	ASSERT(whystopped == TSTP_REGULAR ||
2091	    whystopped == TSTP_MUTATOR);
2092
2093	/*
2094	 * We single-thread the entire thread suspend/continue mechanism.
2095	 */
2096	fork_lock_enter();
2097
2098	if ((ulwp = find_lwp(tid)) == NULL) {
2099		fork_lock_exit();
2100		return (ESRCH);
2101	}
2102
2103	mp = ulwp_mutex(ulwp, udp);
2104	if ((whystopped == TSTP_MUTATOR && !ulwp->ul_mutator)) {
2105		error = EINVAL;
2106	} else if (ulwp->ul_stop & whystopped) {
2107		ulwp->ul_stop &= ~whystopped;
2108		ulwp_broadcast(ulwp);
2109		if (!ulwp->ul_stop) {
2110			if (whystopped == TSTP_REGULAR && ulwp->ul_created) {
2111				ulwp->ul_sp = 0;
2112				ulwp->ul_created = 0;
2113			}
2114			force_continue(ulwp);
2115		}
2116	}
2117	lmutex_unlock(mp);
2118
2119	fork_lock_exit();
2120	return (error);
2121}
2122
2123int
2124thr_suspend(thread_t tid)
2125{
2126	return (_thrp_suspend(tid, TSTP_REGULAR));
2127}
2128
2129int
2130thr_continue(thread_t tid)
2131{
2132	return (_thrp_continue(tid, TSTP_REGULAR));
2133}
2134
2135void
2136thr_yield()
2137{
2138	yield();
2139}
2140
2141#pragma weak pthread_kill = thr_kill
2142#pragma weak _thr_kill = thr_kill
2143int
2144thr_kill(thread_t tid, int sig)
2145{
2146	if (sig == SIGCANCEL)
2147		return (EINVAL);
2148	return (_lwp_kill(tid, sig));
2149}
2150
2151/*
2152 * Exit a critical section, take deferred actions if necessary.
2153 * Called from exit_critical() and from sigon().
2154 */
2155void
2156do_exit_critical()
2157{
2158	ulwp_t *self = curthread;
2159	int sig;
2160
2161	ASSERT(self->ul_critical == 0);
2162
2163	/*
2164	 * Don't suspend ourself or take a deferred signal while dying
2165	 * or while executing inside the dynamic linker (ld.so.1).
2166	 */
2167	if (self->ul_dead || self->ul_rtld)
2168		return;
2169
2170	while (self->ul_pleasestop ||
2171	    (self->ul_cursig != 0 && self->ul_sigdefer == 0)) {
2172		/*
2173		 * Avoid a recursive call to exit_critical() in _thrp_suspend()
2174		 * by keeping self->ul_critical == 1 here.
2175		 */
2176		self->ul_critical++;
2177		while (self->ul_pleasestop) {
2178			/*
2179			 * Guard against suspending ourself while on a sleep
2180			 * queue.  See the comments in call_user_handler().
2181			 */
2182			unsleep_self();
2183			set_parking_flag(self, 0);
2184			(void) _thrp_suspend(self->ul_lwpid,
2185			    self->ul_pleasestop);
2186		}
2187		self->ul_critical--;
2188
2189		if ((sig = self->ul_cursig) != 0 && self->ul_sigdefer == 0) {
2190			/*
2191			 * Clear ul_cursig before proceeding.
2192			 * This protects us from the dynamic linker's
2193			 * calls to bind_guard()/bind_clear() in the
2194			 * event that it is invoked to resolve a symbol
2195			 * like take_deferred_signal() below.
2196			 */
2197			self->ul_cursig = 0;
2198			take_deferred_signal(sig);
2199			ASSERT(self->ul_cursig == 0);
2200		}
2201	}
2202	ASSERT(self->ul_critical == 0);
2203}
2204
2205/*
2206 * _ti_bind_guard() and _ti_bind_clear() are called by the dynamic linker
2207 * (ld.so.1) when it has do do something, like resolve a symbol to be called
2208 * by the application or one of its libraries.  _ti_bind_guard() is called
2209 * on entry to ld.so.1, _ti_bind_clear() on exit from ld.so.1 back to the
2210 * application.  The dynamic linker gets special dispensation from libc to
2211 * run in a critical region (all signals deferred and no thread suspension
2212 * or forking allowed), and to be immune from cancellation for the duration.
2213 */
2214int
2215_ti_bind_guard(int flags)
2216{
2217	ulwp_t *self = curthread;
2218	uberdata_t *udp = self->ul_uberdata;
2219	int bindflag = (flags & THR_FLG_RTLD);
2220
2221	if ((self->ul_bindflags & bindflag) == bindflag)
2222		return (0);
2223	self->ul_bindflags |= bindflag;
2224	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2225		sigoff(self);	/* see no signals while holding ld_lock */
2226		self->ul_rtld++;	/* don't suspend while in ld.so.1 */
2227		(void) mutex_lock(&udp->ld_lock);
2228	}
2229	enter_critical(self);
2230	self->ul_save_state = self->ul_cancel_disabled;
2231	self->ul_cancel_disabled = 1;
2232	set_cancel_pending_flag(self, 0);
2233	return (1);
2234}
2235
2236int
2237_ti_bind_clear(int flags)
2238{
2239	ulwp_t *self = curthread;
2240	uberdata_t *udp = self->ul_uberdata;
2241	int bindflag = (flags & THR_FLG_RTLD);
2242
2243	if ((self->ul_bindflags & bindflag) == 0)
2244		return (self->ul_bindflags);
2245	self->ul_bindflags &= ~bindflag;
2246	self->ul_cancel_disabled = self->ul_save_state;
2247	set_cancel_pending_flag(self, 0);
2248	exit_critical(self);
2249	if ((flags & (THR_FLG_NOLOCK | THR_FLG_REENTER)) == THR_FLG_NOLOCK) {
2250		if (MUTEX_OWNED(&udp->ld_lock, self)) {
2251			(void) mutex_unlock(&udp->ld_lock);
2252			self->ul_rtld--;
2253			sigon(self);	/* reenable signals */
2254		}
2255	}
2256	return (self->ul_bindflags);
2257}
2258
2259/*
2260 * Tell the dynamic linker (ld.so.1) whether or not it was entered from
2261 * a critical region in libc.  Return zero if not, else return non-zero.
2262 */
2263int
2264_ti_critical(void)
2265{
2266	ulwp_t *self = curthread;
2267	int level = self->ul_critical;
2268
2269	if ((self->ul_bindflags & THR_FLG_RTLD) == 0 || level == 0)
2270		return (level);	/* ld.so.1 hasn't (yet) called enter() */
2271	return (level - 1);
2272}
2273
2274/*
2275 * sigoff() and sigon() enable cond_wait() to behave (optionally) like
2276 * it does in the old libthread (see the comments in cond_wait_queue()).
2277 * Also, signals are deferred at thread startup until TLS constructors
2278 * have all been called, at which time _thrp_setup() calls sigon().
2279 *
2280 * _sigoff() and _sigon() are external consolidation-private interfaces to
2281 * sigoff() and sigon(), respectively, in libc.  These are used in libnsl.
2282 * Also, _sigoff() and _sigon() are called from dbx's run-time checking
2283 * (librtc.so) to defer signals during its critical sections (not to be
2284 * confused with libc critical sections [see exit_critical() above]).
2285 */
2286void
2287_sigoff(void)
2288{
2289	ulwp_t *self = curthread;
2290
2291	sigoff(self);
2292}
2293
2294void
2295_sigon(void)
2296{
2297	ulwp_t *self = curthread;
2298
2299	ASSERT(self->ul_sigdefer > 0);
2300	sigon(self);
2301}
2302
2303int
2304thr_getconcurrency()
2305{
2306	return (thr_concurrency);
2307}
2308
2309int
2310pthread_getconcurrency()
2311{
2312	return (pthread_concurrency);
2313}
2314
2315int
2316thr_setconcurrency(int new_level)
2317{
2318	uberdata_t *udp = curthread->ul_uberdata;
2319
2320	if (new_level < 0)
2321		return (EINVAL);
2322	if (new_level > 65536)		/* 65536 is totally arbitrary */
2323		return (EAGAIN);
2324	lmutex_lock(&udp->link_lock);
2325	if (new_level > thr_concurrency)
2326		thr_concurrency = new_level;
2327	lmutex_unlock(&udp->link_lock);
2328	return (0);
2329}
2330
2331int
2332pthread_setconcurrency(int new_level)
2333{
2334	if (new_level < 0)
2335		return (EINVAL);
2336	if (new_level > 65536)		/* 65536 is totally arbitrary */
2337		return (EAGAIN);
2338	pthread_concurrency = new_level;
2339	return (0);
2340}
2341
2342size_t
2343thr_min_stack(void)
2344{
2345	return (MINSTACK);
2346}
2347
2348int
2349__nthreads(void)
2350{
2351	return (curthread->ul_uberdata->nthreads);
2352}
2353
2354/*
2355 * XXX
2356 * The remainder of this file implements the private interfaces to java for
2357 * garbage collection.  It is no longer used, at least by java 1.2.
2358 * It can all go away once all old JVMs have disappeared.
2359 */
2360
2361int	suspendingallmutators;	/* when non-zero, suspending all mutators. */
2362int	suspendedallmutators;	/* when non-zero, all mutators suspended. */
2363int	mutatorsbarrier;	/* when non-zero, mutators barrier imposed. */
2364mutex_t	mutatorslock = DEFAULTMUTEX;	/* used to enforce mutators barrier. */
2365cond_t	mutatorscv = DEFAULTCV;		/* where non-mutators sleep. */
2366
2367/*
2368 * Get the available register state for the target thread.
2369 * Return non-volatile registers: TRS_NONVOLATILE
2370 */
2371#pragma weak _thr_getstate = thr_getstate
2372int
2373thr_getstate(thread_t tid, int *flag, lwpid_t *lwp, stack_t *ss, gregset_t rs)
2374{
2375	ulwp_t *self = curthread;
2376	uberdata_t *udp = self->ul_uberdata;
2377	ulwp_t **ulwpp;
2378	ulwp_t *ulwp;
2379	int error = 0;
2380	int trs_flag = TRS_LWPID;
2381
2382	if (tid == 0 || self->ul_lwpid == tid) {
2383		ulwp = self;
2384		ulwp_lock(ulwp, udp);
2385	} else if ((ulwpp = find_lwpp(tid)) != NULL) {
2386		ulwp = *ulwpp;
2387	} else {
2388		if (flag)
2389			*flag = TRS_INVALID;
2390		return (ESRCH);
2391	}
2392
2393	if (ulwp->ul_dead) {
2394		trs_flag = TRS_INVALID;
2395	} else if (!ulwp->ul_stop && !suspendedallmutators) {
2396		error = EINVAL;
2397		trs_flag = TRS_INVALID;
2398	} else if (ulwp->ul_stop) {
2399		trs_flag = TRS_NONVOLATILE;
2400		getgregs(ulwp, rs);
2401	}
2402
2403	if (flag)
2404		*flag = trs_flag;
2405	if (lwp)
2406		*lwp = tid;
2407	if (ss != NULL)
2408		(void) _thrp_stksegment(ulwp, ss);
2409
2410	ulwp_unlock(ulwp, udp);
2411	return (error);
2412}
2413
2414/*
2415 * Set the appropriate register state for the target thread.
2416 * This is not used by java.  It exists solely for the MSTC test suite.
2417 */
2418#pragma weak _thr_setstate = thr_setstate
2419int
2420thr_setstate(thread_t tid, int flag, gregset_t rs)
2421{
2422	uberdata_t *udp = curthread->ul_uberdata;
2423	ulwp_t *ulwp;
2424	int error = 0;
2425
2426	if ((ulwp = find_lwp(tid)) == NULL)
2427		return (ESRCH);
2428
2429	if (!ulwp->ul_stop && !suspendedallmutators)
2430		error = EINVAL;
2431	else if (rs != NULL) {
2432		switch (flag) {
2433		case TRS_NONVOLATILE:
2434			/* do /proc stuff here? */
2435			if (ulwp->ul_stop)
2436				setgregs(ulwp, rs);
2437			else
2438				error = EINVAL;
2439			break;
2440		case TRS_LWPID:		/* do /proc stuff here? */
2441		default:
2442			error = EINVAL;
2443			break;
2444		}
2445	}
2446
2447	ulwp_unlock(ulwp, udp);
2448	return (error);
2449}
2450
2451int
2452getlwpstatus(thread_t tid, struct lwpstatus *sp)
2453{
2454	extern ssize_t __pread(int, void *, size_t, off_t);
2455	char buf[100];
2456	int fd;
2457
2458	/* "/proc/self/lwp/%u/lwpstatus" w/o stdio */
2459	(void) strcpy(buf, "/proc/self/lwp/");
2460	ultos((uint64_t)tid, 10, buf + strlen(buf));
2461	(void) strcat(buf, "/lwpstatus");
2462	if ((fd = __open(buf, O_RDONLY, 0)) >= 0) {
2463		while (__pread(fd, sp, sizeof (*sp), 0) == sizeof (*sp)) {
2464			if (sp->pr_flags & PR_STOPPED) {
2465				(void) __close(fd);
2466				return (0);
2467			}
2468			yield();	/* give him a chance to stop */
2469		}
2470		(void) __close(fd);
2471	}
2472	return (-1);
2473}
2474
2475int
2476putlwpregs(thread_t tid, prgregset_t prp)
2477{
2478	extern ssize_t __writev(int, const struct iovec *, int);
2479	char buf[100];
2480	int fd;
2481	long dstop_sreg[2];
2482	long run_null[2];
2483	iovec_t iov[3];
2484
2485	/* "/proc/self/lwp/%u/lwpctl" w/o stdio */
2486	(void) strcpy(buf, "/proc/self/lwp/");
2487	ultos((uint64_t)tid, 10, buf + strlen(buf));
2488	(void) strcat(buf, "/lwpctl");
2489	if ((fd = __open(buf, O_WRONLY, 0)) >= 0) {
2490		dstop_sreg[0] = PCDSTOP;	/* direct it to stop */
2491		dstop_sreg[1] = PCSREG;		/* set the registers */
2492		iov[0].iov_base = (caddr_t)dstop_sreg;
2493		iov[0].iov_len = sizeof (dstop_sreg);
2494		iov[1].iov_base = (caddr_t)prp;	/* from the register set */
2495		iov[1].iov_len = sizeof (prgregset_t);
2496		run_null[0] = PCRUN;		/* make it runnable again */
2497		run_null[1] = 0;
2498		iov[2].iov_base = (caddr_t)run_null;
2499		iov[2].iov_len = sizeof (run_null);
2500		if (__writev(fd, iov, 3) >= 0) {
2501			(void) __close(fd);
2502			return (0);
2503		}
2504		(void) __close(fd);
2505	}
2506	return (-1);
2507}
2508
2509static ulong_t
2510gettsp_slow(thread_t tid)
2511{
2512	char buf[100];
2513	struct lwpstatus status;
2514
2515	if (getlwpstatus(tid, &status) != 0) {
2516		/* "__gettsp(%u): can't read lwpstatus" w/o stdio */
2517		(void) strcpy(buf, "__gettsp(");
2518		ultos((uint64_t)tid, 10, buf + strlen(buf));
2519		(void) strcat(buf, "): can't read lwpstatus");
2520		thr_panic(buf);
2521	}
2522	return (status.pr_reg[R_SP]);
2523}
2524
2525ulong_t
2526__gettsp(thread_t tid)
2527{
2528	uberdata_t *udp = curthread->ul_uberdata;
2529	ulwp_t *ulwp;
2530	ulong_t result;
2531
2532	if ((ulwp = find_lwp(tid)) == NULL)
2533		return (0);
2534
2535	if (ulwp->ul_stop && (result = ulwp->ul_sp) != 0) {
2536		ulwp_unlock(ulwp, udp);
2537		return (result);
2538	}
2539
2540	result = gettsp_slow(tid);
2541	ulwp_unlock(ulwp, udp);
2542	return (result);
2543}
2544
2545/*
2546 * This tells java stack walkers how to find the ucontext
2547 * structure passed to signal handlers.
2548 */
2549#pragma weak _thr_sighndlrinfo = thr_sighndlrinfo
2550void
2551thr_sighndlrinfo(void (**func)(), int *funcsize)
2552{
2553	*func = &__sighndlr;
2554	*funcsize = (char *)&__sighndlrend - (char *)&__sighndlr;
2555}
2556
2557/*
2558 * Mark a thread a mutator or reset a mutator to being a default,
2559 * non-mutator thread.
2560 */
2561#pragma weak _thr_setmutator = thr_setmutator
2562int
2563thr_setmutator(thread_t tid, int enabled)
2564{
2565	ulwp_t *self = curthread;
2566	uberdata_t *udp = self->ul_uberdata;
2567	ulwp_t *ulwp;
2568	int error;
2569	int cancel_state;
2570
2571	enabled = enabled? 1 : 0;
2572top:
2573	if (tid == 0) {
2574		ulwp = self;
2575		ulwp_lock(ulwp, udp);
2576	} else if ((ulwp = find_lwp(tid)) == NULL) {
2577		return (ESRCH);
2578	}
2579
2580	/*
2581	 * The target thread should be the caller itself or a suspended thread.
2582	 * This prevents the target from also changing its ul_mutator field.
2583	 */
2584	error = 0;
2585	if (ulwp != self && !ulwp->ul_stop && enabled)
2586		error = EINVAL;
2587	else if (ulwp->ul_mutator != enabled) {
2588		lmutex_lock(&mutatorslock);
2589		if (mutatorsbarrier) {
2590			ulwp_unlock(ulwp, udp);
2591			(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,
2592			    &cancel_state);
2593			while (mutatorsbarrier)
2594				(void) cond_wait(&mutatorscv, &mutatorslock);
2595			(void) pthread_setcancelstate(cancel_state, NULL);
2596			lmutex_unlock(&mutatorslock);
2597			goto top;
2598		}
2599		ulwp->ul_mutator = enabled;
2600		lmutex_unlock(&mutatorslock);
2601	}
2602
2603	ulwp_unlock(ulwp, udp);
2604	return (error);
2605}
2606
2607/*
2608 * Establish a barrier against new mutators.  Any non-mutator trying
2609 * to become a mutator is suspended until the barrier is removed.
2610 */
2611#pragma weak _thr_mutators_barrier = thr_mutators_barrier
2612void
2613thr_mutators_barrier(int enabled)
2614{
2615	int oldvalue;
2616	int cancel_state;
2617
2618	lmutex_lock(&mutatorslock);
2619
2620	/*
2621	 * Wait if trying to set the barrier while it is already set.
2622	 */
2623	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2624	while (mutatorsbarrier && enabled)
2625		(void) cond_wait(&mutatorscv, &mutatorslock);
2626	(void) pthread_setcancelstate(cancel_state, NULL);
2627
2628	oldvalue = mutatorsbarrier;
2629	mutatorsbarrier = enabled;
2630	/*
2631	 * Wakeup any blocked non-mutators when barrier is removed.
2632	 */
2633	if (oldvalue && !enabled)
2634		(void) cond_broadcast(&mutatorscv);
2635	lmutex_unlock(&mutatorslock);
2636}
2637
2638/*
2639 * Suspend the set of all mutators except for the caller.  The list
2640 * of actively running threads is searched and only the mutators
2641 * in this list are suspended.  Actively running non-mutators remain
2642 * running.  Any other thread is suspended.
2643 */
2644#pragma weak _thr_suspend_allmutators = thr_suspend_allmutators
2645int
2646thr_suspend_allmutators(void)
2647{
2648	ulwp_t *self = curthread;
2649	uberdata_t *udp = self->ul_uberdata;
2650	ulwp_t *ulwp;
2651	int link_dropped;
2652
2653	/*
2654	 * We single-thread the entire thread suspend/continue mechanism.
2655	 */
2656	fork_lock_enter();
2657
2658top:
2659	lmutex_lock(&udp->link_lock);
2660
2661	if (suspendingallmutators || suspendedallmutators) {
2662		lmutex_unlock(&udp->link_lock);
2663		fork_lock_exit();
2664		return (EINVAL);
2665	}
2666	suspendingallmutators = 1;
2667
2668	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2669		ulwp_lock(ulwp, udp);
2670		if (!ulwp->ul_mutator) {
2671			ulwp_unlock(ulwp, udp);
2672		} else if (ulwp->ul_stop) {	/* already stopped */
2673			ulwp->ul_stop |= TSTP_MUTATOR;
2674			ulwp_broadcast(ulwp);
2675			ulwp_unlock(ulwp, udp);
2676		} else {
2677			/*
2678			 * Move the stopped lwp out of a critical section.
2679			 */
2680			if (safe_suspend(ulwp, TSTP_MUTATOR, &link_dropped) ||
2681			    link_dropped) {
2682				suspendingallmutators = 0;
2683				goto top;
2684			}
2685		}
2686	}
2687
2688	suspendedallmutators = 1;
2689	suspendingallmutators = 0;
2690	lmutex_unlock(&udp->link_lock);
2691	fork_lock_exit();
2692	return (0);
2693}
2694
2695/*
2696 * Suspend the target mutator.  The caller is permitted to suspend
2697 * itself.  If a mutator barrier is enabled, the caller will suspend
2698 * itself as though it had been suspended by thr_suspend_allmutators().
2699 * When the barrier is removed, this thread will be resumed.  Any
2700 * suspended mutator, whether suspended by thr_suspend_mutator(), or by
2701 * thr_suspend_allmutators(), can be resumed by thr_continue_mutator().
2702 */
2703#pragma weak _thr_suspend_mutator = thr_suspend_mutator
2704int
2705thr_suspend_mutator(thread_t tid)
2706{
2707	if (tid == 0)
2708		tid = curthread->ul_lwpid;
2709	return (_thrp_suspend(tid, TSTP_MUTATOR));
2710}
2711
2712/*
2713 * Resume the set of all suspended mutators.
2714 */
2715#pragma weak _thr_continue_allmutators = thr_continue_allmutators
2716int
2717thr_continue_allmutators()
2718{
2719	ulwp_t *self = curthread;
2720	uberdata_t *udp = self->ul_uberdata;
2721	ulwp_t *ulwp;
2722
2723	/*
2724	 * We single-thread the entire thread suspend/continue mechanism.
2725	 */
2726	fork_lock_enter();
2727
2728	lmutex_lock(&udp->link_lock);
2729	if (!suspendedallmutators) {
2730		lmutex_unlock(&udp->link_lock);
2731		fork_lock_exit();
2732		return (EINVAL);
2733	}
2734	suspendedallmutators = 0;
2735
2736	for (ulwp = self->ul_forw; ulwp != self; ulwp = ulwp->ul_forw) {
2737		mutex_t *mp = ulwp_mutex(ulwp, udp);
2738		lmutex_lock(mp);
2739		if (ulwp->ul_stop & TSTP_MUTATOR) {
2740			ulwp->ul_stop &= ~TSTP_MUTATOR;
2741			ulwp_broadcast(ulwp);
2742			if (!ulwp->ul_stop)
2743				force_continue(ulwp);
2744		}
2745		lmutex_unlock(mp);
2746	}
2747
2748	lmutex_unlock(&udp->link_lock);
2749	fork_lock_exit();
2750	return (0);
2751}
2752
2753/*
2754 * Resume a suspended mutator.
2755 */
2756#pragma weak _thr_continue_mutator = thr_continue_mutator
2757int
2758thr_continue_mutator(thread_t tid)
2759{
2760	return (_thrp_continue(tid, TSTP_MUTATOR));
2761}
2762
2763#pragma weak _thr_wait_mutator = thr_wait_mutator
2764int
2765thr_wait_mutator(thread_t tid, int dontwait)
2766{
2767	uberdata_t *udp = curthread->ul_uberdata;
2768	ulwp_t *ulwp;
2769	int cancel_state;
2770	int error = 0;
2771
2772	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state);
2773top:
2774	if ((ulwp = find_lwp(tid)) == NULL) {
2775		(void) pthread_setcancelstate(cancel_state, NULL);
2776		return (ESRCH);
2777	}
2778
2779	if (!ulwp->ul_mutator)
2780		error = EINVAL;
2781	else if (dontwait) {
2782		if (!(ulwp->ul_stop & TSTP_MUTATOR))
2783			error = EWOULDBLOCK;
2784	} else if (!(ulwp->ul_stop & TSTP_MUTATOR)) {
2785		cond_t *cvp = ulwp_condvar(ulwp, udp);
2786		mutex_t *mp = ulwp_mutex(ulwp, udp);
2787
2788		(void) cond_wait(cvp, mp);
2789		(void) lmutex_unlock(mp);
2790		goto top;
2791	}
2792
2793	ulwp_unlock(ulwp, udp);
2794	(void) pthread_setcancelstate(cancel_state, NULL);
2795	return (error);
2796}
2797
2798/* PROBE_SUPPORT begin */
2799
2800void
2801thr_probe_setup(void *data)
2802{
2803	curthread->ul_tpdp = data;
2804}
2805
2806static void *
2807_thread_probe_getfunc()
2808{
2809	return (curthread->ul_tpdp);
2810}
2811
2812void * (*thr_probe_getfunc_addr)(void) = _thread_probe_getfunc;
2813
2814/* ARGSUSED */
2815void
2816_resume(ulwp_t *ulwp, caddr_t sp, int dontsave)
2817{
2818	/* never called */
2819}
2820
2821/* ARGSUSED */
2822void
2823_resume_ret(ulwp_t *oldlwp)
2824{
2825	/* never called */
2826}
2827
2828/* PROBE_SUPPORT end */
2829