thr_kern.c revision 153989
1/*
2 * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3 * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
4 * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by John Birrell.
18 * 4. Neither the name of the author nor the names of any co-contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/lib/libkse/thread/thr_kern.c 153989 2006-01-03 15:34:27Z brian $");
37
38#include <sys/types.h>
39#include <sys/kse.h>
40#include <sys/ptrace.h>
41#include <sys/signalvar.h>
42#include <sys/queue.h>
43#include <machine/atomic.h>
44#include <machine/sigframe.h>
45
46#include <assert.h>
47#include <errno.h>
48#include <signal.h>
49#include <stdlib.h>
50#include <string.h>
51#include <time.h>
52#include <ucontext.h>
53#include <unistd.h>
54
55#include "atomic_ops.h"
56#include "thr_private.h"
57#include "libc_private.h"
58#ifdef NOTYET
59#include "spinlock.h"
60#endif
61
62/* #define DEBUG_THREAD_KERN */
63#ifdef DEBUG_THREAD_KERN
64#define DBG_MSG		stdout_debug
65#else
66#define DBG_MSG(x...)
67#endif
68
69/*
70 * Define a high water mark for the maximum number of threads that
71 * will be cached.  Once this level is reached, any extra threads
72 * will be free()'d.
73 */
74#define	MAX_CACHED_THREADS	100
75/*
76 * Define high water marks for the maximum number of KSEs and KSE groups
77 * that will be cached. Because we support 1:1 threading, there could have
78 * same number of KSEs and KSE groups as threads. Once these levels are
79 * reached, any extra KSE and KSE groups will be free()'d.
80 */
81#define	MAX_CACHED_KSES		((_thread_scope_system <= 0) ? 50 : 100)
82#define	MAX_CACHED_KSEGS	((_thread_scope_system <= 0) ? 50 : 100)
83
84#define	KSE_SET_MBOX(kse, thrd) \
85	(kse)->k_kcb->kcb_kmbx.km_curthread = &(thrd)->tcb->tcb_tmbx
86
87#define	KSE_SET_EXITED(kse)	(kse)->k_flags |= KF_EXITED
88
89/*
90 * Macros for manipulating the run queues.  The priority queue
91 * routines use the thread's pqe link and also handle the setting
92 * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
93 */
94#define	KSE_RUNQ_INSERT_HEAD(kse, thrd)			\
95	_pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
96#define	KSE_RUNQ_INSERT_TAIL(kse, thrd)			\
97	_pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
98#define	KSE_RUNQ_REMOVE(kse, thrd)			\
99	_pq_remove(&(kse)->k_schedq->sq_runq, thrd)
100#define	KSE_RUNQ_FIRST(kse)				\
101	((_libkse_debug == 0) ?				\
102	 _pq_first(&(kse)->k_schedq->sq_runq) :		\
103	 _pq_first_debug(&(kse)->k_schedq->sq_runq))
104
105#define KSE_RUNQ_THREADS(kse)	((kse)->k_schedq->sq_runq.pq_threads)
106
107#define THR_NEED_CANCEL(thrd)						\
108	 (((thrd)->cancelflags & THR_CANCELLING) != 0 &&		\
109	  ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&	\
110	  (((thrd)->cancelflags & THR_AT_CANCEL_POINT) != 0 ||		\
111	   ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
112
113#define THR_NEED_ASYNC_CANCEL(thrd)					\
114	 (((thrd)->cancelflags & THR_CANCELLING) != 0 &&		\
115	  ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&	\
116	  (((thrd)->cancelflags & THR_AT_CANCEL_POINT) == 0 &&		\
117	   ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
118
119/*
120 * We've got to keep track of everything that is allocated, not only
121 * to have a speedy free list, but also so they can be deallocated
122 * after a fork().
123 */
124static TAILQ_HEAD(, kse)	active_kseq;
125static TAILQ_HEAD(, kse)	free_kseq;
126static TAILQ_HEAD(, kse_group)	free_kse_groupq;
127static TAILQ_HEAD(, kse_group)	active_kse_groupq;
128static TAILQ_HEAD(, kse_group)	gc_ksegq;
129static struct lock		kse_lock;	/* also used for kseg queue */
130static int			free_kse_count = 0;
131static int			free_kseg_count = 0;
132static TAILQ_HEAD(, pthread)	free_threadq;
133static struct lock		thread_lock;
134static int			free_thread_count = 0;
135static int			inited = 0;
136static int			active_kse_count = 0;
137static int			active_kseg_count = 0;
138static u_int64_t		next_uniqueid = 1;
139
140LIST_HEAD(thread_hash_head, pthread);
141#define THREAD_HASH_QUEUES	127
142static struct thread_hash_head	thr_hashtable[THREAD_HASH_QUEUES];
143#define	THREAD_HASH(thrd)	((unsigned long)thrd % THREAD_HASH_QUEUES)
144
145/* Lock for thread tcb constructor/destructor */
146static pthread_mutex_t		_tcb_mutex;
147
148#ifdef DEBUG_THREAD_KERN
149static void	dump_queues(struct kse *curkse);
150#endif
151static void	kse_check_completed(struct kse *kse);
152static void	kse_check_waitq(struct kse *kse);
153static void	kse_fini(struct kse *curkse);
154static void	kse_reinit(struct kse *kse, int sys_scope);
155static void	kse_sched_multi(struct kse_mailbox *kmbx);
156static void	kse_sched_single(struct kse_mailbox *kmbx);
157static void	kse_switchout_thread(struct kse *kse, struct pthread *thread);
158static void	kse_wait(struct kse *kse, struct pthread *td_wait, int sigseq);
159static void	kse_free_unlocked(struct kse *kse);
160static void	kse_destroy(struct kse *kse);
161static void	kseg_free_unlocked(struct kse_group *kseg);
162static void	kseg_init(struct kse_group *kseg);
163static void	kseg_reinit(struct kse_group *kseg);
164static void	kseg_destroy(struct kse_group *kseg);
165static void	kse_waitq_insert(struct pthread *thread);
166static void	kse_wakeup_multi(struct kse *curkse);
167static struct kse_mailbox *kse_wakeup_one(struct pthread *thread);
168static void	thr_cleanup(struct kse *kse, struct pthread *curthread);
169static void	thr_link(struct pthread *thread);
170static void	thr_resume_wrapper(int sig, siginfo_t *, ucontext_t *);
171static void	thr_resume_check(struct pthread *curthread, ucontext_t *ucp);
172static int	thr_timedout(struct pthread *thread, struct timespec *curtime);
173static void	thr_unlink(struct pthread *thread);
174static void	thr_destroy(struct pthread *curthread, struct pthread *thread);
175static void	thread_gc(struct pthread *thread);
176static void	kse_gc(struct pthread *thread);
177static void	kseg_gc(struct pthread *thread);
178
179static void __inline
180thr_accounting(struct pthread *thread)
181{
182	if ((thread->slice_usec != -1) &&
183	    (thread->slice_usec <= TIMESLICE_USEC) &&
184	    (thread->attr.sched_policy != SCHED_FIFO)) {
185		thread->slice_usec += (thread->tcb->tcb_tmbx.tm_uticks
186		    + thread->tcb->tcb_tmbx.tm_sticks) * _clock_res_usec;
187		/* Check for time quantum exceeded: */
188		if (thread->slice_usec > TIMESLICE_USEC)
189			thread->slice_usec = -1;
190	}
191	thread->tcb->tcb_tmbx.tm_uticks = 0;
192	thread->tcb->tcb_tmbx.tm_sticks = 0;
193}
194
195/*
196 * This is called after a fork().
197 * No locks need to be taken here since we are guaranteed to be
198 * single threaded.
199 *
200 * XXX
201 * POSIX says for threaded process, fork() function is used
202 * only to run new programs, and the effects of calling functions
203 * that require certain resources between the call to fork() and
204 * the call to an exec function are undefined.
205 *
206 * It is not safe to free memory after fork(), because these data
207 * structures may be in inconsistent state.
208 */
209void
210_kse_single_thread(struct pthread *curthread)
211{
212#ifdef NOTYET
213	struct kse *kse;
214	struct kse_group *kseg;
215	struct pthread *thread;
216
217	_thr_spinlock_init();
218	*__malloc_lock = (spinlock_t)_SPINLOCK_INITIALIZER;
219	if (__isthreaded) {
220		_thr_rtld_fini();
221		_thr_signal_deinit();
222	}
223	__isthreaded = 0;
224	/*
225	 * Restore signal mask early, so any memory problems could
226	 * dump core.
227	 */
228	__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
229	_thread_active_threads = 1;
230
231	/*
232	 * Enter a loop to remove and free all threads other than
233	 * the running thread from the active thread list:
234	 */
235	while ((thread = TAILQ_FIRST(&_thread_list)) != NULL) {
236		THR_GCLIST_REMOVE(thread);
237		/*
238		 * Remove this thread from the list (the current
239		 * thread will be removed but re-added by libpthread
240		 * initialization.
241		 */
242		TAILQ_REMOVE(&_thread_list, thread, tle);
243		/* Make sure this isn't the running thread: */
244		if (thread != curthread) {
245			_thr_stack_free(&thread->attr);
246			if (thread->specific != NULL)
247				free(thread->specific);
248			thr_destroy(curthread, thread);
249		}
250	}
251
252	TAILQ_INIT(&curthread->mutexq);		/* initialize mutex queue */
253	curthread->joiner = NULL;		/* no joining threads yet */
254	curthread->refcount = 0;
255	SIGEMPTYSET(curthread->sigpend);	/* clear pending signals */
256
257	/* Don't free thread-specific data as the caller may require it */
258
259	/* Free the free KSEs: */
260	while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
261		TAILQ_REMOVE(&free_kseq, kse, k_qe);
262		kse_destroy(kse);
263	}
264	free_kse_count = 0;
265
266	/* Free the active KSEs: */
267	while ((kse = TAILQ_FIRST(&active_kseq)) != NULL) {
268		TAILQ_REMOVE(&active_kseq, kse, k_qe);
269		kse_destroy(kse);
270	}
271	active_kse_count = 0;
272
273	/* Free the free KSEGs: */
274	while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
275		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
276		kseg_destroy(kseg);
277	}
278	free_kseg_count = 0;
279
280	/* Free the active KSEGs: */
281	while ((kseg = TAILQ_FIRST(&active_kse_groupq)) != NULL) {
282		TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
283		kseg_destroy(kseg);
284	}
285	active_kseg_count = 0;
286
287	/* Free the free threads. */
288	while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
289		TAILQ_REMOVE(&free_threadq, thread, tle);
290		thr_destroy(curthread, thread);
291	}
292	free_thread_count = 0;
293
294	/* Free the to-be-gc'd threads. */
295	while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
296		TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
297		thr_destroy(curthread, thread);
298	}
299	TAILQ_INIT(&gc_ksegq);
300	_gc_count = 0;
301
302	if (inited != 0) {
303		/*
304		 * Destroy these locks; they'll be recreated to assure they
305		 * are in the unlocked state.
306		 */
307		_lock_destroy(&kse_lock);
308		_lock_destroy(&thread_lock);
309		_lock_destroy(&_thread_list_lock);
310		inited = 0;
311	}
312
313	/*
314	 * After a fork(), the leftover thread goes back to being
315	 * scope process.
316	 */
317	curthread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
318	curthread->attr.flags |= PTHREAD_SCOPE_PROCESS;
319
320	/* We're no longer part of any lists */
321	curthread->tlflags = 0;
322
323	/*
324	 * After a fork, we are still operating on the thread's original
325	 * stack.  Don't clear the THR_FLAGS_USER from the thread's
326	 * attribute flags.
327	 */
328
329	/* Initialize the threads library. */
330	curthread->kse = NULL;
331	curthread->kseg = NULL;
332	_kse_initial = NULL;
333	_libpthread_init(curthread);
334#else
335	int i;
336
337	/* Reset the current thread and KSE lock data. */
338	for (i = 0; i < curthread->locklevel; i++) {
339		_lockuser_reinit(&curthread->lockusers[i], (void *)curthread);
340	}
341	curthread->locklevel = 0;
342	for (i = 0; i < curthread->kse->k_locklevel; i++) {
343		_lockuser_reinit(&curthread->kse->k_lockusers[i],
344		    (void *)curthread->kse);
345		_LCK_SET_PRIVATE2(&curthread->kse->k_lockusers[i], NULL);
346	}
347	curthread->kse->k_locklevel = 0;
348	_thr_spinlock_init();
349	if (__isthreaded) {
350		_thr_rtld_fini();
351		_thr_signal_deinit();
352	}
353	__isthreaded = 0;
354	curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
355	curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
356
357	/* After a fork(), there child should have no pending signals. */
358	sigemptyset(&curthread->sigpend);
359
360	/*
361	 * Restore signal mask early, so any memory problems could
362	 * dump core.
363	 */
364	sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
365	_thread_active_threads = 1;
366#endif
367}
368
369/*
370 * This is used to initialize housekeeping and to initialize the
371 * KSD for the KSE.
372 */
373void
374_kse_init(void)
375{
376	if (inited == 0) {
377		TAILQ_INIT(&active_kseq);
378		TAILQ_INIT(&active_kse_groupq);
379		TAILQ_INIT(&free_kseq);
380		TAILQ_INIT(&free_kse_groupq);
381		TAILQ_INIT(&free_threadq);
382		TAILQ_INIT(&gc_ksegq);
383		if (_lock_init(&kse_lock, LCK_ADAPTIVE,
384		    _kse_lock_wait, _kse_lock_wakeup) != 0)
385			PANIC("Unable to initialize free KSE queue lock");
386		if (_lock_init(&thread_lock, LCK_ADAPTIVE,
387		    _kse_lock_wait, _kse_lock_wakeup) != 0)
388			PANIC("Unable to initialize free thread queue lock");
389		if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
390		    _kse_lock_wait, _kse_lock_wakeup) != 0)
391			PANIC("Unable to initialize thread list lock");
392		_pthread_mutex_init(&_tcb_mutex, NULL);
393		active_kse_count = 0;
394		active_kseg_count = 0;
395		_gc_count = 0;
396		inited = 1;
397	}
398}
399
400/*
401 * This is called when the first thread (other than the initial
402 * thread) is created.
403 */
404int
405_kse_setthreaded(int threaded)
406{
407	sigset_t sigset;
408
409	if ((threaded != 0) && (__isthreaded == 0)) {
410		SIGFILLSET(sigset);
411		__sys_sigprocmask(SIG_SETMASK, &sigset, &_thr_initial->sigmask);
412
413		/*
414		 * Tell the kernel to create a KSE for the initial thread
415		 * and enable upcalls in it.
416		 */
417		_kse_initial->k_flags |= KF_STARTED;
418
419		if (_thread_scope_system <= 0) {
420			_thr_initial->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
421			_kse_initial->k_kseg->kg_flags &= ~KGF_SINGLE_THREAD;
422			_kse_initial->k_kcb->kcb_kmbx.km_curthread = NULL;
423		}
424		else {
425			/*
426			 * For bound thread, kernel reads mailbox pointer
427			 * once, we'd set it here before calling kse_create.
428			 */
429			_tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
430			KSE_SET_MBOX(_kse_initial, _thr_initial);
431			_kse_initial->k_kcb->kcb_kmbx.km_flags |= KMF_BOUND;
432		}
433
434		/*
435		 * Locking functions in libc are required when there are
436		 * threads other than the initial thread.
437		 */
438		_thr_rtld_init();
439
440		__isthreaded = 1;
441		if (kse_create(&_kse_initial->k_kcb->kcb_kmbx, 0) != 0) {
442			_kse_initial->k_flags &= ~KF_STARTED;
443			__isthreaded = 0;
444			PANIC("kse_create() failed\n");
445			return (-1);
446		}
447		_thr_initial->tcb->tcb_tmbx.tm_lwp =
448			_kse_initial->k_kcb->kcb_kmbx.km_lwp;
449		_thread_activated = 1;
450
451#ifndef SYSTEM_SCOPE_ONLY
452		if (_thread_scope_system <= 0) {
453			/* Set current thread to initial thread */
454			_tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
455			KSE_SET_MBOX(_kse_initial, _thr_initial);
456			_thr_start_sig_daemon();
457			_thr_setmaxconcurrency();
458		}
459		else
460#endif
461			__sys_sigprocmask(SIG_SETMASK, &_thr_initial->sigmask,
462			    NULL);
463	}
464	return (0);
465}
466
467/*
468 * Lock wait and wakeup handlers for KSE locks.  These are only used by
469 * KSEs, and should never be used by threads.  KSE locks include the
470 * KSE group lock (used for locking the scheduling queue) and the
471 * kse_lock defined above.
472 *
473 * When a KSE lock attempt blocks, the entire KSE blocks allowing another
474 * KSE to run.  For the most part, it doesn't make much sense to try and
475 * schedule another thread because you need to lock the scheduling queue
476 * in order to do that.  And since the KSE lock is used to lock the scheduling
477 * queue, you would just end up blocking again.
478 */
479void
480_kse_lock_wait(struct lock *lock, struct lockuser *lu)
481{
482	struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
483	struct timespec ts;
484	int saved_flags;
485
486	if (curkse->k_kcb->kcb_kmbx.km_curthread != NULL)
487		PANIC("kse_lock_wait does not disable upcall.\n");
488	/*
489	 * Enter a loop to wait until we get the lock.
490	 */
491	ts.tv_sec = 0;
492	ts.tv_nsec = 1000000;  /* 1 sec */
493	while (!_LCK_GRANTED(lu)) {
494		/*
495		 * Yield the kse and wait to be notified when the lock
496		 * is granted.
497		 */
498		saved_flags = curkse->k_kcb->kcb_kmbx.km_flags;
499		curkse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL |
500		    KMF_NOCOMPLETED;
501		kse_release(&ts);
502		curkse->k_kcb->kcb_kmbx.km_flags = saved_flags;
503	}
504}
505
506void
507_kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
508{
509	struct kse *curkse;
510	struct kse *kse;
511	struct kse_mailbox *mbx;
512
513	curkse = _get_curkse();
514	kse = (struct kse *)_LCK_GET_PRIVATE(lu);
515
516	if (kse == curkse)
517		PANIC("KSE trying to wake itself up in lock");
518	else {
519		mbx = &kse->k_kcb->kcb_kmbx;
520		_lock_grant(lock, lu);
521		/*
522		 * Notify the owning kse that it has the lock.
523		 * It is safe to pass invalid address to kse_wakeup
524		 * even if the mailbox is not in kernel at all,
525		 * and waking up a wrong kse is also harmless.
526		 */
527		kse_wakeup(mbx);
528	}
529}
530
531/*
532 * Thread wait and wakeup handlers for thread locks.  These are only used
533 * by threads, never by KSEs.  Thread locks include the per-thread lock
534 * (defined in its structure), and condition variable and mutex locks.
535 */
536void
537_thr_lock_wait(struct lock *lock, struct lockuser *lu)
538{
539	struct pthread *curthread = (struct pthread *)lu->lu_private;
540
541	do {
542		THR_LOCK_SWITCH(curthread);
543		THR_SET_STATE(curthread, PS_LOCKWAIT);
544		_thr_sched_switch_unlocked(curthread);
545	} while (!_LCK_GRANTED(lu));
546}
547
548void
549_thr_lock_wakeup(struct lock *lock, struct lockuser *lu)
550{
551	struct pthread *thread;
552	struct pthread *curthread;
553	struct kse_mailbox *kmbx;
554
555	curthread = _get_curthread();
556	thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
557
558	THR_SCHED_LOCK(curthread, thread);
559	_lock_grant(lock, lu);
560	kmbx = _thr_setrunnable_unlocked(thread);
561	THR_SCHED_UNLOCK(curthread, thread);
562	if (kmbx != NULL)
563		kse_wakeup(kmbx);
564}
565
566kse_critical_t
567_kse_critical_enter(void)
568{
569	kse_critical_t crit;
570
571	crit = (kse_critical_t)_kcb_critical_enter();
572	return (crit);
573}
574
575void
576_kse_critical_leave(kse_critical_t crit)
577{
578	struct pthread *curthread;
579
580	_kcb_critical_leave((struct kse_thr_mailbox *)crit);
581	if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
582		THR_YIELD_CHECK(curthread);
583}
584
585int
586_kse_in_critical(void)
587{
588	return (_kcb_in_critical());
589}
590
591void
592_thr_critical_enter(struct pthread *thread)
593{
594	thread->critical_count++;
595}
596
597void
598_thr_critical_leave(struct pthread *thread)
599{
600	thread->critical_count--;
601	THR_YIELD_CHECK(thread);
602}
603
604void
605_thr_sched_switch(struct pthread *curthread)
606{
607	struct kse *curkse;
608
609	(void)_kse_critical_enter();
610	curkse = _get_curkse();
611	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
612	_thr_sched_switch_unlocked(curthread);
613}
614
615/*
616 * XXX - We may need to take the scheduling lock before calling
617 *       this, or perhaps take the lock within here before
618 *       doing anything else.
619 */
620void
621_thr_sched_switch_unlocked(struct pthread *curthread)
622{
623	struct kse *curkse;
624	volatile int resume_once = 0;
625	ucontext_t *uc;
626
627	/* We're in the scheduler, 5 by 5: */
628	curkse = curthread->kse;
629
630	curthread->need_switchout = 1;	/* The thread yielded on its own. */
631	curthread->critical_yield = 0;	/* No need to yield anymore. */
632
633	/* Thread can unlock the scheduler lock. */
634	curthread->lock_switch = 1;
635
636	if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
637		kse_sched_single(&curkse->k_kcb->kcb_kmbx);
638	else {
639		if (__predict_false(_libkse_debug != 0)) {
640			/*
641			 * Because debugger saves single step status in thread
642			 * mailbox's tm_dflags, we can safely clear single
643			 * step status here. the single step status will be
644			 * restored by kse_switchin when the thread is
645			 * switched in again. This also lets uts run in full
646			 * speed.
647			 */
648			 ptrace(PT_CLEARSTEP, curkse->k_kcb->kcb_kmbx.km_lwp,
649				(caddr_t) 1, 0);
650		}
651
652		KSE_SET_SWITCH(curkse);
653		_thread_enter_uts(curthread->tcb, curkse->k_kcb);
654	}
655
656	/*
657	 * Unlock the scheduling queue and leave the
658	 * critical region.
659	 */
660	/* Don't trust this after a switch! */
661	curkse = curthread->kse;
662
663	curthread->lock_switch = 0;
664	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
665	_kse_critical_leave(&curthread->tcb->tcb_tmbx);
666
667	/*
668	 * This thread is being resumed; check for cancellations.
669	 */
670	if (THR_NEED_ASYNC_CANCEL(curthread) && !THR_IN_CRITICAL(curthread)) {
671		uc = alloca(sizeof(ucontext_t));
672		resume_once = 0;
673		THR_GETCONTEXT(uc);
674		if (resume_once == 0) {
675			resume_once = 1;
676			curthread->check_pending = 0;
677			thr_resume_check(curthread, uc);
678		}
679	}
680	THR_ACTIVATE_LAST_LOCK(curthread);
681}
682
683/*
684 * This is the scheduler for a KSE which runs a scope system thread.
685 * The multi-thread KSE scheduler should also work for a single threaded
686 * KSE, but we use a separate scheduler so that it can be fine-tuned
687 * to be more efficient (and perhaps not need a separate stack for
688 * the KSE, allowing it to use the thread's stack).
689 */
690
691static void
692kse_sched_single(struct kse_mailbox *kmbx)
693{
694	struct kse *curkse;
695	struct pthread *curthread;
696	struct timespec ts;
697	sigset_t sigmask;
698	int i, sigseqno, level, first = 0;
699
700	curkse = (struct kse *)kmbx->km_udata;
701	curthread = curkse->k_curthread;
702
703	if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
704		/* Setup this KSEs specific data. */
705		_kcb_set(curkse->k_kcb);
706		_tcb_set(curkse->k_kcb, curthread->tcb);
707		curkse->k_flags |= KF_INITIALIZED;
708		first = 1;
709		curthread->active = 1;
710
711		/* Setup kernel signal masks for new thread. */
712		__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
713		/*
714		 * Enter critical region, this is meanless for bound thread,
715		 * It is used to let other code work, those code want mailbox
716		 * to be cleared.
717		 */
718		(void)_kse_critical_enter();
719 	} else {
720		/*
721		 * Bound thread always has tcb set, this prevent some
722		 * code from blindly setting bound thread tcb to NULL,
723		 * buggy code ?
724		 */
725		_tcb_set(curkse->k_kcb, curthread->tcb);
726	}
727
728	curthread->critical_yield = 0;
729	curthread->need_switchout = 0;
730
731	/*
732	 * Lock the scheduling queue.
733	 *
734	 * There is no scheduling queue for single threaded KSEs,
735	 * but we need a lock for protection regardless.
736	 */
737	if (curthread->lock_switch == 0)
738		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
739
740	/*
741	 * This has to do the job of kse_switchout_thread(), only
742	 * for a single threaded KSE/KSEG.
743	 */
744
745	switch (curthread->state) {
746	case PS_MUTEX_WAIT:
747	case PS_COND_WAIT:
748		if (THR_NEED_CANCEL(curthread)) {
749			curthread->interrupted = 1;
750			curthread->continuation = _thr_finish_cancellation;
751			THR_SET_STATE(curthread, PS_RUNNING);
752		}
753		break;
754
755	case PS_LOCKWAIT:
756		/*
757		 * This state doesn't timeout.
758		 */
759		curthread->wakeup_time.tv_sec = -1;
760		curthread->wakeup_time.tv_nsec = -1;
761		level = curthread->locklevel - 1;
762		if (_LCK_GRANTED(&curthread->lockusers[level]))
763			THR_SET_STATE(curthread, PS_RUNNING);
764		break;
765
766	case PS_DEAD:
767		curthread->check_pending = 0;
768		/* Unlock the scheduling queue and exit the KSE and thread. */
769		thr_cleanup(curkse, curthread);
770		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
771		PANIC("bound thread shouldn't get here\n");
772		break;
773
774	case PS_JOIN:
775		if (THR_NEED_CANCEL(curthread)) {
776			curthread->join_status.thread = NULL;
777			THR_SET_STATE(curthread, PS_RUNNING);
778		} else {
779			/*
780			 * This state doesn't timeout.
781			 */
782			curthread->wakeup_time.tv_sec = -1;
783			curthread->wakeup_time.tv_nsec = -1;
784		}
785		break;
786
787	case PS_SUSPENDED:
788		if (THR_NEED_CANCEL(curthread)) {
789			curthread->interrupted = 1;
790			THR_SET_STATE(curthread, PS_RUNNING);
791		} else {
792			/*
793			 * These states don't timeout.
794			 */
795			curthread->wakeup_time.tv_sec = -1;
796			curthread->wakeup_time.tv_nsec = -1;
797		}
798		break;
799
800	case PS_RUNNING:
801		if ((curthread->flags & THR_FLAGS_SUSPENDED) != 0 &&
802		    !THR_NEED_CANCEL(curthread)) {
803			THR_SET_STATE(curthread, PS_SUSPENDED);
804			/*
805			 * These states don't timeout.
806			 */
807			curthread->wakeup_time.tv_sec = -1;
808			curthread->wakeup_time.tv_nsec = -1;
809		}
810		break;
811
812	case PS_SIGWAIT:
813		PANIC("bound thread does not have SIGWAIT state\n");
814
815	case PS_SLEEP_WAIT:
816		PANIC("bound thread does not have SLEEP_WAIT state\n");
817
818	case PS_SIGSUSPEND:
819		PANIC("bound thread does not have SIGSUSPEND state\n");
820
821	case PS_DEADLOCK:
822		/*
823		 * These states don't timeout and don't need
824		 * to be in the waiting queue.
825		 */
826		curthread->wakeup_time.tv_sec = -1;
827		curthread->wakeup_time.tv_nsec = -1;
828		break;
829
830	default:
831		PANIC("Unknown state\n");
832		break;
833	}
834
835	while (curthread->state != PS_RUNNING) {
836		sigseqno = curkse->k_sigseqno;
837		if (curthread->check_pending != 0) {
838			/*
839			 * Install pending signals into the frame, possible
840			 * cause mutex or condvar backout.
841			 */
842			curthread->check_pending = 0;
843			SIGFILLSET(sigmask);
844
845			/*
846			 * Lock out kernel signal code when we are processing
847			 * signals, and get a fresh copy of signal mask.
848			 */
849			__sys_sigprocmask(SIG_SETMASK, &sigmask,
850					  &curthread->sigmask);
851			for (i = 1; i <= _SIG_MAXSIG; i++) {
852				if (SIGISMEMBER(curthread->sigmask, i))
853					continue;
854				if (SIGISMEMBER(curthread->sigpend, i))
855					(void)_thr_sig_add(curthread, i,
856					    &curthread->siginfo[i-1]);
857			}
858			__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask,
859				NULL);
860			/* The above code might make thread runnable */
861			if (curthread->state == PS_RUNNING)
862				break;
863		}
864		THR_DEACTIVATE_LAST_LOCK(curthread);
865		kse_wait(curkse, curthread, sigseqno);
866		THR_ACTIVATE_LAST_LOCK(curthread);
867		if (curthread->wakeup_time.tv_sec >= 0) {
868			KSE_GET_TOD(curkse, &ts);
869			if (thr_timedout(curthread, &ts)) {
870				/* Indicate the thread timedout: */
871				curthread->timeout = 1;
872				/* Make the thread runnable. */
873				THR_SET_STATE(curthread, PS_RUNNING);
874			}
875		}
876	}
877
878	if (curthread->lock_switch == 0) {
879		/* Unlock the scheduling queue. */
880		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
881	}
882
883	DBG_MSG("Continuing bound thread %p\n", curthread);
884	if (first) {
885		_kse_critical_leave(&curthread->tcb->tcb_tmbx);
886		pthread_exit(curthread->start_routine(curthread->arg));
887	}
888}
889
890#ifdef DEBUG_THREAD_KERN
891static void
892dump_queues(struct kse *curkse)
893{
894	struct pthread *thread;
895
896	DBG_MSG("Threads in waiting queue:\n");
897	TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
898		DBG_MSG("  thread %p, state %d, blocked %d\n",
899		    thread, thread->state, thread->blocked);
900	}
901}
902#endif
903
904/*
905 * This is the scheduler for a KSE which runs multiple threads.
906 */
907static void
908kse_sched_multi(struct kse_mailbox *kmbx)
909{
910	struct kse *curkse;
911	struct pthread *curthread, *td_wait;
912	int ret;
913
914	curkse = (struct kse *)kmbx->km_udata;
915	THR_ASSERT(curkse->k_kcb->kcb_kmbx.km_curthread == NULL,
916	    "Mailbox not null in kse_sched_multi");
917
918	/* Check for first time initialization: */
919	if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
920		/* Setup this KSEs specific data. */
921		_kcb_set(curkse->k_kcb);
922
923		/* Set this before grabbing the context. */
924		curkse->k_flags |= KF_INITIALIZED;
925	}
926
927	/*
928	 * No current thread anymore, calling _get_curthread in UTS
929	 * should dump core
930	 */
931	_tcb_set(curkse->k_kcb, NULL);
932
933	/* If this is an upcall; take the scheduler lock. */
934	if (!KSE_IS_SWITCH(curkse))
935		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
936	else
937		KSE_CLEAR_SWITCH(curkse);
938
939	if (KSE_IS_IDLE(curkse)) {
940		KSE_CLEAR_IDLE(curkse);
941		curkse->k_kseg->kg_idle_kses--;
942	}
943
944	/*
945	 * Now that the scheduler lock is held, get the current
946	 * thread.  The KSE's current thread cannot be safely
947	 * examined without the lock because it could have returned
948	 * as completed on another KSE.  See kse_check_completed().
949	 */
950	curthread = curkse->k_curthread;
951
952	/*
953	 * If the current thread was completed in another KSE, then
954	 * it will be in the run queue.  Don't mark it as being blocked.
955	 */
956	if ((curthread != NULL) &&
957	    ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
958	    (curthread->need_switchout == 0)) {
959		/*
960		 * Assume the current thread is blocked; when the
961		 * completed threads are checked and if the current
962		 * thread is among the completed, the blocked flag
963		 * will be cleared.
964		 */
965		curthread->blocked = 1;
966		DBG_MSG("Running thread %p is now blocked in kernel.\n",
967		    curthread);
968	}
969
970	/* Check for any unblocked threads in the kernel. */
971	kse_check_completed(curkse);
972
973	/*
974	 * Check for threads that have timed-out.
975	 */
976	kse_check_waitq(curkse);
977
978	/*
979	 * Switchout the current thread, if necessary, as the last step
980	 * so that it is inserted into the run queue (if it's runnable)
981	 * _after_ any other threads that were added to it above.
982	 */
983	if (curthread == NULL)
984		;  /* Nothing to do here. */
985	else if ((curthread->need_switchout == 0) && DBG_CAN_RUN(curthread) &&
986	    (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
987		/*
988		 * Resume the thread and tell it to yield when
989		 * it leaves the critical region.
990		 */
991		curthread->critical_yield = 1;
992		curthread->active = 1;
993		if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
994			KSE_RUNQ_REMOVE(curkse, curthread);
995		curkse->k_curthread = curthread;
996		curthread->kse = curkse;
997		DBG_MSG("Continuing thread %p in critical region\n",
998		    curthread);
999		kse_wakeup_multi(curkse);
1000		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1001		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1002		if (ret != 0)
1003			PANIC("Can't resume thread in critical region\n");
1004	}
1005	else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) {
1006		curthread->tcb->tcb_tmbx.tm_lwp = 0;
1007		kse_switchout_thread(curkse, curthread);
1008	}
1009	curkse->k_curthread = NULL;
1010
1011#ifdef DEBUG_THREAD_KERN
1012	dump_queues(curkse);
1013#endif
1014
1015	/* Check if there are no threads ready to run: */
1016	while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
1017	    (curkse->k_kseg->kg_threadcount != 0) &&
1018	    ((curkse->k_flags & KF_TERMINATED) == 0)) {
1019		/*
1020		 * Wait for a thread to become active or until there are
1021		 * no more threads.
1022		 */
1023		td_wait = KSE_WAITQ_FIRST(curkse);
1024		kse_wait(curkse, td_wait, 0);
1025		kse_check_completed(curkse);
1026		kse_check_waitq(curkse);
1027	}
1028
1029	/* Check for no more threads: */
1030	if ((curkse->k_kseg->kg_threadcount == 0) ||
1031	    ((curkse->k_flags & KF_TERMINATED) != 0)) {
1032		/*
1033		 * Normally this shouldn't return, but it will if there
1034		 * are other KSEs running that create new threads that
1035		 * are assigned to this KSE[G].  For instance, if a scope
1036		 * system thread were to create a scope process thread
1037		 * and this kse[g] is the initial kse[g], then that newly
1038		 * created thread would be assigned to us (the initial
1039		 * kse[g]).
1040		 */
1041		kse_wakeup_multi(curkse);
1042		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1043		kse_fini(curkse);
1044		/* never returns */
1045	}
1046
1047	THR_ASSERT(curthread != NULL,
1048	    "Return from kse_wait/fini without thread.");
1049	THR_ASSERT(curthread->state != PS_DEAD,
1050	    "Trying to resume dead thread!");
1051	KSE_RUNQ_REMOVE(curkse, curthread);
1052
1053	/*
1054	 * Make the selected thread the current thread.
1055	 */
1056	curkse->k_curthread = curthread;
1057
1058	/*
1059	 * Make sure the current thread's kse points to this kse.
1060	 */
1061	curthread->kse = curkse;
1062
1063	/*
1064	 * Reset the time slice if this thread is running for the first
1065	 * time or running again after using its full time slice allocation.
1066	 */
1067	if (curthread->slice_usec == -1)
1068		curthread->slice_usec = 0;
1069
1070	/* Mark the thread active. */
1071	curthread->active = 1;
1072
1073	/*
1074	 * The thread's current signal frame will only be NULL if it
1075	 * is being resumed after being blocked in the kernel.  In
1076	 * this case, and if the thread needs to run down pending
1077	 * signals or needs a cancellation check, we need to add a
1078	 * signal frame to the thread's context.
1079	 */
1080	if (curthread->lock_switch == 0 && curthread->state == PS_RUNNING &&
1081	    (curthread->check_pending != 0 ||
1082	     THR_NEED_ASYNC_CANCEL(curthread)) &&
1083	    !THR_IN_CRITICAL(curthread)) {
1084		curthread->check_pending = 0;
1085		signalcontext(&curthread->tcb->tcb_tmbx.tm_context, 0,
1086		    (__sighandler_t *)thr_resume_wrapper);
1087	}
1088	kse_wakeup_multi(curkse);
1089	/*
1090	 * Continue the thread at its current frame:
1091	 */
1092	if (curthread->lock_switch != 0) {
1093		/*
1094		 * This thread came from a scheduler switch; it will
1095		 * unlock the scheduler lock and set the mailbox.
1096		 */
1097		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 0);
1098	} else {
1099		/* This thread won't unlock the scheduler lock. */
1100		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1101		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1102	}
1103	if (ret != 0)
1104		PANIC("Thread has returned from _thread_switch");
1105
1106	/* This point should not be reached. */
1107	PANIC("Thread has returned from _thread_switch");
1108}
1109
1110static void
1111thr_resume_wrapper(int sig, siginfo_t *siginfo, ucontext_t *ucp)
1112{
1113	struct pthread *curthread = _get_curthread();
1114	struct kse *curkse;
1115	int ret, err_save = errno;
1116
1117	DBG_MSG(">>> sig wrapper\n");
1118	if (curthread->lock_switch)
1119		PANIC("thr_resume_wrapper, lock_switch != 0\n");
1120	thr_resume_check(curthread, ucp);
1121	errno = err_save;
1122	_kse_critical_enter();
1123	curkse = curthread->kse;
1124	curthread->tcb->tcb_tmbx.tm_context = *ucp;
1125	ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1126	if (ret != 0)
1127		PANIC("thr_resume_wrapper: thread has returned "
1128		      "from _thread_switch");
1129	/* THR_SETCONTEXT(ucp); */ /* not work, why ? */
1130}
1131
1132static void
1133thr_resume_check(struct pthread *curthread, ucontext_t *ucp)
1134{
1135	_thr_sig_rundown(curthread, ucp);
1136
1137	if (THR_NEED_ASYNC_CANCEL(curthread))
1138		pthread_testcancel();
1139}
1140
1141/*
1142 * Clean up a thread.  This must be called with the thread's KSE
1143 * scheduling lock held.  The thread must be a thread from the
1144 * KSE's group.
1145 */
1146static void
1147thr_cleanup(struct kse *curkse, struct pthread *thread)
1148{
1149	struct pthread *joiner;
1150	struct kse_mailbox *kmbx = NULL;
1151	int sys_scope;
1152
1153	if ((joiner = thread->joiner) != NULL) {
1154		/* Joinee scheduler lock held; joiner won't leave. */
1155		if (joiner->kseg == curkse->k_kseg) {
1156			if (joiner->join_status.thread == thread) {
1157				joiner->join_status.thread = NULL;
1158				joiner->join_status.ret = thread->ret;
1159				(void)_thr_setrunnable_unlocked(joiner);
1160			}
1161		} else {
1162			KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1163			/* The joiner may have removed itself and exited. */
1164			if (_thr_ref_add(thread, joiner, 0) == 0) {
1165				KSE_SCHED_LOCK(curkse, joiner->kseg);
1166				if (joiner->join_status.thread == thread) {
1167					joiner->join_status.thread = NULL;
1168					joiner->join_status.ret = thread->ret;
1169					kmbx = _thr_setrunnable_unlocked(joiner);
1170				}
1171				KSE_SCHED_UNLOCK(curkse, joiner->kseg);
1172				_thr_ref_delete(thread, joiner);
1173				if (kmbx != NULL)
1174					kse_wakeup(kmbx);
1175			}
1176			KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1177		}
1178		thread->attr.flags |= PTHREAD_DETACHED;
1179	}
1180
1181	if (!(sys_scope = (thread->attr.flags & PTHREAD_SCOPE_SYSTEM))) {
1182		/*
1183		 * Remove the thread from the KSEG's list of threads.
1184	 	 */
1185		KSEG_THRQ_REMOVE(thread->kseg, thread);
1186		/*
1187		 * Migrate the thread to the main KSE so that this
1188		 * KSE and KSEG can be cleaned when their last thread
1189		 * exits.
1190		 */
1191		thread->kseg = _kse_initial->k_kseg;
1192		thread->kse = _kse_initial;
1193	}
1194
1195	/*
1196	 * We can't hold the thread list lock while holding the
1197	 * scheduler lock.
1198	 */
1199	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1200	DBG_MSG("Adding thread %p to GC list\n", thread);
1201	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1202	thread->tlflags |= TLFLAGS_GC_SAFE;
1203	THR_GCLIST_ADD(thread);
1204	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1205	if (sys_scope) {
1206		/*
1207		 * System scope thread is single thread group,
1208		 * when thread is exited, its kse and ksegrp should
1209		 * be recycled as well.
1210		 * kse upcall stack belongs to thread, clear it here.
1211		 */
1212		curkse->k_stack.ss_sp = 0;
1213		curkse->k_stack.ss_size = 0;
1214		kse_exit();
1215		PANIC("kse_exit() failed for system scope thread");
1216	}
1217	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1218}
1219
1220void
1221_thr_gc(struct pthread *curthread)
1222{
1223	thread_gc(curthread);
1224	kse_gc(curthread);
1225	kseg_gc(curthread);
1226}
1227
1228static void
1229thread_gc(struct pthread *curthread)
1230{
1231	struct pthread *td, *td_next;
1232	kse_critical_t crit;
1233	TAILQ_HEAD(, pthread) worklist;
1234
1235	TAILQ_INIT(&worklist);
1236	crit = _kse_critical_enter();
1237	KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1238
1239	/* Check the threads waiting for GC. */
1240	for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1241		td_next = TAILQ_NEXT(td, gcle);
1242		if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
1243			continue;
1244		else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
1245		    ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
1246			/*
1247			 * The thread and KSE are operating on the same
1248			 * stack.  Wait for the KSE to exit before freeing
1249			 * the thread's stack as well as everything else.
1250			 */
1251			continue;
1252		}
1253		/*
1254		 * Remove the thread from the GC list.  If the thread
1255		 * isn't yet detached, it will get added back to the
1256		 * GC list at a later time.
1257		 */
1258		THR_GCLIST_REMOVE(td);
1259		DBG_MSG("Freeing thread %p stack\n", td);
1260		/*
1261		 * We can free the thread stack since it's no longer
1262		 * in use.
1263		 */
1264		_thr_stack_free(&td->attr);
1265		if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1266		    (td->refcount == 0)) {
1267			/*
1268			 * The thread has detached and is no longer
1269			 * referenced.  It is safe to remove all
1270			 * remnants of the thread.
1271			 */
1272			THR_LIST_REMOVE(td);
1273			TAILQ_INSERT_HEAD(&worklist, td, gcle);
1274		}
1275	}
1276	KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1277	_kse_critical_leave(crit);
1278
1279	while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1280		TAILQ_REMOVE(&worklist, td, gcle);
1281		/*
1282		 * XXX we don't free initial thread and its kse
1283		 * (if thread is a bound thread), because there might
1284		 * have some code referencing initial thread and kse.
1285		 */
1286		if (td == _thr_initial) {
1287			DBG_MSG("Initial thread won't be freed\n");
1288			continue;
1289		}
1290
1291		if ((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1292			crit = _kse_critical_enter();
1293			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1294			kse_free_unlocked(td->kse);
1295			kseg_free_unlocked(td->kseg);
1296			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1297			_kse_critical_leave(crit);
1298		}
1299		DBG_MSG("Freeing thread %p\n", td);
1300		_thr_free(curthread, td);
1301	}
1302}
1303
1304static void
1305kse_gc(struct pthread *curthread)
1306{
1307	kse_critical_t crit;
1308	TAILQ_HEAD(, kse) worklist;
1309	struct kse *kse;
1310
1311	if (free_kse_count <= MAX_CACHED_KSES)
1312		return;
1313	TAILQ_INIT(&worklist);
1314	crit = _kse_critical_enter();
1315	KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1316	while (free_kse_count > MAX_CACHED_KSES) {
1317		kse = TAILQ_FIRST(&free_kseq);
1318		TAILQ_REMOVE(&free_kseq, kse, k_qe);
1319		TAILQ_INSERT_HEAD(&worklist, kse, k_qe);
1320		free_kse_count--;
1321	}
1322	KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1323	_kse_critical_leave(crit);
1324
1325	while ((kse = TAILQ_FIRST(&worklist))) {
1326		TAILQ_REMOVE(&worklist, kse, k_qe);
1327		kse_destroy(kse);
1328	}
1329}
1330
1331static void
1332kseg_gc(struct pthread *curthread)
1333{
1334	kse_critical_t crit;
1335	TAILQ_HEAD(, kse_group) worklist;
1336	struct kse_group *kseg;
1337
1338	if (free_kseg_count <= MAX_CACHED_KSEGS)
1339		return;
1340	crit = _kse_critical_enter();
1341	KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1342	while (free_kseg_count > MAX_CACHED_KSEGS) {
1343		kseg = TAILQ_FIRST(&free_kse_groupq);
1344		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1345		free_kseg_count--;
1346		TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
1347	}
1348	KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1349	_kse_critical_leave(crit);
1350
1351	while ((kseg = TAILQ_FIRST(&worklist))) {
1352		TAILQ_REMOVE(&worklist, kseg, kg_qe);
1353		kseg_destroy(kseg);
1354	}
1355}
1356
1357/*
1358 * Only new threads that are running or suspended may be scheduled.
1359 */
1360int
1361_thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1362{
1363	kse_critical_t crit;
1364	int ret;
1365
1366	/* Add the new thread. */
1367	thr_link(newthread);
1368
1369	/*
1370	 * If this is the first time creating a thread, make sure
1371	 * the mailbox is set for the current thread.
1372	 */
1373	if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1374		/* We use the thread's stack as the KSE's stack. */
1375		newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_sp =
1376		    newthread->attr.stackaddr_attr;
1377		newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_size =
1378		    newthread->attr.stacksize_attr;
1379
1380		/*
1381		 * No need to lock the scheduling queue since the
1382		 * KSE/KSEG pair have not yet been started.
1383		 */
1384		KSEG_THRQ_ADD(newthread->kseg, newthread);
1385		/* this thread never gives up kse */
1386		newthread->active = 1;
1387		newthread->kse->k_curthread = newthread;
1388		newthread->kse->k_kcb->kcb_kmbx.km_flags = KMF_BOUND;
1389		newthread->kse->k_kcb->kcb_kmbx.km_func =
1390		    (kse_func_t *)kse_sched_single;
1391		newthread->kse->k_kcb->kcb_kmbx.km_quantum = 0;
1392		KSE_SET_MBOX(newthread->kse, newthread);
1393		/*
1394		 * This thread needs a new KSE and KSEG.
1395		 */
1396		newthread->kse->k_flags &= ~KF_INITIALIZED;
1397		newthread->kse->k_flags |= KF_STARTED;
1398		/* Fire up! */
1399		ret = kse_create(&newthread->kse->k_kcb->kcb_kmbx, 1);
1400		if (ret != 0)
1401			ret = errno;
1402	}
1403	else {
1404		/*
1405		 * Lock the KSE and add the new thread to its list of
1406		 * assigned threads.  If the new thread is runnable, also
1407		 * add it to the KSE's run queue.
1408		 */
1409		crit = _kse_critical_enter();
1410		KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1411		KSEG_THRQ_ADD(newthread->kseg, newthread);
1412		if (newthread->state == PS_RUNNING)
1413			THR_RUNQ_INSERT_TAIL(newthread);
1414		if ((newthread->kse->k_flags & KF_STARTED) == 0) {
1415			/*
1416			 * This KSE hasn't been started yet.  Start it
1417			 * outside of holding the lock.
1418			 */
1419			newthread->kse->k_flags |= KF_STARTED;
1420			newthread->kse->k_kcb->kcb_kmbx.km_func =
1421			    (kse_func_t *)kse_sched_multi;
1422			newthread->kse->k_kcb->kcb_kmbx.km_flags = 0;
1423			kse_create(&newthread->kse->k_kcb->kcb_kmbx, 0);
1424		 } else if ((newthread->state == PS_RUNNING) &&
1425		     KSE_IS_IDLE(newthread->kse)) {
1426			/*
1427			 * The thread is being scheduled on another KSEG.
1428			 */
1429			kse_wakeup_one(newthread);
1430		}
1431		KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1432		_kse_critical_leave(crit);
1433		ret = 0;
1434	}
1435	if (ret != 0)
1436		thr_unlink(newthread);
1437
1438	return (ret);
1439}
1440
1441void
1442kse_waitq_insert(struct pthread *thread)
1443{
1444	struct pthread *td;
1445
1446	if (thread->wakeup_time.tv_sec == -1)
1447		TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1448		    pqe);
1449	else {
1450		td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1451		while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1452		    ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1453		    ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1454		    (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1455			td = TAILQ_NEXT(td, pqe);
1456		if (td == NULL)
1457			TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1458			    thread, pqe);
1459		else
1460			TAILQ_INSERT_BEFORE(td, thread, pqe);
1461	}
1462	thread->flags |= THR_FLAGS_IN_WAITQ;
1463}
1464
1465/*
1466 * This must be called with the scheduling lock held.
1467 */
1468static void
1469kse_check_completed(struct kse *kse)
1470{
1471	struct pthread *thread;
1472	struct kse_thr_mailbox *completed;
1473	int sig;
1474
1475	if ((completed = kse->k_kcb->kcb_kmbx.km_completed) != NULL) {
1476		kse->k_kcb->kcb_kmbx.km_completed = NULL;
1477		while (completed != NULL) {
1478			thread = completed->tm_udata;
1479			DBG_MSG("Found completed thread %p, name %s\n",
1480			    thread,
1481			    (thread->name == NULL) ? "none" : thread->name);
1482			thread->blocked = 0;
1483			if (thread != kse->k_curthread) {
1484				thr_accounting(thread);
1485				if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1486					THR_SET_STATE(thread, PS_SUSPENDED);
1487				else
1488					KSE_RUNQ_INSERT_TAIL(kse, thread);
1489				if ((thread->kse != kse) &&
1490				    (thread->kse->k_curthread == thread)) {
1491					/*
1492					 * Remove this thread from its
1493					 * previous KSE so that it (the KSE)
1494					 * doesn't think it is still active.
1495					 */
1496					thread->kse->k_curthread = NULL;
1497					thread->active = 0;
1498				}
1499			}
1500			if ((sig = thread->tcb->tcb_tmbx.tm_syncsig.si_signo)
1501			    != 0) {
1502				if (SIGISMEMBER(thread->sigmask, sig))
1503					SIGADDSET(thread->sigpend, sig);
1504				else if (THR_IN_CRITICAL(thread))
1505					kse_thr_interrupt(NULL, KSE_INTR_SIGEXIT, sig);
1506				else
1507					(void)_thr_sig_add(thread, sig,
1508					    &thread->tcb->tcb_tmbx.tm_syncsig);
1509				thread->tcb->tcb_tmbx.tm_syncsig.si_signo = 0;
1510			}
1511			completed = completed->tm_next;
1512		}
1513	}
1514}
1515
1516/*
1517 * This must be called with the scheduling lock held.
1518 */
1519static void
1520kse_check_waitq(struct kse *kse)
1521{
1522	struct pthread	*pthread;
1523	struct timespec ts;
1524
1525	KSE_GET_TOD(kse, &ts);
1526
1527	/*
1528	 * Wake up threads that have timedout.  This has to be
1529	 * done before adding the current thread to the run queue
1530	 * so that a CPU intensive thread doesn't get preference
1531	 * over waiting threads.
1532	 */
1533	while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1534	    thr_timedout(pthread, &ts)) {
1535		/* Remove the thread from the wait queue: */
1536		KSE_WAITQ_REMOVE(kse, pthread);
1537		DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1538
1539		/* Indicate the thread timedout: */
1540		pthread->timeout = 1;
1541
1542		/* Add the thread to the priority queue: */
1543		if ((pthread->flags & THR_FLAGS_SUSPENDED) != 0)
1544			THR_SET_STATE(pthread, PS_SUSPENDED);
1545		else {
1546			THR_SET_STATE(pthread, PS_RUNNING);
1547			KSE_RUNQ_INSERT_TAIL(kse, pthread);
1548		}
1549	}
1550}
1551
1552static int
1553thr_timedout(struct pthread *thread, struct timespec *curtime)
1554{
1555	if (thread->wakeup_time.tv_sec < 0)
1556		return (0);
1557	else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1558		return (0);
1559	else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1560	    (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1561		return (0);
1562	else
1563		return (1);
1564}
1565
1566/*
1567 * This must be called with the scheduling lock held.
1568 *
1569 * Each thread has a time slice, a wakeup time (used when it wants
1570 * to wait for a specified amount of time), a run state, and an
1571 * active flag.
1572 *
1573 * When a thread gets run by the scheduler, the active flag is
1574 * set to non-zero (1).  When a thread performs an explicit yield
1575 * or schedules a state change, it enters the scheduler and the
1576 * active flag is cleared.  When the active flag is still seen
1577 * set in the scheduler, that means that the thread is blocked in
1578 * the kernel (because it is cleared before entering the scheduler
1579 * in all other instances).
1580 *
1581 * The wakeup time is only set for those states that can timeout.
1582 * It is set to (-1, -1) for all other instances.
1583 *
1584 * The thread's run state, aside from being useful when debugging,
1585 * is used to place the thread in an appropriate queue.  There
1586 * are 2 basic queues:
1587 *
1588 *   o run queue - queue ordered by priority for all threads
1589 *                 that are runnable
1590 *   o waiting queue - queue sorted by wakeup time for all threads
1591 *                     that are not otherwise runnable (not blocked
1592 *                     in kernel, not waiting for locks)
1593 *
1594 * The thread's time slice is used for round-robin scheduling
1595 * (the default scheduling policy).  While a SCHED_RR thread
1596 * is runnable it's time slice accumulates.  When it reaches
1597 * the time slice interval, it gets reset and added to the end
1598 * of the queue of threads at its priority.  When a thread no
1599 * longer becomes runnable (blocks in kernel, waits, etc), its
1600 * time slice is reset.
1601 *
1602 * The job of kse_switchout_thread() is to handle all of the above.
1603 */
1604static void
1605kse_switchout_thread(struct kse *kse, struct pthread *thread)
1606{
1607	int level;
1608	int i;
1609	int restart;
1610	siginfo_t siginfo;
1611
1612	/*
1613	 * Place the currently running thread into the
1614	 * appropriate queue(s).
1615	 */
1616	DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1617
1618	THR_DEACTIVATE_LAST_LOCK(thread);
1619	if (thread->blocked != 0) {
1620		thread->active = 0;
1621		thread->need_switchout = 0;
1622		/* This thread must have blocked in the kernel. */
1623		/*
1624		 * Check for pending signals and cancellation for
1625		 * this thread to see if we need to interrupt it
1626		 * in the kernel.
1627		 */
1628		if (THR_NEED_CANCEL(thread)) {
1629			kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1630					  KSE_INTR_INTERRUPT, 0);
1631		} else if (thread->check_pending != 0) {
1632			for (i = 1; i <= _SIG_MAXSIG; ++i) {
1633				if (SIGISMEMBER(thread->sigpend, i) &&
1634				    !SIGISMEMBER(thread->sigmask, i)) {
1635					restart = _thread_sigact[i - 1].sa_flags & SA_RESTART;
1636					kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1637					    restart ? KSE_INTR_RESTART : KSE_INTR_INTERRUPT, 0);
1638					break;
1639				}
1640			}
1641		}
1642	}
1643	else {
1644		switch (thread->state) {
1645		case PS_MUTEX_WAIT:
1646		case PS_COND_WAIT:
1647			if (THR_NEED_CANCEL(thread)) {
1648				thread->interrupted = 1;
1649				thread->continuation = _thr_finish_cancellation;
1650				THR_SET_STATE(thread, PS_RUNNING);
1651			} else {
1652				/* Insert into the waiting queue: */
1653				KSE_WAITQ_INSERT(kse, thread);
1654			}
1655			break;
1656
1657		case PS_LOCKWAIT:
1658			/*
1659			 * This state doesn't timeout.
1660			 */
1661			thread->wakeup_time.tv_sec = -1;
1662			thread->wakeup_time.tv_nsec = -1;
1663			level = thread->locklevel - 1;
1664			if (!_LCK_GRANTED(&thread->lockusers[level]))
1665				KSE_WAITQ_INSERT(kse, thread);
1666			else
1667				THR_SET_STATE(thread, PS_RUNNING);
1668			break;
1669
1670		case PS_SLEEP_WAIT:
1671		case PS_SIGWAIT:
1672			if (THR_NEED_CANCEL(thread)) {
1673				thread->interrupted = 1;
1674				THR_SET_STATE(thread, PS_RUNNING);
1675			} else {
1676				KSE_WAITQ_INSERT(kse, thread);
1677			}
1678			break;
1679
1680		case PS_JOIN:
1681			if (THR_NEED_CANCEL(thread)) {
1682				thread->join_status.thread = NULL;
1683				THR_SET_STATE(thread, PS_RUNNING);
1684			} else {
1685				/*
1686				 * This state doesn't timeout.
1687				 */
1688				thread->wakeup_time.tv_sec = -1;
1689				thread->wakeup_time.tv_nsec = -1;
1690
1691				/* Insert into the waiting queue: */
1692				KSE_WAITQ_INSERT(kse, thread);
1693			}
1694			break;
1695
1696		case PS_SIGSUSPEND:
1697		case PS_SUSPENDED:
1698			if (THR_NEED_CANCEL(thread)) {
1699				thread->interrupted = 1;
1700				THR_SET_STATE(thread, PS_RUNNING);
1701			} else {
1702				/*
1703				 * These states don't timeout.
1704				 */
1705				thread->wakeup_time.tv_sec = -1;
1706				thread->wakeup_time.tv_nsec = -1;
1707
1708				/* Insert into the waiting queue: */
1709				KSE_WAITQ_INSERT(kse, thread);
1710			}
1711			break;
1712
1713		case PS_DEAD:
1714			/*
1715			 * The scheduler is operating on a different
1716			 * stack.  It is safe to do garbage collecting
1717			 * here.
1718			 */
1719			thread->active = 0;
1720			thread->need_switchout = 0;
1721			thread->lock_switch = 0;
1722			thr_cleanup(kse, thread);
1723			return;
1724			break;
1725
1726		case PS_RUNNING:
1727			if ((thread->flags & THR_FLAGS_SUSPENDED) != 0 &&
1728			    !THR_NEED_CANCEL(thread))
1729				THR_SET_STATE(thread, PS_SUSPENDED);
1730			break;
1731
1732		case PS_DEADLOCK:
1733			/*
1734			 * These states don't timeout.
1735			 */
1736			thread->wakeup_time.tv_sec = -1;
1737			thread->wakeup_time.tv_nsec = -1;
1738
1739			/* Insert into the waiting queue: */
1740			KSE_WAITQ_INSERT(kse, thread);
1741			break;
1742
1743		default:
1744			PANIC("Unknown state\n");
1745			break;
1746		}
1747
1748		thr_accounting(thread);
1749		if (thread->state == PS_RUNNING) {
1750			if (thread->slice_usec == -1) {
1751				/*
1752				 * The thread exceeded its time quantum or
1753				 * it yielded the CPU; place it at the tail
1754				 * of the queue for its priority.
1755				 */
1756				KSE_RUNQ_INSERT_TAIL(kse, thread);
1757			} else {
1758				/*
1759				 * The thread hasn't exceeded its interval
1760				 * Place it at the head of the queue for its
1761				 * priority.
1762				 */
1763				KSE_RUNQ_INSERT_HEAD(kse, thread);
1764			}
1765		}
1766	}
1767	thread->active = 0;
1768	thread->need_switchout = 0;
1769	if (thread->check_pending != 0) {
1770		/* Install pending signals into the frame. */
1771		thread->check_pending = 0;
1772		KSE_LOCK_ACQUIRE(kse, &_thread_signal_lock);
1773		for (i = 1; i <= _SIG_MAXSIG; i++) {
1774			if (SIGISMEMBER(thread->sigmask, i))
1775				continue;
1776			if (SIGISMEMBER(thread->sigpend, i))
1777				(void)_thr_sig_add(thread, i,
1778				    &thread->siginfo[i-1]);
1779			else if (SIGISMEMBER(_thr_proc_sigpending, i) &&
1780				_thr_getprocsig_unlocked(i, &siginfo)) {
1781				(void)_thr_sig_add(thread, i, &siginfo);
1782			}
1783		}
1784		KSE_LOCK_RELEASE(kse, &_thread_signal_lock);
1785	}
1786}
1787
1788/*
1789 * This function waits for the smallest timeout value of any waiting
1790 * thread, or until it receives a message from another KSE.
1791 *
1792 * This must be called with the scheduling lock held.
1793 */
1794static void
1795kse_wait(struct kse *kse, struct pthread *td_wait, int sigseqno)
1796{
1797	struct timespec ts, ts_sleep;
1798	int saved_flags;
1799
1800	if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1801		/* Limit sleep to no more than 1 minute. */
1802		ts_sleep.tv_sec = 60;
1803		ts_sleep.tv_nsec = 0;
1804	} else {
1805		KSE_GET_TOD(kse, &ts);
1806		TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1807		if (ts_sleep.tv_sec > 60) {
1808			ts_sleep.tv_sec = 60;
1809			ts_sleep.tv_nsec = 0;
1810		}
1811	}
1812	/* Don't sleep for negative times. */
1813	if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1814		KSE_SET_IDLE(kse);
1815		kse->k_kseg->kg_idle_kses++;
1816		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1817		if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) &&
1818		    (kse->k_sigseqno != sigseqno))
1819			; /* don't sleep */
1820		else {
1821			saved_flags = kse->k_kcb->kcb_kmbx.km_flags;
1822			kse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL;
1823			kse_release(&ts_sleep);
1824			kse->k_kcb->kcb_kmbx.km_flags = saved_flags;
1825		}
1826		KSE_SCHED_LOCK(kse, kse->k_kseg);
1827		if (KSE_IS_IDLE(kse)) {
1828			KSE_CLEAR_IDLE(kse);
1829			kse->k_kseg->kg_idle_kses--;
1830		}
1831	}
1832}
1833
1834/*
1835 * Avoid calling this kse_exit() so as not to confuse it with the
1836 * system call of the same name.
1837 */
1838static void
1839kse_fini(struct kse *kse)
1840{
1841	/* struct kse_group *free_kseg = NULL; */
1842	struct timespec ts;
1843	struct pthread *td;
1844
1845	/*
1846	 * Check to see if this is one of the main kses.
1847	 */
1848	if (kse->k_kseg != _kse_initial->k_kseg) {
1849		PANIC("shouldn't get here");
1850		/* This is for supporting thread groups. */
1851#ifdef NOT_YET
1852		/* Remove this KSE from the KSEG's list of KSEs. */
1853		KSE_SCHED_LOCK(kse, kse->k_kseg);
1854		TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1855		kse->k_kseg->kg_ksecount--;
1856		if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1857			free_kseg = kse->k_kseg;
1858		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1859
1860		/*
1861		 * Add this KSE to the list of free KSEs along with
1862		 * the KSEG if is now orphaned.
1863		 */
1864		KSE_LOCK_ACQUIRE(kse, &kse_lock);
1865		if (free_kseg != NULL)
1866			kseg_free_unlocked(free_kseg);
1867		kse_free_unlocked(kse);
1868		KSE_LOCK_RELEASE(kse, &kse_lock);
1869		kse_exit();
1870		/* Never returns. */
1871		PANIC("kse_exit()");
1872#endif
1873	} else {
1874		/*
1875		 * We allow program to kill kse in initial group (by
1876		 * lowering the concurrency).
1877		 */
1878		if ((kse != _kse_initial) &&
1879		    ((kse->k_flags & KF_TERMINATED) != 0)) {
1880			KSE_SCHED_LOCK(kse, kse->k_kseg);
1881			TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1882			kse->k_kseg->kg_ksecount--;
1883			/*
1884			 * Migrate thread to  _kse_initial if its lastest
1885			 * kse it ran on is the kse.
1886			 */
1887			td = TAILQ_FIRST(&kse->k_kseg->kg_threadq);
1888			while (td != NULL) {
1889				if (td->kse == kse)
1890					td->kse = _kse_initial;
1891				td = TAILQ_NEXT(td, kle);
1892			}
1893			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1894			KSE_LOCK_ACQUIRE(kse, &kse_lock);
1895			kse_free_unlocked(kse);
1896			KSE_LOCK_RELEASE(kse, &kse_lock);
1897			/* Make sure there is always at least one is awake */
1898			KSE_WAKEUP(_kse_initial);
1899			kse_exit();
1900                        /* Never returns. */
1901                        PANIC("kse_exit() failed for initial kseg");
1902                }
1903		KSE_SCHED_LOCK(kse, kse->k_kseg);
1904		KSE_SET_IDLE(kse);
1905		kse->k_kseg->kg_idle_kses++;
1906		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1907		ts.tv_sec = 120;
1908		ts.tv_nsec = 0;
1909		kse->k_kcb->kcb_kmbx.km_flags = 0;
1910		kse_release(&ts);
1911		/* Never reach */
1912	}
1913}
1914
1915void
1916_thr_set_timeout(const struct timespec *timeout)
1917{
1918	struct pthread	*curthread = _get_curthread();
1919	struct timespec ts;
1920
1921	/* Reset the timeout flag for the running thread: */
1922	curthread->timeout = 0;
1923
1924	/* Check if the thread is to wait forever: */
1925	if (timeout == NULL) {
1926		/*
1927		 * Set the wakeup time to something that can be recognised as
1928		 * different to an actual time of day:
1929		 */
1930		curthread->wakeup_time.tv_sec = -1;
1931		curthread->wakeup_time.tv_nsec = -1;
1932	}
1933	/* Check if no waiting is required: */
1934	else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
1935		/* Set the wake up time to 'immediately': */
1936		curthread->wakeup_time.tv_sec = 0;
1937		curthread->wakeup_time.tv_nsec = 0;
1938	} else {
1939		/* Calculate the time for the current thread to wakeup: */
1940		KSE_GET_TOD(curthread->kse, &ts);
1941		TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1942	}
1943}
1944
1945void
1946_thr_panic_exit(char *file, int line, char *msg)
1947{
1948	char buf[256];
1949
1950	snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1951	__sys_write(2, buf, strlen(buf));
1952	abort();
1953}
1954
1955void
1956_thr_setrunnable(struct pthread *curthread, struct pthread *thread)
1957{
1958	kse_critical_t crit;
1959	struct kse_mailbox *kmbx;
1960
1961	crit = _kse_critical_enter();
1962	KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1963	kmbx = _thr_setrunnable_unlocked(thread);
1964	KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1965	_kse_critical_leave(crit);
1966	if ((kmbx != NULL) && (__isthreaded != 0))
1967		kse_wakeup(kmbx);
1968}
1969
1970struct kse_mailbox *
1971_thr_setrunnable_unlocked(struct pthread *thread)
1972{
1973	struct kse_mailbox *kmbx = NULL;
1974
1975	if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0) {
1976		/* No silly queues for these threads. */
1977		if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1978			THR_SET_STATE(thread, PS_SUSPENDED);
1979		else {
1980			THR_SET_STATE(thread, PS_RUNNING);
1981			kmbx = kse_wakeup_one(thread);
1982		}
1983
1984	} else if (thread->state != PS_RUNNING) {
1985		if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
1986			KSE_WAITQ_REMOVE(thread->kse, thread);
1987		if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1988			THR_SET_STATE(thread, PS_SUSPENDED);
1989		else {
1990			THR_SET_STATE(thread, PS_RUNNING);
1991			if ((thread->blocked == 0) && (thread->active == 0) &&
1992			    (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
1993				THR_RUNQ_INSERT_TAIL(thread);
1994			/*
1995			 * XXX - Threads are not yet assigned to specific
1996			 *       KSEs; they are assigned to the KSEG.  So
1997			 *       the fact that a thread's KSE is waiting
1998			 *       doesn't necessarily mean that it will be
1999			 *       the KSE that runs the thread after the
2000			 *       lock is granted.  But we don't know if the
2001			 *       other KSEs within the same KSEG are also
2002			 *       in a waiting state or not so we err on the
2003			 *       side of caution and wakeup the thread's
2004			 *       last known KSE.  We ensure that the
2005			 *       threads KSE doesn't change while it's
2006			 *       scheduling lock is held so it is safe to
2007			 *       reference it (the KSE).  If the KSE wakes
2008			 *       up and doesn't find any more work it will
2009			 *       again go back to waiting so no harm is
2010			 *       done.
2011			 */
2012			kmbx = kse_wakeup_one(thread);
2013		}
2014	}
2015	return (kmbx);
2016}
2017
2018static struct kse_mailbox *
2019kse_wakeup_one(struct pthread *thread)
2020{
2021	struct kse *ke;
2022
2023	if (KSE_IS_IDLE(thread->kse)) {
2024		KSE_CLEAR_IDLE(thread->kse);
2025		thread->kseg->kg_idle_kses--;
2026		return (&thread->kse->k_kcb->kcb_kmbx);
2027	} else {
2028		TAILQ_FOREACH(ke, &thread->kseg->kg_kseq, k_kgqe) {
2029			if (KSE_IS_IDLE(ke)) {
2030				KSE_CLEAR_IDLE(ke);
2031				ke->k_kseg->kg_idle_kses--;
2032				return (&ke->k_kcb->kcb_kmbx);
2033			}
2034		}
2035	}
2036	return (NULL);
2037}
2038
2039static void
2040kse_wakeup_multi(struct kse *curkse)
2041{
2042	struct kse *ke;
2043	int tmp;
2044
2045	if ((tmp = KSE_RUNQ_THREADS(curkse)) && curkse->k_kseg->kg_idle_kses) {
2046		TAILQ_FOREACH(ke, &curkse->k_kseg->kg_kseq, k_kgqe) {
2047			if (KSE_IS_IDLE(ke)) {
2048				KSE_CLEAR_IDLE(ke);
2049				ke->k_kseg->kg_idle_kses--;
2050				KSE_WAKEUP(ke);
2051				if (--tmp == 0)
2052					break;
2053			}
2054		}
2055	}
2056}
2057
2058/*
2059 * Allocate a new KSEG.
2060 *
2061 * We allow the current thread to be NULL in the case that this
2062 * is the first time a KSEG is being created (library initialization).
2063 * In this case, we don't need to (and can't) take any locks.
2064 */
2065struct kse_group *
2066_kseg_alloc(struct pthread *curthread)
2067{
2068	struct kse_group *kseg = NULL;
2069	kse_critical_t crit;
2070
2071	if ((curthread != NULL) && (free_kseg_count > 0)) {
2072		/* Use the kse lock for the kseg queue. */
2073		crit = _kse_critical_enter();
2074		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2075		if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
2076			TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
2077			free_kseg_count--;
2078			active_kseg_count++;
2079			TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
2080		}
2081		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2082		_kse_critical_leave(crit);
2083		if (kseg)
2084			kseg_reinit(kseg);
2085	}
2086
2087	/*
2088	 * If requested, attempt to allocate a new KSE group only if the
2089	 * KSE allocation was successful and a KSE group wasn't found in
2090	 * the free list.
2091	 */
2092	if ((kseg == NULL) &&
2093	    ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
2094		if (_pq_alloc(&kseg->kg_schedq.sq_runq,
2095		    THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
2096			free(kseg);
2097			kseg = NULL;
2098		} else {
2099			kseg_init(kseg);
2100			/* Add the KSEG to the list of active KSEGs. */
2101			if (curthread != NULL) {
2102				crit = _kse_critical_enter();
2103				KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2104				active_kseg_count++;
2105				TAILQ_INSERT_TAIL(&active_kse_groupq,
2106				    kseg, kg_qe);
2107				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2108				_kse_critical_leave(crit);
2109			} else {
2110				active_kseg_count++;
2111				TAILQ_INSERT_TAIL(&active_kse_groupq,
2112				    kseg, kg_qe);
2113			}
2114		}
2115	}
2116	return (kseg);
2117}
2118
2119static void
2120kseg_init(struct kse_group *kseg)
2121{
2122	kseg_reinit(kseg);
2123	_lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
2124	    _kse_lock_wakeup);
2125}
2126
2127static void
2128kseg_reinit(struct kse_group *kseg)
2129{
2130	TAILQ_INIT(&kseg->kg_kseq);
2131	TAILQ_INIT(&kseg->kg_threadq);
2132	TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
2133	kseg->kg_threadcount = 0;
2134	kseg->kg_ksecount = 0;
2135	kseg->kg_idle_kses = 0;
2136	kseg->kg_flags = 0;
2137}
2138
2139/*
2140 * This must be called with the kse lock held and when there are
2141 * no more threads that reference it.
2142 */
2143static void
2144kseg_free_unlocked(struct kse_group *kseg)
2145{
2146	TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
2147	TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
2148	free_kseg_count++;
2149	active_kseg_count--;
2150}
2151
2152void
2153_kseg_free(struct kse_group *kseg)
2154{
2155	struct kse *curkse;
2156	kse_critical_t crit;
2157
2158	crit = _kse_critical_enter();
2159	curkse = _get_curkse();
2160	KSE_LOCK_ACQUIRE(curkse, &kse_lock);
2161	kseg_free_unlocked(kseg);
2162	KSE_LOCK_RELEASE(curkse, &kse_lock);
2163	_kse_critical_leave(crit);
2164}
2165
2166static void
2167kseg_destroy(struct kse_group *kseg)
2168{
2169	_lock_destroy(&kseg->kg_lock);
2170	_pq_free(&kseg->kg_schedq.sq_runq);
2171	free(kseg);
2172}
2173
2174/*
2175 * Allocate a new KSE.
2176 *
2177 * We allow the current thread to be NULL in the case that this
2178 * is the first time a KSE is being created (library initialization).
2179 * In this case, we don't need to (and can't) take any locks.
2180 */
2181struct kse *
2182_kse_alloc(struct pthread *curthread, int sys_scope)
2183{
2184	struct kse *kse = NULL;
2185	char *stack;
2186	kse_critical_t crit;
2187	int i;
2188
2189	if ((curthread != NULL) && (free_kse_count > 0)) {
2190		crit = _kse_critical_enter();
2191		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2192		/* Search for a finished KSE. */
2193		kse = TAILQ_FIRST(&free_kseq);
2194		while ((kse != NULL) &&
2195		    ((kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
2196			kse = TAILQ_NEXT(kse, k_qe);
2197		}
2198		if (kse != NULL) {
2199			DBG_MSG("found an unused kse.\n");
2200			TAILQ_REMOVE(&free_kseq, kse, k_qe);
2201			free_kse_count--;
2202			TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2203			active_kse_count++;
2204		}
2205		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2206		_kse_critical_leave(crit);
2207		if (kse != NULL)
2208			kse_reinit(kse, sys_scope);
2209	}
2210	if ((kse == NULL) &&
2211	    ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
2212		if (sys_scope != 0)
2213			stack = NULL;
2214		else if ((stack = malloc(KSE_STACKSIZE)) == NULL) {
2215			free(kse);
2216			return (NULL);
2217		}
2218		bzero(kse, sizeof(*kse));
2219
2220		/* Initialize KCB without the lock. */
2221		if ((kse->k_kcb = _kcb_ctor(kse)) == NULL) {
2222			if (stack != NULL)
2223				free(stack);
2224			free(kse);
2225			return (NULL);
2226		}
2227
2228		/* Initialize the lockusers. */
2229		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
2230			_lockuser_init(&kse->k_lockusers[i], (void *)kse);
2231			_LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
2232		}
2233		/* _lock_init(kse->k_lock, ...) */
2234
2235		if (curthread != NULL) {
2236			crit = _kse_critical_enter();
2237			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2238		}
2239		kse->k_flags = 0;
2240		TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2241		active_kse_count++;
2242		if (curthread != NULL) {
2243			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2244			_kse_critical_leave(crit);
2245		}
2246		/*
2247		 * Create the KSE context.
2248		 * Scope system threads (one thread per KSE) are not required
2249		 * to have a stack for an unneeded kse upcall.
2250		 */
2251		if (!sys_scope) {
2252			kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2253			kse->k_stack.ss_sp = stack;
2254			kse->k_stack.ss_size = KSE_STACKSIZE;
2255		} else {
2256			kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2257			kse->k_stack.ss_sp = NULL;
2258			kse->k_stack.ss_size = 0;
2259		}
2260		kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2261		kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2262		/*
2263		 * We need to keep a copy of the stack in case it
2264		 * doesn't get used; a KSE running a scope system
2265		 * thread will use that thread's stack.
2266		 */
2267		kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2268	}
2269	return (kse);
2270}
2271
2272static void
2273kse_reinit(struct kse *kse, int sys_scope)
2274{
2275	if (!sys_scope) {
2276		kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2277		if (kse->k_stack.ss_sp == NULL) {
2278			/* XXX check allocation failure */
2279			kse->k_stack.ss_sp = (char *) malloc(KSE_STACKSIZE);
2280			kse->k_stack.ss_size = KSE_STACKSIZE;
2281		}
2282		kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2283	} else {
2284		kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2285		if (kse->k_stack.ss_sp)
2286			free(kse->k_stack.ss_sp);
2287		kse->k_stack.ss_sp = NULL;
2288		kse->k_stack.ss_size = 0;
2289		kse->k_kcb->kcb_kmbx.km_quantum = 0;
2290	}
2291	kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2292	kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2293	kse->k_kcb->kcb_kmbx.km_curthread = NULL;
2294	kse->k_kcb->kcb_kmbx.km_flags = 0;
2295	kse->k_curthread = NULL;
2296	kse->k_kseg = 0;
2297	kse->k_schedq = 0;
2298	kse->k_locklevel = 0;
2299	kse->k_flags = 0;
2300	kse->k_error = 0;
2301	kse->k_cpu = 0;
2302	kse->k_sigseqno = 0;
2303}
2304
2305void
2306kse_free_unlocked(struct kse *kse)
2307{
2308	TAILQ_REMOVE(&active_kseq, kse, k_qe);
2309	active_kse_count--;
2310	kse->k_kseg = NULL;
2311	kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2312	kse->k_flags = 0;
2313	TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
2314	free_kse_count++;
2315}
2316
2317void
2318_kse_free(struct pthread *curthread, struct kse *kse)
2319{
2320	kse_critical_t crit;
2321
2322	if (curthread == NULL)
2323		kse_free_unlocked(kse);
2324	else {
2325		crit = _kse_critical_enter();
2326		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2327		kse_free_unlocked(kse);
2328		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2329		_kse_critical_leave(crit);
2330	}
2331}
2332
2333static void
2334kse_destroy(struct kse *kse)
2335{
2336	int i;
2337
2338	if (kse->k_stack.ss_sp != NULL)
2339		free(kse->k_stack.ss_sp);
2340	_kcb_dtor(kse->k_kcb);
2341	for (i = 0; i < MAX_KSE_LOCKLEVEL; ++i)
2342		_lockuser_destroy(&kse->k_lockusers[i]);
2343	_lock_destroy(&kse->k_lock);
2344	free(kse);
2345}
2346
2347struct pthread *
2348_thr_alloc(struct pthread *curthread)
2349{
2350	kse_critical_t	crit;
2351	struct pthread	*thread = NULL;
2352	int i;
2353
2354	if (curthread != NULL) {
2355		if (GC_NEEDED())
2356			_thr_gc(curthread);
2357		if (free_thread_count > 0) {
2358			crit = _kse_critical_enter();
2359			KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2360			if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
2361				TAILQ_REMOVE(&free_threadq, thread, tle);
2362				free_thread_count--;
2363			}
2364			KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2365			_kse_critical_leave(crit);
2366		}
2367	}
2368	if ((thread == NULL) &&
2369	    ((thread = malloc(sizeof(struct pthread))) != NULL)) {
2370		bzero(thread, sizeof(struct pthread));
2371		thread->siginfo = calloc(_SIG_MAXSIG, sizeof(siginfo_t));
2372		if (thread->siginfo == NULL) {
2373			free(thread);
2374			return (NULL);
2375		}
2376		if (curthread) {
2377			_pthread_mutex_lock(&_tcb_mutex);
2378			thread->tcb = _tcb_ctor(thread, 0 /* not initial tls */);
2379			_pthread_mutex_unlock(&_tcb_mutex);
2380		} else {
2381			thread->tcb = _tcb_ctor(thread, 1 /* initial tls */);
2382		}
2383		if (thread->tcb == NULL) {
2384			free(thread->siginfo);
2385			free(thread);
2386			return (NULL);
2387		}
2388		/*
2389		 * Initialize thread locking.
2390		 * Lock initializing needs malloc, so don't
2391		 * enter critical region before doing this!
2392		 */
2393		if (_lock_init(&thread->lock, LCK_ADAPTIVE,
2394		    _thr_lock_wait, _thr_lock_wakeup) != 0)
2395			PANIC("Cannot initialize thread lock");
2396		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
2397			_lockuser_init(&thread->lockusers[i], (void *)thread);
2398			_LCK_SET_PRIVATE2(&thread->lockusers[i],
2399			    (void *)thread);
2400		}
2401	}
2402	return (thread);
2403}
2404
2405void
2406_thr_free(struct pthread *curthread, struct pthread *thread)
2407{
2408	kse_critical_t crit;
2409
2410	DBG_MSG("Freeing thread %p\n", thread);
2411	if (thread->name) {
2412		free(thread->name);
2413		thread->name = NULL;
2414	}
2415	if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
2416		thr_destroy(curthread, thread);
2417	} else {
2418		/* Add the thread to the free thread list. */
2419		crit = _kse_critical_enter();
2420		KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2421		TAILQ_INSERT_TAIL(&free_threadq, thread, tle);
2422		free_thread_count++;
2423		KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2424		_kse_critical_leave(crit);
2425	}
2426}
2427
2428static void
2429thr_destroy(struct pthread *curthread, struct pthread *thread)
2430{
2431	int i;
2432
2433	for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
2434		_lockuser_destroy(&thread->lockusers[i]);
2435	_lock_destroy(&thread->lock);
2436	if (curthread) {
2437		_pthread_mutex_lock(&_tcb_mutex);
2438		_tcb_dtor(thread->tcb);
2439		_pthread_mutex_unlock(&_tcb_mutex);
2440	} else {
2441		_tcb_dtor(thread->tcb);
2442	}
2443	free(thread->siginfo);
2444	free(thread);
2445}
2446
2447/*
2448 * Add an active thread:
2449 *
2450 *   o Assign the thread a unique id (which GDB uses to track
2451 *     threads.
2452 *   o Add the thread to the list of all threads and increment
2453 *     number of active threads.
2454 */
2455static void
2456thr_link(struct pthread *thread)
2457{
2458	kse_critical_t crit;
2459	struct kse *curkse;
2460
2461	crit = _kse_critical_enter();
2462	curkse = _get_curkse();
2463	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2464	/*
2465	 * Initialize the unique id (which GDB uses to track
2466	 * threads), add the thread to the list of all threads,
2467	 * and
2468	 */
2469	thread->uniqueid = next_uniqueid++;
2470	THR_LIST_ADD(thread);
2471	_thread_active_threads++;
2472	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2473	_kse_critical_leave(crit);
2474}
2475
2476/*
2477 * Remove an active thread.
2478 */
2479static void
2480thr_unlink(struct pthread *thread)
2481{
2482	kse_critical_t crit;
2483	struct kse *curkse;
2484
2485	crit = _kse_critical_enter();
2486	curkse = _get_curkse();
2487	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2488	THR_LIST_REMOVE(thread);
2489	_thread_active_threads--;
2490	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2491	_kse_critical_leave(crit);
2492}
2493
2494void
2495_thr_hash_add(struct pthread *thread)
2496{
2497	struct thread_hash_head *head;
2498
2499	head = &thr_hashtable[THREAD_HASH(thread)];
2500	LIST_INSERT_HEAD(head, thread, hle);
2501}
2502
2503void
2504_thr_hash_remove(struct pthread *thread)
2505{
2506	LIST_REMOVE(thread, hle);
2507}
2508
2509struct pthread *
2510_thr_hash_find(struct pthread *thread)
2511{
2512	struct pthread *td;
2513	struct thread_hash_head *head;
2514
2515	head = &thr_hashtable[THREAD_HASH(thread)];
2516	LIST_FOREACH(td, head, hle) {
2517		if (td == thread)
2518			return (thread);
2519	}
2520	return (NULL);
2521}
2522
2523void
2524_thr_debug_check_yield(struct pthread *curthread)
2525{
2526	/*
2527	 * Note that TMDF_SUSPEND is set after process is suspended.
2528	 * When we are being debugged, every suspension in process
2529	 * will cause all KSEs to schedule an upcall in kernel, unless the
2530	 * KSE is in critical region.
2531	 * If the function is being called, it means the KSE is no longer
2532	 * in critical region, if the TMDF_SUSPEND is set by debugger
2533	 * before KSE leaves critical region, we will catch it here, else
2534	 * if the flag is changed during testing, it also not a problem,
2535	 * because the change only occurs after a process suspension event
2536	 * occurs. A suspension event will always cause KSE to schedule an
2537	 * upcall, in the case, because we are not in critical region,
2538	 * upcall will be scheduled sucessfully, the flag will be checked
2539	 * again in kse_sched_multi, we won't back until the flag
2540	 * is cleared by debugger, the flag will be cleared in next
2541	 * suspension event.
2542	 */
2543	if (!DBG_CAN_RUN(curthread)) {
2544		if ((curthread->attr.flags & PTHREAD_SCOPE_SYSTEM) == 0)
2545			_thr_sched_switch(curthread);
2546		else
2547			kse_thr_interrupt(&curthread->tcb->tcb_tmbx,
2548				KSE_INTR_DBSUSPEND, 0);
2549	}
2550}
2551