113546Sjulian/*
2113658Sdeischen * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3113658Sdeischen * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
435509Sjb * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
513546Sjulian * All rights reserved.
613546Sjulian *
713546Sjulian * Redistribution and use in source and binary forms, with or without
813546Sjulian * modification, are permitted provided that the following conditions
913546Sjulian * are met:
1013546Sjulian * 1. Redistributions of source code must retain the above copyright
1113546Sjulian *    notice, this list of conditions and the following disclaimer.
1213546Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1313546Sjulian *    notice, this list of conditions and the following disclaimer in the
1413546Sjulian *    documentation and/or other materials provided with the distribution.
1513546Sjulian * 3. All advertising materials mentioning features or use of this software
1613546Sjulian *    must display the following acknowledgement:
1713546Sjulian *	This product includes software developed by John Birrell.
1813546Sjulian * 4. Neither the name of the author nor the names of any co-contributors
1913546Sjulian *    may be used to endorse or promote products derived from this software
2013546Sjulian *    without specific prior written permission.
2113546Sjulian *
2213546Sjulian * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
2313546Sjulian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2413546Sjulian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2544963Sjb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2613546Sjulian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2713546Sjulian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2813546Sjulian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2913546Sjulian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3013546Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3113546Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3213546Sjulian * SUCH DAMAGE.
3313546Sjulian *
3413546Sjulian */
35113658Sdeischen#include <sys/cdefs.h>
36113662Sdeischen__FBSDID("$FreeBSD$");
37113658Sdeischen
38113658Sdeischen#include <sys/types.h>
39113658Sdeischen#include <sys/kse.h>
40132120Sdavidxu#include <sys/ptrace.h>
41113658Sdeischen#include <sys/signalvar.h>
42113658Sdeischen#include <sys/queue.h>
43113658Sdeischen#include <machine/atomic.h>
44116977Sdavidxu#include <machine/sigframe.h>
45113658Sdeischen
46113658Sdeischen#include <assert.h>
47113870Sdeischen#include <errno.h>
48113658Sdeischen#include <signal.h>
4913546Sjulian#include <stdlib.h>
5013546Sjulian#include <string.h>
51113658Sdeischen#include <time.h>
52113658Sdeischen#include <ucontext.h>
5313546Sjulian#include <unistd.h>
54113658Sdeischen
55113658Sdeischen#include "atomic_ops.h"
56103388Smini#include "thr_private.h"
57113658Sdeischen#include "libc_private.h"
58150499Sbrian#ifdef NOTYET
59150499Sbrian#include "spinlock.h"
60150499Sbrian#endif
6113546Sjulian
62139023Sdeischen/* #define DEBUG_THREAD_KERN */
6367097Sdeischen#ifdef DEBUG_THREAD_KERN
6467097Sdeischen#define DBG_MSG		stdout_debug
6567097Sdeischen#else
6667097Sdeischen#define DBG_MSG(x...)
6767097Sdeischen#endif
6867097Sdeischen
69113658Sdeischen/*
70113658Sdeischen * Define a high water mark for the maximum number of threads that
71113658Sdeischen * will be cached.  Once this level is reached, any extra threads
72113658Sdeischen * will be free()'d.
73113658Sdeischen */
74113658Sdeischen#define	MAX_CACHED_THREADS	100
75118676Sdavidxu/*
76118676Sdavidxu * Define high water marks for the maximum number of KSEs and KSE groups
77118676Sdavidxu * that will be cached. Because we support 1:1 threading, there could have
78118676Sdavidxu * same number of KSEs and KSE groups as threads. Once these levels are
79118676Sdavidxu * reached, any extra KSE and KSE groups will be free()'d.
80118676Sdavidxu */
81133563Sdeischen#define	MAX_CACHED_KSES		((_thread_scope_system <= 0) ? 50 : 100)
82133563Sdeischen#define	MAX_CACHED_KSEGS	((_thread_scope_system <= 0) ? 50 : 100)
83118676Sdavidxu
84113658Sdeischen#define	KSE_SET_MBOX(kse, thrd) \
85118510Sdeischen	(kse)->k_kcb->kcb_kmbx.km_curthread = &(thrd)->tcb->tcb_tmbx
8613546Sjulian
87113658Sdeischen#define	KSE_SET_EXITED(kse)	(kse)->k_flags |= KF_EXITED
8848046Sjb
89113658Sdeischen/*
90113658Sdeischen * Macros for manipulating the run queues.  The priority queue
91113658Sdeischen * routines use the thread's pqe link and also handle the setting
92113658Sdeischen * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
93113658Sdeischen */
94113658Sdeischen#define	KSE_RUNQ_INSERT_HEAD(kse, thrd)			\
95113658Sdeischen	_pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
96113658Sdeischen#define	KSE_RUNQ_INSERT_TAIL(kse, thrd)			\
97113658Sdeischen	_pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
98113658Sdeischen#define	KSE_RUNQ_REMOVE(kse, thrd)			\
99113658Sdeischen	_pq_remove(&(kse)->k_schedq->sq_runq, thrd)
100132120Sdavidxu#define	KSE_RUNQ_FIRST(kse)				\
101132120Sdavidxu	((_libkse_debug == 0) ?				\
102132120Sdavidxu	 _pq_first(&(kse)->k_schedq->sq_runq) :		\
103132120Sdavidxu	 _pq_first_debug(&(kse)->k_schedq->sq_runq))
104113658Sdeischen
105114187Sdeischen#define KSE_RUNQ_THREADS(kse)	((kse)->k_schedq->sq_runq.pq_threads)
106113658Sdeischen
107120896Sdavidxu#define THR_NEED_CANCEL(thrd)						\
108120896Sdavidxu	 (((thrd)->cancelflags & THR_CANCELLING) != 0 &&		\
109120896Sdavidxu	  ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&	\
110120896Sdavidxu	  (((thrd)->cancelflags & THR_AT_CANCEL_POINT) != 0 ||		\
111120896Sdavidxu	   ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
112120896Sdavidxu
113120896Sdavidxu#define THR_NEED_ASYNC_CANCEL(thrd)					\
114120896Sdavidxu	 (((thrd)->cancelflags & THR_CANCELLING) != 0 &&		\
115120896Sdavidxu	  ((thrd)->cancelflags & PTHREAD_CANCEL_DISABLE) == 0 &&	\
116120896Sdavidxu	  (((thrd)->cancelflags & THR_AT_CANCEL_POINT) == 0 &&		\
117120896Sdavidxu	   ((thrd)->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
118120896Sdavidxu
119113658Sdeischen/*
120113658Sdeischen * We've got to keep track of everything that is allocated, not only
121113658Sdeischen * to have a speedy free list, but also so they can be deallocated
122113658Sdeischen * after a fork().
123113658Sdeischen */
124113658Sdeischenstatic TAILQ_HEAD(, kse)	active_kseq;
125113658Sdeischenstatic TAILQ_HEAD(, kse)	free_kseq;
126113658Sdeischenstatic TAILQ_HEAD(, kse_group)	free_kse_groupq;
127113658Sdeischenstatic TAILQ_HEAD(, kse_group)	active_kse_groupq;
128113661Sdeischenstatic TAILQ_HEAD(, kse_group)	gc_ksegq;
129113658Sdeischenstatic struct lock		kse_lock;	/* also used for kseg queue */
130113658Sdeischenstatic int			free_kse_count = 0;
131113658Sdeischenstatic int			free_kseg_count = 0;
132113658Sdeischenstatic TAILQ_HEAD(, pthread)	free_threadq;
133113658Sdeischenstatic struct lock		thread_lock;
134113658Sdeischenstatic int			free_thread_count = 0;
135113658Sdeischenstatic int			inited = 0;
136113658Sdeischenstatic int			active_kse_count = 0;
137113658Sdeischenstatic int			active_kseg_count = 0;
138115278Sdeischenstatic u_int64_t		next_uniqueid = 1;
139113658Sdeischen
140117706SdavidxuLIST_HEAD(thread_hash_head, pthread);
141117706Sdavidxu#define THREAD_HASH_QUEUES	127
142117706Sdavidxustatic struct thread_hash_head	thr_hashtable[THREAD_HASH_QUEUES];
143117706Sdavidxu#define	THREAD_HASH(thrd)	((unsigned long)thrd % THREAD_HASH_QUEUES)
144115278Sdeischen
145136846Sdavidxu/* Lock for thread tcb constructor/destructor */
146136846Sdavidxustatic pthread_mutex_t		_tcb_mutex;
147136846Sdavidxu
148115173Sdeischen#ifdef DEBUG_THREAD_KERN
149115173Sdeischenstatic void	dump_queues(struct kse *curkse);
150115173Sdeischen#endif
151113658Sdeischenstatic void	kse_check_completed(struct kse *kse);
152113658Sdeischenstatic void	kse_check_waitq(struct kse *kse);
153113658Sdeischenstatic void	kse_fini(struct kse *curkse);
154117706Sdavidxustatic void	kse_reinit(struct kse *kse, int sys_scope);
155118510Sdeischenstatic void	kse_sched_multi(struct kse_mailbox *kmbx);
156118510Sdeischenstatic void	kse_sched_single(struct kse_mailbox *kmbx);
157113658Sdeischenstatic void	kse_switchout_thread(struct kse *kse, struct pthread *thread);
158117706Sdavidxustatic void	kse_wait(struct kse *kse, struct pthread *td_wait, int sigseq);
159113661Sdeischenstatic void	kse_free_unlocked(struct kse *kse);
160118676Sdavidxustatic void	kse_destroy(struct kse *kse);
161113870Sdeischenstatic void	kseg_free_unlocked(struct kse_group *kseg);
162113658Sdeischenstatic void	kseg_init(struct kse_group *kseg);
163113786Sdeischenstatic void	kseg_reinit(struct kse_group *kseg);
164118676Sdavidxustatic void	kseg_destroy(struct kse_group *kseg);
165113658Sdeischenstatic void	kse_waitq_insert(struct pthread *thread);
166114187Sdeischenstatic void	kse_wakeup_multi(struct kse *curkse);
167117907Sdeischenstatic struct kse_mailbox *kse_wakeup_one(struct pthread *thread);
168113658Sdeischenstatic void	thr_cleanup(struct kse *kse, struct pthread *curthread);
169115278Sdeischenstatic void	thr_link(struct pthread *thread);
170116977Sdavidxustatic void	thr_resume_wrapper(int sig, siginfo_t *, ucontext_t *);
171139023Sdeischenstatic void	thr_resume_check(struct pthread *curthread, ucontext_t *ucp);
172113658Sdeischenstatic int	thr_timedout(struct pthread *thread, struct timespec *curtime);
173115278Sdeischenstatic void	thr_unlink(struct pthread *thread);
174136846Sdavidxustatic void	thr_destroy(struct pthread *curthread, struct pthread *thread);
175118676Sdavidxustatic void	thread_gc(struct pthread *thread);
176118676Sdavidxustatic void	kse_gc(struct pthread *thread);
177118676Sdavidxustatic void	kseg_gc(struct pthread *thread);
178113658Sdeischen
179174111Sdeischenstatic __inline void
180117715Sdeischenthr_accounting(struct pthread *thread)
181117715Sdeischen{
182117715Sdeischen	if ((thread->slice_usec != -1) &&
183117715Sdeischen	    (thread->slice_usec <= TIMESLICE_USEC) &&
184117715Sdeischen	    (thread->attr.sched_policy != SCHED_FIFO)) {
185118510Sdeischen		thread->slice_usec += (thread->tcb->tcb_tmbx.tm_uticks
186118510Sdeischen		    + thread->tcb->tcb_tmbx.tm_sticks) * _clock_res_usec;
187117715Sdeischen		/* Check for time quantum exceeded: */
188117715Sdeischen		if (thread->slice_usec > TIMESLICE_USEC)
189117715Sdeischen			thread->slice_usec = -1;
190117715Sdeischen	}
191118510Sdeischen	thread->tcb->tcb_tmbx.tm_uticks = 0;
192118510Sdeischen	thread->tcb->tcb_tmbx.tm_sticks = 0;
193117715Sdeischen}
194117715Sdeischen
195113658Sdeischen/*
196113658Sdeischen * This is called after a fork().
197113658Sdeischen * No locks need to be taken here since we are guaranteed to be
198113658Sdeischen * single threaded.
199116977Sdavidxu *
200116977Sdavidxu * XXX
201116977Sdavidxu * POSIX says for threaded process, fork() function is used
202116977Sdavidxu * only to run new programs, and the effects of calling functions
203116977Sdavidxu * that require certain resources between the call to fork() and
204116977Sdavidxu * the call to an exec function are undefined.
205116977Sdavidxu *
206123668Sdavidxu * It is not safe to free memory after fork(), because these data
207123668Sdavidxu * structures may be in inconsistent state.
208113658Sdeischen */
20913546Sjulianvoid
210113658Sdeischen_kse_single_thread(struct pthread *curthread)
21113546Sjulian{
212116977Sdavidxu#ifdef NOTYET
213115278Sdeischen	struct kse *kse;
214115278Sdeischen	struct kse_group *kseg;
215115278Sdeischen	struct pthread *thread;
21671581Sdeischen
217150499Sbrian	_thr_spinlock_init();
218150499Sbrian	*__malloc_lock = (spinlock_t)_SPINLOCK_INITIALIZER;
219118747Sdavidxu	if (__isthreaded) {
220118747Sdavidxu		_thr_rtld_fini();
221118747Sdavidxu		_thr_signal_deinit();
222118747Sdavidxu	}
223118747Sdavidxu	__isthreaded = 0;
22467097Sdeischen	/*
225118747Sdavidxu	 * Restore signal mask early, so any memory problems could
226118747Sdavidxu	 * dump core.
227118747Sdavidxu	 */
228153989Sbrian	__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
229132120Sdavidxu	_thread_active_threads = 1;
23013546Sjulian
231167241Sbrian	curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
232167244Sbrian	curthread->attr.flags &= ~PTHREAD_SCOPE_PROCESS;
233167241Sbrian	curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
234167241Sbrian
235106191Smini	/*
236113658Sdeischen	 * Enter a loop to remove and free all threads other than
237113658Sdeischen	 * the running thread from the active thread list:
238106191Smini	 */
239115278Sdeischen	while ((thread = TAILQ_FIRST(&_thread_list)) != NULL) {
240115278Sdeischen		THR_GCLIST_REMOVE(thread);
241106191Smini		/*
242113658Sdeischen		 * Remove this thread from the list (the current
243113658Sdeischen		 * thread will be removed but re-added by libpthread
244113658Sdeischen		 * initialization.
245106191Smini		 */
246113658Sdeischen		TAILQ_REMOVE(&_thread_list, thread, tle);
247113658Sdeischen		/* Make sure this isn't the running thread: */
248113658Sdeischen		if (thread != curthread) {
249113658Sdeischen			_thr_stack_free(&thread->attr);
250113658Sdeischen			if (thread->specific != NULL)
251113658Sdeischen				free(thread->specific);
252136846Sdavidxu			thr_destroy(curthread, thread);
253106191Smini		}
254106191Smini	}
255106191Smini
256113658Sdeischen	TAILQ_INIT(&curthread->mutexq);		/* initialize mutex queue */
257113658Sdeischen	curthread->joiner = NULL;		/* no joining threads yet */
258115278Sdeischen	curthread->refcount = 0;
259116977Sdavidxu	SIGEMPTYSET(curthread->sigpend);	/* clear pending signals */
260113658Sdeischen
261150499Sbrian	/* Don't free thread-specific data as the caller may require it */
262150499Sbrian
263113658Sdeischen	/* Free the free KSEs: */
264113658Sdeischen	while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
265113658Sdeischen		TAILQ_REMOVE(&free_kseq, kse, k_qe);
266120074Sdavidxu		kse_destroy(kse);
267113658Sdeischen	}
268113658Sdeischen	free_kse_count = 0;
269113658Sdeischen
270113658Sdeischen	/* Free the active KSEs: */
271115278Sdeischen	while ((kse = TAILQ_FIRST(&active_kseq)) != NULL) {
272113658Sdeischen		TAILQ_REMOVE(&active_kseq, kse, k_qe);
273120074Sdavidxu		kse_destroy(kse);
274113658Sdeischen	}
275113658Sdeischen	active_kse_count = 0;
276113658Sdeischen
277113658Sdeischen	/* Free the free KSEGs: */
278113658Sdeischen	while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
279113658Sdeischen		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
280120074Sdavidxu		kseg_destroy(kseg);
281113658Sdeischen	}
282113658Sdeischen	free_kseg_count = 0;
283113658Sdeischen
284113658Sdeischen	/* Free the active KSEGs: */
285115278Sdeischen	while ((kseg = TAILQ_FIRST(&active_kse_groupq)) != NULL) {
286113658Sdeischen		TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
287120074Sdavidxu		kseg_destroy(kseg);
288113658Sdeischen	}
289113658Sdeischen	active_kseg_count = 0;
290113658Sdeischen
291113658Sdeischen	/* Free the free threads. */
292113658Sdeischen	while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
293113658Sdeischen		TAILQ_REMOVE(&free_threadq, thread, tle);
294136846Sdavidxu		thr_destroy(curthread, thread);
295113658Sdeischen	}
296113658Sdeischen	free_thread_count = 0;
297113658Sdeischen
298113658Sdeischen	/* Free the to-be-gc'd threads. */
299113658Sdeischen	while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
300113661Sdeischen		TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
301136846Sdavidxu		thr_destroy(curthread, thread);
302113658Sdeischen	}
303113661Sdeischen	TAILQ_INIT(&gc_ksegq);
304113661Sdeischen	_gc_count = 0;
305113658Sdeischen
306113658Sdeischen	if (inited != 0) {
307113658Sdeischen		/*
308113658Sdeischen		 * Destroy these locks; they'll be recreated to assure they
309113658Sdeischen		 * are in the unlocked state.
310113658Sdeischen		 */
311113658Sdeischen		_lock_destroy(&kse_lock);
312113658Sdeischen		_lock_destroy(&thread_lock);
313113658Sdeischen		_lock_destroy(&_thread_list_lock);
314113658Sdeischen		inited = 0;
315113658Sdeischen	}
316113658Sdeischen
317150499Sbrian	/* We're no longer part of any lists */
318150499Sbrian	curthread->tlflags = 0;
319150499Sbrian
320113658Sdeischen	/*
321113658Sdeischen	 * After a fork, we are still operating on the thread's original
322113658Sdeischen	 * stack.  Don't clear the THR_FLAGS_USER from the thread's
323113658Sdeischen	 * attribute flags.
324113658Sdeischen	 */
325113658Sdeischen
326113658Sdeischen	/* Initialize the threads library. */
327113658Sdeischen	curthread->kse = NULL;
328113658Sdeischen	curthread->kseg = NULL;
329113658Sdeischen	_kse_initial = NULL;
330113658Sdeischen	_libpthread_init(curthread);
331116977Sdavidxu#else
332122075Sdeischen	int i;
333122075Sdeischen
334122075Sdeischen	/* Reset the current thread and KSE lock data. */
335122075Sdeischen	for (i = 0; i < curthread->locklevel; i++) {
336122075Sdeischen		_lockuser_reinit(&curthread->lockusers[i], (void *)curthread);
337122075Sdeischen	}
338122075Sdeischen	curthread->locklevel = 0;
339122075Sdeischen	for (i = 0; i < curthread->kse->k_locklevel; i++) {
340122075Sdeischen		_lockuser_reinit(&curthread->kse->k_lockusers[i],
341122075Sdeischen		    (void *)curthread->kse);
342122075Sdeischen		_LCK_SET_PRIVATE2(&curthread->kse->k_lockusers[i], NULL);
343122075Sdeischen	}
344122075Sdeischen	curthread->kse->k_locklevel = 0;
345174111Sdeischen
346174111Sdeischen	/*
347174111Sdeischen	 * Reinitialize the thread and signal locks so that
348174111Sdeischen	 * sigaction() will work after a fork().
349174111Sdeischen	 */
350174111Sdeischen	_lock_reinit(&curthread->lock, LCK_ADAPTIVE, _thr_lock_wait,
351174111Sdeischen	    _thr_lock_wakeup);
352174111Sdeischen	_lock_reinit(&_thread_signal_lock, LCK_ADAPTIVE, _kse_lock_wait,
353174111Sdeischen	    _kse_lock_wakeup);
354174111Sdeischen
355122075Sdeischen	_thr_spinlock_init();
356118747Sdavidxu	if (__isthreaded) {
357118747Sdavidxu		_thr_rtld_fini();
358117345Sdavidxu		_thr_signal_deinit();
359118747Sdavidxu	}
360118747Sdavidxu	__isthreaded = 0;
361128041Sdeischen	curthread->kse->k_kcb->kcb_kmbx.km_curthread = NULL;
362128041Sdeischen	curthread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
363128041Sdeischen
364174335Sdeischen	/*
365174335Sdeischen	 * After a fork, it is possible that an upcall occurs in
366174335Sdeischen	 * the parent KSE that fork()'d before the child process
367174335Sdeischen	 * is fully created and before its vm space is copied.
368174335Sdeischen	 * During the upcall, the tcb is set to null or to another
369174335Sdeischen	 * thread, and this is what gets copied in the child process
370174335Sdeischen	 * when the vm space is cloned sometime after the upcall
371174335Sdeischen	 * occurs.  Note that we shouldn't have to set the kcb, but
372174335Sdeischen	 * we do it for completeness.
373174335Sdeischen	 */
374174335Sdeischen	_kcb_set(curthread->kse->k_kcb);
375174335Sdeischen	_tcb_set(curthread->kse->k_kcb, curthread->tcb);
376174335Sdeischen
377139023Sdeischen	/* After a fork(), there child should have no pending signals. */
378139023Sdeischen	sigemptyset(&curthread->sigpend);
379139023Sdeischen
380118747Sdavidxu	/*
381118747Sdavidxu	 * Restore signal mask early, so any memory problems could
382118747Sdavidxu	 * dump core.
383118747Sdavidxu	 */
384118747Sdavidxu	sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
385132120Sdavidxu	_thread_active_threads = 1;
386116977Sdavidxu#endif
387113658Sdeischen}
388113658Sdeischen
389113658Sdeischen/*
390113658Sdeischen * This is used to initialize housekeeping and to initialize the
391113658Sdeischen * KSD for the KSE.
392113658Sdeischen */
393113658Sdeischenvoid
394113658Sdeischen_kse_init(void)
395113658Sdeischen{
396113658Sdeischen	if (inited == 0) {
397113658Sdeischen		TAILQ_INIT(&active_kseq);
398113658Sdeischen		TAILQ_INIT(&active_kse_groupq);
399113658Sdeischen		TAILQ_INIT(&free_kseq);
400113658Sdeischen		TAILQ_INIT(&free_kse_groupq);
401113658Sdeischen		TAILQ_INIT(&free_threadq);
402113661Sdeischen		TAILQ_INIT(&gc_ksegq);
403113658Sdeischen		if (_lock_init(&kse_lock, LCK_ADAPTIVE,
404173967Sjasone		    _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
405113658Sdeischen			PANIC("Unable to initialize free KSE queue lock");
406113658Sdeischen		if (_lock_init(&thread_lock, LCK_ADAPTIVE,
407173967Sjasone		    _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
408113658Sdeischen			PANIC("Unable to initialize free thread queue lock");
409113658Sdeischen		if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
410173967Sjasone		    _kse_lock_wait, _kse_lock_wakeup, calloc) != 0)
411113658Sdeischen			PANIC("Unable to initialize thread list lock");
412136846Sdavidxu		_pthread_mutex_init(&_tcb_mutex, NULL);
413113658Sdeischen		active_kse_count = 0;
414113658Sdeischen		active_kseg_count = 0;
415113661Sdeischen		_gc_count = 0;
416113658Sdeischen		inited = 1;
417113658Sdeischen	}
418113658Sdeischen}
419113658Sdeischen
420113658Sdeischen/*
421113658Sdeischen * This is called when the first thread (other than the initial
422113658Sdeischen * thread) is created.
423113658Sdeischen */
424113786Sdeischenint
425113658Sdeischen_kse_setthreaded(int threaded)
426113658Sdeischen{
427118747Sdavidxu	sigset_t sigset;
428118747Sdavidxu
429113658Sdeischen	if ((threaded != 0) && (__isthreaded == 0)) {
430119063Sdavidxu		SIGFILLSET(sigset);
431119063Sdavidxu		__sys_sigprocmask(SIG_SETMASK, &sigset, &_thr_initial->sigmask);
432119063Sdavidxu
433106191Smini		/*
434113658Sdeischen		 * Tell the kernel to create a KSE for the initial thread
435113658Sdeischen		 * and enable upcalls in it.
436113658Sdeischen		 */
437113786Sdeischen		_kse_initial->k_flags |= KF_STARTED;
438117706Sdavidxu
439133563Sdeischen		if (_thread_scope_system <= 0) {
440133269Sdeischen			_thr_initial->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
441133269Sdeischen			_kse_initial->k_kseg->kg_flags &= ~KGF_SINGLE_THREAD;
442133269Sdeischen			_kse_initial->k_kcb->kcb_kmbx.km_curthread = NULL;
443133269Sdeischen		}
444133269Sdeischen		else {
445133269Sdeischen			/*
446133269Sdeischen			 * For bound thread, kernel reads mailbox pointer
447133269Sdeischen			 * once, we'd set it here before calling kse_create.
448133269Sdeischen			 */
449133269Sdeischen			_tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
450133269Sdeischen			KSE_SET_MBOX(_kse_initial, _thr_initial);
451133269Sdeischen			_kse_initial->k_kcb->kcb_kmbx.km_flags |= KMF_BOUND;
452133269Sdeischen		}
453117706Sdavidxu
454118747Sdavidxu		/*
455118747Sdavidxu		 * Locking functions in libc are required when there are
456118747Sdavidxu		 * threads other than the initial thread.
457118747Sdavidxu		 */
458118747Sdavidxu		_thr_rtld_init();
459118747Sdavidxu
460118747Sdavidxu		__isthreaded = 1;
461118510Sdeischen		if (kse_create(&_kse_initial->k_kcb->kcb_kmbx, 0) != 0) {
462113786Sdeischen			_kse_initial->k_flags &= ~KF_STARTED;
463114187Sdeischen			__isthreaded = 0;
464116977Sdavidxu			PANIC("kse_create() failed\n");
465113786Sdeischen			return (-1);
466113786Sdeischen		}
467132120Sdavidxu		_thr_initial->tcb->tcb_tmbx.tm_lwp =
468132120Sdavidxu			_kse_initial->k_kcb->kcb_kmbx.km_lwp;
469132120Sdavidxu		_thread_activated = 1;
470117706Sdavidxu
471135714Sssouhlal#ifndef SYSTEM_SCOPE_ONLY
472133563Sdeischen		if (_thread_scope_system <= 0) {
473133269Sdeischen			/* Set current thread to initial thread */
474133269Sdeischen			_tcb_set(_kse_initial->k_kcb, _thr_initial->tcb);
475133269Sdeischen			KSE_SET_MBOX(_kse_initial, _thr_initial);
476133269Sdeischen			_thr_start_sig_daemon();
477133269Sdeischen			_thr_setmaxconcurrency();
478133269Sdeischen		}
479135714Sssouhlal		else
480135714Sssouhlal#endif
481133269Sdeischen			__sys_sigprocmask(SIG_SETMASK, &_thr_initial->sigmask,
482133269Sdeischen			    NULL);
483113658Sdeischen	}
484113786Sdeischen	return (0);
485113658Sdeischen}
486113658Sdeischen
487113658Sdeischen/*
488113658Sdeischen * Lock wait and wakeup handlers for KSE locks.  These are only used by
489113658Sdeischen * KSEs, and should never be used by threads.  KSE locks include the
490113658Sdeischen * KSE group lock (used for locking the scheduling queue) and the
491113658Sdeischen * kse_lock defined above.
492113658Sdeischen *
493113658Sdeischen * When a KSE lock attempt blocks, the entire KSE blocks allowing another
494113658Sdeischen * KSE to run.  For the most part, it doesn't make much sense to try and
495113658Sdeischen * schedule another thread because you need to lock the scheduling queue
496113658Sdeischen * in order to do that.  And since the KSE lock is used to lock the scheduling
497113658Sdeischen * queue, you would just end up blocking again.
498113658Sdeischen */
499113658Sdeischenvoid
500174111Sdeischen_kse_lock_wait(struct lock *lock __unused, struct lockuser *lu)
501113658Sdeischen{
502113658Sdeischen	struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
503113658Sdeischen	struct timespec ts;
504113786Sdeischen	int saved_flags;
505113658Sdeischen
506118510Sdeischen	if (curkse->k_kcb->kcb_kmbx.km_curthread != NULL)
507113786Sdeischen		PANIC("kse_lock_wait does not disable upcall.\n");
508113658Sdeischen	/*
509113658Sdeischen	 * Enter a loop to wait until we get the lock.
510113658Sdeischen	 */
511113658Sdeischen	ts.tv_sec = 0;
512113658Sdeischen	ts.tv_nsec = 1000000;  /* 1 sec */
513115278Sdeischen	while (!_LCK_GRANTED(lu)) {
514113658Sdeischen		/*
515113658Sdeischen		 * Yield the kse and wait to be notified when the lock
516113658Sdeischen		 * is granted.
517113658Sdeischen		 */
518118510Sdeischen		saved_flags = curkse->k_kcb->kcb_kmbx.km_flags;
519118510Sdeischen		curkse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL |
520118510Sdeischen		    KMF_NOCOMPLETED;
521113786Sdeischen		kse_release(&ts);
522118510Sdeischen		curkse->k_kcb->kcb_kmbx.km_flags = saved_flags;
523113658Sdeischen	}
524113658Sdeischen}
525113658Sdeischen
526113658Sdeischenvoid
527113658Sdeischen_kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
528113658Sdeischen{
529113658Sdeischen	struct kse *curkse;
530113658Sdeischen	struct kse *kse;
531115080Sdeischen	struct kse_mailbox *mbx;
532113658Sdeischen
533113658Sdeischen	curkse = _get_curkse();
534113658Sdeischen	kse = (struct kse *)_LCK_GET_PRIVATE(lu);
535113658Sdeischen
536113658Sdeischen	if (kse == curkse)
537113658Sdeischen		PANIC("KSE trying to wake itself up in lock");
538115080Sdeischen	else {
539118510Sdeischen		mbx = &kse->k_kcb->kcb_kmbx;
540115080Sdeischen		_lock_grant(lock, lu);
541113658Sdeischen		/*
542113658Sdeischen		 * Notify the owning kse that it has the lock.
543115080Sdeischen		 * It is safe to pass invalid address to kse_wakeup
544115080Sdeischen		 * even if the mailbox is not in kernel at all,
545115080Sdeischen		 * and waking up a wrong kse is also harmless.
546113658Sdeischen		 */
547115080Sdeischen		kse_wakeup(mbx);
548113658Sdeischen	}
549113658Sdeischen}
550113658Sdeischen
551113658Sdeischen/*
552113658Sdeischen * Thread wait and wakeup handlers for thread locks.  These are only used
553113658Sdeischen * by threads, never by KSEs.  Thread locks include the per-thread lock
554113658Sdeischen * (defined in its structure), and condition variable and mutex locks.
555113658Sdeischen */
556113658Sdeischenvoid
557174111Sdeischen_thr_lock_wait(struct lock *lock __unused, struct lockuser *lu)
558113658Sdeischen{
559113658Sdeischen	struct pthread *curthread = (struct pthread *)lu->lu_private;
560113658Sdeischen
561115080Sdeischen	do {
562117907Sdeischen		THR_LOCK_SWITCH(curthread);
563115080Sdeischen		THR_SET_STATE(curthread, PS_LOCKWAIT);
564117907Sdeischen		_thr_sched_switch_unlocked(curthread);
565115278Sdeischen	} while (!_LCK_GRANTED(lu));
566113658Sdeischen}
567106191Smini
568113658Sdeischenvoid
569174111Sdeischen_thr_lock_wakeup(struct lock *lock __unused, struct lockuser *lu)
570113658Sdeischen{
571113658Sdeischen	struct pthread *thread;
572113658Sdeischen	struct pthread *curthread;
573117907Sdeischen	struct kse_mailbox *kmbx;
574113658Sdeischen
575113658Sdeischen	curthread = _get_curthread();
576113658Sdeischen	thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
577113658Sdeischen
578115080Sdeischen	THR_SCHED_LOCK(curthread, thread);
579115080Sdeischen	_lock_grant(lock, lu);
580117907Sdeischen	kmbx = _thr_setrunnable_unlocked(thread);
581115080Sdeischen	THR_SCHED_UNLOCK(curthread, thread);
582117907Sdeischen	if (kmbx != NULL)
583117907Sdeischen		kse_wakeup(kmbx);
584113658Sdeischen}
585113658Sdeischen
586113658Sdeischenkse_critical_t
587113658Sdeischen_kse_critical_enter(void)
588113658Sdeischen{
589113658Sdeischen	kse_critical_t crit;
590113658Sdeischen
591118510Sdeischen	crit = (kse_critical_t)_kcb_critical_enter();
592113658Sdeischen	return (crit);
593113658Sdeischen}
594113658Sdeischen
595113658Sdeischenvoid
596113658Sdeischen_kse_critical_leave(kse_critical_t crit)
597113658Sdeischen{
598113658Sdeischen	struct pthread *curthread;
599113658Sdeischen
600118510Sdeischen	_kcb_critical_leave((struct kse_thr_mailbox *)crit);
601113658Sdeischen	if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
602113658Sdeischen		THR_YIELD_CHECK(curthread);
603113658Sdeischen}
604113658Sdeischen
605113942Sdeischenint
606113942Sdeischen_kse_in_critical(void)
607113942Sdeischen{
608118510Sdeischen	return (_kcb_in_critical());
609113942Sdeischen}
610113942Sdeischen
611113658Sdeischenvoid
612113658Sdeischen_thr_critical_enter(struct pthread *thread)
613113658Sdeischen{
614113658Sdeischen	thread->critical_count++;
615113658Sdeischen}
616113658Sdeischen
617113658Sdeischenvoid
618113658Sdeischen_thr_critical_leave(struct pthread *thread)
619113658Sdeischen{
620113658Sdeischen	thread->critical_count--;
621113658Sdeischen	THR_YIELD_CHECK(thread);
622113658Sdeischen}
623113658Sdeischen
624115080Sdeischenvoid
625115080Sdeischen_thr_sched_switch(struct pthread *curthread)
626115080Sdeischen{
627115080Sdeischen	struct kse *curkse;
628115080Sdeischen
629115080Sdeischen	(void)_kse_critical_enter();
630115080Sdeischen	curkse = _get_curkse();
631115080Sdeischen	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
632115080Sdeischen	_thr_sched_switch_unlocked(curthread);
633115080Sdeischen}
634115080Sdeischen
635113658Sdeischen/*
636113658Sdeischen * XXX - We may need to take the scheduling lock before calling
637113658Sdeischen *       this, or perhaps take the lock within here before
638113658Sdeischen *       doing anything else.
639113658Sdeischen */
640113658Sdeischenvoid
641115080Sdeischen_thr_sched_switch_unlocked(struct pthread *curthread)
642113658Sdeischen{
643113658Sdeischen	struct kse *curkse;
644115080Sdeischen	volatile int resume_once = 0;
645123048Sdavidxu	ucontext_t *uc;
646113658Sdeischen
647113658Sdeischen	/* We're in the scheduler, 5 by 5: */
648139023Sdeischen	curkse = curthread->kse;
649113658Sdeischen
650113658Sdeischen	curthread->need_switchout = 1;	/* The thread yielded on its own. */
651113658Sdeischen	curthread->critical_yield = 0;	/* No need to yield anymore. */
652113658Sdeischen
653115080Sdeischen	/* Thread can unlock the scheduler lock. */
654115080Sdeischen	curthread->lock_switch = 1;
655115080Sdeischen
656117706Sdavidxu	if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
657118510Sdeischen		kse_sched_single(&curkse->k_kcb->kcb_kmbx);
658123048Sdavidxu	else {
659132120Sdavidxu		if (__predict_false(_libkse_debug != 0)) {
660132120Sdavidxu			/*
661132120Sdavidxu			 * Because debugger saves single step status in thread
662132120Sdavidxu			 * mailbox's tm_dflags, we can safely clear single
663132120Sdavidxu			 * step status here. the single step status will be
664132120Sdavidxu			 * restored by kse_switchin when the thread is
665132120Sdavidxu			 * switched in again. This also lets uts run in full
666132120Sdavidxu			 * speed.
667132120Sdavidxu			 */
668132120Sdavidxu			 ptrace(PT_CLEARSTEP, curkse->k_kcb->kcb_kmbx.km_lwp,
669132120Sdavidxu				(caddr_t) 1, 0);
670132120Sdavidxu		}
671132120Sdavidxu
672123312Sdavidxu		KSE_SET_SWITCH(curkse);
673118510Sdeischen		_thread_enter_uts(curthread->tcb, curkse->k_kcb);
674116977Sdavidxu	}
675123048Sdavidxu
676123048Sdavidxu	/*
677123048Sdavidxu	 * Unlock the scheduling queue and leave the
678123048Sdavidxu	 * critical region.
679123048Sdavidxu	 */
680123048Sdavidxu	/* Don't trust this after a switch! */
681139023Sdeischen	curkse = curthread->kse;
682115080Sdeischen
683123048Sdavidxu	curthread->lock_switch = 0;
684123048Sdavidxu	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
685123048Sdavidxu	_kse_critical_leave(&curthread->tcb->tcb_tmbx);
686115080Sdeischen
687113658Sdeischen	/*
688113658Sdeischen	 * This thread is being resumed; check for cancellations.
689113658Sdeischen	 */
690139023Sdeischen	if (THR_NEED_ASYNC_CANCEL(curthread) && !THR_IN_CRITICAL(curthread)) {
691123048Sdavidxu		uc = alloca(sizeof(ucontext_t));
692116977Sdavidxu		resume_once = 0;
693123048Sdavidxu		THR_GETCONTEXT(uc);
694116977Sdavidxu		if (resume_once == 0) {
695116977Sdavidxu			resume_once = 1;
696116977Sdavidxu			curthread->check_pending = 0;
697139023Sdeischen			thr_resume_check(curthread, uc);
698116977Sdavidxu		}
699113658Sdeischen	}
700115080Sdeischen	THR_ACTIVATE_LAST_LOCK(curthread);
701113658Sdeischen}
702113658Sdeischen
703113658Sdeischen/*
704113658Sdeischen * This is the scheduler for a KSE which runs a scope system thread.
705113658Sdeischen * The multi-thread KSE scheduler should also work for a single threaded
706113658Sdeischen * KSE, but we use a separate scheduler so that it can be fine-tuned
707113658Sdeischen * to be more efficient (and perhaps not need a separate stack for
708113658Sdeischen * the KSE, allowing it to use the thread's stack).
709113658Sdeischen */
710117706Sdavidxu
711113658Sdeischenstatic void
712118510Sdeischenkse_sched_single(struct kse_mailbox *kmbx)
713113658Sdeischen{
714118510Sdeischen	struct kse *curkse;
715118510Sdeischen	struct pthread *curthread;
716113658Sdeischen	struct timespec ts;
717117706Sdavidxu	sigset_t sigmask;
718117706Sdavidxu	int i, sigseqno, level, first = 0;
719113658Sdeischen
720118510Sdeischen	curkse = (struct kse *)kmbx->km_udata;
721118510Sdeischen	curthread = curkse->k_curthread;
722118510Sdeischen
723123312Sdavidxu	if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
724117706Sdavidxu		/* Setup this KSEs specific data. */
725118510Sdeischen		_kcb_set(curkse->k_kcb);
726118510Sdeischen		_tcb_set(curkse->k_kcb, curthread->tcb);
727117706Sdavidxu		curkse->k_flags |= KF_INITIALIZED;
728117706Sdavidxu		first = 1;
729117706Sdavidxu		curthread->active = 1;
730132120Sdavidxu
731117706Sdavidxu		/* Setup kernel signal masks for new thread. */
732117706Sdavidxu		__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask, NULL);
733117706Sdavidxu		/*
734117706Sdavidxu		 * Enter critical region, this is meanless for bound thread,
735117706Sdavidxu		 * It is used to let other code work, those code want mailbox
736117706Sdavidxu		 * to be cleared.
737117706Sdavidxu		 */
738118510Sdeischen		(void)_kse_critical_enter();
739118850Sdavidxu 	} else {
740118850Sdavidxu		/*
741118850Sdavidxu		 * Bound thread always has tcb set, this prevent some
742118850Sdavidxu		 * code from blindly setting bound thread tcb to NULL,
743118850Sdavidxu		 * buggy code ?
744118850Sdavidxu		 */
745118850Sdavidxu		_tcb_set(curkse->k_kcb, curthread->tcb);
746118850Sdavidxu	}
747106191Smini
748113786Sdeischen	curthread->critical_yield = 0;
749113786Sdeischen	curthread->need_switchout = 0;
750113658Sdeischen
751113786Sdeischen	/*
752113786Sdeischen	 * Lock the scheduling queue.
753113786Sdeischen	 *
754113786Sdeischen	 * There is no scheduling queue for single threaded KSEs,
755113786Sdeischen	 * but we need a lock for protection regardless.
756113786Sdeischen	 */
757117706Sdavidxu	if (curthread->lock_switch == 0)
758117706Sdavidxu		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
759113786Sdeischen
760113786Sdeischen	/*
761113786Sdeischen	 * This has to do the job of kse_switchout_thread(), only
762113786Sdeischen	 * for a single threaded KSE/KSEG.
763113786Sdeischen	 */
764113786Sdeischen
765113786Sdeischen	switch (curthread->state) {
766120896Sdavidxu	case PS_MUTEX_WAIT:
767113786Sdeischen	case PS_COND_WAIT:
768120896Sdavidxu		if (THR_NEED_CANCEL(curthread)) {
769120896Sdavidxu			curthread->interrupted = 1;
770120896Sdavidxu			curthread->continuation = _thr_finish_cancellation;
771120896Sdavidxu			THR_SET_STATE(curthread, PS_RUNNING);
772120896Sdavidxu		}
773113786Sdeischen		break;
774113786Sdeischen
775113786Sdeischen	case PS_LOCKWAIT:
776117706Sdavidxu		/*
777117706Sdavidxu		 * This state doesn't timeout.
778117706Sdavidxu		 */
779117706Sdavidxu		curthread->wakeup_time.tv_sec = -1;
780117706Sdavidxu		curthread->wakeup_time.tv_nsec = -1;
781113786Sdeischen		level = curthread->locklevel - 1;
782117706Sdavidxu		if (_LCK_GRANTED(&curthread->lockusers[level]))
783113786Sdeischen			THR_SET_STATE(curthread, PS_RUNNING);
784113786Sdeischen		break;
785113786Sdeischen
786120896Sdavidxu	case PS_DEAD:
787120896Sdavidxu		/* Unlock the scheduling queue and exit the KSE and thread. */
788120896Sdavidxu		thr_cleanup(curkse, curthread);
789120896Sdavidxu		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
790120896Sdavidxu		PANIC("bound thread shouldn't get here\n");
791120896Sdavidxu		break;
792120896Sdavidxu
793120896Sdavidxu	case PS_JOIN:
794120896Sdavidxu		if (THR_NEED_CANCEL(curthread)) {
795120896Sdavidxu			curthread->join_status.thread = NULL;
796120896Sdavidxu			THR_SET_STATE(curthread, PS_RUNNING);
797120896Sdavidxu		} else {
798120896Sdavidxu			/*
799120896Sdavidxu			 * This state doesn't timeout.
800120896Sdavidxu			 */
801120896Sdavidxu			curthread->wakeup_time.tv_sec = -1;
802120896Sdavidxu			curthread->wakeup_time.tv_nsec = -1;
803120896Sdavidxu		}
804120896Sdavidxu		break;
805120896Sdavidxu
806120896Sdavidxu	case PS_SUSPENDED:
807120896Sdavidxu		if (THR_NEED_CANCEL(curthread)) {
808120896Sdavidxu			curthread->interrupted = 1;
809120896Sdavidxu			THR_SET_STATE(curthread, PS_RUNNING);
810120896Sdavidxu		} else {
811120896Sdavidxu			/*
812120896Sdavidxu			 * These states don't timeout.
813120896Sdavidxu			 */
814120896Sdavidxu			curthread->wakeup_time.tv_sec = -1;
815120896Sdavidxu			curthread->wakeup_time.tv_nsec = -1;
816120896Sdavidxu		}
817120896Sdavidxu		break;
818120896Sdavidxu
819117706Sdavidxu	case PS_RUNNING:
820120896Sdavidxu		if ((curthread->flags & THR_FLAGS_SUSPENDED) != 0 &&
821120896Sdavidxu		    !THR_NEED_CANCEL(curthread)) {
822117706Sdavidxu			THR_SET_STATE(curthread, PS_SUSPENDED);
823120896Sdavidxu			/*
824120896Sdavidxu			 * These states don't timeout.
825120896Sdavidxu			 */
826120896Sdavidxu			curthread->wakeup_time.tv_sec = -1;
827120896Sdavidxu			curthread->wakeup_time.tv_nsec = -1;
828117706Sdavidxu		}
829117706Sdavidxu		break;
830117706Sdavidxu
831120896Sdavidxu	case PS_SIGWAIT:
832120896Sdavidxu		PANIC("bound thread does not have SIGWAIT state\n");
833120896Sdavidxu
834120896Sdavidxu	case PS_SLEEP_WAIT:
835120896Sdavidxu		PANIC("bound thread does not have SLEEP_WAIT state\n");
836120896Sdavidxu
837120896Sdavidxu	case PS_SIGSUSPEND:
838120896Sdavidxu		PANIC("bound thread does not have SIGSUSPEND state\n");
839120896Sdavidxu
840113786Sdeischen	case PS_DEADLOCK:
841113658Sdeischen		/*
842113786Sdeischen		 * These states don't timeout and don't need
843113786Sdeischen		 * to be in the waiting queue.
844113658Sdeischen		 */
845117706Sdavidxu		curthread->wakeup_time.tv_sec = -1;
846117706Sdavidxu		curthread->wakeup_time.tv_nsec = -1;
847113786Sdeischen		break;
848120896Sdavidxu
849120896Sdavidxu	default:
850120896Sdavidxu		PANIC("Unknown state\n");
851120896Sdavidxu		break;
852113786Sdeischen	}
853117706Sdavidxu
854113786Sdeischen	while (curthread->state != PS_RUNNING) {
855117706Sdavidxu		sigseqno = curkse->k_sigseqno;
856117706Sdavidxu		if (curthread->check_pending != 0) {
857117706Sdavidxu			/*
858117706Sdavidxu			 * Install pending signals into the frame, possible
859117706Sdavidxu			 * cause mutex or condvar backout.
860117706Sdavidxu			 */
861117706Sdavidxu			curthread->check_pending = 0;
862117706Sdavidxu			SIGFILLSET(sigmask);
863113658Sdeischen
864117706Sdavidxu			/*
865117706Sdavidxu			 * Lock out kernel signal code when we are processing
866117706Sdavidxu			 * signals, and get a fresh copy of signal mask.
867117706Sdavidxu			 */
868117706Sdavidxu			__sys_sigprocmask(SIG_SETMASK, &sigmask,
869117706Sdavidxu					  &curthread->sigmask);
870117706Sdavidxu			for (i = 1; i <= _SIG_MAXSIG; i++) {
871117706Sdavidxu				if (SIGISMEMBER(curthread->sigmask, i))
872117706Sdavidxu					continue;
873117706Sdavidxu				if (SIGISMEMBER(curthread->sigpend, i))
874117907Sdeischen					(void)_thr_sig_add(curthread, i,
875117907Sdeischen					    &curthread->siginfo[i-1]);
876113658Sdeischen			}
877117706Sdavidxu			__sys_sigprocmask(SIG_SETMASK, &curthread->sigmask,
878117706Sdavidxu				NULL);
879117706Sdavidxu			/* The above code might make thread runnable */
880117706Sdavidxu			if (curthread->state == PS_RUNNING)
881117706Sdavidxu				break;
882106191Smini		}
883117706Sdavidxu		THR_DEACTIVATE_LAST_LOCK(curthread);
884117706Sdavidxu		kse_wait(curkse, curthread, sigseqno);
885117706Sdavidxu		THR_ACTIVATE_LAST_LOCK(curthread);
886136286Sdavidxu		if (curthread->wakeup_time.tv_sec >= 0) {
887136286Sdavidxu			KSE_GET_TOD(curkse, &ts);
888136286Sdavidxu			if (thr_timedout(curthread, &ts)) {
889136286Sdavidxu				/* Indicate the thread timedout: */
890136286Sdavidxu				curthread->timeout = 1;
891136286Sdavidxu				/* Make the thread runnable. */
892136286Sdavidxu				THR_SET_STATE(curthread, PS_RUNNING);
893136286Sdavidxu			}
894117706Sdavidxu		}
895113658Sdeischen	}
896107202Smini
897117706Sdavidxu	if (curthread->lock_switch == 0) {
898117706Sdavidxu		/* Unlock the scheduling queue. */
899117706Sdavidxu		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
900117706Sdavidxu	}
901107202Smini
902113786Sdeischen	DBG_MSG("Continuing bound thread %p\n", curthread);
903117706Sdavidxu	if (first) {
904118510Sdeischen		_kse_critical_leave(&curthread->tcb->tcb_tmbx);
905117706Sdavidxu		pthread_exit(curthread->start_routine(curthread->arg));
906117706Sdavidxu	}
907113658Sdeischen}
908106191Smini
909115173Sdeischen#ifdef DEBUG_THREAD_KERN
910115173Sdeischenstatic void
911113658Sdeischendump_queues(struct kse *curkse)
912113658Sdeischen{
913113658Sdeischen	struct pthread *thread;
914106191Smini
915113658Sdeischen	DBG_MSG("Threads in waiting queue:\n");
916113658Sdeischen	TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
917113658Sdeischen		DBG_MSG("  thread %p, state %d, blocked %d\n",
918113658Sdeischen		    thread, thread->state, thread->blocked);
919113658Sdeischen	}
920113658Sdeischen}
921115173Sdeischen#endif
922106191Smini
923113658Sdeischen/*
924113658Sdeischen * This is the scheduler for a KSE which runs multiple threads.
925113658Sdeischen */
926113658Sdeischenstatic void
927118510Sdeischenkse_sched_multi(struct kse_mailbox *kmbx)
928113658Sdeischen{
929118510Sdeischen	struct kse *curkse;
930113786Sdeischen	struct pthread *curthread, *td_wait;
931113658Sdeischen	int ret;
932113658Sdeischen
933118510Sdeischen	curkse = (struct kse *)kmbx->km_udata;
934118510Sdeischen	THR_ASSERT(curkse->k_kcb->kcb_kmbx.km_curthread == NULL,
935113870Sdeischen	    "Mailbox not null in kse_sched_multi");
936113870Sdeischen
937113786Sdeischen	/* Check for first time initialization: */
938123312Sdavidxu	if (__predict_false((curkse->k_flags & KF_INITIALIZED) == 0)) {
939113786Sdeischen		/* Setup this KSEs specific data. */
940118510Sdeischen		_kcb_set(curkse->k_kcb);
941113786Sdeischen
942113786Sdeischen		/* Set this before grabbing the context. */
943113786Sdeischen		curkse->k_flags |= KF_INITIALIZED;
944113786Sdeischen	}
945113786Sdeischen
946118850Sdavidxu	/*
947118850Sdavidxu	 * No current thread anymore, calling _get_curthread in UTS
948118850Sdavidxu	 * should dump core
949118850Sdavidxu	 */
950118850Sdavidxu	_tcb_set(curkse->k_kcb, NULL);
951118850Sdavidxu
952118817Sdavidxu	/* If this is an upcall; take the scheduler lock. */
953123312Sdavidxu	if (!KSE_IS_SWITCH(curkse))
954114187Sdeischen		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
955123312Sdavidxu	else
956123312Sdavidxu		KSE_CLEAR_SWITCH(curkse);
957113658Sdeischen
958123312Sdavidxu	if (KSE_IS_IDLE(curkse)) {
959123312Sdavidxu		KSE_CLEAR_IDLE(curkse);
960123312Sdavidxu		curkse->k_kseg->kg_idle_kses--;
961123312Sdavidxu	}
962123312Sdavidxu
963118510Sdeischen	/*
964118510Sdeischen	 * Now that the scheduler lock is held, get the current
965118510Sdeischen	 * thread.  The KSE's current thread cannot be safely
966118510Sdeischen	 * examined without the lock because it could have returned
967118510Sdeischen	 * as completed on another KSE.  See kse_check_completed().
968118510Sdeischen	 */
969116977Sdavidxu	curthread = curkse->k_curthread;
970116977Sdavidxu
971113658Sdeischen	/*
972113658Sdeischen	 * If the current thread was completed in another KSE, then
973113658Sdeischen	 * it will be in the run queue.  Don't mark it as being blocked.
974113658Sdeischen	 */
975114187Sdeischen	if ((curthread != NULL) &&
976113658Sdeischen	    ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
977113658Sdeischen	    (curthread->need_switchout == 0)) {
978113658Sdeischen		/*
979113658Sdeischen		 * Assume the current thread is blocked; when the
980113658Sdeischen		 * completed threads are checked and if the current
981113658Sdeischen		 * thread is among the completed, the blocked flag
982113658Sdeischen		 * will be cleared.
983113658Sdeischen		 */
984113658Sdeischen		curthread->blocked = 1;
985139023Sdeischen		DBG_MSG("Running thread %p is now blocked in kernel.\n",
986139023Sdeischen		    curthread);
987106191Smini	}
988106191Smini
989113658Sdeischen	/* Check for any unblocked threads in the kernel. */
990113658Sdeischen	kse_check_completed(curkse);
99167097Sdeischen
99235509Sjb	/*
993113658Sdeischen	 * Check for threads that have timed-out.
99435509Sjb	 */
995113658Sdeischen	kse_check_waitq(curkse);
99613546Sjulian
997113658Sdeischen	/*
998113658Sdeischen	 * Switchout the current thread, if necessary, as the last step
999113658Sdeischen	 * so that it is inserted into the run queue (if it's runnable)
1000113658Sdeischen	 * _after_ any other threads that were added to it above.
1001113658Sdeischen	 */
1002113658Sdeischen	if (curthread == NULL)
1003113658Sdeischen		;  /* Nothing to do here. */
1004132120Sdavidxu	else if ((curthread->need_switchout == 0) && DBG_CAN_RUN(curthread) &&
1005113658Sdeischen	    (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
1006113658Sdeischen		/*
1007113658Sdeischen		 * Resume the thread and tell it to yield when
1008113658Sdeischen		 * it leaves the critical region.
1009113658Sdeischen		 */
1010114187Sdeischen		curthread->critical_yield = 1;
1011113658Sdeischen		curthread->active = 1;
1012113658Sdeischen		if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
1013113658Sdeischen			KSE_RUNQ_REMOVE(curkse, curthread);
1014118817Sdavidxu		curkse->k_curthread = curthread;
1015113658Sdeischen		curthread->kse = curkse;
1016113658Sdeischen		DBG_MSG("Continuing thread %p in critical region\n",
1017113658Sdeischen		    curthread);
1018114267Sdavidxu		kse_wakeup_multi(curkse);
1019115080Sdeischen		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1020118510Sdeischen		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1021113658Sdeischen		if (ret != 0)
1022113658Sdeischen			PANIC("Can't resume thread in critical region\n");
1023103419Smini	}
1024132120Sdavidxu	else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) {
1025132120Sdavidxu		curthread->tcb->tcb_tmbx.tm_lwp = 0;
1026113658Sdeischen		kse_switchout_thread(curkse, curthread);
1027132120Sdavidxu	}
1028118817Sdavidxu	curkse->k_curthread = NULL;
102953812Salfred
1030114688Sdavidxu#ifdef DEBUG_THREAD_KERN
1031113658Sdeischen	dump_queues(curkse);
1032114688Sdavidxu#endif
1033113658Sdeischen
1034113658Sdeischen	/* Check if there are no threads ready to run: */
1035113658Sdeischen	while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
1036119577Sdeischen	    (curkse->k_kseg->kg_threadcount != 0) &&
1037119577Sdeischen	    ((curkse->k_flags & KF_TERMINATED) == 0)) {
1038113658Sdeischen		/*
1039113658Sdeischen		 * Wait for a thread to become active or until there are
1040113658Sdeischen		 * no more threads.
1041113658Sdeischen		 */
1042113786Sdeischen		td_wait = KSE_WAITQ_FIRST(curkse);
1043117706Sdavidxu		kse_wait(curkse, td_wait, 0);
1044113786Sdeischen		kse_check_completed(curkse);
1045113658Sdeischen		kse_check_waitq(curkse);
104667097Sdeischen	}
1047113658Sdeischen
1048113658Sdeischen	/* Check for no more threads: */
1049119577Sdeischen	if ((curkse->k_kseg->kg_threadcount == 0) ||
1050119577Sdeischen	    ((curkse->k_flags & KF_TERMINATED) != 0)) {
1051113658Sdeischen		/*
1052113658Sdeischen		 * Normally this shouldn't return, but it will if there
1053113658Sdeischen		 * are other KSEs running that create new threads that
1054113658Sdeischen		 * are assigned to this KSE[G].  For instance, if a scope
1055113658Sdeischen		 * system thread were to create a scope process thread
1056113658Sdeischen		 * and this kse[g] is the initial kse[g], then that newly
1057113658Sdeischen		 * created thread would be assigned to us (the initial
1058113658Sdeischen		 * kse[g]).
1059113658Sdeischen		 */
1060119700Sdavidxu		kse_wakeup_multi(curkse);
1061113658Sdeischen		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1062113658Sdeischen		kse_fini(curkse);
1063115278Sdeischen		/* never returns */
1064113658Sdeischen	}
1065113658Sdeischen
1066113658Sdeischen	THR_ASSERT(curthread != NULL,
1067113658Sdeischen	    "Return from kse_wait/fini without thread.");
1068113658Sdeischen	THR_ASSERT(curthread->state != PS_DEAD,
1069113658Sdeischen	    "Trying to resume dead thread!");
1070113658Sdeischen	KSE_RUNQ_REMOVE(curkse, curthread);
1071113658Sdeischen
1072113658Sdeischen	/*
1073113658Sdeischen	 * Make the selected thread the current thread.
1074113658Sdeischen	 */
1075118817Sdavidxu	curkse->k_curthread = curthread;
1076113658Sdeischen
1077113658Sdeischen	/*
1078113658Sdeischen	 * Make sure the current thread's kse points to this kse.
1079113658Sdeischen	 */
1080113658Sdeischen	curthread->kse = curkse;
1081113658Sdeischen
1082113658Sdeischen	/*
1083113658Sdeischen	 * Reset the time slice if this thread is running for the first
1084113658Sdeischen	 * time or running again after using its full time slice allocation.
1085113658Sdeischen	 */
1086113658Sdeischen	if (curthread->slice_usec == -1)
1087113658Sdeischen		curthread->slice_usec = 0;
1088113658Sdeischen
1089113658Sdeischen	/* Mark the thread active. */
1090113658Sdeischen	curthread->active = 1;
1091113658Sdeischen
1092113658Sdeischen	/*
1093113658Sdeischen	 * The thread's current signal frame will only be NULL if it
1094113658Sdeischen	 * is being resumed after being blocked in the kernel.  In
1095113658Sdeischen	 * this case, and if the thread needs to run down pending
1096113658Sdeischen	 * signals or needs a cancellation check, we need to add a
1097113658Sdeischen	 * signal frame to the thread's context.
1098113658Sdeischen	 */
1099139023Sdeischen	if (curthread->lock_switch == 0 && curthread->state == PS_RUNNING &&
1100120896Sdavidxu	    (curthread->check_pending != 0 ||
1101120896Sdavidxu	     THR_NEED_ASYNC_CANCEL(curthread)) &&
1102120896Sdavidxu	    !THR_IN_CRITICAL(curthread)) {
1103116977Sdavidxu		curthread->check_pending = 0;
1104118510Sdeischen		signalcontext(&curthread->tcb->tcb_tmbx.tm_context, 0,
1105114187Sdeischen		    (__sighandler_t *)thr_resume_wrapper);
1106116977Sdavidxu	}
1107119700Sdavidxu	kse_wakeup_multi(curkse);
1108113658Sdeischen	/*
1109113658Sdeischen	 * Continue the thread at its current frame:
1110113658Sdeischen	 */
1111115080Sdeischen	if (curthread->lock_switch != 0) {
1112115080Sdeischen		/*
1113115080Sdeischen		 * This thread came from a scheduler switch; it will
1114115080Sdeischen		 * unlock the scheduler lock and set the mailbox.
1115115080Sdeischen		 */
1116118510Sdeischen		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 0);
1117114187Sdeischen	} else {
1118115080Sdeischen		/* This thread won't unlock the scheduler lock. */
1119115080Sdeischen		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1120118510Sdeischen		ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1121114187Sdeischen	}
1122113658Sdeischen	if (ret != 0)
1123113658Sdeischen		PANIC("Thread has returned from _thread_switch");
1124113658Sdeischen
1125113658Sdeischen	/* This point should not be reached. */
1126113658Sdeischen	PANIC("Thread has returned from _thread_switch");
112767097Sdeischen}
112848046Sjb
1129113658Sdeischenstatic void
1130174111Sdeischenthr_resume_wrapper(int sig __unused, siginfo_t *siginfo __unused,
1131174111Sdeischen    ucontext_t *ucp)
113267097Sdeischen{
1133113658Sdeischen	struct pthread *curthread = _get_curthread();
1134116977Sdavidxu	struct kse *curkse;
1135119063Sdavidxu	int ret, err_save = errno;
113613546Sjulian
1137116977Sdavidxu	DBG_MSG(">>> sig wrapper\n");
1138116977Sdavidxu	if (curthread->lock_switch)
1139116977Sdavidxu		PANIC("thr_resume_wrapper, lock_switch != 0\n");
1140139023Sdeischen	thr_resume_check(curthread, ucp);
1141119063Sdavidxu	errno = err_save;
1142116977Sdavidxu	_kse_critical_enter();
1143139023Sdeischen	curkse = curthread->kse;
1144118510Sdeischen	curthread->tcb->tcb_tmbx.tm_context = *ucp;
1145118510Sdeischen	ret = _thread_switch(curkse->k_kcb, curthread->tcb, 1);
1146116977Sdavidxu	if (ret != 0)
1147116977Sdavidxu		PANIC("thr_resume_wrapper: thread has returned "
1148116977Sdavidxu		      "from _thread_switch");
1149116977Sdavidxu	/* THR_SETCONTEXT(ucp); */ /* not work, why ? */
1150113658Sdeischen}
1151113658Sdeischen
1152113658Sdeischenstatic void
1153139023Sdeischenthr_resume_check(struct pthread *curthread, ucontext_t *ucp)
1154113658Sdeischen{
1155139023Sdeischen	_thr_sig_rundown(curthread, ucp);
1156113658Sdeischen
1157120896Sdavidxu	if (THR_NEED_ASYNC_CANCEL(curthread))
1158113658Sdeischen		pthread_testcancel();
1159113658Sdeischen}
1160113658Sdeischen
1161113658Sdeischen/*
1162113658Sdeischen * Clean up a thread.  This must be called with the thread's KSE
1163113658Sdeischen * scheduling lock held.  The thread must be a thread from the
1164113658Sdeischen * KSE's group.
1165113658Sdeischen */
1166113658Sdeischenstatic void
1167113658Sdeischenthr_cleanup(struct kse *curkse, struct pthread *thread)
1168113658Sdeischen{
1169113658Sdeischen	struct pthread *joiner;
1170117907Sdeischen	struct kse_mailbox *kmbx = NULL;
1171115278Sdeischen	int sys_scope;
1172113658Sdeischen
1173165334Speadar	thread->active = 0;
1174165334Speadar	thread->need_switchout = 0;
1175165334Speadar	thread->lock_switch = 0;
1176165334Speadar	thread->check_pending = 0;
1177165334Speadar
1178113658Sdeischen	if ((joiner = thread->joiner) != NULL) {
1179115278Sdeischen		/* Joinee scheduler lock held; joiner won't leave. */
1180115278Sdeischen		if (joiner->kseg == curkse->k_kseg) {
1181115278Sdeischen			if (joiner->join_status.thread == thread) {
1182115278Sdeischen				joiner->join_status.thread = NULL;
1183115278Sdeischen				joiner->join_status.ret = thread->ret;
1184117907Sdeischen				(void)_thr_setrunnable_unlocked(joiner);
1185115278Sdeischen			}
1186115278Sdeischen		} else {
1187115278Sdeischen			KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1188115278Sdeischen			/* The joiner may have removed itself and exited. */
1189115278Sdeischen			if (_thr_ref_add(thread, joiner, 0) == 0) {
1190113658Sdeischen				KSE_SCHED_LOCK(curkse, joiner->kseg);
1191115278Sdeischen				if (joiner->join_status.thread == thread) {
1192115278Sdeischen					joiner->join_status.thread = NULL;
1193115278Sdeischen					joiner->join_status.ret = thread->ret;
1194117907Sdeischen					kmbx = _thr_setrunnable_unlocked(joiner);
1195115278Sdeischen				}
1196113658Sdeischen				KSE_SCHED_UNLOCK(curkse, joiner->kseg);
1197115278Sdeischen				_thr_ref_delete(thread, joiner);
1198117907Sdeischen				if (kmbx != NULL)
1199117907Sdeischen					kse_wakeup(kmbx);
1200106786Smini			}
1201115278Sdeischen			KSE_SCHED_LOCK(curkse, curkse->k_kseg);
120213546Sjulian		}
1203113658Sdeischen		thread->attr.flags |= PTHREAD_DETACHED;
1204113658Sdeischen	}
120513546Sjulian
1206115278Sdeischen	if (!(sys_scope = (thread->attr.flags & PTHREAD_SCOPE_SYSTEM))) {
1207113661Sdeischen		/*
1208113661Sdeischen		 * Remove the thread from the KSEG's list of threads.
1209113661Sdeischen	 	 */
1210113661Sdeischen		KSEG_THRQ_REMOVE(thread->kseg, thread);
1211113661Sdeischen		/*
1212113661Sdeischen		 * Migrate the thread to the main KSE so that this
1213113661Sdeischen		 * KSE and KSEG can be cleaned when their last thread
1214113661Sdeischen		 * exits.
1215113661Sdeischen		 */
1216113661Sdeischen		thread->kseg = _kse_initial->k_kseg;
1217113661Sdeischen		thread->kse = _kse_initial;
1218113661Sdeischen	}
1219113661Sdeischen
1220113661Sdeischen	/*
1221113661Sdeischen	 * We can't hold the thread list lock while holding the
1222113661Sdeischen	 * scheduler lock.
1223113661Sdeischen	 */
1224113661Sdeischen	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1225113661Sdeischen	DBG_MSG("Adding thread %p to GC list\n", thread);
1226113658Sdeischen	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1227136846Sdavidxu	thread->tlflags |= TLFLAGS_GC_SAFE;
1228119732Sdavidxu	THR_GCLIST_ADD(thread);
1229113658Sdeischen	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1230115278Sdeischen	if (sys_scope) {
1231115278Sdeischen		/*
1232115278Sdeischen		 * System scope thread is single thread group,
1233115278Sdeischen		 * when thread is exited, its kse and ksegrp should
1234115278Sdeischen		 * be recycled as well.
1235117706Sdavidxu		 * kse upcall stack belongs to thread, clear it here.
1236115278Sdeischen		 */
1237117706Sdavidxu		curkse->k_stack.ss_sp = 0;
1238117706Sdavidxu		curkse->k_stack.ss_size = 0;
1239115278Sdeischen		kse_exit();
1240115278Sdeischen		PANIC("kse_exit() failed for system scope thread");
1241115278Sdeischen	}
1242113661Sdeischen	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1243113658Sdeischen}
124467097Sdeischen
1245113658Sdeischenvoid
1246113661Sdeischen_thr_gc(struct pthread *curthread)
1247113658Sdeischen{
1248118676Sdavidxu	thread_gc(curthread);
1249118676Sdavidxu	kse_gc(curthread);
1250118676Sdavidxu	kseg_gc(curthread);
1251118676Sdavidxu}
1252118676Sdavidxu
1253118676Sdavidxustatic void
1254118676Sdavidxuthread_gc(struct pthread *curthread)
1255118676Sdavidxu{
1256113661Sdeischen	struct pthread *td, *td_next;
1257113661Sdeischen	kse_critical_t crit;
1258113786Sdeischen	TAILQ_HEAD(, pthread) worklist;
1259106786Smini
1260113786Sdeischen	TAILQ_INIT(&worklist);
1261113661Sdeischen	crit = _kse_critical_enter();
1262113661Sdeischen	KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1263113661Sdeischen
1264113661Sdeischen	/* Check the threads waiting for GC. */
1265113661Sdeischen	for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1266113661Sdeischen		td_next = TAILQ_NEXT(td, gcle);
1267136846Sdavidxu		if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
1268113661Sdeischen			continue;
1269115278Sdeischen		else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
1270118510Sdeischen		    ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
1271113661Sdeischen			/*
1272113661Sdeischen			 * The thread and KSE are operating on the same
1273113661Sdeischen			 * stack.  Wait for the KSE to exit before freeing
1274113661Sdeischen			 * the thread's stack as well as everything else.
1275113661Sdeischen			 */
1276113661Sdeischen			continue;
1277113661Sdeischen		}
1278113786Sdeischen		/*
1279113786Sdeischen		 * Remove the thread from the GC list.  If the thread
1280113786Sdeischen		 * isn't yet detached, it will get added back to the
1281113786Sdeischen		 * GC list at a later time.
1282113786Sdeischen		 */
1283113658Sdeischen		THR_GCLIST_REMOVE(td);
1284113786Sdeischen		DBG_MSG("Freeing thread %p stack\n", td);
1285113786Sdeischen		/*
1286113786Sdeischen		 * We can free the thread stack since it's no longer
1287113786Sdeischen		 * in use.
1288113786Sdeischen		 */
1289113661Sdeischen		_thr_stack_free(&td->attr);
1290113786Sdeischen		if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1291113786Sdeischen		    (td->refcount == 0)) {
1292113786Sdeischen			/*
1293113786Sdeischen			 * The thread has detached and is no longer
1294113786Sdeischen			 * referenced.  It is safe to remove all
1295113786Sdeischen			 * remnants of the thread.
1296113786Sdeischen			 */
1297114187Sdeischen			THR_LIST_REMOVE(td);
1298113786Sdeischen			TAILQ_INSERT_HEAD(&worklist, td, gcle);
1299113786Sdeischen		}
1300113786Sdeischen	}
1301113786Sdeischen	KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1302113786Sdeischen	_kse_critical_leave(crit);
1303113658Sdeischen
1304113786Sdeischen	while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1305113786Sdeischen		TAILQ_REMOVE(&worklist, td, gcle);
1306118985Sdavidxu		/*
1307118985Sdavidxu		 * XXX we don't free initial thread and its kse
1308118985Sdavidxu		 * (if thread is a bound thread), because there might
1309118985Sdavidxu		 * have some code referencing initial thread and kse.
1310118985Sdavidxu		 */
1311118985Sdavidxu		if (td == _thr_initial) {
1312118985Sdavidxu			DBG_MSG("Initial thread won't be freed\n");
1313118985Sdavidxu			continue;
1314118985Sdavidxu		}
1315113786Sdeischen
1316115278Sdeischen		if ((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1317113786Sdeischen			crit = _kse_critical_enter();
1318113661Sdeischen			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1319113661Sdeischen			kse_free_unlocked(td->kse);
1320113870Sdeischen			kseg_free_unlocked(td->kseg);
1321113661Sdeischen			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1322113661Sdeischen			_kse_critical_leave(crit);
1323113658Sdeischen		}
1324118985Sdavidxu		DBG_MSG("Freeing thread %p\n", td);
1325118985Sdavidxu		_thr_free(curthread, td);
1326113658Sdeischen	}
1327113658Sdeischen}
1328106786Smini
1329118676Sdavidxustatic void
1330118676Sdavidxukse_gc(struct pthread *curthread)
1331118676Sdavidxu{
1332118676Sdavidxu	kse_critical_t crit;
1333118676Sdavidxu	TAILQ_HEAD(, kse) worklist;
1334118676Sdavidxu	struct kse *kse;
1335113658Sdeischen
1336118676Sdavidxu	if (free_kse_count <= MAX_CACHED_KSES)
1337118676Sdavidxu		return;
1338118676Sdavidxu	TAILQ_INIT(&worklist);
1339118676Sdavidxu	crit = _kse_critical_enter();
1340118676Sdavidxu	KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1341118676Sdavidxu	while (free_kse_count > MAX_CACHED_KSES) {
1342118676Sdavidxu		kse = TAILQ_FIRST(&free_kseq);
1343118676Sdavidxu		TAILQ_REMOVE(&free_kseq, kse, k_qe);
1344118676Sdavidxu		TAILQ_INSERT_HEAD(&worklist, kse, k_qe);
1345118676Sdavidxu		free_kse_count--;
1346118676Sdavidxu	}
1347118676Sdavidxu	KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1348118676Sdavidxu	_kse_critical_leave(crit);
1349118676Sdavidxu
1350118676Sdavidxu	while ((kse = TAILQ_FIRST(&worklist))) {
1351118676Sdavidxu		TAILQ_REMOVE(&worklist, kse, k_qe);
1352118676Sdavidxu		kse_destroy(kse);
1353118676Sdavidxu	}
1354118676Sdavidxu}
1355118676Sdavidxu
1356118676Sdavidxustatic void
1357118676Sdavidxukseg_gc(struct pthread *curthread)
1358118676Sdavidxu{
1359118676Sdavidxu	kse_critical_t crit;
1360118676Sdavidxu	TAILQ_HEAD(, kse_group) worklist;
1361118676Sdavidxu	struct kse_group *kseg;
1362118676Sdavidxu
1363118676Sdavidxu	if (free_kseg_count <= MAX_CACHED_KSEGS)
1364118676Sdavidxu		return;
1365155745Sdeischen	TAILQ_INIT(&worklist);
1366118676Sdavidxu	crit = _kse_critical_enter();
1367118676Sdavidxu	KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1368118676Sdavidxu	while (free_kseg_count > MAX_CACHED_KSEGS) {
1369118676Sdavidxu		kseg = TAILQ_FIRST(&free_kse_groupq);
1370118676Sdavidxu		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1371118676Sdavidxu		free_kseg_count--;
1372118676Sdavidxu		TAILQ_INSERT_HEAD(&worklist, kseg, kg_qe);
1373118676Sdavidxu	}
1374118676Sdavidxu	KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1375118676Sdavidxu	_kse_critical_leave(crit);
1376118676Sdavidxu
1377118676Sdavidxu	while ((kseg = TAILQ_FIRST(&worklist))) {
1378118676Sdavidxu		TAILQ_REMOVE(&worklist, kseg, kg_qe);
1379118676Sdavidxu		kseg_destroy(kseg);
1380118676Sdavidxu	}
1381118676Sdavidxu}
1382118676Sdavidxu
1383113658Sdeischen/*
1384113658Sdeischen * Only new threads that are running or suspended may be scheduled.
1385113658Sdeischen */
1386113870Sdeischenint
1387113658Sdeischen_thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1388113658Sdeischen{
1389113658Sdeischen	kse_critical_t crit;
1390113870Sdeischen	int ret;
1391113658Sdeischen
1392115278Sdeischen	/* Add the new thread. */
1393115278Sdeischen	thr_link(newthread);
1394115278Sdeischen
1395113658Sdeischen	/*
1396113658Sdeischen	 * If this is the first time creating a thread, make sure
1397113658Sdeischen	 * the mailbox is set for the current thread.
1398113658Sdeischen	 */
1399113658Sdeischen	if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1400113786Sdeischen		/* We use the thread's stack as the KSE's stack. */
1401118510Sdeischen		newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_sp =
1402117706Sdavidxu		    newthread->attr.stackaddr_attr;
1403118510Sdeischen		newthread->kse->k_kcb->kcb_kmbx.km_stack.ss_size =
1404117706Sdavidxu		    newthread->attr.stacksize_attr;
1405117706Sdavidxu
1406113658Sdeischen		/*
1407113658Sdeischen		 * No need to lock the scheduling queue since the
1408113658Sdeischen		 * KSE/KSEG pair have not yet been started.
1409113658Sdeischen		 */
1410113658Sdeischen		KSEG_THRQ_ADD(newthread->kseg, newthread);
1411117706Sdavidxu		/* this thread never gives up kse */
1412117706Sdavidxu		newthread->active = 1;
1413118516Sdeischen		newthread->kse->k_curthread = newthread;
1414118510Sdeischen		newthread->kse->k_kcb->kcb_kmbx.km_flags = KMF_BOUND;
1415118516Sdeischen		newthread->kse->k_kcb->kcb_kmbx.km_func =
1416118516Sdeischen		    (kse_func_t *)kse_sched_single;
1417118510Sdeischen		newthread->kse->k_kcb->kcb_kmbx.km_quantum = 0;
1418117706Sdavidxu		KSE_SET_MBOX(newthread->kse, newthread);
1419113658Sdeischen		/*
1420113658Sdeischen		 * This thread needs a new KSE and KSEG.
1421113658Sdeischen		 */
1422116977Sdavidxu		newthread->kse->k_flags &= ~KF_INITIALIZED;
1423116977Sdavidxu		newthread->kse->k_flags |= KF_STARTED;
1424117706Sdavidxu		/* Fire up! */
1425118510Sdeischen		ret = kse_create(&newthread->kse->k_kcb->kcb_kmbx, 1);
1426113870Sdeischen		if (ret != 0)
1427113870Sdeischen			ret = errno;
1428113658Sdeischen	}
1429113658Sdeischen	else {
1430113658Sdeischen		/*
1431113658Sdeischen		 * Lock the KSE and add the new thread to its list of
1432113658Sdeischen		 * assigned threads.  If the new thread is runnable, also
1433113658Sdeischen		 * add it to the KSE's run queue.
1434113658Sdeischen		 */
1435116977Sdavidxu		crit = _kse_critical_enter();
1436113658Sdeischen		KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1437113658Sdeischen		KSEG_THRQ_ADD(newthread->kseg, newthread);
1438113658Sdeischen		if (newthread->state == PS_RUNNING)
1439113658Sdeischen			THR_RUNQ_INSERT_TAIL(newthread);
1440113658Sdeischen		if ((newthread->kse->k_flags & KF_STARTED) == 0) {
144113546Sjulian			/*
1442113658Sdeischen			 * This KSE hasn't been started yet.  Start it
1443113658Sdeischen			 * outside of holding the lock.
1444103419Smini			 */
1445113658Sdeischen			newthread->kse->k_flags |= KF_STARTED;
1446118510Sdeischen			newthread->kse->k_kcb->kcb_kmbx.km_func =
1447113786Sdeischen			    (kse_func_t *)kse_sched_multi;
1448118510Sdeischen			newthread->kse->k_kcb->kcb_kmbx.km_flags = 0;
1449118510Sdeischen			kse_create(&newthread->kse->k_kcb->kcb_kmbx, 0);
1450115278Sdeischen		 } else if ((newthread->state == PS_RUNNING) &&
1451115278Sdeischen		     KSE_IS_IDLE(newthread->kse)) {
1452103419Smini			/*
1453113658Sdeischen			 * The thread is being scheduled on another KSEG.
145413546Sjulian			 */
1455114187Sdeischen			kse_wakeup_one(newthread);
145648046Sjb		}
1457115278Sdeischen		KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1458116977Sdavidxu		_kse_critical_leave(crit);
1459113870Sdeischen		ret = 0;
1460113658Sdeischen	}
1461115278Sdeischen	if (ret != 0)
1462115278Sdeischen		thr_unlink(newthread);
1463115278Sdeischen
1464113870Sdeischen	return (ret);
1465113658Sdeischen}
146613546Sjulian
1467113658Sdeischenvoid
1468113658Sdeischenkse_waitq_insert(struct pthread *thread)
1469113658Sdeischen{
1470113658Sdeischen	struct pthread *td;
1471113658Sdeischen
1472113658Sdeischen	if (thread->wakeup_time.tv_sec == -1)
1473113658Sdeischen		TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1474113658Sdeischen		    pqe);
1475113658Sdeischen	else {
1476113658Sdeischen		td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1477113658Sdeischen		while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1478113658Sdeischen		    ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1479113658Sdeischen		    ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1480113658Sdeischen		    (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1481113658Sdeischen			td = TAILQ_NEXT(td, pqe);
1482113658Sdeischen		if (td == NULL)
1483113658Sdeischen			TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1484113658Sdeischen			    thread, pqe);
1485113658Sdeischen		else
1486113658Sdeischen			TAILQ_INSERT_BEFORE(td, thread, pqe);
1487113658Sdeischen	}
1488113658Sdeischen	thread->flags |= THR_FLAGS_IN_WAITQ;
1489113658Sdeischen}
1490113658Sdeischen
1491113658Sdeischen/*
1492113658Sdeischen * This must be called with the scheduling lock held.
1493113658Sdeischen */
1494113658Sdeischenstatic void
1495113658Sdeischenkse_check_completed(struct kse *kse)
1496113658Sdeischen{
1497113658Sdeischen	struct pthread *thread;
1498113658Sdeischen	struct kse_thr_mailbox *completed;
1499116977Sdavidxu	int sig;
1500113658Sdeischen
1501118510Sdeischen	if ((completed = kse->k_kcb->kcb_kmbx.km_completed) != NULL) {
1502118510Sdeischen		kse->k_kcb->kcb_kmbx.km_completed = NULL;
1503113658Sdeischen		while (completed != NULL) {
1504113658Sdeischen			thread = completed->tm_udata;
1505113658Sdeischen			DBG_MSG("Found completed thread %p, name %s\n",
1506113658Sdeischen			    thread,
1507113658Sdeischen			    (thread->name == NULL) ? "none" : thread->name);
1508113658Sdeischen			thread->blocked = 0;
1509114664Sdeischen			if (thread != kse->k_curthread) {
1510117715Sdeischen				thr_accounting(thread);
1511114664Sdeischen				if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
1512114664Sdeischen					THR_SET_STATE(thread, PS_SUSPENDED);
1513114664Sdeischen				else
1514114664Sdeischen					KSE_RUNQ_INSERT_TAIL(kse, thread);
1515115080Sdeischen				if ((thread->kse != kse) &&
1516115080Sdeischen				    (thread->kse->k_curthread == thread)) {
1517118510Sdeischen					/*
1518118510Sdeischen					 * Remove this thread from its
1519118510Sdeischen					 * previous KSE so that it (the KSE)
1520118510Sdeischen					 * doesn't think it is still active.
1521118510Sdeischen					 */
1522118817Sdavidxu					thread->kse->k_curthread = NULL;
1523115080Sdeischen					thread->active = 0;
1524115080Sdeischen				}
1525114664Sdeischen			}
1526118510Sdeischen			if ((sig = thread->tcb->tcb_tmbx.tm_syncsig.si_signo)
1527118510Sdeischen			    != 0) {
1528116977Sdavidxu				if (SIGISMEMBER(thread->sigmask, sig))
1529116977Sdavidxu					SIGADDSET(thread->sigpend, sig);
1530122338Sdavidxu				else if (THR_IN_CRITICAL(thread))
1531122338Sdavidxu					kse_thr_interrupt(NULL, KSE_INTR_SIGEXIT, sig);
1532116977Sdavidxu				else
1533117907Sdeischen					(void)_thr_sig_add(thread, sig,
1534118510Sdeischen					    &thread->tcb->tcb_tmbx.tm_syncsig);
1535118510Sdeischen				thread->tcb->tcb_tmbx.tm_syncsig.si_signo = 0;
1536116977Sdavidxu			}
1537113658Sdeischen			completed = completed->tm_next;
1538113658Sdeischen		}
1539113658Sdeischen	}
1540113658Sdeischen}
1541113658Sdeischen
1542113658Sdeischen/*
1543113658Sdeischen * This must be called with the scheduling lock held.
1544113658Sdeischen */
1545113658Sdeischenstatic void
1546113658Sdeischenkse_check_waitq(struct kse *kse)
1547113658Sdeischen{
1548113658Sdeischen	struct pthread	*pthread;
1549113658Sdeischen	struct timespec ts;
1550113658Sdeischen
1551113658Sdeischen	KSE_GET_TOD(kse, &ts);
1552113658Sdeischen
1553113658Sdeischen	/*
1554113658Sdeischen	 * Wake up threads that have timedout.  This has to be
1555113658Sdeischen	 * done before adding the current thread to the run queue
1556113658Sdeischen	 * so that a CPU intensive thread doesn't get preference
1557113658Sdeischen	 * over waiting threads.
1558113658Sdeischen	 */
1559113658Sdeischen	while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1560113658Sdeischen	    thr_timedout(pthread, &ts)) {
1561113658Sdeischen		/* Remove the thread from the wait queue: */
1562113658Sdeischen		KSE_WAITQ_REMOVE(kse, pthread);
1563113658Sdeischen		DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1564113658Sdeischen
1565113658Sdeischen		/* Indicate the thread timedout: */
1566113658Sdeischen		pthread->timeout = 1;
1567113658Sdeischen
1568113658Sdeischen		/* Add the thread to the priority queue: */
1569114664Sdeischen		if ((pthread->flags & THR_FLAGS_SUSPENDED) != 0)
1570114664Sdeischen			THR_SET_STATE(pthread, PS_SUSPENDED);
1571114664Sdeischen		else {
1572114664Sdeischen			THR_SET_STATE(pthread, PS_RUNNING);
1573114664Sdeischen			KSE_RUNQ_INSERT_TAIL(kse, pthread);
1574114664Sdeischen		}
1575113658Sdeischen	}
1576113658Sdeischen}
1577113658Sdeischen
1578113658Sdeischenstatic int
1579113658Sdeischenthr_timedout(struct pthread *thread, struct timespec *curtime)
1580113658Sdeischen{
1581113658Sdeischen	if (thread->wakeup_time.tv_sec < 0)
1582113658Sdeischen		return (0);
1583113658Sdeischen	else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1584113658Sdeischen		return (0);
1585113658Sdeischen	else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1586113658Sdeischen	    (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1587113658Sdeischen		return (0);
1588113658Sdeischen	else
1589113658Sdeischen		return (1);
1590113658Sdeischen}
1591113658Sdeischen
1592113658Sdeischen/*
1593113658Sdeischen * This must be called with the scheduling lock held.
1594113658Sdeischen *
1595113658Sdeischen * Each thread has a time slice, a wakeup time (used when it wants
1596113658Sdeischen * to wait for a specified amount of time), a run state, and an
1597113658Sdeischen * active flag.
1598113658Sdeischen *
1599113658Sdeischen * When a thread gets run by the scheduler, the active flag is
1600113658Sdeischen * set to non-zero (1).  When a thread performs an explicit yield
1601113658Sdeischen * or schedules a state change, it enters the scheduler and the
1602113658Sdeischen * active flag is cleared.  When the active flag is still seen
1603113658Sdeischen * set in the scheduler, that means that the thread is blocked in
1604113658Sdeischen * the kernel (because it is cleared before entering the scheduler
1605113658Sdeischen * in all other instances).
1606113658Sdeischen *
1607113658Sdeischen * The wakeup time is only set for those states that can timeout.
1608113658Sdeischen * It is set to (-1, -1) for all other instances.
1609113658Sdeischen *
1610113658Sdeischen * The thread's run state, aside from being useful when debugging,
1611113658Sdeischen * is used to place the thread in an appropriate queue.  There
1612113658Sdeischen * are 2 basic queues:
1613113658Sdeischen *
1614113658Sdeischen *   o run queue - queue ordered by priority for all threads
1615113658Sdeischen *                 that are runnable
1616113658Sdeischen *   o waiting queue - queue sorted by wakeup time for all threads
1617113658Sdeischen *                     that are not otherwise runnable (not blocked
1618113658Sdeischen *                     in kernel, not waiting for locks)
1619113658Sdeischen *
1620113658Sdeischen * The thread's time slice is used for round-robin scheduling
1621113658Sdeischen * (the default scheduling policy).  While a SCHED_RR thread
1622113658Sdeischen * is runnable it's time slice accumulates.  When it reaches
1623113658Sdeischen * the time slice interval, it gets reset and added to the end
1624113658Sdeischen * of the queue of threads at its priority.  When a thread no
1625113658Sdeischen * longer becomes runnable (blocks in kernel, waits, etc), its
1626113658Sdeischen * time slice is reset.
1627113658Sdeischen *
1628113658Sdeischen * The job of kse_switchout_thread() is to handle all of the above.
1629113658Sdeischen */
1630113658Sdeischenstatic void
1631113658Sdeischenkse_switchout_thread(struct kse *kse, struct pthread *thread)
1632113658Sdeischen{
1633113658Sdeischen	int level;
1634115080Sdeischen	int i;
1635116977Sdavidxu	int restart;
1636116977Sdavidxu	siginfo_t siginfo;
1637113658Sdeischen
1638113658Sdeischen	/*
1639113658Sdeischen	 * Place the currently running thread into the
1640113658Sdeischen	 * appropriate queue(s).
1641113658Sdeischen	 */
1642113658Sdeischen	DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1643115080Sdeischen
1644115080Sdeischen	THR_DEACTIVATE_LAST_LOCK(thread);
1645113658Sdeischen	if (thread->blocked != 0) {
1646114187Sdeischen		thread->active = 0;
1647114187Sdeischen		thread->need_switchout = 0;
1648113658Sdeischen		/* This thread must have blocked in the kernel. */
1649116977Sdavidxu		/*
1650120896Sdavidxu		 * Check for pending signals and cancellation for
1651120896Sdavidxu		 * this thread to see if we need to interrupt it
1652120896Sdavidxu		 * in the kernel.
1653116977Sdavidxu		 */
1654120896Sdavidxu		if (THR_NEED_CANCEL(thread)) {
1655120896Sdavidxu			kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1656120896Sdavidxu					  KSE_INTR_INTERRUPT, 0);
1657120896Sdavidxu		} else if (thread->check_pending != 0) {
1658116977Sdavidxu			for (i = 1; i <= _SIG_MAXSIG; ++i) {
1659116977Sdavidxu				if (SIGISMEMBER(thread->sigpend, i) &&
1660116977Sdavidxu				    !SIGISMEMBER(thread->sigmask, i)) {
1661120109Sdavidxu					restart = _thread_sigact[i - 1].sa_flags & SA_RESTART;
1662118510Sdeischen					kse_thr_interrupt(&thread->tcb->tcb_tmbx,
1663117706Sdavidxu					    restart ? KSE_INTR_RESTART : KSE_INTR_INTERRUPT, 0);
1664116977Sdavidxu					break;
1665116977Sdavidxu				}
1666116977Sdavidxu			}
1667116977Sdavidxu		}
1668113658Sdeischen	}
1669113658Sdeischen	else {
1670113658Sdeischen		switch (thread->state) {
1671120896Sdavidxu		case PS_MUTEX_WAIT:
1672113658Sdeischen		case PS_COND_WAIT:
1673120896Sdavidxu			if (THR_NEED_CANCEL(thread)) {
1674120896Sdavidxu				thread->interrupted = 1;
1675120896Sdavidxu				thread->continuation = _thr_finish_cancellation;
1676120896Sdavidxu				THR_SET_STATE(thread, PS_RUNNING);
1677120896Sdavidxu			} else {
1678120896Sdavidxu				/* Insert into the waiting queue: */
1679120896Sdavidxu				KSE_WAITQ_INSERT(kse, thread);
1680120896Sdavidxu			}
1681113658Sdeischen			break;
168213546Sjulian
1683113658Sdeischen		case PS_LOCKWAIT:
168413546Sjulian			/*
1685113658Sdeischen			 * This state doesn't timeout.
168613546Sjulian			 */
1687113658Sdeischen			thread->wakeup_time.tv_sec = -1;
1688113658Sdeischen			thread->wakeup_time.tv_nsec = -1;
1689113658Sdeischen			level = thread->locklevel - 1;
1690115278Sdeischen			if (!_LCK_GRANTED(&thread->lockusers[level]))
1691113658Sdeischen				KSE_WAITQ_INSERT(kse, thread);
1692113658Sdeischen			else
1693113658Sdeischen				THR_SET_STATE(thread, PS_RUNNING);
1694113658Sdeischen			break;
169513546Sjulian
1696120896Sdavidxu		case PS_SLEEP_WAIT:
1697116977Sdavidxu		case PS_SIGWAIT:
1698120896Sdavidxu			if (THR_NEED_CANCEL(thread)) {
1699120896Sdavidxu				thread->interrupted = 1;
1700120896Sdavidxu				THR_SET_STATE(thread, PS_RUNNING);
1701120896Sdavidxu			} else {
1702120896Sdavidxu				KSE_WAITQ_INSERT(kse, thread);
1703120896Sdavidxu			}
1704116977Sdavidxu			break;
1705120896Sdavidxu
1706120896Sdavidxu		case PS_JOIN:
1707120896Sdavidxu			if (THR_NEED_CANCEL(thread)) {
1708120896Sdavidxu				thread->join_status.thread = NULL;
1709120896Sdavidxu				THR_SET_STATE(thread, PS_RUNNING);
1710120896Sdavidxu			} else {
1711120896Sdavidxu				/*
1712120896Sdavidxu				 * This state doesn't timeout.
1713120896Sdavidxu				 */
1714120896Sdavidxu				thread->wakeup_time.tv_sec = -1;
1715120896Sdavidxu				thread->wakeup_time.tv_nsec = -1;
1716120896Sdavidxu
1717120896Sdavidxu				/* Insert into the waiting queue: */
1718120896Sdavidxu				KSE_WAITQ_INSERT(kse, thread);
1719120896Sdavidxu			}
1720119736Sdavidxu			break;
1721120896Sdavidxu
1722113658Sdeischen		case PS_SIGSUSPEND:
1723113658Sdeischen		case PS_SUSPENDED:
1724120896Sdavidxu			if (THR_NEED_CANCEL(thread)) {
1725120896Sdavidxu				thread->interrupted = 1;
1726120896Sdavidxu				THR_SET_STATE(thread, PS_RUNNING);
1727120896Sdavidxu			} else {
1728120896Sdavidxu				/*
1729120896Sdavidxu				 * These states don't timeout.
1730120896Sdavidxu				 */
1731120896Sdavidxu				thread->wakeup_time.tv_sec = -1;
1732120896Sdavidxu				thread->wakeup_time.tv_nsec = -1;
1733120896Sdavidxu
1734120896Sdavidxu				/* Insert into the waiting queue: */
1735120896Sdavidxu				KSE_WAITQ_INSERT(kse, thread);
1736120896Sdavidxu			}
1737120896Sdavidxu			break;
1738120896Sdavidxu
1739120896Sdavidxu		case PS_DEAD:
1740120896Sdavidxu			/*
1741120896Sdavidxu			 * The scheduler is operating on a different
1742120896Sdavidxu			 * stack.  It is safe to do garbage collecting
1743120896Sdavidxu			 * here.
1744120896Sdavidxu			 */
1745120896Sdavidxu			thr_cleanup(kse, thread);
1746120896Sdavidxu			return;
1747120896Sdavidxu			break;
1748120896Sdavidxu
1749120896Sdavidxu		case PS_RUNNING:
1750120896Sdavidxu			if ((thread->flags & THR_FLAGS_SUSPENDED) != 0 &&
1751120896Sdavidxu			    !THR_NEED_CANCEL(thread))
1752120896Sdavidxu				THR_SET_STATE(thread, PS_SUSPENDED);
1753120896Sdavidxu			break;
1754120896Sdavidxu
1755113658Sdeischen		case PS_DEADLOCK:
175613546Sjulian			/*
1757113658Sdeischen			 * These states don't timeout.
175813546Sjulian			 */
1759113658Sdeischen			thread->wakeup_time.tv_sec = -1;
1760113658Sdeischen			thread->wakeup_time.tv_nsec = -1;
176113546Sjulian
1762113658Sdeischen			/* Insert into the waiting queue: */
1763113658Sdeischen			KSE_WAITQ_INSERT(kse, thread);
1764113658Sdeischen			break;
1765120896Sdavidxu
1766120896Sdavidxu		default:
1767120896Sdavidxu			PANIC("Unknown state\n");
1768120896Sdavidxu			break;
1769113658Sdeischen		}
1770120896Sdavidxu
1771117715Sdeischen		thr_accounting(thread);
1772117715Sdeischen		if (thread->state == PS_RUNNING) {
1773113658Sdeischen			if (thread->slice_usec == -1) {
1774106786Smini				/*
1775113658Sdeischen				 * The thread exceeded its time quantum or
1776113658Sdeischen				 * it yielded the CPU; place it at the tail
1777113658Sdeischen				 * of the queue for its priority.
1778106786Smini				 */
1779113658Sdeischen				KSE_RUNQ_INSERT_TAIL(kse, thread);
1780113658Sdeischen			} else {
1781113658Sdeischen				/*
1782113658Sdeischen				 * The thread hasn't exceeded its interval
1783113658Sdeischen				 * Place it at the head of the queue for its
1784113658Sdeischen				 * priority.
1785113658Sdeischen				 */
1786113658Sdeischen				KSE_RUNQ_INSERT_HEAD(kse, thread);
1787113658Sdeischen			}
178813546Sjulian		}
178913546Sjulian	}
1790113658Sdeischen	thread->active = 0;
1791113658Sdeischen	thread->need_switchout = 0;
1792115080Sdeischen	if (thread->check_pending != 0) {
1793115080Sdeischen		/* Install pending signals into the frame. */
1794115080Sdeischen		thread->check_pending = 0;
1795116977Sdavidxu		KSE_LOCK_ACQUIRE(kse, &_thread_signal_lock);
1796116977Sdavidxu		for (i = 1; i <= _SIG_MAXSIG; i++) {
1797116977Sdavidxu			if (SIGISMEMBER(thread->sigmask, i))
1798116977Sdavidxu				continue;
1799116977Sdavidxu			if (SIGISMEMBER(thread->sigpend, i))
1800117907Sdeischen				(void)_thr_sig_add(thread, i,
1801117907Sdeischen				    &thread->siginfo[i-1]);
1802116977Sdavidxu			else if (SIGISMEMBER(_thr_proc_sigpending, i) &&
1803116977Sdavidxu				_thr_getprocsig_unlocked(i, &siginfo)) {
1804117907Sdeischen				(void)_thr_sig_add(thread, i, &siginfo);
1805116977Sdavidxu			}
1806115080Sdeischen		}
1807116977Sdavidxu		KSE_LOCK_RELEASE(kse, &_thread_signal_lock);
1808115080Sdeischen	}
180913546Sjulian}
181013546Sjulian
1811113658Sdeischen/*
1812113658Sdeischen * This function waits for the smallest timeout value of any waiting
1813113658Sdeischen * thread, or until it receives a message from another KSE.
1814113658Sdeischen *
1815113658Sdeischen * This must be called with the scheduling lock held.
1816113658Sdeischen */
1817113658Sdeischenstatic void
1818117706Sdavidxukse_wait(struct kse *kse, struct pthread *td_wait, int sigseqno)
181913546Sjulian{
1820113786Sdeischen	struct timespec ts, ts_sleep;
1821113786Sdeischen	int saved_flags;
182271581Sdeischen
1823113786Sdeischen	if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1824113870Sdeischen		/* Limit sleep to no more than 1 minute. */
1825113870Sdeischen		ts_sleep.tv_sec = 60;
1826113786Sdeischen		ts_sleep.tv_nsec = 0;
1827113786Sdeischen	} else {
1828136286Sdavidxu		KSE_GET_TOD(kse, &ts);
1829113786Sdeischen		TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1830113870Sdeischen		if (ts_sleep.tv_sec > 60) {
1831113870Sdeischen			ts_sleep.tv_sec = 60;
1832113658Sdeischen			ts_sleep.tv_nsec = 0;
1833113658Sdeischen		}
1834113658Sdeischen	}
1835113786Sdeischen	/* Don't sleep for negative times. */
1836113786Sdeischen	if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1837114187Sdeischen		KSE_SET_IDLE(kse);
1838114187Sdeischen		kse->k_kseg->kg_idle_kses++;
1839113786Sdeischen		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1840117706Sdavidxu		if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) &&
1841117706Sdavidxu		    (kse->k_sigseqno != sigseqno))
1842117706Sdavidxu			; /* don't sleep */
1843117706Sdavidxu		else {
1844118510Sdeischen			saved_flags = kse->k_kcb->kcb_kmbx.km_flags;
1845118510Sdeischen			kse->k_kcb->kcb_kmbx.km_flags |= KMF_NOUPCALL;
1846117706Sdavidxu			kse_release(&ts_sleep);
1847118510Sdeischen			kse->k_kcb->kcb_kmbx.km_flags = saved_flags;
1848117706Sdavidxu		}
1849113786Sdeischen		KSE_SCHED_LOCK(kse, kse->k_kseg);
1850114187Sdeischen		if (KSE_IS_IDLE(kse)) {
1851114187Sdeischen			KSE_CLEAR_IDLE(kse);
1852114187Sdeischen			kse->k_kseg->kg_idle_kses--;
1853114187Sdeischen		}
1854113786Sdeischen	}
185513546Sjulian}
185613546Sjulian
1857113658Sdeischen/*
1858113658Sdeischen * Avoid calling this kse_exit() so as not to confuse it with the
1859113658Sdeischen * system call of the same name.
1860113658Sdeischen */
1861113658Sdeischenstatic void
1862113658Sdeischenkse_fini(struct kse *kse)
186341164Sjb{
1864115278Sdeischen	/* struct kse_group *free_kseg = NULL; */
1865113658Sdeischen	struct timespec ts;
1866120567Sdavidxu	struct pthread *td;
186771581Sdeischen
186848046Sjb	/*
1869113661Sdeischen	 * Check to see if this is one of the main kses.
187048046Sjb	 */
1871115278Sdeischen	if (kse->k_kseg != _kse_initial->k_kseg) {
1872115278Sdeischen		PANIC("shouldn't get here");
1873115278Sdeischen		/* This is for supporting thread groups. */
1874115278Sdeischen#ifdef NOT_YET
1875113661Sdeischen		/* Remove this KSE from the KSEG's list of KSEs. */
1876113661Sdeischen		KSE_SCHED_LOCK(kse, kse->k_kseg);
1877113661Sdeischen		TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1878113870Sdeischen		kse->k_kseg->kg_ksecount--;
1879113661Sdeischen		if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1880113661Sdeischen			free_kseg = kse->k_kseg;
1881113661Sdeischen		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1882113661Sdeischen
1883113658Sdeischen		/*
1884113661Sdeischen		 * Add this KSE to the list of free KSEs along with
1885113661Sdeischen		 * the KSEG if is now orphaned.
1886113661Sdeischen		 */
1887113661Sdeischen		KSE_LOCK_ACQUIRE(kse, &kse_lock);
1888113661Sdeischen		if (free_kseg != NULL)
1889113870Sdeischen			kseg_free_unlocked(free_kseg);
1890113661Sdeischen		kse_free_unlocked(kse);
1891113661Sdeischen		KSE_LOCK_RELEASE(kse, &kse_lock);
1892113661Sdeischen		kse_exit();
1893113661Sdeischen		/* Never returns. */
1894115278Sdeischen		PANIC("kse_exit()");
1895115278Sdeischen#endif
1896113661Sdeischen	} else {
1897113661Sdeischen		/*
1898119577Sdeischen		 * We allow program to kill kse in initial group (by
1899119577Sdeischen		 * lowering the concurrency).
1900113658Sdeischen		 */
1901119577Sdeischen		if ((kse != _kse_initial) &&
1902119577Sdeischen		    ((kse->k_flags & KF_TERMINATED) != 0)) {
1903115278Sdeischen			KSE_SCHED_LOCK(kse, kse->k_kseg);
1904115278Sdeischen			TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1905115278Sdeischen			kse->k_kseg->kg_ksecount--;
1906120567Sdavidxu			/*
1907120567Sdavidxu			 * Migrate thread to  _kse_initial if its lastest
1908120567Sdavidxu			 * kse it ran on is the kse.
1909120567Sdavidxu			 */
1910120567Sdavidxu			td = TAILQ_FIRST(&kse->k_kseg->kg_threadq);
1911120567Sdavidxu			while (td != NULL) {
1912120567Sdavidxu				if (td->kse == kse)
1913120567Sdavidxu					td->kse = _kse_initial;
1914120567Sdavidxu				td = TAILQ_NEXT(td, kle);
1915120567Sdavidxu			}
1916115278Sdeischen			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1917115278Sdeischen			KSE_LOCK_ACQUIRE(kse, &kse_lock);
1918115278Sdeischen			kse_free_unlocked(kse);
1919115278Sdeischen			KSE_LOCK_RELEASE(kse, &kse_lock);
1920120567Sdavidxu			/* Make sure there is always at least one is awake */
1921120567Sdavidxu			KSE_WAKEUP(_kse_initial);
1922115278Sdeischen			kse_exit();
1923115278Sdeischen                        /* Never returns. */
1924115278Sdeischen                        PANIC("kse_exit() failed for initial kseg");
1925115278Sdeischen                }
1926115278Sdeischen		KSE_SCHED_LOCK(kse, kse->k_kseg);
1927115278Sdeischen		KSE_SET_IDLE(kse);
1928115278Sdeischen		kse->k_kseg->kg_idle_kses++;
1929115278Sdeischen		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1930113658Sdeischen		ts.tv_sec = 120;
1931113658Sdeischen		ts.tv_nsec = 0;
1932118510Sdeischen		kse->k_kcb->kcb_kmbx.km_flags = 0;
1933115278Sdeischen		kse_release(&ts);
1934115278Sdeischen		/* Never reach */
1935113658Sdeischen	}
193641164Sjb}
193741164Sjb
1938106786Sminivoid
1939113658Sdeischen_thr_set_timeout(const struct timespec *timeout)
194013546Sjulian{
194171581Sdeischen	struct pthread	*curthread = _get_curthread();
1942113658Sdeischen	struct timespec ts;
194313546Sjulian
194413546Sjulian	/* Reset the timeout flag for the running thread: */
194571581Sdeischen	curthread->timeout = 0;
194613546Sjulian
194713546Sjulian	/* Check if the thread is to wait forever: */
194813546Sjulian	if (timeout == NULL) {
194913546Sjulian		/*
195013546Sjulian		 * Set the wakeup time to something that can be recognised as
195168516Sdeischen		 * different to an actual time of day:
195213546Sjulian		 */
195371581Sdeischen		curthread->wakeup_time.tv_sec = -1;
195471581Sdeischen		curthread->wakeup_time.tv_nsec = -1;
195513546Sjulian	}
195613546Sjulian	/* Check if no waiting is required: */
1957113658Sdeischen	else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
195813546Sjulian		/* Set the wake up time to 'immediately': */
195971581Sdeischen		curthread->wakeup_time.tv_sec = 0;
196071581Sdeischen		curthread->wakeup_time.tv_nsec = 0;
196113546Sjulian	} else {
1962113658Sdeischen		/* Calculate the time for the current thread to wakeup: */
1963113658Sdeischen		KSE_GET_TOD(curthread->kse, &ts);
1964113658Sdeischen		TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1965113658Sdeischen	}
1966113658Sdeischen}
196713546Sjulian
1968113658Sdeischenvoid
1969113658Sdeischen_thr_panic_exit(char *file, int line, char *msg)
1970113658Sdeischen{
1971113658Sdeischen	char buf[256];
197213546Sjulian
1973113658Sdeischen	snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1974113658Sdeischen	__sys_write(2, buf, strlen(buf));
1975113658Sdeischen	abort();
197613546Sjulian}
197744963Sjb
197844963Sjbvoid
1979113658Sdeischen_thr_setrunnable(struct pthread *curthread, struct pthread *thread)
198044963Sjb{
1981113658Sdeischen	kse_critical_t crit;
1982117907Sdeischen	struct kse_mailbox *kmbx;
198371581Sdeischen
1984113658Sdeischen	crit = _kse_critical_enter();
1985113658Sdeischen	KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1986117907Sdeischen	kmbx = _thr_setrunnable_unlocked(thread);
1987113658Sdeischen	KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1988113658Sdeischen	_kse_critical_leave(crit);
1989122075Sdeischen	if ((kmbx != NULL) && (__isthreaded != 0))
1990117907Sdeischen		kse_wakeup(kmbx);
199144963Sjb}
199244963Sjb
1993117907Sdeischenstruct kse_mailbox *
1994113658Sdeischen_thr_setrunnable_unlocked(struct pthread *thread)
199544963Sjb{
1996117907Sdeischen	struct kse_mailbox *kmbx = NULL;
1997117907Sdeischen
1998114664Sdeischen	if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0) {
1999113658Sdeischen		/* No silly queues for these threads. */
2000114664Sdeischen		if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2001114664Sdeischen			THR_SET_STATE(thread, PS_SUSPENDED);
2002117907Sdeischen		else {
2003114664Sdeischen			THR_SET_STATE(thread, PS_RUNNING);
2004117907Sdeischen			kmbx = kse_wakeup_one(thread);
2005117907Sdeischen		}
2006117907Sdeischen
2007115080Sdeischen	} else if (thread->state != PS_RUNNING) {
2008113658Sdeischen		if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
2009113658Sdeischen			KSE_WAITQ_REMOVE(thread->kse, thread);
2010114664Sdeischen		if ((thread->flags & THR_FLAGS_SUSPENDED) != 0)
2011114664Sdeischen			THR_SET_STATE(thread, PS_SUSPENDED);
2012114664Sdeischen		else {
2013114664Sdeischen			THR_SET_STATE(thread, PS_RUNNING);
2014115080Sdeischen			if ((thread->blocked == 0) && (thread->active == 0) &&
2015114664Sdeischen			    (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
2016114664Sdeischen				THR_RUNQ_INSERT_TAIL(thread);
2017117907Sdeischen			/*
2018117907Sdeischen			 * XXX - Threads are not yet assigned to specific
2019117907Sdeischen			 *       KSEs; they are assigned to the KSEG.  So
2020117907Sdeischen			 *       the fact that a thread's KSE is waiting
2021117907Sdeischen			 *       doesn't necessarily mean that it will be
2022117907Sdeischen			 *       the KSE that runs the thread after the
2023117907Sdeischen			 *       lock is granted.  But we don't know if the
2024117907Sdeischen			 *       other KSEs within the same KSEG are also
2025117907Sdeischen			 *       in a waiting state or not so we err on the
2026117907Sdeischen			 *       side of caution and wakeup the thread's
2027117907Sdeischen			 *       last known KSE.  We ensure that the
2028117907Sdeischen			 *       threads KSE doesn't change while it's
2029117907Sdeischen			 *       scheduling lock is held so it is safe to
2030117907Sdeischen			 *       reference it (the KSE).  If the KSE wakes
2031117907Sdeischen			 *       up and doesn't find any more work it will
2032117907Sdeischen			 *       again go back to waiting so no harm is
2033117907Sdeischen			 *       done.
2034117907Sdeischen			 */
2035117907Sdeischen			kmbx = kse_wakeup_one(thread);
2036114664Sdeischen		}
2037113658Sdeischen	}
2038117907Sdeischen	return (kmbx);
2039114187Sdeischen}
2040114187Sdeischen
2041117907Sdeischenstatic struct kse_mailbox *
2042114187Sdeischenkse_wakeup_one(struct pthread *thread)
2043114187Sdeischen{
2044114187Sdeischen	struct kse *ke;
2045114187Sdeischen
2046114187Sdeischen	if (KSE_IS_IDLE(thread->kse)) {
2047114187Sdeischen		KSE_CLEAR_IDLE(thread->kse);
2048114187Sdeischen		thread->kseg->kg_idle_kses--;
2049118510Sdeischen		return (&thread->kse->k_kcb->kcb_kmbx);
2050114187Sdeischen	} else {
2051114187Sdeischen		TAILQ_FOREACH(ke, &thread->kseg->kg_kseq, k_kgqe) {
2052114187Sdeischen			if (KSE_IS_IDLE(ke)) {
2053114187Sdeischen				KSE_CLEAR_IDLE(ke);
2054114187Sdeischen				ke->k_kseg->kg_idle_kses--;
2055118510Sdeischen				return (&ke->k_kcb->kcb_kmbx);
2056114187Sdeischen			}
2057114187Sdeischen		}
2058114187Sdeischen	}
2059117907Sdeischen	return (NULL);
2060113658Sdeischen}
206171581Sdeischen
2062114187Sdeischenstatic void
2063114187Sdeischenkse_wakeup_multi(struct kse *curkse)
2064114187Sdeischen{
2065114187Sdeischen	struct kse *ke;
2066114187Sdeischen	int tmp;
2067114187Sdeischen
2068114187Sdeischen	if ((tmp = KSE_RUNQ_THREADS(curkse)) && curkse->k_kseg->kg_idle_kses) {
2069114187Sdeischen		TAILQ_FOREACH(ke, &curkse->k_kseg->kg_kseq, k_kgqe) {
2070114187Sdeischen			if (KSE_IS_IDLE(ke)) {
2071114187Sdeischen				KSE_CLEAR_IDLE(ke);
2072114187Sdeischen				ke->k_kseg->kg_idle_kses--;
2073114187Sdeischen				KSE_WAKEUP(ke);
2074114187Sdeischen				if (--tmp == 0)
2075114187Sdeischen					break;
2076114187Sdeischen			}
2077114187Sdeischen		}
2078114187Sdeischen	}
2079114187Sdeischen}
2080114187Sdeischen
2081113658Sdeischen/*
2082113658Sdeischen * Allocate a new KSEG.
2083113658Sdeischen *
2084113661Sdeischen * We allow the current thread to be NULL in the case that this
2085113658Sdeischen * is the first time a KSEG is being created (library initialization).
2086113658Sdeischen * In this case, we don't need to (and can't) take any locks.
2087113658Sdeischen */
2088113658Sdeischenstruct kse_group *
2089113661Sdeischen_kseg_alloc(struct pthread *curthread)
2090113658Sdeischen{
2091113658Sdeischen	struct kse_group *kseg = NULL;
2092113661Sdeischen	kse_critical_t crit;
2093113658Sdeischen
2094113661Sdeischen	if ((curthread != NULL) && (free_kseg_count > 0)) {
2095113658Sdeischen		/* Use the kse lock for the kseg queue. */
2096113661Sdeischen		crit = _kse_critical_enter();
2097113661Sdeischen		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2098113658Sdeischen		if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
2099113658Sdeischen			TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
2100113658Sdeischen			free_kseg_count--;
2101113658Sdeischen			active_kseg_count++;
2102113658Sdeischen			TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
2103113658Sdeischen		}
2104113661Sdeischen		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2105113661Sdeischen		_kse_critical_leave(crit);
2106113786Sdeischen		if (kseg)
2107113786Sdeischen			kseg_reinit(kseg);
2108113658Sdeischen	}
2109113658Sdeischen
211044963Sjb	/*
2111113658Sdeischen	 * If requested, attempt to allocate a new KSE group only if the
2112113658Sdeischen	 * KSE allocation was successful and a KSE group wasn't found in
2113113658Sdeischen	 * the free list.
211444963Sjb	 */
2115113658Sdeischen	if ((kseg == NULL) &&
2116113658Sdeischen	    ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
2117113661Sdeischen		if (_pq_alloc(&kseg->kg_schedq.sq_runq,
2118113661Sdeischen		    THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
2119113661Sdeischen			free(kseg);
2120113661Sdeischen			kseg = NULL;
2121113661Sdeischen		} else {
2122113661Sdeischen			kseg_init(kseg);
2123113661Sdeischen			/* Add the KSEG to the list of active KSEGs. */
2124113661Sdeischen			if (curthread != NULL) {
2125113661Sdeischen				crit = _kse_critical_enter();
2126113661Sdeischen				KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2127113661Sdeischen				active_kseg_count++;
2128113661Sdeischen				TAILQ_INSERT_TAIL(&active_kse_groupq,
2129113661Sdeischen				    kseg, kg_qe);
2130113661Sdeischen				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2131113661Sdeischen				_kse_critical_leave(crit);
2132113661Sdeischen			} else {
2133113661Sdeischen				active_kseg_count++;
2134113661Sdeischen				TAILQ_INSERT_TAIL(&active_kse_groupq,
2135113661Sdeischen				    kseg, kg_qe);
2136113661Sdeischen			}
2137113661Sdeischen		}
213848046Sjb	}
2139113658Sdeischen	return (kseg);
2140113658Sdeischen}
214148046Sjb
2142118676Sdavidxustatic void
2143118676Sdavidxukseg_init(struct kse_group *kseg)
2144118676Sdavidxu{
2145118676Sdavidxu	kseg_reinit(kseg);
2146118676Sdavidxu	_lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
2147173967Sjasone	    _kse_lock_wakeup, calloc);
2148118676Sdavidxu}
2149118676Sdavidxu
2150118676Sdavidxustatic void
2151118676Sdavidxukseg_reinit(struct kse_group *kseg)
2152118676Sdavidxu{
2153118676Sdavidxu	TAILQ_INIT(&kseg->kg_kseq);
2154118676Sdavidxu	TAILQ_INIT(&kseg->kg_threadq);
2155118676Sdavidxu	TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
2156118676Sdavidxu	kseg->kg_threadcount = 0;
2157118676Sdavidxu	kseg->kg_ksecount = 0;
2158118676Sdavidxu	kseg->kg_idle_kses = 0;
2159118676Sdavidxu	kseg->kg_flags = 0;
2160118676Sdavidxu}
2161118676Sdavidxu
2162113658Sdeischen/*
2163113658Sdeischen * This must be called with the kse lock held and when there are
2164113658Sdeischen * no more threads that reference it.
2165113658Sdeischen */
2166113658Sdeischenstatic void
2167113870Sdeischenkseg_free_unlocked(struct kse_group *kseg)
2168113658Sdeischen{
2169113661Sdeischen	TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
2170113658Sdeischen	TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
2171113658Sdeischen	free_kseg_count++;
2172113658Sdeischen	active_kseg_count--;
2173113658Sdeischen}
2174113658Sdeischen
2175113870Sdeischenvoid
2176113870Sdeischen_kseg_free(struct kse_group *kseg)
2177113870Sdeischen{
2178113870Sdeischen	struct kse *curkse;
2179113870Sdeischen	kse_critical_t crit;
2180113870Sdeischen
2181113870Sdeischen	crit = _kse_critical_enter();
2182113870Sdeischen	curkse = _get_curkse();
2183113870Sdeischen	KSE_LOCK_ACQUIRE(curkse, &kse_lock);
2184113870Sdeischen	kseg_free_unlocked(kseg);
2185113870Sdeischen	KSE_LOCK_RELEASE(curkse, &kse_lock);
2186113870Sdeischen	_kse_critical_leave(crit);
2187113870Sdeischen}
2188113870Sdeischen
2189118676Sdavidxustatic void
2190118676Sdavidxukseg_destroy(struct kse_group *kseg)
2191118676Sdavidxu{
2192118676Sdavidxu	_lock_destroy(&kseg->kg_lock);
2193118676Sdavidxu	_pq_free(&kseg->kg_schedq.sq_runq);
2194118676Sdavidxu	free(kseg);
2195118676Sdavidxu}
2196118676Sdavidxu
2197113658Sdeischen/*
2198113658Sdeischen * Allocate a new KSE.
2199113658Sdeischen *
2200113661Sdeischen * We allow the current thread to be NULL in the case that this
2201113658Sdeischen * is the first time a KSE is being created (library initialization).
2202113658Sdeischen * In this case, we don't need to (and can't) take any locks.
2203113658Sdeischen */
2204113658Sdeischenstruct kse *
2205117706Sdavidxu_kse_alloc(struct pthread *curthread, int sys_scope)
2206113658Sdeischen{
2207113658Sdeischen	struct kse *kse = NULL;
2208118510Sdeischen	char *stack;
2209113661Sdeischen	kse_critical_t crit;
2210113658Sdeischen	int i;
2211113658Sdeischen
2212113661Sdeischen	if ((curthread != NULL) && (free_kse_count > 0)) {
2213113661Sdeischen		crit = _kse_critical_enter();
2214113661Sdeischen		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2215113658Sdeischen		/* Search for a finished KSE. */
2216113658Sdeischen		kse = TAILQ_FIRST(&free_kseq);
2217113658Sdeischen		while ((kse != NULL) &&
2218118510Sdeischen		    ((kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == 0)) {
2219113658Sdeischen			kse = TAILQ_NEXT(kse, k_qe);
2220113658Sdeischen		}
2221113658Sdeischen		if (kse != NULL) {
2222115278Sdeischen			DBG_MSG("found an unused kse.\n");
2223113658Sdeischen			TAILQ_REMOVE(&free_kseq, kse, k_qe);
2224113658Sdeischen			free_kse_count--;
2225113870Sdeischen			TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2226113658Sdeischen			active_kse_count++;
2227113658Sdeischen		}
2228113661Sdeischen		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2229113661Sdeischen		_kse_critical_leave(crit);
2230113942Sdeischen		if (kse != NULL)
2231117706Sdavidxu			kse_reinit(kse, sys_scope);
2232113658Sdeischen	}
2233113658Sdeischen	if ((kse == NULL) &&
2234113658Sdeischen	    ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
2235118510Sdeischen		if (sys_scope != 0)
2236118510Sdeischen			stack = NULL;
2237118510Sdeischen		else if ((stack = malloc(KSE_STACKSIZE)) == NULL) {
2238118510Sdeischen			free(kse);
2239118510Sdeischen			return (NULL);
2240118510Sdeischen		}
2241113658Sdeischen		bzero(kse, sizeof(*kse));
2242113658Sdeischen
2243118510Sdeischen		/* Initialize KCB without the lock. */
2244118510Sdeischen		if ((kse->k_kcb = _kcb_ctor(kse)) == NULL) {
2245118510Sdeischen			if (stack != NULL)
2246118510Sdeischen				free(stack);
2247118510Sdeischen			free(kse);
2248118510Sdeischen			return (NULL);
2249118510Sdeischen		}
2250118510Sdeischen
2251113658Sdeischen		/* Initialize the lockusers. */
2252113658Sdeischen		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
2253113658Sdeischen			_lockuser_init(&kse->k_lockusers[i], (void *)kse);
2254113658Sdeischen			_LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
2255113658Sdeischen		}
2256113786Sdeischen		/* _lock_init(kse->k_lock, ...) */
2257113658Sdeischen
2258118510Sdeischen		if (curthread != NULL) {
2259118510Sdeischen			crit = _kse_critical_enter();
2260118510Sdeischen			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2261118510Sdeischen		}
2262118510Sdeischen		kse->k_flags = 0;
2263118510Sdeischen		TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
2264118510Sdeischen		active_kse_count++;
2265118510Sdeischen		if (curthread != NULL) {
2266118510Sdeischen			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2267118510Sdeischen			_kse_critical_leave(crit);
2268118510Sdeischen		}
226944963Sjb		/*
2270113658Sdeischen		 * Create the KSE context.
2271117706Sdavidxu		 * Scope system threads (one thread per KSE) are not required
2272117706Sdavidxu		 * to have a stack for an unneeded kse upcall.
227367097Sdeischen		 */
2274117706Sdavidxu		if (!sys_scope) {
2275118510Sdeischen			kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2276118510Sdeischen			kse->k_stack.ss_sp = stack;
2277117706Sdavidxu			kse->k_stack.ss_size = KSE_STACKSIZE;
2278117706Sdavidxu		} else {
2279118510Sdeischen			kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2280118510Sdeischen			kse->k_stack.ss_sp = NULL;
2281118510Sdeischen			kse->k_stack.ss_size = 0;
2282117706Sdavidxu		}
2283118510Sdeischen		kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2284118510Sdeischen		kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2285113786Sdeischen		/*
2286113786Sdeischen		 * We need to keep a copy of the stack in case it
2287113786Sdeischen		 * doesn't get used; a KSE running a scope system
2288113786Sdeischen		 * thread will use that thread's stack.
2289113786Sdeischen		 */
2290118510Sdeischen		kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
229144963Sjb	}
2292113658Sdeischen	return (kse);
229348046Sjb}
229444963Sjb
2295113942Sdeischenstatic void
2296117706Sdavidxukse_reinit(struct kse *kse, int sys_scope)
2297113942Sdeischen{
2298117706Sdavidxu	if (!sys_scope) {
2299118510Sdeischen		kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_multi;
2300117706Sdavidxu		if (kse->k_stack.ss_sp == NULL) {
2301117706Sdavidxu			/* XXX check allocation failure */
2302117706Sdavidxu			kse->k_stack.ss_sp = (char *) malloc(KSE_STACKSIZE);
2303117706Sdavidxu			kse->k_stack.ss_size = KSE_STACKSIZE;
2304117706Sdavidxu		}
2305118510Sdeischen		kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2306117706Sdavidxu	} else {
2307118510Sdeischen		kse->k_kcb->kcb_kmbx.km_func = (kse_func_t *)kse_sched_single;
2308117706Sdavidxu		if (kse->k_stack.ss_sp)
2309117706Sdavidxu			free(kse->k_stack.ss_sp);
2310117706Sdavidxu		kse->k_stack.ss_sp = NULL;
2311117706Sdavidxu		kse->k_stack.ss_size = 0;
2312118510Sdeischen		kse->k_kcb->kcb_kmbx.km_quantum = 0;
2313117706Sdavidxu	}
2314118510Sdeischen	kse->k_kcb->kcb_kmbx.km_stack = kse->k_stack;
2315118510Sdeischen	kse->k_kcb->kcb_kmbx.km_udata = (void *)kse;
2316118510Sdeischen	kse->k_kcb->kcb_kmbx.km_curthread = NULL;
2317118510Sdeischen	kse->k_kcb->kcb_kmbx.km_flags = 0;
2318118510Sdeischen	kse->k_curthread = NULL;
2319113942Sdeischen	kse->k_kseg = 0;
2320113942Sdeischen	kse->k_schedq = 0;
2321113942Sdeischen	kse->k_locklevel = 0;
2322114187Sdeischen	kse->k_flags = 0;
2323113942Sdeischen	kse->k_error = 0;
2324113942Sdeischen	kse->k_cpu = 0;
2325117706Sdavidxu	kse->k_sigseqno = 0;
2326113942Sdeischen}
2327113942Sdeischen
2328113658Sdeischenvoid
2329113661Sdeischenkse_free_unlocked(struct kse *kse)
233044963Sjb{
2331113870Sdeischen	TAILQ_REMOVE(&active_kseq, kse, k_qe);
2332113658Sdeischen	active_kse_count--;
2333113658Sdeischen	kse->k_kseg = NULL;
2334118510Sdeischen	kse->k_kcb->kcb_kmbx.km_quantum = 20000;
2335115278Sdeischen	kse->k_flags = 0;
2336113658Sdeischen	TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
2337113658Sdeischen	free_kse_count++;
233844963Sjb}
233971581Sdeischen
2340113661Sdeischenvoid
2341113661Sdeischen_kse_free(struct pthread *curthread, struct kse *kse)
2342113661Sdeischen{
2343113661Sdeischen	kse_critical_t crit;
2344113661Sdeischen
2345113661Sdeischen	if (curthread == NULL)
2346113661Sdeischen		kse_free_unlocked(kse);
2347113661Sdeischen	else {
2348113661Sdeischen		crit = _kse_critical_enter();
2349113661Sdeischen		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
2350113661Sdeischen		kse_free_unlocked(kse);
2351113661Sdeischen		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
2352113661Sdeischen		_kse_critical_leave(crit);
2353113661Sdeischen	}
2354113661Sdeischen}
2355113661Sdeischen
2356113658Sdeischenstatic void
2357118676Sdavidxukse_destroy(struct kse *kse)
2358113658Sdeischen{
2359118676Sdavidxu	int i;
2360113786Sdeischen
2361118676Sdavidxu	if (kse->k_stack.ss_sp != NULL)
2362118676Sdavidxu		free(kse->k_stack.ss_sp);
2363118676Sdavidxu	_kcb_dtor(kse->k_kcb);
2364118676Sdavidxu	for (i = 0; i < MAX_KSE_LOCKLEVEL; ++i)
2365118676Sdavidxu		_lockuser_destroy(&kse->k_lockusers[i]);
2366118676Sdavidxu	_lock_destroy(&kse->k_lock);
2367118676Sdavidxu	free(kse);
2368113658Sdeischen}
2369113658Sdeischen
237071581Sdeischenstruct pthread *
2371113658Sdeischen_thr_alloc(struct pthread *curthread)
237271581Sdeischen{
2373120074Sdavidxu	kse_critical_t	crit;
2374120074Sdavidxu	struct pthread	*thread = NULL;
2375120074Sdavidxu	int i;
237671581Sdeischen
2377113658Sdeischen	if (curthread != NULL) {
2378113661Sdeischen		if (GC_NEEDED())
2379113661Sdeischen			_thr_gc(curthread);
2380113658Sdeischen		if (free_thread_count > 0) {
2381113658Sdeischen			crit = _kse_critical_enter();
2382113661Sdeischen			KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2383113658Sdeischen			if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
2384113658Sdeischen				TAILQ_REMOVE(&free_threadq, thread, tle);
2385113658Sdeischen				free_thread_count--;
2386113658Sdeischen			}
2387113661Sdeischen			KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2388114187Sdeischen			_kse_critical_leave(crit);
2389113658Sdeischen		}
2390113658Sdeischen	}
2391118510Sdeischen	if ((thread == NULL) &&
2392118510Sdeischen	    ((thread = malloc(sizeof(struct pthread))) != NULL)) {
2393118510Sdeischen		bzero(thread, sizeof(struct pthread));
2394149617Sdeischen		thread->siginfo = calloc(_SIG_MAXSIG, sizeof(siginfo_t));
2395149617Sdeischen		if (thread->siginfo == NULL) {
2396149617Sdeischen			free(thread);
2397149617Sdeischen			return (NULL);
2398149617Sdeischen		}
2399136846Sdavidxu		if (curthread) {
2400136846Sdavidxu			_pthread_mutex_lock(&_tcb_mutex);
2401136846Sdavidxu			thread->tcb = _tcb_ctor(thread, 0 /* not initial tls */);
2402136846Sdavidxu			_pthread_mutex_unlock(&_tcb_mutex);
2403136846Sdavidxu		} else {
2404136846Sdavidxu			thread->tcb = _tcb_ctor(thread, 1 /* initial tls */);
2405136846Sdavidxu		}
2406149617Sdeischen		if (thread->tcb == NULL) {
2407149617Sdeischen			free(thread->siginfo);
2408118510Sdeischen			free(thread);
2409149617Sdeischen			return (NULL);
2410114295Sdeischen		}
2411149617Sdeischen		/*
2412149617Sdeischen		 * Initialize thread locking.
2413149617Sdeischen		 * Lock initializing needs malloc, so don't
2414149617Sdeischen		 * enter critical region before doing this!
2415149617Sdeischen		 */
2416149617Sdeischen		if (_lock_init(&thread->lock, LCK_ADAPTIVE,
2417173967Sjasone		    _thr_lock_wait, _thr_lock_wakeup, calloc) != 0)
2418149617Sdeischen			PANIC("Cannot initialize thread lock");
2419149617Sdeischen		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
2420149617Sdeischen			_lockuser_init(&thread->lockusers[i], (void *)thread);
2421149617Sdeischen			_LCK_SET_PRIVATE2(&thread->lockusers[i],
2422149617Sdeischen			    (void *)thread);
2423149617Sdeischen		}
2424114295Sdeischen	}
2425113658Sdeischen	return (thread);
242671581Sdeischen}
242771581Sdeischen
242871581Sdeischenvoid
2429113658Sdeischen_thr_free(struct pthread *curthread, struct pthread *thread)
243071581Sdeischen{
2431113658Sdeischen	kse_critical_t crit;
2432113658Sdeischen
2433113661Sdeischen	DBG_MSG("Freeing thread %p\n", thread);
2434120370Sdavidxu	if (thread->name) {
2435120370Sdavidxu		free(thread->name);
2436120370Sdavidxu		thread->name = NULL;
2437120370Sdavidxu	}
2438113786Sdeischen	if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
2439136846Sdavidxu		thr_destroy(curthread, thread);
2440120074Sdavidxu	} else {
2441118510Sdeischen		/* Add the thread to the free thread list. */
2442113658Sdeischen		crit = _kse_critical_enter();
2443113661Sdeischen		KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2444116060Sdeischen		TAILQ_INSERT_TAIL(&free_threadq, thread, tle);
2445113658Sdeischen		free_thread_count++;
2446113661Sdeischen		KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2447113658Sdeischen		_kse_critical_leave(crit);
2448113658Sdeischen	}
244971581Sdeischen}
2450115278Sdeischen
2451120074Sdavidxustatic void
2452136846Sdavidxuthr_destroy(struct pthread *curthread, struct pthread *thread)
2453120074Sdavidxu{
2454120074Sdavidxu	int i;
2455120074Sdavidxu
2456120074Sdavidxu	for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
2457120074Sdavidxu		_lockuser_destroy(&thread->lockusers[i]);
2458120074Sdavidxu	_lock_destroy(&thread->lock);
2459136846Sdavidxu	if (curthread) {
2460136846Sdavidxu		_pthread_mutex_lock(&_tcb_mutex);
2461136846Sdavidxu		_tcb_dtor(thread->tcb);
2462136846Sdavidxu		_pthread_mutex_unlock(&_tcb_mutex);
2463136846Sdavidxu	} else {
2464136846Sdavidxu		_tcb_dtor(thread->tcb);
2465136846Sdavidxu	}
2466123048Sdavidxu	free(thread->siginfo);
2467120074Sdavidxu	free(thread);
2468120074Sdavidxu}
2469120074Sdavidxu
2470115278Sdeischen/*
2471115278Sdeischen * Add an active thread:
2472115278Sdeischen *
2473115278Sdeischen *   o Assign the thread a unique id (which GDB uses to track
2474115278Sdeischen *     threads.
2475115278Sdeischen *   o Add the thread to the list of all threads and increment
2476115278Sdeischen *     number of active threads.
2477115278Sdeischen */
2478115278Sdeischenstatic void
2479115278Sdeischenthr_link(struct pthread *thread)
2480115278Sdeischen{
2481115278Sdeischen	kse_critical_t crit;
2482115278Sdeischen	struct kse *curkse;
2483115278Sdeischen
2484115278Sdeischen	crit = _kse_critical_enter();
2485115278Sdeischen	curkse = _get_curkse();
2486115278Sdeischen	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2487115278Sdeischen	/*
2488115278Sdeischen	 * Initialize the unique id (which GDB uses to track
2489115278Sdeischen	 * threads), add the thread to the list of all threads,
2490115278Sdeischen	 * and
2491115278Sdeischen	 */
2492115278Sdeischen	thread->uniqueid = next_uniqueid++;
2493115278Sdeischen	THR_LIST_ADD(thread);
2494132120Sdavidxu	_thread_active_threads++;
2495115278Sdeischen	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2496115278Sdeischen	_kse_critical_leave(crit);
2497115278Sdeischen}
2498115278Sdeischen
2499115278Sdeischen/*
2500115278Sdeischen * Remove an active thread.
2501115278Sdeischen */
2502115278Sdeischenstatic void
2503115278Sdeischenthr_unlink(struct pthread *thread)
2504115278Sdeischen{
2505115278Sdeischen	kse_critical_t crit;
2506115278Sdeischen	struct kse *curkse;
2507115278Sdeischen
2508115278Sdeischen	crit = _kse_critical_enter();
2509115278Sdeischen	curkse = _get_curkse();
2510115278Sdeischen	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
2511115278Sdeischen	THR_LIST_REMOVE(thread);
2512132120Sdavidxu	_thread_active_threads--;
2513115278Sdeischen	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
2514115278Sdeischen	_kse_critical_leave(crit);
2515115278Sdeischen}
2516117706Sdavidxu
2517117706Sdavidxuvoid
2518117706Sdavidxu_thr_hash_add(struct pthread *thread)
2519117706Sdavidxu{
2520117706Sdavidxu	struct thread_hash_head *head;
2521117706Sdavidxu
2522117706Sdavidxu	head = &thr_hashtable[THREAD_HASH(thread)];
2523117706Sdavidxu	LIST_INSERT_HEAD(head, thread, hle);
2524117706Sdavidxu}
2525117706Sdavidxu
2526117706Sdavidxuvoid
2527117706Sdavidxu_thr_hash_remove(struct pthread *thread)
2528117706Sdavidxu{
2529117706Sdavidxu	LIST_REMOVE(thread, hle);
2530117706Sdavidxu}
2531117706Sdavidxu
2532117706Sdavidxustruct pthread *
2533117706Sdavidxu_thr_hash_find(struct pthread *thread)
2534117706Sdavidxu{
2535117706Sdavidxu	struct pthread *td;
2536117706Sdavidxu	struct thread_hash_head *head;
2537117706Sdavidxu
2538117706Sdavidxu	head = &thr_hashtable[THREAD_HASH(thread)];
2539117706Sdavidxu	LIST_FOREACH(td, head, hle) {
2540117706Sdavidxu		if (td == thread)
2541117706Sdavidxu			return (thread);
2542117706Sdavidxu	}
2543117706Sdavidxu	return (NULL);
2544117706Sdavidxu}
2545117706Sdavidxu
2546132120Sdavidxuvoid
2547132120Sdavidxu_thr_debug_check_yield(struct pthread *curthread)
2548132120Sdavidxu{
2549132120Sdavidxu	/*
2550133047Sdavidxu	 * Note that TMDF_SUSPEND is set after process is suspended.
2551132120Sdavidxu	 * When we are being debugged, every suspension in process
2552132120Sdavidxu	 * will cause all KSEs to schedule an upcall in kernel, unless the
2553132120Sdavidxu	 * KSE is in critical region.
2554132120Sdavidxu	 * If the function is being called, it means the KSE is no longer
2555133047Sdavidxu	 * in critical region, if the TMDF_SUSPEND is set by debugger
2556132120Sdavidxu	 * before KSE leaves critical region, we will catch it here, else
2557132120Sdavidxu	 * if the flag is changed during testing, it also not a problem,
2558132120Sdavidxu	 * because the change only occurs after a process suspension event
2559132120Sdavidxu	 * occurs. A suspension event will always cause KSE to schedule an
2560132120Sdavidxu	 * upcall, in the case, because we are not in critical region,
2561132120Sdavidxu	 * upcall will be scheduled sucessfully, the flag will be checked
2562132120Sdavidxu	 * again in kse_sched_multi, we won't back until the flag
2563132120Sdavidxu	 * is cleared by debugger, the flag will be cleared in next
2564132120Sdavidxu	 * suspension event.
2565132120Sdavidxu	 */
2566133344Sdavidxu	if (!DBG_CAN_RUN(curthread)) {
2567133344Sdavidxu		if ((curthread->attr.flags & PTHREAD_SCOPE_SYSTEM) == 0)
2568133344Sdavidxu			_thr_sched_switch(curthread);
2569133344Sdavidxu		else
2570133344Sdavidxu			kse_thr_interrupt(&curthread->tcb->tcb_tmbx,
2571133344Sdavidxu				KSE_INTR_DBSUSPEND, 0);
2572133344Sdavidxu	}
2573132120Sdavidxu}
2574