thr_kern.c revision 113870
1/*
2 * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3 * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
4 * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by John Birrell.
18 * 4. Neither the name of the author nor the names of any co-contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/lib/libkse/thread/thr_kern.c 113870 2003-04-22 20:28:33Z deischen $");
37
38#include <sys/types.h>
39#include <sys/kse.h>
40#include <sys/signalvar.h>
41#include <sys/queue.h>
42#include <machine/atomic.h>
43
44#include <assert.h>
45#include <errno.h>
46#include <signal.h>
47#include <stdlib.h>
48#include <string.h>
49#include <time.h>
50#include <ucontext.h>
51#include <unistd.h>
52
53#include "atomic_ops.h"
54#include "thr_private.h"
55#include "pthread_md.h"
56#include "libc_private.h"
57
58/*#define DEBUG_THREAD_KERN */
59#ifdef DEBUG_THREAD_KERN
60#define DBG_MSG		stdout_debug
61#else
62#define DBG_MSG(x...)
63#endif
64
65/*
66 * Define a high water mark for the maximum number of threads that
67 * will be cached.  Once this level is reached, any extra threads
68 * will be free()'d.
69 *
70 * XXX - It doesn't make sense to worry about the maximum number of
71 *       KSEs that we can cache because the system will limit us to
72 *       something *much* less than the maximum number of threads
73 *       that we can have.  Disregarding KSEs in their own group,
74 *       the maximum number of KSEs is the number of processors in
75 *       the system.
76 */
77#define	MAX_CACHED_THREADS	100
78#define	KSE_STACKSIZE		16384
79
80#define	KSE_SET_MBOX(kse, thrd) \
81	(kse)->k_mbx.km_curthread = &(thrd)->tmbx
82
83#define	KSE_SET_EXITED(kse)	(kse)->k_flags |= KF_EXITED
84
85/*
86 * Macros for manipulating the run queues.  The priority queue
87 * routines use the thread's pqe link and also handle the setting
88 * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
89 */
90#define	KSE_RUNQ_INSERT_HEAD(kse, thrd)			\
91	_pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
92#define	KSE_RUNQ_INSERT_TAIL(kse, thrd)			\
93	_pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
94#define	KSE_RUNQ_REMOVE(kse, thrd)			\
95	_pq_remove(&(kse)->k_schedq->sq_runq, thrd)
96#define	KSE_RUNQ_FIRST(kse)	_pq_first(&(kse)->k_schedq->sq_runq)
97
98/*
99 * XXX - Remove when David commits kernel changes to support these.
100 */
101#ifndef KMF_NOUPCALL
102#define	KMF_NOUPCALL	0x01
103#define	KMF_NOCOMPLETED	0x02
104#endif
105
106
107/*
108 * We've got to keep track of everything that is allocated, not only
109 * to have a speedy free list, but also so they can be deallocated
110 * after a fork().
111 */
112static TAILQ_HEAD(, kse)	active_kseq;
113static TAILQ_HEAD(, kse)	free_kseq;
114static TAILQ_HEAD(, kse_group)	free_kse_groupq;
115static TAILQ_HEAD(, kse_group)	active_kse_groupq;
116static TAILQ_HEAD(, kse_group)	gc_ksegq;
117static struct lock		kse_lock;	/* also used for kseg queue */
118static int			free_kse_count = 0;
119static int			free_kseg_count = 0;
120static TAILQ_HEAD(, pthread)	free_threadq;
121static struct lock		thread_lock;
122static int			free_thread_count = 0;
123static int			inited = 0;
124static int			active_kse_count = 0;
125static int			active_kseg_count = 0;
126
127static void	kse_check_completed(struct kse *kse);
128static void	kse_check_waitq(struct kse *kse);
129static void	kse_check_signals(struct kse *kse);
130static void	kse_fini(struct kse *curkse);
131static void	kse_sched_multi(struct kse *curkse);
132#ifdef NOT_YET
133static void	kse_sched_single(struct kse *curkse);
134#endif
135static void	kse_switchout_thread(struct kse *kse, struct pthread *thread);
136static void	kse_wait(struct kse *kse, struct pthread *td_wait);
137static void	kse_free_unlocked(struct kse *kse);
138static void	kseg_free_unlocked(struct kse_group *kseg);
139static void	kseg_init(struct kse_group *kseg);
140static void	kseg_reinit(struct kse_group *kseg);
141static void	kse_waitq_insert(struct pthread *thread);
142static void	thr_cleanup(struct kse *kse, struct pthread *curthread);
143#ifdef NOT_YET
144static void	thr_resume_wrapper(int unused_1, siginfo_t *unused_2,
145		    ucontext_t *ucp);
146#endif
147static void	thr_resume_check(struct pthread *curthread, ucontext_t *ucp,
148		    struct pthread_sigframe *psf);
149static int	thr_timedout(struct pthread *thread, struct timespec *curtime);
150
151/*
152 * This is called after a fork().
153 * No locks need to be taken here since we are guaranteed to be
154 * single threaded.
155 */
156void
157_kse_single_thread(struct pthread *curthread)
158{
159	struct kse *kse, *kse_next;
160	struct kse_group *kseg, *kseg_next;
161	struct pthread *thread, *thread_next;
162	kse_critical_t crit;
163	int i;
164
165	/*
166	 * Disable upcalls and clear the threaded flag.
167	 * XXX - I don't think we need to disable upcalls after a fork().
168	 *       but it doesn't hurt.
169	 */
170	crit = _kse_critical_enter();
171	__isthreaded = 0;
172
173	/*
174	 * Enter a loop to remove and free all threads other than
175	 * the running thread from the active thread list:
176	 */
177	for (thread = TAILQ_FIRST(&_thread_list); thread != NULL;
178	    thread = thread_next) {
179		/*
180		 * Advance to the next thread before the destroying
181		 * the current thread.
182		*/
183		thread_next = TAILQ_NEXT(thread, tle);
184
185		/*
186		 * Remove this thread from the list (the current
187		 * thread will be removed but re-added by libpthread
188		 * initialization.
189		 */
190		TAILQ_REMOVE(&_thread_list, thread, tle);
191		/* Make sure this isn't the running thread: */
192		if (thread != curthread) {
193			_thr_stack_free(&thread->attr);
194			if (thread->specific != NULL)
195				free(thread->specific);
196			for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
197				_lockuser_destroy(&thread->lockusers[i]);
198			}
199			_lock_destroy(&thread->lock);
200			free(thread);
201		}
202	}
203
204	TAILQ_INIT(&curthread->mutexq);		/* initialize mutex queue */
205	curthread->joiner = NULL;		/* no joining threads yet */
206	sigemptyset(&curthread->sigpend);	/* clear pending signals */
207	if (curthread->specific != NULL) {
208		free(curthread->specific);
209		curthread->specific = NULL;
210		curthread->specific_data_count = 0;
211	}
212
213	/* Free the free KSEs: */
214	while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
215		TAILQ_REMOVE(&free_kseq, kse, k_qe);
216		_ksd_destroy(&kse->k_ksd);
217		if (kse->k_stack.ss_sp != NULL)
218			free(kse->k_stack.ss_sp);
219		free(kse);
220	}
221	free_kse_count = 0;
222
223	/* Free the active KSEs: */
224	for (kse = TAILQ_FIRST(&active_kseq); kse != NULL; kse = kse_next) {
225		kse_next = TAILQ_NEXT(kse, k_qe);
226		TAILQ_REMOVE(&active_kseq, kse, k_qe);
227		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
228			_lockuser_destroy(&kse->k_lockusers[i]);
229		}
230		if (kse->k_stack.ss_sp != NULL)
231			free(kse->k_stack.ss_sp);
232		_lock_destroy(&kse->k_lock);
233		free(kse);
234	}
235	active_kse_count = 0;
236
237	/* Free the free KSEGs: */
238	while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
239		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
240		_lock_destroy(&kseg->kg_lock);
241		_pq_free(&kseg->kg_schedq.sq_runq);
242		free(kseg);
243	}
244	free_kseg_count = 0;
245
246	/* Free the active KSEGs: */
247	for (kseg = TAILQ_FIRST(&active_kse_groupq);
248	    kseg != NULL; kseg = kseg_next) {
249		kseg_next = TAILQ_NEXT(kseg, kg_qe);
250		TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
251		_lock_destroy(&kseg->kg_lock);
252		_pq_free(&kseg->kg_schedq.sq_runq);
253		free(kseg);
254	}
255	active_kseg_count = 0;
256
257	/* Free the free threads. */
258	while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
259		TAILQ_REMOVE(&free_threadq, thread, tle);
260		if (thread->specific != NULL)
261			free(thread->specific);
262		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
263			_lockuser_destroy(&thread->lockusers[i]);
264		}
265		_lock_destroy(&thread->lock);
266		free(thread);
267	}
268	free_thread_count = 0;
269
270	/* Free the to-be-gc'd threads. */
271	while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
272		TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
273		free(thread);
274	}
275	TAILQ_INIT(&gc_ksegq);
276	_gc_count = 0;
277
278	if (inited != 0) {
279		/*
280		 * Destroy these locks; they'll be recreated to assure they
281		 * are in the unlocked state.
282		 */
283		_lock_destroy(&kse_lock);
284		_lock_destroy(&thread_lock);
285		_lock_destroy(&_thread_list_lock);
286		inited = 0;
287	}
288
289	/*
290	 * After a fork(), the leftover thread goes back to being
291	 * scope process.
292	 */
293	curthread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
294	curthread->attr.flags |= PTHREAD_SCOPE_PROCESS;
295
296	/*
297	 * After a fork, we are still operating on the thread's original
298	 * stack.  Don't clear the THR_FLAGS_USER from the thread's
299	 * attribute flags.
300	 */
301
302	/* Initialize the threads library. */
303	curthread->kse = NULL;
304	curthread->kseg = NULL;
305	_kse_initial = NULL;
306	_libpthread_init(curthread);
307}
308
309/*
310 * This is used to initialize housekeeping and to initialize the
311 * KSD for the KSE.
312 */
313void
314_kse_init(void)
315{
316	if (inited == 0) {
317		TAILQ_INIT(&active_kseq);
318		TAILQ_INIT(&active_kse_groupq);
319		TAILQ_INIT(&free_kseq);
320		TAILQ_INIT(&free_kse_groupq);
321		TAILQ_INIT(&free_threadq);
322		TAILQ_INIT(&gc_ksegq);
323		if (_lock_init(&kse_lock, LCK_ADAPTIVE,
324		    _kse_lock_wait, _kse_lock_wakeup) != 0)
325			PANIC("Unable to initialize free KSE queue lock");
326		if (_lock_init(&thread_lock, LCK_ADAPTIVE,
327		    _kse_lock_wait, _kse_lock_wakeup) != 0)
328			PANIC("Unable to initialize free thread queue lock");
329		if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
330		    _kse_lock_wait, _kse_lock_wakeup) != 0)
331			PANIC("Unable to initialize thread list lock");
332		active_kse_count = 0;
333		active_kseg_count = 0;
334		_gc_count = 0;
335		inited = 1;
336	}
337}
338
339int
340_kse_isthreaded(void)
341{
342	return (__isthreaded != 0);
343}
344
345/*
346 * This is called when the first thread (other than the initial
347 * thread) is created.
348 */
349int
350_kse_setthreaded(int threaded)
351{
352	if ((threaded != 0) && (__isthreaded == 0)) {
353		/*
354		 * Locking functions in libc are required when there are
355		 * threads other than the initial thread.
356		 */
357		__isthreaded = 1;
358
359		/*
360		 * Tell the kernel to create a KSE for the initial thread
361		 * and enable upcalls in it.
362		 */
363		_kse_initial->k_flags |= KF_STARTED;
364		if (kse_create(&_kse_initial->k_mbx, 0) != 0) {
365			_kse_initial->k_flags &= ~KF_STARTED;
366			/* may abort() */
367			DBG_MSG("kse_create failed\n");
368			return (-1);
369		}
370		KSE_SET_MBOX(_kse_initial, _thr_initial);
371	}
372	return (0);
373}
374
375/*
376 * Lock wait and wakeup handlers for KSE locks.  These are only used by
377 * KSEs, and should never be used by threads.  KSE locks include the
378 * KSE group lock (used for locking the scheduling queue) and the
379 * kse_lock defined above.
380 *
381 * When a KSE lock attempt blocks, the entire KSE blocks allowing another
382 * KSE to run.  For the most part, it doesn't make much sense to try and
383 * schedule another thread because you need to lock the scheduling queue
384 * in order to do that.  And since the KSE lock is used to lock the scheduling
385 * queue, you would just end up blocking again.
386 */
387void
388_kse_lock_wait(struct lock *lock, struct lockuser *lu)
389{
390	struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
391	struct timespec ts;
392	int saved_flags;
393
394	if (curkse->k_mbx.km_curthread != NULL)
395		PANIC("kse_lock_wait does not disable upcall.\n");
396	/*
397	 * Enter a loop to wait until we get the lock.
398	 */
399	ts.tv_sec = 0;
400	ts.tv_nsec = 1000000;  /* 1 sec */
401	KSE_SET_WAIT(curkse);
402	while (_LCK_BUSY(lu)) {
403		/*
404		 * Yield the kse and wait to be notified when the lock
405		 * is granted.
406		 */
407		saved_flags = curkse->k_mbx.km_flags;
408		curkse->k_mbx.km_flags |= KMF_NOUPCALL | KMF_NOCOMPLETED;
409		kse_release(&ts);
410		curkse->k_mbx.km_flags = saved_flags;
411
412		/*
413		 * Make sure that the wait flag is set again in case
414		 * we wokeup without the lock being granted.
415		 */
416		KSE_SET_WAIT(curkse);
417	}
418	KSE_CLEAR_WAIT(curkse);
419}
420
421void
422_kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
423{
424	struct kse *curkse;
425	struct kse *kse;
426
427	curkse = _get_curkse();
428	kse = (struct kse *)_LCK_GET_PRIVATE(lu);
429
430	if (kse == curkse)
431		PANIC("KSE trying to wake itself up in lock");
432	else if (KSE_WAITING(kse)) {
433		/*
434		 * Notify the owning kse that it has the lock.
435		 */
436		KSE_WAKEUP(kse);
437	}
438}
439
440/*
441 * Thread wait and wakeup handlers for thread locks.  These are only used
442 * by threads, never by KSEs.  Thread locks include the per-thread lock
443 * (defined in its structure), and condition variable and mutex locks.
444 */
445void
446_thr_lock_wait(struct lock *lock, struct lockuser *lu)
447{
448	struct pthread *curthread = (struct pthread *)lu->lu_private;
449	int count;
450
451	/*
452	 * Spin for a bit.
453	 *
454	 * XXX - We probably want to make this a bit smarter.  It
455	 *       doesn't make sense to spin unless there is more
456	 *       than 1 CPU.  A thread that is holding one of these
457	 *       locks is prevented from being swapped out for another
458	 *       thread within the same scheduling entity.
459	 */
460	count = 0;
461	while (_LCK_BUSY(lu) && count < 300)
462		count++;
463	while (_LCK_BUSY(lu)) {
464		THR_SCHED_LOCK(curthread, curthread);
465		if (_LCK_BUSY(lu)) {
466			/* Wait for the lock: */
467			atomic_store_rel_int(&curthread->need_wakeup, 1);
468			THR_SET_STATE(curthread, PS_LOCKWAIT);
469			THR_SCHED_UNLOCK(curthread, curthread);
470			_thr_sched_switch(curthread);
471		}
472		else
473			THR_SCHED_UNLOCK(curthread, curthread);
474	}
475}
476
477void
478_thr_lock_wakeup(struct lock *lock, struct lockuser *lu)
479{
480	struct pthread *thread;
481	struct pthread *curthread;
482
483	curthread = _get_curthread();
484	thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
485
486	THR_SCHED_LOCK(curthread, thread);
487	_thr_setrunnable_unlocked(thread);
488	atomic_store_rel_int(&thread->need_wakeup, 0);
489	THR_SCHED_UNLOCK(curthread, thread);
490}
491
492kse_critical_t
493_kse_critical_enter(void)
494{
495	kse_critical_t crit;
496
497	crit = _ksd_readandclear_tmbx;
498	return (crit);
499}
500
501void
502_kse_critical_leave(kse_critical_t crit)
503{
504	struct pthread *curthread;
505
506	_ksd_set_tmbx(crit);
507	if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
508		THR_YIELD_CHECK(curthread);
509}
510
511void
512_thr_critical_enter(struct pthread *thread)
513{
514	thread->critical_count++;
515}
516
517void
518_thr_critical_leave(struct pthread *thread)
519{
520	thread->critical_count--;
521	THR_YIELD_CHECK(thread);
522}
523
524/*
525 * XXX - We may need to take the scheduling lock before calling
526 *       this, or perhaps take the lock within here before
527 *       doing anything else.
528 */
529void
530_thr_sched_switch(struct pthread *curthread)
531{
532	struct pthread_sigframe psf;
533	kse_critical_t crit;
534	struct kse *curkse;
535	volatile int once = 0;
536
537	/* We're in the scheduler, 5 by 5: */
538	crit = _kse_critical_enter();
539	curkse = _get_curkse();
540
541	curthread->need_switchout = 1;	/* The thread yielded on its own. */
542	curthread->critical_yield = 0;	/* No need to yield anymore. */
543	curthread->slice_usec = -1;	/* Restart the time slice. */
544
545	/*
546	 * The signal frame is allocated off the stack because
547	 * a thread can be interrupted by other signals while
548	 * it is running down pending signals.
549	 */
550	sigemptyset(&psf.psf_sigset);
551	curthread->curframe = &psf;
552
553	_thread_enter_uts(&curthread->tmbx, &curkse->k_mbx);
554
555	/*
556	 * This thread is being resumed; check for cancellations.
557	 */
558	if ((once == 0) && (!THR_IN_CRITICAL(curthread))) {
559		once = 1;
560		thr_resume_check(curthread, &curthread->tmbx.tm_context, &psf);
561	}
562}
563
564/*
565 * This is the scheduler for a KSE which runs a scope system thread.
566 * The multi-thread KSE scheduler should also work for a single threaded
567 * KSE, but we use a separate scheduler so that it can be fine-tuned
568 * to be more efficient (and perhaps not need a separate stack for
569 * the KSE, allowing it to use the thread's stack).
570 *
571 * XXX - This probably needs some work.
572 */
573#ifdef NOT_YET
574static void
575kse_sched_single(struct kse *curkse)
576{
577	struct pthread *curthread = curkse->k_curthread;
578	struct pthread *td_wait;
579	struct timespec ts;
580	int level;
581
582	if (curthread->active == 0) {
583		if (curthread->state != PS_RUNNING) {
584			/* Check to see if the thread has timed out. */
585			KSE_GET_TOD(curkse, &ts);
586			if (thr_timedout(curthread, &ts) != 0) {
587				curthread->timeout = 1;
588				curthread->state = PS_RUNNING;
589			}
590		}
591	}
592
593	/* This thread no longer needs to yield the CPU: */
594	curthread->critical_yield = 0;
595	curthread->need_switchout = 0;
596
597	/*
598	 * Lock the scheduling queue.
599	 *
600	 * There is no scheduling queue for single threaded KSEs,
601	 * but we need a lock for protection regardless.
602	 */
603	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
604
605	/*
606	 * This has to do the job of kse_switchout_thread(), only
607	 * for a single threaded KSE/KSEG.
608	 */
609
610	switch (curthread->state) {
611	case PS_DEAD:
612		/* Unlock the scheduling queue and exit the KSE. */
613		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
614		kse_fini(curkse);	/* does not return */
615		break;
616
617	case PS_COND_WAIT:
618	case PS_SLEEP_WAIT:
619		/* Only insert threads that can timeout: */
620		if (curthread->wakeup_time.tv_sec != -1) {
621			/* Insert into the waiting queue: */
622			KSE_WAITQ_INSERT(curkse, curthread);
623		}
624		break;
625
626	case PS_LOCKWAIT:
627		level = curthread->locklevel - 1;
628		if (_LCK_BUSY(&curthread->lockusers[level]))
629			KSE_WAITQ_INSERT(curkse, curthread);
630		else
631			THR_SET_STATE(curthread, PS_RUNNING);
632		break;
633
634	case PS_JOIN:
635	case PS_MUTEX_WAIT:
636	case PS_RUNNING:
637	case PS_SIGSUSPEND:
638	case PS_SIGWAIT:
639	case PS_SUSPENDED:
640	case PS_DEADLOCK:
641	default:
642		/*
643		 * These states don't timeout and don't need
644		 * to be in the waiting queue.
645		 */
646		break;
647	}
648	while (curthread->state != PS_RUNNING) {
649		curthread->active = 0;
650		td_wait = KSE_WAITQ_FIRST(curkse);
651
652		kse_wait(curkse, td_wait);
653
654	    	if (td_wait != NULL) {
655			KSE_GET_TOD(curkse, &ts);
656			if (thr_timedout(curthread, &ts)) {
657				/* Indicate the thread timedout: */
658				td_wait->timeout = 1;
659
660				/* Make the thread runnable. */
661				THR_SET_STATE(td_wait, PS_RUNNING);
662				KSE_WAITQ_REMOVE(curkse, td_wait);
663			}
664		}
665		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
666		kse_check_signals(curkse);
667		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
668	}
669
670	/* Remove the frame reference. */
671	curthread->curframe = NULL;
672
673	/* Unlock the scheduling queue. */
674	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
675
676	/*
677	 * Continue the thread at its current frame:
678	 */
679	DBG_MSG("Continuing bound thread %p\n", curthread);
680	_thread_switch(&curthread->tmbx, &curkse->k_mbx.km_curthread);
681	PANIC("Thread has returned from _thread_switch");
682}
683#endif
684
685void
686dump_queues(struct kse *curkse)
687{
688	struct pthread *thread;
689
690	DBG_MSG("Threads in waiting queue:\n");
691	TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
692		DBG_MSG("  thread %p, state %d, blocked %d\n",
693		    thread, thread->state, thread->blocked);
694	}
695}
696
697/*
698 * This is the scheduler for a KSE which runs multiple threads.
699 */
700static void
701kse_sched_multi(struct kse *curkse)
702{
703	struct pthread *curthread, *td_wait;
704	struct pthread_sigframe *curframe;
705	int ret;
706
707	THR_ASSERT(curkse->k_mbx.km_curthread == NULL,
708	    "Mailbox not null in kse_sched_multi");
709
710	/* Check for first time initialization: */
711	if ((curkse->k_flags & KF_INITIALIZED) == 0) {
712		/* Setup this KSEs specific data. */
713		_ksd_setprivate(&curkse->k_ksd);
714		_set_curkse(curkse);
715
716		/* Set this before grabbing the context. */
717		curkse->k_flags |= KF_INITIALIZED;
718	}
719
720	/* This may have returned from a kse_release(). */
721	if (KSE_WAITING(curkse)) {
722		DBG_MSG("Entered upcall when KSE is waiting.");
723		KSE_CLEAR_WAIT(curkse);
724	}
725
726	/* Lock the scheduling lock. */
727	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
728
729	/*
730	 * If the current thread was completed in another KSE, then
731	 * it will be in the run queue.  Don't mark it as being blocked.
732	 */
733	if (((curthread = curkse->k_curthread) != NULL) &&
734	    ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
735	    (curthread->need_switchout == 0)) {
736		/*
737		 * Assume the current thread is blocked; when the
738		 * completed threads are checked and if the current
739		 * thread is among the completed, the blocked flag
740		 * will be cleared.
741		 */
742		curthread->blocked = 1;
743	}
744
745	/* Check for any unblocked threads in the kernel. */
746	kse_check_completed(curkse);
747
748	/*
749	 * Check for threads that have timed-out.
750	 */
751	kse_check_waitq(curkse);
752
753	/*
754	 * Switchout the current thread, if necessary, as the last step
755	 * so that it is inserted into the run queue (if it's runnable)
756	 * _after_ any other threads that were added to it above.
757	 */
758	if (curthread == NULL)
759		;  /* Nothing to do here. */
760	else if ((curthread->need_switchout == 0) &&
761	    (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
762		/*
763		 * Resume the thread and tell it to yield when
764		 * it leaves the critical region.
765		 */
766		curthread->critical_yield = 0;
767		curthread->active = 1;
768		if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
769			KSE_RUNQ_REMOVE(curkse, curthread);
770		curkse->k_curthread = curthread;
771		curthread->kse = curkse;
772		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
773		DBG_MSG("Continuing thread %p in critical region\n",
774		    curthread);
775		ret = _thread_switch(&curthread->tmbx,
776		    &curkse->k_mbx.km_curthread);
777		if (ret != 0)
778			PANIC("Can't resume thread in critical region\n");
779	}
780	else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0)
781		kse_switchout_thread(curkse, curthread);
782	curkse->k_curthread = NULL;
783
784	/* This has to be done without the scheduling lock held. */
785	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
786	kse_check_signals(curkse);
787	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
788
789	dump_queues(curkse);
790
791	/* Check if there are no threads ready to run: */
792	while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
793	    (curkse->k_kseg->kg_threadcount != 0)) {
794		/*
795		 * Wait for a thread to become active or until there are
796		 * no more threads.
797		 */
798		td_wait = KSE_WAITQ_FIRST(curkse);
799		kse_wait(curkse, td_wait);
800		kse_check_completed(curkse);
801		kse_check_waitq(curkse);
802		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
803		kse_check_signals(curkse);
804		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
805	}
806
807	/* Check for no more threads: */
808	if (curkse->k_kseg->kg_threadcount == 0) {
809		/*
810		 * Normally this shouldn't return, but it will if there
811		 * are other KSEs running that create new threads that
812		 * are assigned to this KSE[G].  For instance, if a scope
813		 * system thread were to create a scope process thread
814		 * and this kse[g] is the initial kse[g], then that newly
815		 * created thread would be assigned to us (the initial
816		 * kse[g]).
817		 */
818		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
819		kse_fini(curkse);
820		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
821		curthread = KSE_RUNQ_FIRST(curkse);
822	}
823
824	THR_ASSERT(curthread != NULL,
825	    "Return from kse_wait/fini without thread.");
826	THR_ASSERT(curthread->state != PS_DEAD,
827	    "Trying to resume dead thread!");
828	KSE_RUNQ_REMOVE(curkse, curthread);
829
830	/*
831	 * Make the selected thread the current thread.
832	 */
833	curkse->k_curthread = curthread;
834
835	/*
836	 * Make sure the current thread's kse points to this kse.
837	 */
838	curthread->kse = curkse;
839
840	/*
841	 * Reset accounting.
842	 */
843	curthread->tmbx.tm_uticks = 0;
844	curthread->tmbx.tm_sticks = 0;
845
846	/*
847	 * Reset the time slice if this thread is running for the first
848	 * time or running again after using its full time slice allocation.
849	 */
850	if (curthread->slice_usec == -1)
851		curthread->slice_usec = 0;
852
853	/* Mark the thread active. */
854	curthread->active = 1;
855
856	/* Remove the frame reference. */
857	curframe = curthread->curframe;
858	curthread->curframe = NULL;
859
860	/* Unlock the scheduling queue: */
861	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
862
863	/*
864	 * The thread's current signal frame will only be NULL if it
865	 * is being resumed after being blocked in the kernel.  In
866	 * this case, and if the thread needs to run down pending
867	 * signals or needs a cancellation check, we need to add a
868	 * signal frame to the thread's context.
869	 */
870#ifdef NOT_YET
871	if ((curframe == NULL) && ((curthread->check_pending != 0) ||
872	    (((curthread->cancelflags & THR_AT_CANCEL_POINT) == 0) &&
873	    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0)))) {
874		signalcontext(&curthread->tmbx.tm_context, 0,
875		    (__sighandler_t *)thr_resume_wrapper);
876	}
877#endif
878#ifdef GS_HACK
879	/* XXX - The kernel sometimes forgets to restore %gs properly. */
880	_ksd_setprivate(&curkse->k_ksd);
881#endif
882	/*
883	 * Continue the thread at its current frame:
884	 */
885	ret = _thread_switch(&curthread->tmbx, &curkse->k_mbx.km_curthread);
886	if (ret != 0)
887		PANIC("Thread has returned from _thread_switch");
888
889	/* This point should not be reached. */
890	PANIC("Thread has returned from _thread_switch");
891}
892
893static void
894kse_check_signals(struct kse *curkse)
895{
896	sigset_t sigset;
897	int i;
898
899	/* Deliver posted signals. */
900	for (i = 0; i < _SIG_WORDS; i++) {
901		atomic_swap_int(&curkse->k_mbx.km_sigscaught.__bits[i],
902		    0, &sigset.__bits[i]);
903	}
904	if (SIGNOTEMPTY(sigset)) {
905		/*
906		 * Dispatch each signal.
907		 *
908		 * XXX - There is no siginfo for any of these.
909		 *       I think there should be, especially for
910		 *       signals from other processes (si_pid, si_uid).
911		 */
912		for (i = 1; i < NSIG; i++) {
913			if (sigismember(&sigset, i) != 0) {
914				DBG_MSG("Dispatching signal %d\n", i);
915				_thr_sig_dispatch(curkse, i,
916				    NULL /* no siginfo */);
917			}
918		}
919		sigemptyset(&sigset);
920		__sys_sigprocmask(SIG_SETMASK, &sigset, NULL);
921	}
922}
923
924#ifdef NOT_YET
925static void
926thr_resume_wrapper(int unused_1, siginfo_t *unused_2, ucontext_t *ucp)
927{
928	struct pthread *curthread = _get_curthread();
929
930	thr_resume_check(curthread, ucp, NULL);
931}
932#endif
933
934static void
935thr_resume_check(struct pthread *curthread, ucontext_t *ucp,
936    struct pthread_sigframe *psf)
937{
938	/* Check signals before cancellations. */
939	while (curthread->check_pending != 0) {
940		/* Clear the pending flag. */
941		curthread->check_pending = 0;
942
943		/*
944		 * It's perfectly valid, though not portable, for
945		 * signal handlers to munge their interrupted context
946		 * and expect to return to it.  Ensure we use the
947		 * correct context when running down signals.
948		 */
949		_thr_sig_rundown(curthread, ucp, psf);
950	}
951
952	if (((curthread->cancelflags & THR_AT_CANCEL_POINT) == 0) &&
953	    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
954		pthread_testcancel();
955}
956
957/*
958 * Clean up a thread.  This must be called with the thread's KSE
959 * scheduling lock held.  The thread must be a thread from the
960 * KSE's group.
961 */
962static void
963thr_cleanup(struct kse *curkse, struct pthread *thread)
964{
965	struct pthread *joiner;
966
967	if ((joiner = thread->joiner) != NULL) {
968		thread->joiner = NULL;
969		if ((joiner->state == PS_JOIN) &&
970		    (joiner->join_status.thread == thread)) {
971			joiner->join_status.thread = NULL;
972
973			/* Set the return status for the joining thread: */
974			joiner->join_status.ret = thread->ret;
975
976			/* Make the thread runnable. */
977			if (joiner->kseg == curkse->k_kseg)
978				_thr_setrunnable_unlocked(joiner);
979			else {
980				KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
981				KSE_SCHED_LOCK(curkse, joiner->kseg);
982				_thr_setrunnable_unlocked(joiner);
983				KSE_SCHED_UNLOCK(curkse, joiner->kseg);
984				KSE_SCHED_LOCK(curkse, curkse->k_kseg);
985			}
986		}
987		thread->attr.flags |= PTHREAD_DETACHED;
988	}
989
990	if ((thread->attr.flags & PTHREAD_SCOPE_PROCESS) == 0) {
991		/*
992		 * Remove the thread from the KSEG's list of threads.
993	 	 */
994		KSEG_THRQ_REMOVE(thread->kseg, thread);
995		/*
996		 * Migrate the thread to the main KSE so that this
997		 * KSE and KSEG can be cleaned when their last thread
998		 * exits.
999		 */
1000		thread->kseg = _kse_initial->k_kseg;
1001		thread->kse = _kse_initial;
1002	}
1003	thread->flags |= THR_FLAGS_GC_SAFE;
1004
1005	/*
1006	 * We can't hold the thread list lock while holding the
1007	 * scheduler lock.
1008	 */
1009	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1010	DBG_MSG("Adding thread %p to GC list\n", thread);
1011	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1012	THR_GCLIST_ADD(thread);
1013	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1014	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1015}
1016
1017void
1018_thr_gc(struct pthread *curthread)
1019{
1020	struct pthread *td, *td_next;
1021	kse_critical_t crit;
1022	TAILQ_HEAD(, pthread) worklist;
1023
1024	TAILQ_INIT(&worklist);
1025	crit = _kse_critical_enter();
1026	KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1027
1028	/* Check the threads waiting for GC. */
1029	for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1030		td_next = TAILQ_NEXT(td, gcle);
1031		if ((td->flags & THR_FLAGS_GC_SAFE) == 0)
1032			continue;
1033#ifdef NOT_YET
1034		else if (((td->attr.flags & PTHREAD_SCOPE_PROCESS) != 0) &&
1035		    (td->kse->k_mbx.km_flags == 0)) {
1036			/*
1037			 * The thread and KSE are operating on the same
1038			 * stack.  Wait for the KSE to exit before freeing
1039			 * the thread's stack as well as everything else.
1040			 */
1041			continue;
1042		}
1043#endif
1044		/*
1045		 * Remove the thread from the GC list.  If the thread
1046		 * isn't yet detached, it will get added back to the
1047		 * GC list at a later time.
1048		 */
1049		THR_GCLIST_REMOVE(td);
1050		DBG_MSG("Freeing thread %p stack\n", td);
1051		/*
1052		 * We can free the thread stack since it's no longer
1053		 * in use.
1054		 */
1055		_thr_stack_free(&td->attr);
1056		if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1057		    (td->refcount == 0)) {
1058			/*
1059			 * The thread has detached and is no longer
1060			 * referenced.  It is safe to remove all
1061			 * remnants of the thread.
1062			 */
1063			TAILQ_INSERT_HEAD(&worklist, td, gcle);
1064		}
1065	}
1066	KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1067	_kse_critical_leave(crit);
1068
1069	while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1070		TAILQ_REMOVE(&worklist, td, gcle);
1071
1072		if ((td->attr.flags & PTHREAD_SCOPE_PROCESS) != 0) {
1073			crit = _kse_critical_enter();
1074			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1075			kse_free_unlocked(td->kse);
1076			kseg_free_unlocked(td->kseg);
1077			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1078			_kse_critical_leave(crit);
1079		}
1080		DBG_MSG("Freeing thread %p\n", td);
1081		_thr_free(curthread, td);
1082	}
1083}
1084
1085
1086/*
1087 * Only new threads that are running or suspended may be scheduled.
1088 */
1089int
1090_thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1091{
1092	struct kse *curkse;
1093	kse_critical_t crit;
1094	int need_start;
1095	int ret;
1096
1097	/*
1098	 * If this is the first time creating a thread, make sure
1099	 * the mailbox is set for the current thread.
1100	 */
1101	if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1102#ifdef NOT_YET
1103		/* We use the thread's stack as the KSE's stack. */
1104		new_thread->kse->k_mbx.km_stack.ss_sp =
1105		    new_thread->attr.stackaddr_attr;
1106		new_thread->kse->k_mbx.km_stack.ss_size =
1107		    new_thread->attr.stacksize_attr;
1108#endif
1109		/*
1110		 * No need to lock the scheduling queue since the
1111		 * KSE/KSEG pair have not yet been started.
1112		 */
1113		KSEG_THRQ_ADD(newthread->kseg, newthread);
1114		TAILQ_INSERT_TAIL(&newthread->kseg->kg_kseq, newthread->kse,
1115		    k_kgqe);
1116		newthread->kseg->kg_ksecount = 1;
1117		if (newthread->state == PS_RUNNING)
1118			THR_RUNQ_INSERT_TAIL(newthread);
1119		newthread->kse->k_curthread = NULL;
1120		newthread->kse->k_mbx.km_flags = 0;
1121		newthread->kse->k_mbx.km_func = (kse_func_t *)kse_sched_multi;
1122
1123		/*
1124		 * This thread needs a new KSE and KSEG.
1125		 */
1126		crit = _kse_critical_enter();
1127		curkse = _get_curkse();
1128		_ksd_setprivate(&newthread->kse->k_ksd);
1129		newthread->kse->k_flags |= KF_INITIALIZED;
1130		ret = kse_create(&newthread->kse->k_mbx, 1);
1131		if (ret != 0)
1132			ret = errno;
1133		_ksd_setprivate(&curkse->k_ksd);
1134		_kse_critical_leave(crit);
1135	}
1136	else {
1137		/*
1138		 * Lock the KSE and add the new thread to its list of
1139		 * assigned threads.  If the new thread is runnable, also
1140		 * add it to the KSE's run queue.
1141		 */
1142		need_start = 0;
1143		KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1144		KSEG_THRQ_ADD(newthread->kseg, newthread);
1145		if (newthread->state == PS_RUNNING)
1146			THR_RUNQ_INSERT_TAIL(newthread);
1147		if ((newthread->kse->k_flags & KF_STARTED) == 0) {
1148			/*
1149			 * This KSE hasn't been started yet.  Start it
1150			 * outside of holding the lock.
1151			 */
1152			newthread->kse->k_flags |= KF_STARTED;
1153			newthread->kse->k_mbx.km_func =
1154			    (kse_func_t *)kse_sched_multi;
1155			newthread->kse->k_mbx.km_flags = 0;
1156			need_start = 1;
1157		}
1158		KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1159
1160	  	if (need_start != 0)
1161			kse_create(&newthread->kse->k_mbx, 0);
1162		else if ((newthread->state == PS_RUNNING) &&
1163		    KSE_WAITING(newthread->kse)) {
1164			/*
1165			 * The thread is being scheduled on another KSEG.
1166			 */
1167			KSE_WAKEUP(newthread->kse);
1168		}
1169		ret = 0;
1170	}
1171	return (ret);
1172}
1173
1174void
1175kse_waitq_insert(struct pthread *thread)
1176{
1177	struct pthread *td;
1178
1179	if (thread->wakeup_time.tv_sec == -1)
1180		TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1181		    pqe);
1182	else {
1183		td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1184		while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1185		    ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1186		    ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1187		    (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1188			td = TAILQ_NEXT(td, pqe);
1189		if (td == NULL)
1190			TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1191			    thread, pqe);
1192		else
1193			TAILQ_INSERT_BEFORE(td, thread, pqe);
1194	}
1195	thread->flags |= THR_FLAGS_IN_WAITQ;
1196}
1197
1198/*
1199 * This must be called with the scheduling lock held.
1200 */
1201static void
1202kse_check_completed(struct kse *kse)
1203{
1204	struct pthread *thread;
1205	struct kse_thr_mailbox *completed;
1206
1207	if ((completed = kse->k_mbx.km_completed) != NULL) {
1208		kse->k_mbx.km_completed = NULL;
1209		while (completed != NULL) {
1210			thread = completed->tm_udata;
1211			DBG_MSG("Found completed thread %p, name %s\n",
1212			    thread,
1213			    (thread->name == NULL) ? "none" : thread->name);
1214			thread->blocked = 0;
1215			if (thread != kse->k_curthread)
1216				KSE_RUNQ_INSERT_TAIL(kse, thread);
1217			completed = completed->tm_next;
1218		}
1219	}
1220}
1221
1222/*
1223 * This must be called with the scheduling lock held.
1224 */
1225static void
1226kse_check_waitq(struct kse *kse)
1227{
1228	struct pthread	*pthread;
1229	struct timespec ts;
1230
1231	KSE_GET_TOD(kse, &ts);
1232
1233	/*
1234	 * Wake up threads that have timedout.  This has to be
1235	 * done before adding the current thread to the run queue
1236	 * so that a CPU intensive thread doesn't get preference
1237	 * over waiting threads.
1238	 */
1239	while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1240	    thr_timedout(pthread, &ts)) {
1241		/* Remove the thread from the wait queue: */
1242		KSE_WAITQ_REMOVE(kse, pthread);
1243		DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1244
1245		/* Indicate the thread timedout: */
1246		pthread->timeout = 1;
1247
1248		/* Add the thread to the priority queue: */
1249		THR_SET_STATE(pthread, PS_RUNNING);
1250		KSE_RUNQ_INSERT_TAIL(kse, pthread);
1251	}
1252}
1253
1254static int
1255thr_timedout(struct pthread *thread, struct timespec *curtime)
1256{
1257	if (thread->wakeup_time.tv_sec < 0)
1258		return (0);
1259	else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1260		return (0);
1261	else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1262	    (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1263		return (0);
1264	else
1265		return (1);
1266}
1267
1268/*
1269 * This must be called with the scheduling lock held.
1270 *
1271 * Each thread has a time slice, a wakeup time (used when it wants
1272 * to wait for a specified amount of time), a run state, and an
1273 * active flag.
1274 *
1275 * When a thread gets run by the scheduler, the active flag is
1276 * set to non-zero (1).  When a thread performs an explicit yield
1277 * or schedules a state change, it enters the scheduler and the
1278 * active flag is cleared.  When the active flag is still seen
1279 * set in the scheduler, that means that the thread is blocked in
1280 * the kernel (because it is cleared before entering the scheduler
1281 * in all other instances).
1282 *
1283 * The wakeup time is only set for those states that can timeout.
1284 * It is set to (-1, -1) for all other instances.
1285 *
1286 * The thread's run state, aside from being useful when debugging,
1287 * is used to place the thread in an appropriate queue.  There
1288 * are 2 basic queues:
1289 *
1290 *   o run queue - queue ordered by priority for all threads
1291 *                 that are runnable
1292 *   o waiting queue - queue sorted by wakeup time for all threads
1293 *                     that are not otherwise runnable (not blocked
1294 *                     in kernel, not waiting for locks)
1295 *
1296 * The thread's time slice is used for round-robin scheduling
1297 * (the default scheduling policy).  While a SCHED_RR thread
1298 * is runnable it's time slice accumulates.  When it reaches
1299 * the time slice interval, it gets reset and added to the end
1300 * of the queue of threads at its priority.  When a thread no
1301 * longer becomes runnable (blocks in kernel, waits, etc), its
1302 * time slice is reset.
1303 *
1304 * The job of kse_switchout_thread() is to handle all of the above.
1305 */
1306static void
1307kse_switchout_thread(struct kse *kse, struct pthread *thread)
1308{
1309	int level;
1310
1311	/*
1312	 * Place the currently running thread into the
1313	 * appropriate queue(s).
1314	 */
1315	DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1316	if (thread->blocked != 0) {
1317		/* This thread must have blocked in the kernel. */
1318		/* thread->slice_usec = -1;*/	/* restart timeslice */
1319		/*
1320		 * XXX - Check for pending signals for this thread to
1321		 *       see if we need to interrupt it in the kernel.
1322		 */
1323		/* if (thread->check_pending != 0) */
1324		if ((thread->slice_usec != -1) &&
1325		    (thread->attr.sched_policy != SCHED_FIFO))
1326			thread->slice_usec += (thread->tmbx.tm_uticks
1327			    + thread->tmbx.tm_sticks) * _clock_res_usec;
1328	}
1329	else {
1330		switch (thread->state) {
1331		case PS_DEAD:
1332			/*
1333			 * The scheduler is operating on a different
1334			 * stack.  It is safe to do garbage collecting
1335			 * here.
1336			 */
1337			thr_cleanup(kse, thread);
1338			return;
1339			break;
1340
1341		case PS_RUNNING:
1342			/* Nothing to do here. */
1343			break;
1344
1345		case PS_COND_WAIT:
1346		case PS_SLEEP_WAIT:
1347			/* Insert into the waiting queue: */
1348			KSE_WAITQ_INSERT(kse, thread);
1349			break;
1350
1351		case PS_LOCKWAIT:
1352			/*
1353			 * This state doesn't timeout.
1354			 */
1355			thread->wakeup_time.tv_sec = -1;
1356			thread->wakeup_time.tv_nsec = -1;
1357			level = thread->locklevel - 1;
1358			if (_LCK_BUSY(&thread->lockusers[level]))
1359				KSE_WAITQ_INSERT(kse, thread);
1360			else
1361				THR_SET_STATE(thread, PS_RUNNING);
1362			break;
1363
1364		case PS_JOIN:
1365		case PS_MUTEX_WAIT:
1366		case PS_SIGSUSPEND:
1367		case PS_SIGWAIT:
1368		case PS_SUSPENDED:
1369		case PS_DEADLOCK:
1370		default:
1371			/*
1372			 * These states don't timeout.
1373			 */
1374			thread->wakeup_time.tv_sec = -1;
1375			thread->wakeup_time.tv_nsec = -1;
1376
1377			/* Insert into the waiting queue: */
1378			KSE_WAITQ_INSERT(kse, thread);
1379			break;
1380		}
1381		if (thread->state != PS_RUNNING) {
1382			/* Restart the time slice: */
1383			thread->slice_usec = -1;
1384		} else {
1385			if (thread->need_switchout != 0)
1386				/*
1387				 * The thread yielded on its own;
1388				 * restart the timeslice.
1389				 */
1390				thread->slice_usec = -1;
1391			else if ((thread->slice_usec != -1) &&
1392	   		    (thread->attr.sched_policy != SCHED_FIFO)) {
1393				thread->slice_usec += (thread->tmbx.tm_uticks
1394				    + thread->tmbx.tm_sticks) * _clock_res_usec;
1395				/* Check for time quantum exceeded: */
1396				if (thread->slice_usec > TIMESLICE_USEC)
1397					thread->slice_usec = -1;
1398			}
1399			if (thread->slice_usec == -1) {
1400				/*
1401				 * The thread exceeded its time quantum or
1402				 * it yielded the CPU; place it at the tail
1403				 * of the queue for its priority.
1404				 */
1405				KSE_RUNQ_INSERT_TAIL(kse, thread);
1406			} else {
1407				/*
1408				 * The thread hasn't exceeded its interval
1409				 * Place it at the head of the queue for its
1410				 * priority.
1411				 */
1412				KSE_RUNQ_INSERT_HEAD(kse, thread);
1413			}
1414		}
1415	}
1416	thread->active = 0;
1417	thread->need_switchout = 0;
1418}
1419
1420/*
1421 * This function waits for the smallest timeout value of any waiting
1422 * thread, or until it receives a message from another KSE.
1423 *
1424 * This must be called with the scheduling lock held.
1425 */
1426static void
1427kse_wait(struct kse *kse, struct pthread *td_wait)
1428{
1429	struct timespec ts, ts_sleep;
1430	int saved_flags;
1431
1432	KSE_GET_TOD(kse, &ts);
1433
1434	if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1435		/* Limit sleep to no more than 1 minute. */
1436		ts_sleep.tv_sec = 60;
1437		ts_sleep.tv_nsec = 0;
1438	} else {
1439		TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1440		if (ts_sleep.tv_sec > 60) {
1441			ts_sleep.tv_sec = 60;
1442			ts_sleep.tv_nsec = 0;
1443		}
1444	}
1445	/* Don't sleep for negative times. */
1446	if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1447		KSE_SET_WAIT(kse);
1448		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1449		saved_flags = kse->k_mbx.km_flags;
1450		kse->k_mbx.km_flags |= KMF_NOUPCALL;
1451		kse_release(&ts_sleep);
1452		kse->k_mbx.km_flags = saved_flags;
1453		KSE_CLEAR_WAIT(kse);
1454		KSE_SCHED_LOCK(kse, kse->k_kseg);
1455	}
1456}
1457
1458/*
1459 * Avoid calling this kse_exit() so as not to confuse it with the
1460 * system call of the same name.
1461 */
1462static void
1463kse_fini(struct kse *kse)
1464{
1465	struct timespec ts;
1466	struct kse_group *free_kseg = NULL;
1467
1468	if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) != 0)
1469		kse_exit();
1470	/*
1471	 * Check to see if this is one of the main kses.
1472	 */
1473	else if (kse->k_kseg != _kse_initial->k_kseg) {
1474		/* Remove this KSE from the KSEG's list of KSEs. */
1475		KSE_SCHED_LOCK(kse, kse->k_kseg);
1476		TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1477		kse->k_kseg->kg_ksecount--;
1478		if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1479			free_kseg = kse->k_kseg;
1480		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1481
1482		/*
1483		 * Add this KSE to the list of free KSEs along with
1484		 * the KSEG if is now orphaned.
1485		 */
1486		KSE_LOCK_ACQUIRE(kse, &kse_lock);
1487		if (free_kseg != NULL)
1488			kseg_free_unlocked(free_kseg);
1489		kse_free_unlocked(kse);
1490		KSE_LOCK_RELEASE(kse, &kse_lock);
1491		kse_exit();
1492		/* Never returns. */
1493	} else {
1494		/*
1495		 * Wait for the last KSE/thread to exit, or for more
1496		 * threads to be created (it is possible for additional
1497		 * scope process threads to be created after the main
1498		 * thread exits).
1499		 */
1500		ts.tv_sec = 120;
1501		ts.tv_nsec = 0;
1502		KSE_SET_WAIT(kse);
1503		KSE_SCHED_LOCK(kse, kse->k_kseg);
1504		if ((active_kse_count > 1) &&
1505		    (kse->k_kseg->kg_threadcount == 0)) {
1506			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1507			kse_release(&ts);
1508			/* The above never returns. */
1509		}
1510		else
1511			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1512
1513		/* There are no more threads; exit this process: */
1514		if (kse->k_kseg->kg_threadcount == 0) {
1515			/* kse_exit(); */
1516			__isthreaded = 0;
1517			exit(0);
1518		}
1519	}
1520}
1521
1522void
1523_thr_sig_add(struct pthread *thread, int sig, siginfo_t *info, ucontext_t *ucp)
1524{
1525	struct kse *curkse;
1526
1527	curkse = _get_curkse();
1528
1529	KSE_SCHED_LOCK(curkse, thread->kseg);
1530	/*
1531	 * A threads assigned KSE can't change out from under us
1532	 * when we hold the scheduler lock.
1533	 */
1534	if (THR_IS_ACTIVE(thread)) {
1535		/* Thread is active.  Can't install the signal for it. */
1536		/* Make a note in the thread that it has a signal. */
1537		sigaddset(&thread->sigpend, sig);
1538		thread->check_pending = 1;
1539	}
1540	else {
1541		/* Make a note in the thread that it has a signal. */
1542		sigaddset(&thread->sigpend, sig);
1543		thread->check_pending = 1;
1544
1545		if (thread->blocked != 0) {
1546			/* Tell the kernel to interrupt the thread. */
1547			kse_thr_interrupt(&thread->tmbx);
1548		}
1549	}
1550	KSE_SCHED_UNLOCK(curkse, thread->kseg);
1551}
1552
1553void
1554_thr_set_timeout(const struct timespec *timeout)
1555{
1556	struct pthread	*curthread = _get_curthread();
1557	struct timespec ts;
1558
1559	/* Reset the timeout flag for the running thread: */
1560	curthread->timeout = 0;
1561
1562	/* Check if the thread is to wait forever: */
1563	if (timeout == NULL) {
1564		/*
1565		 * Set the wakeup time to something that can be recognised as
1566		 * different to an actual time of day:
1567		 */
1568		curthread->wakeup_time.tv_sec = -1;
1569		curthread->wakeup_time.tv_nsec = -1;
1570	}
1571	/* Check if no waiting is required: */
1572	else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
1573		/* Set the wake up time to 'immediately': */
1574		curthread->wakeup_time.tv_sec = 0;
1575		curthread->wakeup_time.tv_nsec = 0;
1576	} else {
1577		/* Calculate the time for the current thread to wakeup: */
1578		KSE_GET_TOD(curthread->kse, &ts);
1579		TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1580	}
1581}
1582
1583void
1584_thr_panic_exit(char *file, int line, char *msg)
1585{
1586	char buf[256];
1587
1588	snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1589	__sys_write(2, buf, strlen(buf));
1590	abort();
1591}
1592
1593void
1594_thr_setrunnable(struct pthread *curthread, struct pthread *thread)
1595{
1596	kse_critical_t crit;
1597
1598	crit = _kse_critical_enter();
1599	KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1600	_thr_setrunnable_unlocked(thread);
1601	KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1602	_kse_critical_leave(crit);
1603}
1604
1605void
1606_thr_setrunnable_unlocked(struct pthread *thread)
1607{
1608	if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0)
1609		/* No silly queues for these threads. */
1610		THR_SET_STATE(thread, PS_RUNNING);
1611	else {
1612		if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
1613			KSE_WAITQ_REMOVE(thread->kse, thread);
1614		THR_SET_STATE(thread, PS_RUNNING);
1615		if ((thread->blocked == 0) &&
1616		    (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
1617			THR_RUNQ_INSERT_TAIL(thread);
1618	}
1619        /*
1620         * XXX - Threads are not yet assigned to specific KSEs; they are
1621         *       assigned to the KSEG.  So the fact that a thread's KSE is
1622         *       waiting doesn't necessarily mean that it will be the KSE
1623         *       that runs the thread after the lock is granted.  But we
1624         *       don't know if the other KSEs within the same KSEG are
1625         *       also in a waiting state or not so we err on the side of
1626         *       caution and wakeup the thread's last known KSE.  We
1627         *       ensure that the threads KSE doesn't change while it's
1628         *       scheduling lock is held so it is safe to reference it
1629         *       (the KSE).  If the KSE wakes up and doesn't find any more
1630         *       work it will again go back to waiting so no harm is done.
1631         */
1632	if (KSE_WAITING(thread->kse))
1633		KSE_WAKEUP(thread->kse);
1634}
1635
1636struct pthread *
1637_get_curthread(void)
1638{
1639	return (_ksd_curthread);
1640}
1641
1642/* This assumes the caller has disabled upcalls. */
1643struct kse *
1644_get_curkse(void)
1645{
1646	return (_ksd_curkse);
1647}
1648
1649void
1650_set_curkse(struct kse *kse)
1651{
1652	_ksd_setprivate(&kse->k_ksd);
1653}
1654
1655/*
1656 * Allocate a new KSEG.
1657 *
1658 * We allow the current thread to be NULL in the case that this
1659 * is the first time a KSEG is being created (library initialization).
1660 * In this case, we don't need to (and can't) take any locks.
1661 */
1662struct kse_group *
1663_kseg_alloc(struct pthread *curthread)
1664{
1665	struct kse_group *kseg = NULL;
1666	kse_critical_t crit;
1667
1668	if ((curthread != NULL) && (free_kseg_count > 0)) {
1669		/* Use the kse lock for the kseg queue. */
1670		crit = _kse_critical_enter();
1671		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1672		if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
1673			TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1674			free_kseg_count--;
1675			active_kseg_count++;
1676			TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
1677		}
1678		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1679		_kse_critical_leave(crit);
1680		if (kseg)
1681			kseg_reinit(kseg);
1682	}
1683
1684	/*
1685	 * If requested, attempt to allocate a new KSE group only if the
1686	 * KSE allocation was successful and a KSE group wasn't found in
1687	 * the free list.
1688	 */
1689	if ((kseg == NULL) &&
1690	    ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
1691		if (_pq_alloc(&kseg->kg_schedq.sq_runq,
1692		    THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
1693			free(kseg);
1694			kseg = NULL;
1695		} else {
1696			kseg_init(kseg);
1697			/* Add the KSEG to the list of active KSEGs. */
1698			if (curthread != NULL) {
1699				crit = _kse_critical_enter();
1700				KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1701				active_kseg_count++;
1702				TAILQ_INSERT_TAIL(&active_kse_groupq,
1703				    kseg, kg_qe);
1704				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1705				_kse_critical_leave(crit);
1706			} else {
1707				active_kseg_count++;
1708				TAILQ_INSERT_TAIL(&active_kse_groupq,
1709				    kseg, kg_qe);
1710			}
1711		}
1712	}
1713	return (kseg);
1714}
1715
1716/*
1717 * This must be called with the kse lock held and when there are
1718 * no more threads that reference it.
1719 */
1720static void
1721kseg_free_unlocked(struct kse_group *kseg)
1722{
1723	TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
1724	TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
1725	free_kseg_count++;
1726	active_kseg_count--;
1727}
1728
1729void
1730_kseg_free(struct kse_group *kseg)
1731{
1732	struct kse *curkse;
1733	kse_critical_t crit;
1734
1735	crit = _kse_critical_enter();
1736	curkse = _get_curkse();
1737	KSE_LOCK_ACQUIRE(curkse, &kse_lock);
1738	kseg_free_unlocked(kseg);
1739	KSE_LOCK_RELEASE(curkse, &kse_lock);
1740	_kse_critical_leave(crit);
1741}
1742
1743/*
1744 * Allocate a new KSE.
1745 *
1746 * We allow the current thread to be NULL in the case that this
1747 * is the first time a KSE is being created (library initialization).
1748 * In this case, we don't need to (and can't) take any locks.
1749 */
1750struct kse *
1751_kse_alloc(struct pthread *curthread)
1752{
1753	struct kse *kse = NULL;
1754	kse_critical_t crit;
1755	int need_ksd = 0;
1756	int i;
1757
1758	if ((curthread != NULL) && (free_kse_count > 0)) {
1759		crit = _kse_critical_enter();
1760		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1761		/* Search for a finished KSE. */
1762		kse = TAILQ_FIRST(&free_kseq);
1763#ifdef NOT_YET
1764#define KEMBX_DONE	0x04
1765		while ((kse != NULL) &&
1766		    ((kse->k_mbx.km_flags & KEMBX_DONE) == 0)) {
1767			kse = TAILQ_NEXT(kse, k_qe);
1768		}
1769#undef KEMBX_DONE
1770#endif
1771		if (kse != NULL) {
1772			TAILQ_REMOVE(&free_kseq, kse, k_qe);
1773			free_kse_count--;
1774			TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
1775			active_kse_count++;
1776		}
1777		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1778		_kse_critical_leave(crit);
1779	}
1780	if ((kse == NULL) &&
1781	    ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
1782		bzero(kse, sizeof(*kse));
1783
1784		/* Initialize the lockusers. */
1785		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1786			_lockuser_init(&kse->k_lockusers[i], (void *)kse);
1787			_LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
1788		}
1789		/* _lock_init(kse->k_lock, ...) */
1790
1791		/* We had to malloc a kse; mark it as needing a new ID.*/
1792		need_ksd = 1;
1793
1794		/*
1795		 * Create the KSE context.
1796		 *
1797		 * XXX - For now this is done here in the allocation.
1798		 *       In the future, we may want to have it done
1799		 *       outside the allocation so that scope system
1800		 *       threads (one thread per KSE) are not required
1801		 *       to have a stack for an unneeded kse upcall.
1802		 */
1803		kse->k_mbx.km_func = (kse_func_t *)kse_sched_multi;
1804		kse->k_mbx.km_stack.ss_sp = (char *)malloc(KSE_STACKSIZE);
1805		kse->k_mbx.km_stack.ss_size = KSE_STACKSIZE;
1806		kse->k_mbx.km_udata = (void *)kse;
1807		kse->k_mbx.km_quantum = 20000;
1808		/*
1809		 * We need to keep a copy of the stack in case it
1810		 * doesn't get used; a KSE running a scope system
1811		 * thread will use that thread's stack.
1812		 */
1813		kse->k_stack.ss_sp = kse->k_mbx.km_stack.ss_sp;
1814		kse->k_stack.ss_size = kse->k_mbx.km_stack.ss_size;
1815		if (kse->k_mbx.km_stack.ss_sp == NULL) {
1816			for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1817				_lockuser_destroy(&kse->k_lockusers[i]);
1818			}
1819			/* _lock_destroy(&kse->k_lock); */
1820			free(kse);
1821			kse = NULL;
1822		}
1823	}
1824	if ((kse != NULL) && (need_ksd != 0)) {
1825		/* This KSE needs initialization. */
1826		if (curthread != NULL) {
1827			crit = _kse_critical_enter();
1828			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1829		}
1830		/* Initialize KSD inside of the lock. */
1831		if (_ksd_create(&kse->k_ksd, (void *)kse, sizeof(*kse)) != 0) {
1832			if (curthread != NULL) {
1833				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1834				_kse_critical_leave(crit);
1835			}
1836			free(kse->k_mbx.km_stack.ss_sp);
1837			for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1838				_lockuser_destroy(&kse->k_lockusers[i]);
1839			}
1840			free(kse);
1841			return (NULL);
1842		}
1843		kse->k_flags = 0;
1844		TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
1845		active_kse_count++;
1846		if (curthread != NULL) {
1847			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1848			_kse_critical_leave(crit);
1849		}
1850	}
1851	return (kse);
1852}
1853
1854void
1855kse_free_unlocked(struct kse *kse)
1856{
1857	TAILQ_REMOVE(&active_kseq, kse, k_qe);
1858	active_kse_count--;
1859	kse->k_kseg = NULL;
1860	kse->k_flags &= ~KF_INITIALIZED;
1861	TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
1862	free_kse_count++;
1863}
1864
1865void
1866_kse_free(struct pthread *curthread, struct kse *kse)
1867{
1868	kse_critical_t crit;
1869
1870	if (curthread == NULL)
1871		kse_free_unlocked(kse);
1872	else {
1873		crit = _kse_critical_enter();
1874		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1875		kse_free_unlocked(kse);
1876		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1877		_kse_critical_leave(crit);
1878	}
1879}
1880
1881static void
1882kseg_init(struct kse_group *kseg)
1883{
1884	kseg_reinit(kseg);
1885	_lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
1886	    _kse_lock_wakeup);
1887}
1888
1889static void
1890kseg_reinit(struct kse_group *kseg)
1891{
1892	TAILQ_INIT(&kseg->kg_kseq);
1893	TAILQ_INIT(&kseg->kg_threadq);
1894	TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
1895	kseg->kg_threadcount = 0;
1896	kseg->kg_ksecount = 0;
1897	kseg->kg_idle_kses = 0;
1898	kseg->kg_flags = 0;
1899}
1900
1901struct pthread *
1902_thr_alloc(struct pthread *curthread)
1903{
1904	kse_critical_t crit;
1905	struct pthread *thread = NULL;
1906
1907	if (curthread != NULL) {
1908		if (GC_NEEDED())
1909			_thr_gc(curthread);
1910		if (free_thread_count > 0) {
1911			crit = _kse_critical_enter();
1912			KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
1913			if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
1914				TAILQ_REMOVE(&free_threadq, thread, tle);
1915				free_thread_count--;
1916			}
1917			KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
1918		}
1919	}
1920	if (thread == NULL)
1921		thread = (struct pthread *)malloc(sizeof(struct pthread));
1922	return (thread);
1923}
1924
1925void
1926_thr_free(struct pthread *curthread, struct pthread *thread)
1927{
1928	kse_critical_t crit;
1929	int i;
1930
1931	DBG_MSG("Freeing thread %p\n", thread);
1932	if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
1933		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
1934			_lockuser_destroy(&thread->lockusers[i]);
1935		}
1936		_lock_destroy(&thread->lock);
1937		free(thread);
1938	}
1939	else {
1940		crit = _kse_critical_enter();
1941		KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1942		THR_LIST_REMOVE(thread);
1943		KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1944		KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
1945		TAILQ_INSERT_HEAD(&free_threadq, thread, tle);
1946		free_thread_count++;
1947		KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
1948		_kse_critical_leave(crit);
1949	}
1950}
1951