thr_kern.c revision 113942
1/*
2 * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3 * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
4 * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by John Birrell.
18 * 4. Neither the name of the author nor the names of any co-contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/lib/libkse/thread/thr_kern.c 113942 2003-04-23 21:46:50Z deischen $");
37
38#include <sys/types.h>
39#include <sys/kse.h>
40#include <sys/signalvar.h>
41#include <sys/queue.h>
42#include <machine/atomic.h>
43
44#include <assert.h>
45#include <errno.h>
46#include <signal.h>
47#include <stdlib.h>
48#include <string.h>
49#include <time.h>
50#include <ucontext.h>
51#include <unistd.h>
52
53#include "atomic_ops.h"
54#include "thr_private.h"
55#include "pthread_md.h"
56#include "libc_private.h"
57
58/*#define DEBUG_THREAD_KERN */
59#ifdef DEBUG_THREAD_KERN
60#define DBG_MSG		stdout_debug
61#else
62#define DBG_MSG(x...)
63#endif
64
65/*
66 * Define a high water mark for the maximum number of threads that
67 * will be cached.  Once this level is reached, any extra threads
68 * will be free()'d.
69 *
70 * XXX - It doesn't make sense to worry about the maximum number of
71 *       KSEs that we can cache because the system will limit us to
72 *       something *much* less than the maximum number of threads
73 *       that we can have.  Disregarding KSEs in their own group,
74 *       the maximum number of KSEs is the number of processors in
75 *       the system.
76 */
77#define	MAX_CACHED_THREADS	100
78#define	KSE_STACKSIZE		16384
79
80#define	KSE_SET_MBOX(kse, thrd) \
81	(kse)->k_mbx.km_curthread = &(thrd)->tmbx
82
83#define	KSE_SET_EXITED(kse)	(kse)->k_flags |= KF_EXITED
84
85/*
86 * Macros for manipulating the run queues.  The priority queue
87 * routines use the thread's pqe link and also handle the setting
88 * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
89 */
90#define	KSE_RUNQ_INSERT_HEAD(kse, thrd)			\
91	_pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
92#define	KSE_RUNQ_INSERT_TAIL(kse, thrd)			\
93	_pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
94#define	KSE_RUNQ_REMOVE(kse, thrd)			\
95	_pq_remove(&(kse)->k_schedq->sq_runq, thrd)
96#define	KSE_RUNQ_FIRST(kse)	_pq_first(&(kse)->k_schedq->sq_runq)
97
98/*
99 * XXX - Remove when David commits kernel changes to support these.
100 */
101#ifndef KMF_NOUPCALL
102#define	KMF_NOUPCALL	0x01
103#define	KMF_NOCOMPLETED	0x02
104#endif
105
106
107/*
108 * We've got to keep track of everything that is allocated, not only
109 * to have a speedy free list, but also so they can be deallocated
110 * after a fork().
111 */
112static TAILQ_HEAD(, kse)	active_kseq;
113static TAILQ_HEAD(, kse)	free_kseq;
114static TAILQ_HEAD(, kse_group)	free_kse_groupq;
115static TAILQ_HEAD(, kse_group)	active_kse_groupq;
116static TAILQ_HEAD(, kse_group)	gc_ksegq;
117static struct lock		kse_lock;	/* also used for kseg queue */
118static int			free_kse_count = 0;
119static int			free_kseg_count = 0;
120static TAILQ_HEAD(, pthread)	free_threadq;
121static struct lock		thread_lock;
122static int			free_thread_count = 0;
123static int			inited = 0;
124static int			active_kse_count = 0;
125static int			active_kseg_count = 0;
126
127static void	kse_check_completed(struct kse *kse);
128static void	kse_check_waitq(struct kse *kse);
129static void	kse_check_signals(struct kse *kse);
130static void	kse_fini(struct kse *curkse);
131static void	kse_reinit(struct kse *kse);
132static void	kse_sched_multi(struct kse *curkse);
133#ifdef NOT_YET
134static void	kse_sched_single(struct kse *curkse);
135#endif
136static void	kse_switchout_thread(struct kse *kse, struct pthread *thread);
137static void	kse_wait(struct kse *kse, struct pthread *td_wait);
138static void	kse_free_unlocked(struct kse *kse);
139static void	kseg_free_unlocked(struct kse_group *kseg);
140static void	kseg_init(struct kse_group *kseg);
141static void	kseg_reinit(struct kse_group *kseg);
142static void	kse_waitq_insert(struct pthread *thread);
143static void	thr_cleanup(struct kse *kse, struct pthread *curthread);
144#ifdef NOT_YET
145static void	thr_resume_wrapper(int unused_1, siginfo_t *unused_2,
146		    ucontext_t *ucp);
147#endif
148static void	thr_resume_check(struct pthread *curthread, ucontext_t *ucp,
149		    struct pthread_sigframe *psf);
150static int	thr_timedout(struct pthread *thread, struct timespec *curtime);
151
152/*
153 * This is called after a fork().
154 * No locks need to be taken here since we are guaranteed to be
155 * single threaded.
156 */
157void
158_kse_single_thread(struct pthread *curthread)
159{
160	struct kse *kse, *kse_next;
161	struct kse_group *kseg, *kseg_next;
162	struct pthread *thread, *thread_next;
163	kse_critical_t crit;
164	int i;
165
166	/*
167	 * Disable upcalls and clear the threaded flag.
168	 * XXX - I don't think we need to disable upcalls after a fork().
169	 *       but it doesn't hurt.
170	 */
171	crit = _kse_critical_enter();
172	__isthreaded = 0;
173
174	/*
175	 * Enter a loop to remove and free all threads other than
176	 * the running thread from the active thread list:
177	 */
178	for (thread = TAILQ_FIRST(&_thread_list); thread != NULL;
179	    thread = thread_next) {
180		/*
181		 * Advance to the next thread before the destroying
182		 * the current thread.
183		*/
184		thread_next = TAILQ_NEXT(thread, tle);
185
186		/*
187		 * Remove this thread from the list (the current
188		 * thread will be removed but re-added by libpthread
189		 * initialization.
190		 */
191		TAILQ_REMOVE(&_thread_list, thread, tle);
192		/* Make sure this isn't the running thread: */
193		if (thread != curthread) {
194			_thr_stack_free(&thread->attr);
195			if (thread->specific != NULL)
196				free(thread->specific);
197			for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
198				_lockuser_destroy(&thread->lockusers[i]);
199			}
200			_lock_destroy(&thread->lock);
201			free(thread);
202		}
203	}
204
205	TAILQ_INIT(&curthread->mutexq);		/* initialize mutex queue */
206	curthread->joiner = NULL;		/* no joining threads yet */
207	sigemptyset(&curthread->sigpend);	/* clear pending signals */
208	if (curthread->specific != NULL) {
209		free(curthread->specific);
210		curthread->specific = NULL;
211		curthread->specific_data_count = 0;
212	}
213
214	/* Free the free KSEs: */
215	while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
216		TAILQ_REMOVE(&free_kseq, kse, k_qe);
217		_ksd_destroy(&kse->k_ksd);
218		if (kse->k_stack.ss_sp != NULL)
219			free(kse->k_stack.ss_sp);
220		free(kse);
221	}
222	free_kse_count = 0;
223
224	/* Free the active KSEs: */
225	for (kse = TAILQ_FIRST(&active_kseq); kse != NULL; kse = kse_next) {
226		kse_next = TAILQ_NEXT(kse, k_qe);
227		TAILQ_REMOVE(&active_kseq, kse, k_qe);
228		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
229			_lockuser_destroy(&kse->k_lockusers[i]);
230		}
231		if (kse->k_stack.ss_sp != NULL)
232			free(kse->k_stack.ss_sp);
233		_lock_destroy(&kse->k_lock);
234		free(kse);
235	}
236	active_kse_count = 0;
237
238	/* Free the free KSEGs: */
239	while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
240		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
241		_lock_destroy(&kseg->kg_lock);
242		_pq_free(&kseg->kg_schedq.sq_runq);
243		free(kseg);
244	}
245	free_kseg_count = 0;
246
247	/* Free the active KSEGs: */
248	for (kseg = TAILQ_FIRST(&active_kse_groupq);
249	    kseg != NULL; kseg = kseg_next) {
250		kseg_next = TAILQ_NEXT(kseg, kg_qe);
251		TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
252		_lock_destroy(&kseg->kg_lock);
253		_pq_free(&kseg->kg_schedq.sq_runq);
254		free(kseg);
255	}
256	active_kseg_count = 0;
257
258	/* Free the free threads. */
259	while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
260		TAILQ_REMOVE(&free_threadq, thread, tle);
261		if (thread->specific != NULL)
262			free(thread->specific);
263		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
264			_lockuser_destroy(&thread->lockusers[i]);
265		}
266		_lock_destroy(&thread->lock);
267		free(thread);
268	}
269	free_thread_count = 0;
270
271	/* Free the to-be-gc'd threads. */
272	while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
273		TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
274		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
275			_lockuser_destroy(&thread->lockusers[i]);
276		}
277		_lock_destroy(&thread->lock);
278		free(thread);
279	}
280	TAILQ_INIT(&gc_ksegq);
281	_gc_count = 0;
282
283	if (inited != 0) {
284		/*
285		 * Destroy these locks; they'll be recreated to assure they
286		 * are in the unlocked state.
287		 */
288		_lock_destroy(&kse_lock);
289		_lock_destroy(&thread_lock);
290		_lock_destroy(&_thread_list_lock);
291		inited = 0;
292	}
293
294	/*
295	 * After a fork(), the leftover thread goes back to being
296	 * scope process.
297	 */
298	curthread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
299	curthread->attr.flags |= PTHREAD_SCOPE_PROCESS;
300
301	/*
302	 * After a fork, we are still operating on the thread's original
303	 * stack.  Don't clear the THR_FLAGS_USER from the thread's
304	 * attribute flags.
305	 */
306
307	/* Initialize the threads library. */
308	curthread->kse = NULL;
309	curthread->kseg = NULL;
310	_kse_initial = NULL;
311	_libpthread_init(curthread);
312}
313
314/*
315 * This is used to initialize housekeeping and to initialize the
316 * KSD for the KSE.
317 */
318void
319_kse_init(void)
320{
321	if (inited == 0) {
322		TAILQ_INIT(&active_kseq);
323		TAILQ_INIT(&active_kse_groupq);
324		TAILQ_INIT(&free_kseq);
325		TAILQ_INIT(&free_kse_groupq);
326		TAILQ_INIT(&free_threadq);
327		TAILQ_INIT(&gc_ksegq);
328		if (_lock_init(&kse_lock, LCK_ADAPTIVE,
329		    _kse_lock_wait, _kse_lock_wakeup) != 0)
330			PANIC("Unable to initialize free KSE queue lock");
331		if (_lock_init(&thread_lock, LCK_ADAPTIVE,
332		    _kse_lock_wait, _kse_lock_wakeup) != 0)
333			PANIC("Unable to initialize free thread queue lock");
334		if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
335		    _kse_lock_wait, _kse_lock_wakeup) != 0)
336			PANIC("Unable to initialize thread list lock");
337		active_kse_count = 0;
338		active_kseg_count = 0;
339		_gc_count = 0;
340		inited = 1;
341	}
342}
343
344int
345_kse_isthreaded(void)
346{
347	return (__isthreaded != 0);
348}
349
350/*
351 * This is called when the first thread (other than the initial
352 * thread) is created.
353 */
354int
355_kse_setthreaded(int threaded)
356{
357	if ((threaded != 0) && (__isthreaded == 0)) {
358		/*
359		 * Locking functions in libc are required when there are
360		 * threads other than the initial thread.
361		 */
362		__isthreaded = 1;
363
364		/*
365		 * Tell the kernel to create a KSE for the initial thread
366		 * and enable upcalls in it.
367		 */
368		_kse_initial->k_flags |= KF_STARTED;
369		if (kse_create(&_kse_initial->k_mbx, 0) != 0) {
370			_kse_initial->k_flags &= ~KF_STARTED;
371			/* may abort() */
372			DBG_MSG("kse_create failed\n");
373			return (-1);
374		}
375		KSE_SET_MBOX(_kse_initial, _thr_initial);
376	}
377	return (0);
378}
379
380/*
381 * Lock wait and wakeup handlers for KSE locks.  These are only used by
382 * KSEs, and should never be used by threads.  KSE locks include the
383 * KSE group lock (used for locking the scheduling queue) and the
384 * kse_lock defined above.
385 *
386 * When a KSE lock attempt blocks, the entire KSE blocks allowing another
387 * KSE to run.  For the most part, it doesn't make much sense to try and
388 * schedule another thread because you need to lock the scheduling queue
389 * in order to do that.  And since the KSE lock is used to lock the scheduling
390 * queue, you would just end up blocking again.
391 */
392void
393_kse_lock_wait(struct lock *lock, struct lockuser *lu)
394{
395	struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
396	struct timespec ts;
397	int saved_flags;
398
399	if (curkse->k_mbx.km_curthread != NULL)
400		PANIC("kse_lock_wait does not disable upcall.\n");
401	/*
402	 * Enter a loop to wait until we get the lock.
403	 */
404	ts.tv_sec = 0;
405	ts.tv_nsec = 1000000;  /* 1 sec */
406	KSE_SET_WAIT(curkse);
407	while (_LCK_BUSY(lu)) {
408		/*
409		 * Yield the kse and wait to be notified when the lock
410		 * is granted.
411		 */
412		saved_flags = curkse->k_mbx.km_flags;
413		curkse->k_mbx.km_flags |= KMF_NOUPCALL | KMF_NOCOMPLETED;
414		kse_release(&ts);
415		curkse->k_mbx.km_flags = saved_flags;
416
417		/*
418		 * Make sure that the wait flag is set again in case
419		 * we wokeup without the lock being granted.
420		 */
421		KSE_SET_WAIT(curkse);
422	}
423	KSE_CLEAR_WAIT(curkse);
424}
425
426void
427_kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
428{
429	struct kse *curkse;
430	struct kse *kse;
431
432	curkse = _get_curkse();
433	kse = (struct kse *)_LCK_GET_PRIVATE(lu);
434
435	if (kse == curkse)
436		PANIC("KSE trying to wake itself up in lock");
437	else if (KSE_WAITING(kse)) {
438		/*
439		 * Notify the owning kse that it has the lock.
440		 */
441		KSE_WAKEUP(kse);
442	}
443}
444
445/*
446 * Thread wait and wakeup handlers for thread locks.  These are only used
447 * by threads, never by KSEs.  Thread locks include the per-thread lock
448 * (defined in its structure), and condition variable and mutex locks.
449 */
450void
451_thr_lock_wait(struct lock *lock, struct lockuser *lu)
452{
453	struct pthread *curthread = (struct pthread *)lu->lu_private;
454	int count;
455
456	/*
457	 * Spin for a bit.
458	 *
459	 * XXX - We probably want to make this a bit smarter.  It
460	 *       doesn't make sense to spin unless there is more
461	 *       than 1 CPU.  A thread that is holding one of these
462	 *       locks is prevented from being swapped out for another
463	 *       thread within the same scheduling entity.
464	 */
465	count = 0;
466	while (_LCK_BUSY(lu) && count < 300)
467		count++;
468	while (_LCK_BUSY(lu)) {
469		THR_SCHED_LOCK(curthread, curthread);
470		if (_LCK_BUSY(lu)) {
471			/* Wait for the lock: */
472			atomic_store_rel_int(&curthread->need_wakeup, 1);
473			THR_SET_STATE(curthread, PS_LOCKWAIT);
474			THR_SCHED_UNLOCK(curthread, curthread);
475			_thr_sched_switch(curthread);
476		}
477		else
478			THR_SCHED_UNLOCK(curthread, curthread);
479	}
480}
481
482void
483_thr_lock_wakeup(struct lock *lock, struct lockuser *lu)
484{
485	struct pthread *thread;
486	struct pthread *curthread;
487
488	curthread = _get_curthread();
489	thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
490
491	THR_SCHED_LOCK(curthread, thread);
492	_thr_setrunnable_unlocked(thread);
493	atomic_store_rel_int(&thread->need_wakeup, 0);
494	THR_SCHED_UNLOCK(curthread, thread);
495}
496
497kse_critical_t
498_kse_critical_enter(void)
499{
500	kse_critical_t crit;
501
502	crit = _ksd_readandclear_tmbx;
503	return (crit);
504}
505
506void
507_kse_critical_leave(kse_critical_t crit)
508{
509	struct pthread *curthread;
510
511	_ksd_set_tmbx(crit);
512	if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
513		THR_YIELD_CHECK(curthread);
514}
515
516int
517_kse_in_critical(void)
518{
519	return (_ksd_get_tmbx() == NULL);
520}
521
522void
523_thr_critical_enter(struct pthread *thread)
524{
525	thread->critical_count++;
526}
527
528void
529_thr_critical_leave(struct pthread *thread)
530{
531	thread->critical_count--;
532	THR_YIELD_CHECK(thread);
533}
534
535/*
536 * XXX - We may need to take the scheduling lock before calling
537 *       this, or perhaps take the lock within here before
538 *       doing anything else.
539 */
540void
541_thr_sched_switch(struct pthread *curthread)
542{
543	struct pthread_sigframe psf;
544	kse_critical_t crit;
545	struct kse *curkse;
546	volatile int once = 0;
547
548	/* We're in the scheduler, 5 by 5: */
549	crit = _kse_critical_enter();
550	curkse = _get_curkse();
551
552	curthread->need_switchout = 1;	/* The thread yielded on its own. */
553	curthread->critical_yield = 0;	/* No need to yield anymore. */
554	curthread->slice_usec = -1;	/* Restart the time slice. */
555
556	/*
557	 * The signal frame is allocated off the stack because
558	 * a thread can be interrupted by other signals while
559	 * it is running down pending signals.
560	 */
561	sigemptyset(&psf.psf_sigset);
562	curthread->curframe = &psf;
563
564	_thread_enter_uts(&curthread->tmbx, &curkse->k_mbx);
565
566	/*
567	 * This thread is being resumed; check for cancellations.
568	 */
569	if ((once == 0) && (!THR_IN_CRITICAL(curthread))) {
570		once = 1;
571		thr_resume_check(curthread, &curthread->tmbx.tm_context, &psf);
572	}
573}
574
575/*
576 * This is the scheduler for a KSE which runs a scope system thread.
577 * The multi-thread KSE scheduler should also work for a single threaded
578 * KSE, but we use a separate scheduler so that it can be fine-tuned
579 * to be more efficient (and perhaps not need a separate stack for
580 * the KSE, allowing it to use the thread's stack).
581 *
582 * XXX - This probably needs some work.
583 */
584#ifdef NOT_YET
585static void
586kse_sched_single(struct kse *curkse)
587{
588	struct pthread *curthread = curkse->k_curthread;
589	struct pthread *td_wait;
590	struct timespec ts;
591	int level;
592
593	if (curthread->active == 0) {
594		if (curthread->state != PS_RUNNING) {
595			/* Check to see if the thread has timed out. */
596			KSE_GET_TOD(curkse, &ts);
597			if (thr_timedout(curthread, &ts) != 0) {
598				curthread->timeout = 1;
599				curthread->state = PS_RUNNING;
600			}
601		}
602	}
603
604	/* This thread no longer needs to yield the CPU: */
605	curthread->critical_yield = 0;
606	curthread->need_switchout = 0;
607
608	/*
609	 * Lock the scheduling queue.
610	 *
611	 * There is no scheduling queue for single threaded KSEs,
612	 * but we need a lock for protection regardless.
613	 */
614	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
615
616	/*
617	 * This has to do the job of kse_switchout_thread(), only
618	 * for a single threaded KSE/KSEG.
619	 */
620
621	switch (curthread->state) {
622	case PS_DEAD:
623		/* Unlock the scheduling queue and exit the KSE. */
624		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
625		kse_fini(curkse);	/* does not return */
626		break;
627
628	case PS_COND_WAIT:
629	case PS_SLEEP_WAIT:
630		/* Only insert threads that can timeout: */
631		if (curthread->wakeup_time.tv_sec != -1) {
632			/* Insert into the waiting queue: */
633			KSE_WAITQ_INSERT(curkse, curthread);
634		}
635		break;
636
637	case PS_LOCKWAIT:
638		level = curthread->locklevel - 1;
639		if (_LCK_BUSY(&curthread->lockusers[level]))
640			KSE_WAITQ_INSERT(curkse, curthread);
641		else
642			THR_SET_STATE(curthread, PS_RUNNING);
643		break;
644
645	case PS_JOIN:
646	case PS_MUTEX_WAIT:
647	case PS_RUNNING:
648	case PS_SIGSUSPEND:
649	case PS_SIGWAIT:
650	case PS_SUSPENDED:
651	case PS_DEADLOCK:
652	default:
653		/*
654		 * These states don't timeout and don't need
655		 * to be in the waiting queue.
656		 */
657		break;
658	}
659	while (curthread->state != PS_RUNNING) {
660		curthread->active = 0;
661		td_wait = KSE_WAITQ_FIRST(curkse);
662
663		kse_wait(curkse, td_wait);
664
665	    	if (td_wait != NULL) {
666			KSE_GET_TOD(curkse, &ts);
667			if (thr_timedout(curthread, &ts)) {
668				/* Indicate the thread timedout: */
669				td_wait->timeout = 1;
670
671				/* Make the thread runnable. */
672				THR_SET_STATE(td_wait, PS_RUNNING);
673				KSE_WAITQ_REMOVE(curkse, td_wait);
674			}
675		}
676		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
677		kse_check_signals(curkse);
678		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
679	}
680
681	/* Remove the frame reference. */
682	curthread->curframe = NULL;
683
684	/* Unlock the scheduling queue. */
685	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
686
687	/*
688	 * Continue the thread at its current frame:
689	 */
690	DBG_MSG("Continuing bound thread %p\n", curthread);
691	_thread_switch(&curthread->tmbx, &curkse->k_mbx.km_curthread);
692	PANIC("Thread has returned from _thread_switch");
693}
694#endif
695
696void
697dump_queues(struct kse *curkse)
698{
699	struct pthread *thread;
700
701	DBG_MSG("Threads in waiting queue:\n");
702	TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
703		DBG_MSG("  thread %p, state %d, blocked %d\n",
704		    thread, thread->state, thread->blocked);
705	}
706}
707
708/*
709 * This is the scheduler for a KSE which runs multiple threads.
710 */
711static void
712kse_sched_multi(struct kse *curkse)
713{
714	struct pthread *curthread, *td_wait;
715	struct pthread_sigframe *curframe;
716	int ret;
717
718	THR_ASSERT(curkse->k_mbx.km_curthread == NULL,
719	    "Mailbox not null in kse_sched_multi");
720
721	/* Check for first time initialization: */
722	if ((curkse->k_flags & KF_INITIALIZED) == 0) {
723		/* Setup this KSEs specific data. */
724		_ksd_setprivate(&curkse->k_ksd);
725		_set_curkse(curkse);
726
727		/* Set this before grabbing the context. */
728		curkse->k_flags |= KF_INITIALIZED;
729	}
730
731	/* This may have returned from a kse_release(). */
732	if (KSE_WAITING(curkse)) {
733		DBG_MSG("Entered upcall when KSE is waiting.");
734		KSE_CLEAR_WAIT(curkse);
735	}
736
737	/* Lock the scheduling lock. */
738	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
739
740	/*
741	 * If the current thread was completed in another KSE, then
742	 * it will be in the run queue.  Don't mark it as being blocked.
743	 */
744	if (((curthread = curkse->k_curthread) != NULL) &&
745	    ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
746	    (curthread->need_switchout == 0)) {
747		/*
748		 * Assume the current thread is blocked; when the
749		 * completed threads are checked and if the current
750		 * thread is among the completed, the blocked flag
751		 * will be cleared.
752		 */
753		curthread->blocked = 1;
754	}
755
756	/* Check for any unblocked threads in the kernel. */
757	kse_check_completed(curkse);
758
759	/*
760	 * Check for threads that have timed-out.
761	 */
762	kse_check_waitq(curkse);
763
764	/*
765	 * Switchout the current thread, if necessary, as the last step
766	 * so that it is inserted into the run queue (if it's runnable)
767	 * _after_ any other threads that were added to it above.
768	 */
769	if (curthread == NULL)
770		;  /* Nothing to do here. */
771	else if ((curthread->need_switchout == 0) &&
772	    (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
773		/*
774		 * Resume the thread and tell it to yield when
775		 * it leaves the critical region.
776		 */
777		curthread->critical_yield = 0;
778		curthread->active = 1;
779		if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
780			KSE_RUNQ_REMOVE(curkse, curthread);
781		curkse->k_curthread = curthread;
782		curthread->kse = curkse;
783		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
784		DBG_MSG("Continuing thread %p in critical region\n",
785		    curthread);
786		ret = _thread_switch(&curthread->tmbx,
787		    &curkse->k_mbx.km_curthread);
788		if (ret != 0)
789			PANIC("Can't resume thread in critical region\n");
790	}
791	else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0)
792		kse_switchout_thread(curkse, curthread);
793	curkse->k_curthread = NULL;
794
795	/* This has to be done without the scheduling lock held. */
796	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
797	kse_check_signals(curkse);
798	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
799
800	dump_queues(curkse);
801
802	/* Check if there are no threads ready to run: */
803	while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
804	    (curkse->k_kseg->kg_threadcount != 0)) {
805		/*
806		 * Wait for a thread to become active or until there are
807		 * no more threads.
808		 */
809		td_wait = KSE_WAITQ_FIRST(curkse);
810		kse_wait(curkse, td_wait);
811		kse_check_completed(curkse);
812		kse_check_waitq(curkse);
813		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
814		kse_check_signals(curkse);
815		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
816	}
817
818	/* Check for no more threads: */
819	if (curkse->k_kseg->kg_threadcount == 0) {
820		/*
821		 * Normally this shouldn't return, but it will if there
822		 * are other KSEs running that create new threads that
823		 * are assigned to this KSE[G].  For instance, if a scope
824		 * system thread were to create a scope process thread
825		 * and this kse[g] is the initial kse[g], then that newly
826		 * created thread would be assigned to us (the initial
827		 * kse[g]).
828		 */
829		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
830		kse_fini(curkse);
831		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
832		curthread = KSE_RUNQ_FIRST(curkse);
833	}
834
835	THR_ASSERT(curthread != NULL,
836	    "Return from kse_wait/fini without thread.");
837	THR_ASSERT(curthread->state != PS_DEAD,
838	    "Trying to resume dead thread!");
839	KSE_RUNQ_REMOVE(curkse, curthread);
840
841	/*
842	 * Make the selected thread the current thread.
843	 */
844	curkse->k_curthread = curthread;
845
846	/*
847	 * Make sure the current thread's kse points to this kse.
848	 */
849	curthread->kse = curkse;
850
851	/*
852	 * Reset accounting.
853	 */
854	curthread->tmbx.tm_uticks = 0;
855	curthread->tmbx.tm_sticks = 0;
856
857	/*
858	 * Reset the time slice if this thread is running for the first
859	 * time or running again after using its full time slice allocation.
860	 */
861	if (curthread->slice_usec == -1)
862		curthread->slice_usec = 0;
863
864	/* Mark the thread active. */
865	curthread->active = 1;
866
867	/* Remove the frame reference. */
868	curframe = curthread->curframe;
869	curthread->curframe = NULL;
870
871	/* Unlock the scheduling queue: */
872	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
873
874	/*
875	 * The thread's current signal frame will only be NULL if it
876	 * is being resumed after being blocked in the kernel.  In
877	 * this case, and if the thread needs to run down pending
878	 * signals or needs a cancellation check, we need to add a
879	 * signal frame to the thread's context.
880	 */
881#ifdef NOT_YET
882	if ((curframe == NULL) && ((curthread->check_pending != 0) ||
883	    (((curthread->cancelflags & THR_AT_CANCEL_POINT) == 0) &&
884	    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0)))) {
885		signalcontext(&curthread->tmbx.tm_context, 0,
886		    (__sighandler_t *)thr_resume_wrapper);
887	}
888#endif
889#ifdef GS_HACK
890	/* XXX - The kernel sometimes forgets to restore %gs properly. */
891	_ksd_setprivate(&curkse->k_ksd);
892#endif
893	/*
894	 * Continue the thread at its current frame:
895	 */
896	ret = _thread_switch(&curthread->tmbx, &curkse->k_mbx.km_curthread);
897	if (ret != 0)
898		PANIC("Thread has returned from _thread_switch");
899
900	/* This point should not be reached. */
901	PANIC("Thread has returned from _thread_switch");
902}
903
904static void
905kse_check_signals(struct kse *curkse)
906{
907	sigset_t sigset;
908	int i;
909
910	/* Deliver posted signals. */
911	for (i = 0; i < _SIG_WORDS; i++) {
912		atomic_swap_int(&curkse->k_mbx.km_sigscaught.__bits[i],
913		    0, &sigset.__bits[i]);
914	}
915	if (SIGNOTEMPTY(sigset)) {
916		/*
917		 * Dispatch each signal.
918		 *
919		 * XXX - There is no siginfo for any of these.
920		 *       I think there should be, especially for
921		 *       signals from other processes (si_pid, si_uid).
922		 */
923		for (i = 1; i < NSIG; i++) {
924			if (sigismember(&sigset, i) != 0) {
925				DBG_MSG("Dispatching signal %d\n", i);
926				_thr_sig_dispatch(curkse, i,
927				    NULL /* no siginfo */);
928			}
929		}
930		sigemptyset(&sigset);
931		__sys_sigprocmask(SIG_SETMASK, &sigset, NULL);
932	}
933}
934
935#ifdef NOT_YET
936static void
937thr_resume_wrapper(int unused_1, siginfo_t *unused_2, ucontext_t *ucp)
938{
939	struct pthread *curthread = _get_curthread();
940
941	thr_resume_check(curthread, ucp, NULL);
942}
943#endif
944
945static void
946thr_resume_check(struct pthread *curthread, ucontext_t *ucp,
947    struct pthread_sigframe *psf)
948{
949	/* Check signals before cancellations. */
950	while (curthread->check_pending != 0) {
951		/* Clear the pending flag. */
952		curthread->check_pending = 0;
953
954		/*
955		 * It's perfectly valid, though not portable, for
956		 * signal handlers to munge their interrupted context
957		 * and expect to return to it.  Ensure we use the
958		 * correct context when running down signals.
959		 */
960		_thr_sig_rundown(curthread, ucp, psf);
961	}
962
963	if (((curthread->cancelflags & THR_AT_CANCEL_POINT) == 0) &&
964	    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
965		pthread_testcancel();
966}
967
968/*
969 * Clean up a thread.  This must be called with the thread's KSE
970 * scheduling lock held.  The thread must be a thread from the
971 * KSE's group.
972 */
973static void
974thr_cleanup(struct kse *curkse, struct pthread *thread)
975{
976	struct pthread *joiner;
977
978	if ((joiner = thread->joiner) != NULL) {
979		thread->joiner = NULL;
980		if ((joiner->state == PS_JOIN) &&
981		    (joiner->join_status.thread == thread)) {
982			joiner->join_status.thread = NULL;
983
984			/* Set the return status for the joining thread: */
985			joiner->join_status.ret = thread->ret;
986
987			/* Make the thread runnable. */
988			if (joiner->kseg == curkse->k_kseg)
989				_thr_setrunnable_unlocked(joiner);
990			else {
991				KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
992				KSE_SCHED_LOCK(curkse, joiner->kseg);
993				_thr_setrunnable_unlocked(joiner);
994				KSE_SCHED_UNLOCK(curkse, joiner->kseg);
995				KSE_SCHED_LOCK(curkse, curkse->k_kseg);
996			}
997		}
998		thread->attr.flags |= PTHREAD_DETACHED;
999	}
1000
1001	if ((thread->attr.flags & PTHREAD_SCOPE_PROCESS) == 0) {
1002		/*
1003		 * Remove the thread from the KSEG's list of threads.
1004	 	 */
1005		KSEG_THRQ_REMOVE(thread->kseg, thread);
1006		/*
1007		 * Migrate the thread to the main KSE so that this
1008		 * KSE and KSEG can be cleaned when their last thread
1009		 * exits.
1010		 */
1011		thread->kseg = _kse_initial->k_kseg;
1012		thread->kse = _kse_initial;
1013	}
1014	thread->flags |= THR_FLAGS_GC_SAFE;
1015
1016	/*
1017	 * We can't hold the thread list lock while holding the
1018	 * scheduler lock.
1019	 */
1020	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1021	DBG_MSG("Adding thread %p to GC list\n", thread);
1022	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1023	THR_GCLIST_ADD(thread);
1024	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1025	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1026}
1027
1028void
1029_thr_gc(struct pthread *curthread)
1030{
1031	struct pthread *td, *td_next;
1032	kse_critical_t crit;
1033	TAILQ_HEAD(, pthread) worklist;
1034
1035	TAILQ_INIT(&worklist);
1036	crit = _kse_critical_enter();
1037	KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1038
1039	/* Check the threads waiting for GC. */
1040	for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1041		td_next = TAILQ_NEXT(td, gcle);
1042		if ((td->flags & THR_FLAGS_GC_SAFE) == 0)
1043			continue;
1044#ifdef NOT_YET
1045		else if (((td->attr.flags & PTHREAD_SCOPE_PROCESS) != 0) &&
1046		    (td->kse->k_mbx.km_flags == 0)) {
1047			/*
1048			 * The thread and KSE are operating on the same
1049			 * stack.  Wait for the KSE to exit before freeing
1050			 * the thread's stack as well as everything else.
1051			 */
1052			continue;
1053		}
1054#endif
1055		/*
1056		 * Remove the thread from the GC list.  If the thread
1057		 * isn't yet detached, it will get added back to the
1058		 * GC list at a later time.
1059		 */
1060		THR_GCLIST_REMOVE(td);
1061		DBG_MSG("Freeing thread %p stack\n", td);
1062		/*
1063		 * We can free the thread stack since it's no longer
1064		 * in use.
1065		 */
1066		_thr_stack_free(&td->attr);
1067		if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1068		    (td->refcount == 0)) {
1069			/*
1070			 * The thread has detached and is no longer
1071			 * referenced.  It is safe to remove all
1072			 * remnants of the thread.
1073			 */
1074			TAILQ_INSERT_HEAD(&worklist, td, gcle);
1075		}
1076	}
1077	KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1078	_kse_critical_leave(crit);
1079
1080	while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1081		TAILQ_REMOVE(&worklist, td, gcle);
1082
1083		if ((td->attr.flags & PTHREAD_SCOPE_PROCESS) != 0) {
1084			crit = _kse_critical_enter();
1085			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1086			kse_free_unlocked(td->kse);
1087			kseg_free_unlocked(td->kseg);
1088			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1089			_kse_critical_leave(crit);
1090		}
1091		DBG_MSG("Freeing thread %p\n", td);
1092		_thr_free(curthread, td);
1093	}
1094}
1095
1096
1097/*
1098 * Only new threads that are running or suspended may be scheduled.
1099 */
1100int
1101_thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1102{
1103	struct kse *curkse;
1104	kse_critical_t crit;
1105	int need_start;
1106	int ret;
1107
1108	/*
1109	 * If this is the first time creating a thread, make sure
1110	 * the mailbox is set for the current thread.
1111	 */
1112	if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1113#ifdef NOT_YET
1114		/* We use the thread's stack as the KSE's stack. */
1115		new_thread->kse->k_mbx.km_stack.ss_sp =
1116		    new_thread->attr.stackaddr_attr;
1117		new_thread->kse->k_mbx.km_stack.ss_size =
1118		    new_thread->attr.stacksize_attr;
1119#endif
1120		/*
1121		 * No need to lock the scheduling queue since the
1122		 * KSE/KSEG pair have not yet been started.
1123		 */
1124		KSEG_THRQ_ADD(newthread->kseg, newthread);
1125		TAILQ_INSERT_TAIL(&newthread->kseg->kg_kseq, newthread->kse,
1126		    k_kgqe);
1127		newthread->kseg->kg_ksecount = 1;
1128		if (newthread->state == PS_RUNNING)
1129			THR_RUNQ_INSERT_TAIL(newthread);
1130		newthread->kse->k_curthread = NULL;
1131		newthread->kse->k_mbx.km_flags = 0;
1132		newthread->kse->k_mbx.km_func = (kse_func_t *)kse_sched_multi;
1133		newthread->kse->k_mbx.km_quantum = 0;
1134
1135		/*
1136		 * This thread needs a new KSE and KSEG.
1137		 */
1138		crit = _kse_critical_enter();
1139		curkse = _get_curkse();
1140		_ksd_setprivate(&newthread->kse->k_ksd);
1141		newthread->kse->k_flags |= KF_INITIALIZED;
1142		ret = kse_create(&newthread->kse->k_mbx, 1);
1143		if (ret != 0)
1144			ret = errno;
1145		_ksd_setprivate(&curkse->k_ksd);
1146		_kse_critical_leave(crit);
1147	}
1148	else {
1149		/*
1150		 * Lock the KSE and add the new thread to its list of
1151		 * assigned threads.  If the new thread is runnable, also
1152		 * add it to the KSE's run queue.
1153		 */
1154		need_start = 0;
1155		KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1156		KSEG_THRQ_ADD(newthread->kseg, newthread);
1157		if (newthread->state == PS_RUNNING)
1158			THR_RUNQ_INSERT_TAIL(newthread);
1159		if ((newthread->kse->k_flags & KF_STARTED) == 0) {
1160			/*
1161			 * This KSE hasn't been started yet.  Start it
1162			 * outside of holding the lock.
1163			 */
1164			newthread->kse->k_flags |= KF_STARTED;
1165			newthread->kse->k_mbx.km_func =
1166			    (kse_func_t *)kse_sched_multi;
1167			newthread->kse->k_mbx.km_flags = 0;
1168			need_start = 1;
1169		}
1170		KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1171
1172	  	if (need_start != 0)
1173			kse_create(&newthread->kse->k_mbx, 0);
1174		else if ((newthread->state == PS_RUNNING) &&
1175		    KSE_WAITING(newthread->kse)) {
1176			/*
1177			 * The thread is being scheduled on another KSEG.
1178			 */
1179			KSE_WAKEUP(newthread->kse);
1180		}
1181		ret = 0;
1182	}
1183	return (ret);
1184}
1185
1186void
1187kse_waitq_insert(struct pthread *thread)
1188{
1189	struct pthread *td;
1190
1191	if (thread->wakeup_time.tv_sec == -1)
1192		TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1193		    pqe);
1194	else {
1195		td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1196		while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1197		    ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1198		    ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1199		    (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1200			td = TAILQ_NEXT(td, pqe);
1201		if (td == NULL)
1202			TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1203			    thread, pqe);
1204		else
1205			TAILQ_INSERT_BEFORE(td, thread, pqe);
1206	}
1207	thread->flags |= THR_FLAGS_IN_WAITQ;
1208}
1209
1210/*
1211 * This must be called with the scheduling lock held.
1212 */
1213static void
1214kse_check_completed(struct kse *kse)
1215{
1216	struct pthread *thread;
1217	struct kse_thr_mailbox *completed;
1218
1219	if ((completed = kse->k_mbx.km_completed) != NULL) {
1220		kse->k_mbx.km_completed = NULL;
1221		while (completed != NULL) {
1222			thread = completed->tm_udata;
1223			DBG_MSG("Found completed thread %p, name %s\n",
1224			    thread,
1225			    (thread->name == NULL) ? "none" : thread->name);
1226			thread->blocked = 0;
1227			if (thread != kse->k_curthread)
1228				KSE_RUNQ_INSERT_TAIL(kse, thread);
1229			completed = completed->tm_next;
1230		}
1231	}
1232}
1233
1234/*
1235 * This must be called with the scheduling lock held.
1236 */
1237static void
1238kse_check_waitq(struct kse *kse)
1239{
1240	struct pthread	*pthread;
1241	struct timespec ts;
1242
1243	KSE_GET_TOD(kse, &ts);
1244
1245	/*
1246	 * Wake up threads that have timedout.  This has to be
1247	 * done before adding the current thread to the run queue
1248	 * so that a CPU intensive thread doesn't get preference
1249	 * over waiting threads.
1250	 */
1251	while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1252	    thr_timedout(pthread, &ts)) {
1253		/* Remove the thread from the wait queue: */
1254		KSE_WAITQ_REMOVE(kse, pthread);
1255		DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1256
1257		/* Indicate the thread timedout: */
1258		pthread->timeout = 1;
1259
1260		/* Add the thread to the priority queue: */
1261		THR_SET_STATE(pthread, PS_RUNNING);
1262		KSE_RUNQ_INSERT_TAIL(kse, pthread);
1263	}
1264}
1265
1266static int
1267thr_timedout(struct pthread *thread, struct timespec *curtime)
1268{
1269	if (thread->wakeup_time.tv_sec < 0)
1270		return (0);
1271	else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1272		return (0);
1273	else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1274	    (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1275		return (0);
1276	else
1277		return (1);
1278}
1279
1280/*
1281 * This must be called with the scheduling lock held.
1282 *
1283 * Each thread has a time slice, a wakeup time (used when it wants
1284 * to wait for a specified amount of time), a run state, and an
1285 * active flag.
1286 *
1287 * When a thread gets run by the scheduler, the active flag is
1288 * set to non-zero (1).  When a thread performs an explicit yield
1289 * or schedules a state change, it enters the scheduler and the
1290 * active flag is cleared.  When the active flag is still seen
1291 * set in the scheduler, that means that the thread is blocked in
1292 * the kernel (because it is cleared before entering the scheduler
1293 * in all other instances).
1294 *
1295 * The wakeup time is only set for those states that can timeout.
1296 * It is set to (-1, -1) for all other instances.
1297 *
1298 * The thread's run state, aside from being useful when debugging,
1299 * is used to place the thread in an appropriate queue.  There
1300 * are 2 basic queues:
1301 *
1302 *   o run queue - queue ordered by priority for all threads
1303 *                 that are runnable
1304 *   o waiting queue - queue sorted by wakeup time for all threads
1305 *                     that are not otherwise runnable (not blocked
1306 *                     in kernel, not waiting for locks)
1307 *
1308 * The thread's time slice is used for round-robin scheduling
1309 * (the default scheduling policy).  While a SCHED_RR thread
1310 * is runnable it's time slice accumulates.  When it reaches
1311 * the time slice interval, it gets reset and added to the end
1312 * of the queue of threads at its priority.  When a thread no
1313 * longer becomes runnable (blocks in kernel, waits, etc), its
1314 * time slice is reset.
1315 *
1316 * The job of kse_switchout_thread() is to handle all of the above.
1317 */
1318static void
1319kse_switchout_thread(struct kse *kse, struct pthread *thread)
1320{
1321	int level;
1322
1323	/*
1324	 * Place the currently running thread into the
1325	 * appropriate queue(s).
1326	 */
1327	DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1328	if (thread->blocked != 0) {
1329		/* This thread must have blocked in the kernel. */
1330		/* thread->slice_usec = -1;*/	/* restart timeslice */
1331		/*
1332		 * XXX - Check for pending signals for this thread to
1333		 *       see if we need to interrupt it in the kernel.
1334		 */
1335		/* if (thread->check_pending != 0) */
1336		if ((thread->slice_usec != -1) &&
1337		    (thread->attr.sched_policy != SCHED_FIFO))
1338			thread->slice_usec += (thread->tmbx.tm_uticks
1339			    + thread->tmbx.tm_sticks) * _clock_res_usec;
1340	}
1341	else {
1342		switch (thread->state) {
1343		case PS_DEAD:
1344			/*
1345			 * The scheduler is operating on a different
1346			 * stack.  It is safe to do garbage collecting
1347			 * here.
1348			 */
1349			thr_cleanup(kse, thread);
1350			return;
1351			break;
1352
1353		case PS_RUNNING:
1354			/* Nothing to do here. */
1355			break;
1356
1357		case PS_COND_WAIT:
1358		case PS_SLEEP_WAIT:
1359			/* Insert into the waiting queue: */
1360			KSE_WAITQ_INSERT(kse, thread);
1361			break;
1362
1363		case PS_LOCKWAIT:
1364			/*
1365			 * This state doesn't timeout.
1366			 */
1367			thread->wakeup_time.tv_sec = -1;
1368			thread->wakeup_time.tv_nsec = -1;
1369			level = thread->locklevel - 1;
1370			if (_LCK_BUSY(&thread->lockusers[level]))
1371				KSE_WAITQ_INSERT(kse, thread);
1372			else
1373				THR_SET_STATE(thread, PS_RUNNING);
1374			break;
1375
1376		case PS_JOIN:
1377		case PS_MUTEX_WAIT:
1378		case PS_SIGSUSPEND:
1379		case PS_SIGWAIT:
1380		case PS_SUSPENDED:
1381		case PS_DEADLOCK:
1382		default:
1383			/*
1384			 * These states don't timeout.
1385			 */
1386			thread->wakeup_time.tv_sec = -1;
1387			thread->wakeup_time.tv_nsec = -1;
1388
1389			/* Insert into the waiting queue: */
1390			KSE_WAITQ_INSERT(kse, thread);
1391			break;
1392		}
1393		if (thread->state != PS_RUNNING) {
1394			/* Restart the time slice: */
1395			thread->slice_usec = -1;
1396		} else {
1397			if (thread->need_switchout != 0)
1398				/*
1399				 * The thread yielded on its own;
1400				 * restart the timeslice.
1401				 */
1402				thread->slice_usec = -1;
1403			else if ((thread->slice_usec != -1) &&
1404	   		    (thread->attr.sched_policy != SCHED_FIFO)) {
1405				thread->slice_usec += (thread->tmbx.tm_uticks
1406				    + thread->tmbx.tm_sticks) * _clock_res_usec;
1407				/* Check for time quantum exceeded: */
1408				if (thread->slice_usec > TIMESLICE_USEC)
1409					thread->slice_usec = -1;
1410			}
1411			if (thread->slice_usec == -1) {
1412				/*
1413				 * The thread exceeded its time quantum or
1414				 * it yielded the CPU; place it at the tail
1415				 * of the queue for its priority.
1416				 */
1417				KSE_RUNQ_INSERT_TAIL(kse, thread);
1418			} else {
1419				/*
1420				 * The thread hasn't exceeded its interval
1421				 * Place it at the head of the queue for its
1422				 * priority.
1423				 */
1424				KSE_RUNQ_INSERT_HEAD(kse, thread);
1425			}
1426		}
1427	}
1428	thread->active = 0;
1429	thread->need_switchout = 0;
1430}
1431
1432/*
1433 * This function waits for the smallest timeout value of any waiting
1434 * thread, or until it receives a message from another KSE.
1435 *
1436 * This must be called with the scheduling lock held.
1437 */
1438static void
1439kse_wait(struct kse *kse, struct pthread *td_wait)
1440{
1441	struct timespec ts, ts_sleep;
1442	int saved_flags;
1443
1444	KSE_GET_TOD(kse, &ts);
1445
1446	if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1447		/* Limit sleep to no more than 1 minute. */
1448		ts_sleep.tv_sec = 60;
1449		ts_sleep.tv_nsec = 0;
1450	} else {
1451		TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1452		if (ts_sleep.tv_sec > 60) {
1453			ts_sleep.tv_sec = 60;
1454			ts_sleep.tv_nsec = 0;
1455		}
1456	}
1457	/* Don't sleep for negative times. */
1458	if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1459		KSE_SET_WAIT(kse);
1460		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1461		saved_flags = kse->k_mbx.km_flags;
1462		kse->k_mbx.km_flags |= KMF_NOUPCALL;
1463		kse_release(&ts_sleep);
1464		kse->k_mbx.km_flags = saved_flags;
1465		KSE_CLEAR_WAIT(kse);
1466		KSE_SCHED_LOCK(kse, kse->k_kseg);
1467	}
1468}
1469
1470/*
1471 * Avoid calling this kse_exit() so as not to confuse it with the
1472 * system call of the same name.
1473 */
1474static void
1475kse_fini(struct kse *kse)
1476{
1477	struct timespec ts;
1478	struct kse_group *free_kseg = NULL;
1479
1480	if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) != 0)
1481		kse_exit();
1482	/*
1483	 * Check to see if this is one of the main kses.
1484	 */
1485	else if (kse->k_kseg != _kse_initial->k_kseg) {
1486		/* Remove this KSE from the KSEG's list of KSEs. */
1487		KSE_SCHED_LOCK(kse, kse->k_kseg);
1488		TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1489		kse->k_kseg->kg_ksecount--;
1490		if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1491			free_kseg = kse->k_kseg;
1492		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1493
1494		/*
1495		 * Add this KSE to the list of free KSEs along with
1496		 * the KSEG if is now orphaned.
1497		 */
1498		KSE_LOCK_ACQUIRE(kse, &kse_lock);
1499		if (free_kseg != NULL)
1500			kseg_free_unlocked(free_kseg);
1501		kse_free_unlocked(kse);
1502		KSE_LOCK_RELEASE(kse, &kse_lock);
1503		kse_exit();
1504		/* Never returns. */
1505	} else {
1506		/*
1507		 * Wait for the last KSE/thread to exit, or for more
1508		 * threads to be created (it is possible for additional
1509		 * scope process threads to be created after the main
1510		 * thread exits).
1511		 */
1512		ts.tv_sec = 120;
1513		ts.tv_nsec = 0;
1514		KSE_SET_WAIT(kse);
1515		KSE_SCHED_LOCK(kse, kse->k_kseg);
1516		if ((active_kse_count > 1) &&
1517		    (kse->k_kseg->kg_threadcount == 0)) {
1518			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1519			kse_release(&ts);
1520			/* The above never returns. */
1521		}
1522		else
1523			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1524
1525		/* There are no more threads; exit this process: */
1526		if (kse->k_kseg->kg_threadcount == 0) {
1527			/* kse_exit(); */
1528			__isthreaded = 0;
1529			exit(0);
1530		}
1531	}
1532}
1533
1534void
1535_thr_sig_add(struct pthread *thread, int sig, siginfo_t *info, ucontext_t *ucp)
1536{
1537	struct kse *curkse;
1538
1539	curkse = _get_curkse();
1540
1541	KSE_SCHED_LOCK(curkse, thread->kseg);
1542	/*
1543	 * A threads assigned KSE can't change out from under us
1544	 * when we hold the scheduler lock.
1545	 */
1546	if (THR_IS_ACTIVE(thread)) {
1547		/* Thread is active.  Can't install the signal for it. */
1548		/* Make a note in the thread that it has a signal. */
1549		sigaddset(&thread->sigpend, sig);
1550		thread->check_pending = 1;
1551	}
1552	else {
1553		/* Make a note in the thread that it has a signal. */
1554		sigaddset(&thread->sigpend, sig);
1555		thread->check_pending = 1;
1556
1557		if (thread->blocked != 0) {
1558			/* Tell the kernel to interrupt the thread. */
1559			kse_thr_interrupt(&thread->tmbx);
1560		}
1561	}
1562	KSE_SCHED_UNLOCK(curkse, thread->kseg);
1563}
1564
1565void
1566_thr_set_timeout(const struct timespec *timeout)
1567{
1568	struct pthread	*curthread = _get_curthread();
1569	struct timespec ts;
1570
1571	/* Reset the timeout flag for the running thread: */
1572	curthread->timeout = 0;
1573
1574	/* Check if the thread is to wait forever: */
1575	if (timeout == NULL) {
1576		/*
1577		 * Set the wakeup time to something that can be recognised as
1578		 * different to an actual time of day:
1579		 */
1580		curthread->wakeup_time.tv_sec = -1;
1581		curthread->wakeup_time.tv_nsec = -1;
1582	}
1583	/* Check if no waiting is required: */
1584	else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
1585		/* Set the wake up time to 'immediately': */
1586		curthread->wakeup_time.tv_sec = 0;
1587		curthread->wakeup_time.tv_nsec = 0;
1588	} else {
1589		/* Calculate the time for the current thread to wakeup: */
1590		KSE_GET_TOD(curthread->kse, &ts);
1591		TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1592	}
1593}
1594
1595void
1596_thr_panic_exit(char *file, int line, char *msg)
1597{
1598	char buf[256];
1599
1600	snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1601	__sys_write(2, buf, strlen(buf));
1602	abort();
1603}
1604
1605void
1606_thr_setrunnable(struct pthread *curthread, struct pthread *thread)
1607{
1608	kse_critical_t crit;
1609
1610	crit = _kse_critical_enter();
1611	KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1612	_thr_setrunnable_unlocked(thread);
1613	KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1614	_kse_critical_leave(crit);
1615}
1616
1617void
1618_thr_setrunnable_unlocked(struct pthread *thread)
1619{
1620	if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0)
1621		/* No silly queues for these threads. */
1622		THR_SET_STATE(thread, PS_RUNNING);
1623	else {
1624		if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
1625			KSE_WAITQ_REMOVE(thread->kse, thread);
1626		THR_SET_STATE(thread, PS_RUNNING);
1627		if ((thread->blocked == 0) &&
1628		    (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
1629			THR_RUNQ_INSERT_TAIL(thread);
1630	}
1631        /*
1632         * XXX - Threads are not yet assigned to specific KSEs; they are
1633         *       assigned to the KSEG.  So the fact that a thread's KSE is
1634         *       waiting doesn't necessarily mean that it will be the KSE
1635         *       that runs the thread after the lock is granted.  But we
1636         *       don't know if the other KSEs within the same KSEG are
1637         *       also in a waiting state or not so we err on the side of
1638         *       caution and wakeup the thread's last known KSE.  We
1639         *       ensure that the threads KSE doesn't change while it's
1640         *       scheduling lock is held so it is safe to reference it
1641         *       (the KSE).  If the KSE wakes up and doesn't find any more
1642         *       work it will again go back to waiting so no harm is done.
1643         */
1644	if (KSE_WAITING(thread->kse))
1645		KSE_WAKEUP(thread->kse);
1646}
1647
1648struct pthread *
1649_get_curthread(void)
1650{
1651	return (_ksd_curthread);
1652}
1653
1654/* This assumes the caller has disabled upcalls. */
1655struct kse *
1656_get_curkse(void)
1657{
1658	return (_ksd_curkse);
1659}
1660
1661void
1662_set_curkse(struct kse *kse)
1663{
1664	_ksd_setprivate(&kse->k_ksd);
1665}
1666
1667/*
1668 * Allocate a new KSEG.
1669 *
1670 * We allow the current thread to be NULL in the case that this
1671 * is the first time a KSEG is being created (library initialization).
1672 * In this case, we don't need to (and can't) take any locks.
1673 */
1674struct kse_group *
1675_kseg_alloc(struct pthread *curthread)
1676{
1677	struct kse_group *kseg = NULL;
1678	kse_critical_t crit;
1679
1680	if ((curthread != NULL) && (free_kseg_count > 0)) {
1681		/* Use the kse lock for the kseg queue. */
1682		crit = _kse_critical_enter();
1683		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1684		if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
1685			TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1686			free_kseg_count--;
1687			active_kseg_count++;
1688			TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
1689		}
1690		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1691		_kse_critical_leave(crit);
1692		if (kseg)
1693			kseg_reinit(kseg);
1694	}
1695
1696	/*
1697	 * If requested, attempt to allocate a new KSE group only if the
1698	 * KSE allocation was successful and a KSE group wasn't found in
1699	 * the free list.
1700	 */
1701	if ((kseg == NULL) &&
1702	    ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
1703		if (_pq_alloc(&kseg->kg_schedq.sq_runq,
1704		    THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
1705			free(kseg);
1706			kseg = NULL;
1707		} else {
1708			kseg_init(kseg);
1709			/* Add the KSEG to the list of active KSEGs. */
1710			if (curthread != NULL) {
1711				crit = _kse_critical_enter();
1712				KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1713				active_kseg_count++;
1714				TAILQ_INSERT_TAIL(&active_kse_groupq,
1715				    kseg, kg_qe);
1716				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1717				_kse_critical_leave(crit);
1718			} else {
1719				active_kseg_count++;
1720				TAILQ_INSERT_TAIL(&active_kse_groupq,
1721				    kseg, kg_qe);
1722			}
1723		}
1724	}
1725	return (kseg);
1726}
1727
1728/*
1729 * This must be called with the kse lock held and when there are
1730 * no more threads that reference it.
1731 */
1732static void
1733kseg_free_unlocked(struct kse_group *kseg)
1734{
1735	TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
1736	TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
1737	free_kseg_count++;
1738	active_kseg_count--;
1739}
1740
1741void
1742_kseg_free(struct kse_group *kseg)
1743{
1744	struct kse *curkse;
1745	kse_critical_t crit;
1746
1747	crit = _kse_critical_enter();
1748	curkse = _get_curkse();
1749	KSE_LOCK_ACQUIRE(curkse, &kse_lock);
1750	kseg_free_unlocked(kseg);
1751	KSE_LOCK_RELEASE(curkse, &kse_lock);
1752	_kse_critical_leave(crit);
1753}
1754
1755/*
1756 * Allocate a new KSE.
1757 *
1758 * We allow the current thread to be NULL in the case that this
1759 * is the first time a KSE is being created (library initialization).
1760 * In this case, we don't need to (and can't) take any locks.
1761 */
1762struct kse *
1763_kse_alloc(struct pthread *curthread)
1764{
1765	struct kse *kse = NULL;
1766	kse_critical_t crit;
1767	int need_ksd = 0;
1768	int i;
1769
1770	if ((curthread != NULL) && (free_kse_count > 0)) {
1771		crit = _kse_critical_enter();
1772		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1773		/* Search for a finished KSE. */
1774		kse = TAILQ_FIRST(&free_kseq);
1775#ifdef NOT_YET
1776#define KEMBX_DONE	0x04
1777		while ((kse != NULL) &&
1778		    ((kse->k_mbx.km_flags & KEMBX_DONE) == 0)) {
1779			kse = TAILQ_NEXT(kse, k_qe);
1780		}
1781#undef KEMBX_DONE
1782#endif
1783		if (kse != NULL) {
1784			TAILQ_REMOVE(&free_kseq, kse, k_qe);
1785			free_kse_count--;
1786			TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
1787			active_kse_count++;
1788		}
1789		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1790		_kse_critical_leave(crit);
1791		if (kse != NULL)
1792			kse_reinit(kse);
1793	}
1794	if ((kse == NULL) &&
1795	    ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
1796		bzero(kse, sizeof(*kse));
1797
1798		/* Initialize the lockusers. */
1799		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1800			_lockuser_init(&kse->k_lockusers[i], (void *)kse);
1801			_LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
1802		}
1803		/* _lock_init(kse->k_lock, ...) */
1804
1805		/* We had to malloc a kse; mark it as needing a new ID.*/
1806		need_ksd = 1;
1807
1808		/*
1809		 * Create the KSE context.
1810		 *
1811		 * XXX - For now this is done here in the allocation.
1812		 *       In the future, we may want to have it done
1813		 *       outside the allocation so that scope system
1814		 *       threads (one thread per KSE) are not required
1815		 *       to have a stack for an unneeded kse upcall.
1816		 */
1817		kse->k_mbx.km_func = (kse_func_t *)kse_sched_multi;
1818		kse->k_mbx.km_stack.ss_sp = (char *)malloc(KSE_STACKSIZE);
1819		kse->k_mbx.km_stack.ss_size = KSE_STACKSIZE;
1820		kse->k_mbx.km_udata = (void *)kse;
1821		kse->k_mbx.km_quantum = 20000;
1822		/*
1823		 * We need to keep a copy of the stack in case it
1824		 * doesn't get used; a KSE running a scope system
1825		 * thread will use that thread's stack.
1826		 */
1827		kse->k_stack.ss_sp = kse->k_mbx.km_stack.ss_sp;
1828		kse->k_stack.ss_size = kse->k_mbx.km_stack.ss_size;
1829		if (kse->k_mbx.km_stack.ss_sp == NULL) {
1830			for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1831				_lockuser_destroy(&kse->k_lockusers[i]);
1832			}
1833			/* _lock_destroy(&kse->k_lock); */
1834			free(kse);
1835			kse = NULL;
1836		}
1837	}
1838	if ((kse != NULL) && (need_ksd != 0)) {
1839		/* This KSE needs initialization. */
1840		if (curthread != NULL) {
1841			crit = _kse_critical_enter();
1842			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1843		}
1844		/* Initialize KSD inside of the lock. */
1845		if (_ksd_create(&kse->k_ksd, (void *)kse, sizeof(*kse)) != 0) {
1846			if (curthread != NULL) {
1847				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1848				_kse_critical_leave(crit);
1849			}
1850			free(kse->k_mbx.km_stack.ss_sp);
1851			for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1852				_lockuser_destroy(&kse->k_lockusers[i]);
1853			}
1854			free(kse);
1855			return (NULL);
1856		}
1857		kse->k_flags = 0;
1858		TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
1859		active_kse_count++;
1860		if (curthread != NULL) {
1861			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1862			_kse_critical_leave(crit);
1863		}
1864	}
1865	return (kse);
1866}
1867
1868static void
1869kse_reinit(struct kse *kse)
1870{
1871	bzero(&kse->k_mbx, sizeof(struct kse_mailbox));
1872	kse->k_curthread = 0;
1873	kse->k_kseg = 0;
1874	kse->k_schedq = 0;
1875	kse->k_locklevel = 0;
1876	sigemptyset(&kse->k_sigmask);
1877	bzero(&kse->k_sigq, sizeof(kse->k_sigq));
1878	kse->k_check_sigq = 0;
1879	kse->k_flags = KF_INITIALIZED;
1880	kse->k_waiting = 0;
1881	kse->k_error = 0;
1882	kse->k_cpu = 0;
1883	kse->k_done = 0;
1884}
1885
1886void
1887kse_free_unlocked(struct kse *kse)
1888{
1889	TAILQ_REMOVE(&active_kseq, kse, k_qe);
1890	active_kse_count--;
1891	kse->k_kseg = NULL;
1892	kse->k_mbx.km_quantum = 20000;
1893	kse->k_flags &= ~KF_INITIALIZED;
1894	TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
1895	free_kse_count++;
1896}
1897
1898void
1899_kse_free(struct pthread *curthread, struct kse *kse)
1900{
1901	kse_critical_t crit;
1902
1903	if (curthread == NULL)
1904		kse_free_unlocked(kse);
1905	else {
1906		crit = _kse_critical_enter();
1907		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1908		kse_free_unlocked(kse);
1909		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1910		_kse_critical_leave(crit);
1911	}
1912}
1913
1914static void
1915kseg_init(struct kse_group *kseg)
1916{
1917	kseg_reinit(kseg);
1918	_lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
1919	    _kse_lock_wakeup);
1920}
1921
1922static void
1923kseg_reinit(struct kse_group *kseg)
1924{
1925	TAILQ_INIT(&kseg->kg_kseq);
1926	TAILQ_INIT(&kseg->kg_threadq);
1927	TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
1928	kseg->kg_threadcount = 0;
1929	kseg->kg_ksecount = 0;
1930	kseg->kg_idle_kses = 0;
1931	kseg->kg_flags = 0;
1932}
1933
1934struct pthread *
1935_thr_alloc(struct pthread *curthread)
1936{
1937	kse_critical_t crit;
1938	struct pthread *thread = NULL;
1939
1940	if (curthread != NULL) {
1941		if (GC_NEEDED())
1942			_thr_gc(curthread);
1943		if (free_thread_count > 0) {
1944			crit = _kse_critical_enter();
1945			KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
1946			if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
1947				TAILQ_REMOVE(&free_threadq, thread, tle);
1948				free_thread_count--;
1949			}
1950			KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
1951		}
1952	}
1953	if (thread == NULL)
1954		thread = (struct pthread *)malloc(sizeof(struct pthread));
1955	return (thread);
1956}
1957
1958void
1959_thr_free(struct pthread *curthread, struct pthread *thread)
1960{
1961	kse_critical_t crit;
1962	int i;
1963
1964	DBG_MSG("Freeing thread %p\n", thread);
1965	if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
1966		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
1967			_lockuser_destroy(&thread->lockusers[i]);
1968		}
1969		_lock_destroy(&thread->lock);
1970		free(thread);
1971	}
1972	else {
1973		crit = _kse_critical_enter();
1974		KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1975		THR_LIST_REMOVE(thread);
1976		KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1977		KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
1978		TAILQ_INSERT_HEAD(&free_threadq, thread, tle);
1979		free_thread_count++;
1980		KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
1981		_kse_critical_leave(crit);
1982	}
1983}
1984