thr_kern.c revision 114187
113546Sjulian/*
2113658Sdeischen * Copyright (C) 2003 Daniel M. Eischen <deischen@freebsd.org>
3113658Sdeischen * Copyright (C) 2002 Jonathon Mini <mini@freebsd.org>
435509Sjb * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
513546Sjulian * All rights reserved.
613546Sjulian *
713546Sjulian * Redistribution and use in source and binary forms, with or without
813546Sjulian * modification, are permitted provided that the following conditions
913546Sjulian * are met:
1013546Sjulian * 1. Redistributions of source code must retain the above copyright
1113546Sjulian *    notice, this list of conditions and the following disclaimer.
1213546Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1313546Sjulian *    notice, this list of conditions and the following disclaimer in the
1413546Sjulian *    documentation and/or other materials provided with the distribution.
1513546Sjulian * 3. All advertising materials mentioning features or use of this software
1613546Sjulian *    must display the following acknowledgement:
1713546Sjulian *	This product includes software developed by John Birrell.
1813546Sjulian * 4. Neither the name of the author nor the names of any co-contributors
1913546Sjulian *    may be used to endorse or promote products derived from this software
2013546Sjulian *    without specific prior written permission.
2113546Sjulian *
2213546Sjulian * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
2313546Sjulian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2413546Sjulian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2544963Sjb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2613546Sjulian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2713546Sjulian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2813546Sjulian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2913546Sjulian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3013546Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3113546Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3213546Sjulian * SUCH DAMAGE.
3313546Sjulian *
3413546Sjulian */
35113658Sdeischen#include <sys/cdefs.h>
36113662Sdeischen__FBSDID("$FreeBSD: head/lib/libkse/thread/thr_kern.c 114187 2003-04-28 23:56:12Z deischen $");
37113658Sdeischen
38113658Sdeischen#include <sys/types.h>
39113658Sdeischen#include <sys/kse.h>
40113658Sdeischen#include <sys/signalvar.h>
41113658Sdeischen#include <sys/queue.h>
42113658Sdeischen#include <machine/atomic.h>
43113658Sdeischen
44113658Sdeischen#include <assert.h>
45113870Sdeischen#include <errno.h>
46113658Sdeischen#include <signal.h>
4713546Sjulian#include <stdlib.h>
4813546Sjulian#include <string.h>
49113658Sdeischen#include <time.h>
50113658Sdeischen#include <ucontext.h>
5113546Sjulian#include <unistd.h>
52113658Sdeischen
53113658Sdeischen#include "atomic_ops.h"
54103388Smini#include "thr_private.h"
55113658Sdeischen#include "pthread_md.h"
56113658Sdeischen#include "libc_private.h"
5713546Sjulian
58113658Sdeischen/*#define DEBUG_THREAD_KERN */
5967097Sdeischen#ifdef DEBUG_THREAD_KERN
6067097Sdeischen#define DBG_MSG		stdout_debug
6167097Sdeischen#else
6267097Sdeischen#define DBG_MSG(x...)
6367097Sdeischen#endif
6467097Sdeischen
65113658Sdeischen/*
66113658Sdeischen * Define a high water mark for the maximum number of threads that
67113658Sdeischen * will be cached.  Once this level is reached, any extra threads
68113658Sdeischen * will be free()'d.
69113658Sdeischen *
70113658Sdeischen * XXX - It doesn't make sense to worry about the maximum number of
71113658Sdeischen *       KSEs that we can cache because the system will limit us to
72113658Sdeischen *       something *much* less than the maximum number of threads
73113658Sdeischen *       that we can have.  Disregarding KSEs in their own group,
74113658Sdeischen *       the maximum number of KSEs is the number of processors in
75113658Sdeischen *       the system.
76113658Sdeischen */
77113658Sdeischen#define	MAX_CACHED_THREADS	100
78113658Sdeischen#define	KSE_STACKSIZE		16384
79106786Smini
80113658Sdeischen#define	KSE_SET_MBOX(kse, thrd) \
81113658Sdeischen	(kse)->k_mbx.km_curthread = &(thrd)->tmbx
8213546Sjulian
83113658Sdeischen#define	KSE_SET_EXITED(kse)	(kse)->k_flags |= KF_EXITED
8448046Sjb
85113658Sdeischen/*
86113658Sdeischen * Macros for manipulating the run queues.  The priority queue
87113658Sdeischen * routines use the thread's pqe link and also handle the setting
88113658Sdeischen * and clearing of the thread's THR_FLAGS_IN_RUNQ flag.
89113658Sdeischen */
90113658Sdeischen#define	KSE_RUNQ_INSERT_HEAD(kse, thrd)			\
91113658Sdeischen	_pq_insert_head(&(kse)->k_schedq->sq_runq, thrd)
92113658Sdeischen#define	KSE_RUNQ_INSERT_TAIL(kse, thrd)			\
93113658Sdeischen	_pq_insert_tail(&(kse)->k_schedq->sq_runq, thrd)
94113658Sdeischen#define	KSE_RUNQ_REMOVE(kse, thrd)			\
95113658Sdeischen	_pq_remove(&(kse)->k_schedq->sq_runq, thrd)
96113658Sdeischen#define	KSE_RUNQ_FIRST(kse)	_pq_first(&(kse)->k_schedq->sq_runq)
97113658Sdeischen
98114187Sdeischen#define KSE_RUNQ_THREADS(kse)	((kse)->k_schedq->sq_runq.pq_threads)
99113658Sdeischen
100113658Sdeischen/*
101113658Sdeischen * We've got to keep track of everything that is allocated, not only
102113658Sdeischen * to have a speedy free list, but also so they can be deallocated
103113658Sdeischen * after a fork().
104113658Sdeischen */
105113658Sdeischenstatic TAILQ_HEAD(, kse)	active_kseq;
106113658Sdeischenstatic TAILQ_HEAD(, kse)	free_kseq;
107113658Sdeischenstatic TAILQ_HEAD(, kse_group)	free_kse_groupq;
108113658Sdeischenstatic TAILQ_HEAD(, kse_group)	active_kse_groupq;
109113661Sdeischenstatic TAILQ_HEAD(, kse_group)	gc_ksegq;
110113658Sdeischenstatic struct lock		kse_lock;	/* also used for kseg queue */
111113658Sdeischenstatic int			free_kse_count = 0;
112113658Sdeischenstatic int			free_kseg_count = 0;
113113658Sdeischenstatic TAILQ_HEAD(, pthread)	free_threadq;
114113658Sdeischenstatic struct lock		thread_lock;
115113658Sdeischenstatic int			free_thread_count = 0;
116113658Sdeischenstatic int			inited = 0;
117113658Sdeischenstatic int			active_kse_count = 0;
118113658Sdeischenstatic int			active_kseg_count = 0;
119113658Sdeischen
120113658Sdeischenstatic void	kse_check_completed(struct kse *kse);
121113658Sdeischenstatic void	kse_check_waitq(struct kse *kse);
122113658Sdeischenstatic void	kse_check_signals(struct kse *kse);
123113658Sdeischenstatic void	kse_fini(struct kse *curkse);
124113942Sdeischenstatic void	kse_reinit(struct kse *kse);
125113658Sdeischenstatic void	kse_sched_multi(struct kse *curkse);
126113786Sdeischen#ifdef NOT_YET
127113658Sdeischenstatic void	kse_sched_single(struct kse *curkse);
128113786Sdeischen#endif
129113658Sdeischenstatic void	kse_switchout_thread(struct kse *kse, struct pthread *thread);
130113786Sdeischenstatic void	kse_wait(struct kse *kse, struct pthread *td_wait);
131113661Sdeischenstatic void	kse_free_unlocked(struct kse *kse);
132113870Sdeischenstatic void	kseg_free_unlocked(struct kse_group *kseg);
133113658Sdeischenstatic void	kseg_init(struct kse_group *kseg);
134113786Sdeischenstatic void	kseg_reinit(struct kse_group *kseg);
135113658Sdeischenstatic void	kse_waitq_insert(struct pthread *thread);
136114187Sdeischenstatic void	kse_wakeup_multi(struct kse *curkse);
137114187Sdeischenstatic void	kse_wakeup_one(struct pthread *thread);
138113658Sdeischenstatic void	thr_cleanup(struct kse *kse, struct pthread *curthread);
139113658Sdeischenstatic void	thr_resume_wrapper(int unused_1, siginfo_t *unused_2,
140113658Sdeischen		    ucontext_t *ucp);
141113658Sdeischenstatic void	thr_resume_check(struct pthread *curthread, ucontext_t *ucp,
142113658Sdeischen		    struct pthread_sigframe *psf);
143113658Sdeischenstatic int	thr_timedout(struct pthread *thread, struct timespec *curtime);
144113658Sdeischen
145113658Sdeischen/*
146113658Sdeischen * This is called after a fork().
147113658Sdeischen * No locks need to be taken here since we are guaranteed to be
148113658Sdeischen * single threaded.
149113658Sdeischen */
15013546Sjulianvoid
151113658Sdeischen_kse_single_thread(struct pthread *curthread)
15213546Sjulian{
153113658Sdeischen	struct kse *kse, *kse_next;
154113658Sdeischen	struct kse_group *kseg, *kseg_next;
155113658Sdeischen	struct pthread *thread, *thread_next;
156113658Sdeischen	kse_critical_t crit;
157113658Sdeischen	int i;
15871581Sdeischen
15967097Sdeischen	/*
160113658Sdeischen	 * Disable upcalls and clear the threaded flag.
161113658Sdeischen	 * XXX - I don't think we need to disable upcalls after a fork().
162113658Sdeischen	 *       but it doesn't hurt.
16367097Sdeischen	 */
164113658Sdeischen	crit = _kse_critical_enter();
165113658Sdeischen	__isthreaded = 0;
16613546Sjulian
167106191Smini	/*
168113658Sdeischen	 * Enter a loop to remove and free all threads other than
169113658Sdeischen	 * the running thread from the active thread list:
170106191Smini	 */
171113658Sdeischen	for (thread = TAILQ_FIRST(&_thread_list); thread != NULL;
172113658Sdeischen	    thread = thread_next) {
173106191Smini		/*
174113658Sdeischen		 * Advance to the next thread before the destroying
175113658Sdeischen		 * the current thread.
176113658Sdeischen		*/
177113658Sdeischen		thread_next = TAILQ_NEXT(thread, tle);
178113658Sdeischen
179113658Sdeischen		/*
180113658Sdeischen		 * Remove this thread from the list (the current
181113658Sdeischen		 * thread will be removed but re-added by libpthread
182113658Sdeischen		 * initialization.
183106191Smini		 */
184113658Sdeischen		TAILQ_REMOVE(&_thread_list, thread, tle);
185113658Sdeischen		/* Make sure this isn't the running thread: */
186113658Sdeischen		if (thread != curthread) {
187113658Sdeischen			_thr_stack_free(&thread->attr);
188113658Sdeischen			if (thread->specific != NULL)
189113658Sdeischen				free(thread->specific);
190113658Sdeischen			for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
191113658Sdeischen				_lockuser_destroy(&thread->lockusers[i]);
192113658Sdeischen			}
193113658Sdeischen			_lock_destroy(&thread->lock);
194113658Sdeischen			free(thread);
195106191Smini		}
196106191Smini	}
197106191Smini
198113658Sdeischen	TAILQ_INIT(&curthread->mutexq);		/* initialize mutex queue */
199113658Sdeischen	curthread->joiner = NULL;		/* no joining threads yet */
200113658Sdeischen	sigemptyset(&curthread->sigpend);	/* clear pending signals */
201113658Sdeischen	if (curthread->specific != NULL) {
202113658Sdeischen		free(curthread->specific);
203113658Sdeischen		curthread->specific = NULL;
204113658Sdeischen		curthread->specific_data_count = 0;
205113658Sdeischen	}
206113658Sdeischen
207113658Sdeischen	/* Free the free KSEs: */
208113658Sdeischen	while ((kse = TAILQ_FIRST(&free_kseq)) != NULL) {
209113658Sdeischen		TAILQ_REMOVE(&free_kseq, kse, k_qe);
210113658Sdeischen		_ksd_destroy(&kse->k_ksd);
211113786Sdeischen		if (kse->k_stack.ss_sp != NULL)
212113786Sdeischen			free(kse->k_stack.ss_sp);
213113658Sdeischen		free(kse);
214113658Sdeischen	}
215113658Sdeischen	free_kse_count = 0;
216113658Sdeischen
217113658Sdeischen	/* Free the active KSEs: */
218113658Sdeischen	for (kse = TAILQ_FIRST(&active_kseq); kse != NULL; kse = kse_next) {
219113658Sdeischen		kse_next = TAILQ_NEXT(kse, k_qe);
220113658Sdeischen		TAILQ_REMOVE(&active_kseq, kse, k_qe);
221113658Sdeischen		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
222113658Sdeischen			_lockuser_destroy(&kse->k_lockusers[i]);
223113658Sdeischen		}
224113786Sdeischen		if (kse->k_stack.ss_sp != NULL)
225113786Sdeischen			free(kse->k_stack.ss_sp);
226113658Sdeischen		_lock_destroy(&kse->k_lock);
227113658Sdeischen		free(kse);
228113658Sdeischen	}
229113658Sdeischen	active_kse_count = 0;
230113658Sdeischen
231113658Sdeischen	/* Free the free KSEGs: */
232113658Sdeischen	while ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
233113658Sdeischen		TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
234113658Sdeischen		_lock_destroy(&kseg->kg_lock);
235113661Sdeischen		_pq_free(&kseg->kg_schedq.sq_runq);
236113658Sdeischen		free(kseg);
237113658Sdeischen	}
238113658Sdeischen	free_kseg_count = 0;
239113658Sdeischen
240113658Sdeischen	/* Free the active KSEGs: */
241113658Sdeischen	for (kseg = TAILQ_FIRST(&active_kse_groupq);
242113658Sdeischen	    kseg != NULL; kseg = kseg_next) {
243113658Sdeischen		kseg_next = TAILQ_NEXT(kseg, kg_qe);
244113658Sdeischen		TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
245113658Sdeischen		_lock_destroy(&kseg->kg_lock);
246113661Sdeischen		_pq_free(&kseg->kg_schedq.sq_runq);
247113658Sdeischen		free(kseg);
248113658Sdeischen	}
249113658Sdeischen	active_kseg_count = 0;
250113658Sdeischen
251113658Sdeischen	/* Free the free threads. */
252113658Sdeischen	while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
253113658Sdeischen		TAILQ_REMOVE(&free_threadq, thread, tle);
254113658Sdeischen		if (thread->specific != NULL)
255113658Sdeischen			free(thread->specific);
256113658Sdeischen		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
257113658Sdeischen			_lockuser_destroy(&thread->lockusers[i]);
258113658Sdeischen		}
259113658Sdeischen		_lock_destroy(&thread->lock);
260113658Sdeischen		free(thread);
261113658Sdeischen	}
262113658Sdeischen	free_thread_count = 0;
263113658Sdeischen
264113658Sdeischen	/* Free the to-be-gc'd threads. */
265113658Sdeischen	while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
266113661Sdeischen		TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
267113942Sdeischen		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
268113942Sdeischen			_lockuser_destroy(&thread->lockusers[i]);
269113942Sdeischen		}
270113942Sdeischen		_lock_destroy(&thread->lock);
271113658Sdeischen		free(thread);
272113658Sdeischen	}
273113661Sdeischen	TAILQ_INIT(&gc_ksegq);
274113661Sdeischen	_gc_count = 0;
275113658Sdeischen
276113658Sdeischen	if (inited != 0) {
277113658Sdeischen		/*
278113658Sdeischen		 * Destroy these locks; they'll be recreated to assure they
279113658Sdeischen		 * are in the unlocked state.
280113658Sdeischen		 */
281113658Sdeischen		_lock_destroy(&kse_lock);
282113658Sdeischen		_lock_destroy(&thread_lock);
283113658Sdeischen		_lock_destroy(&_thread_list_lock);
284113658Sdeischen		inited = 0;
285113658Sdeischen	}
286113658Sdeischen
287106191Smini	/*
288113658Sdeischen	 * After a fork(), the leftover thread goes back to being
289113658Sdeischen	 * scope process.
290106191Smini	 */
291113658Sdeischen	curthread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
292113658Sdeischen	curthread->attr.flags |= PTHREAD_SCOPE_PROCESS;
293113658Sdeischen
294113658Sdeischen	/*
295113658Sdeischen	 * After a fork, we are still operating on the thread's original
296113658Sdeischen	 * stack.  Don't clear the THR_FLAGS_USER from the thread's
297113658Sdeischen	 * attribute flags.
298113658Sdeischen	 */
299113658Sdeischen
300113658Sdeischen	/* Initialize the threads library. */
301113658Sdeischen	curthread->kse = NULL;
302113658Sdeischen	curthread->kseg = NULL;
303113658Sdeischen	_kse_initial = NULL;
304113658Sdeischen	_libpthread_init(curthread);
305113658Sdeischen}
306113658Sdeischen
307113658Sdeischen/*
308113658Sdeischen * This is used to initialize housekeeping and to initialize the
309113658Sdeischen * KSD for the KSE.
310113658Sdeischen */
311113658Sdeischenvoid
312113658Sdeischen_kse_init(void)
313113658Sdeischen{
314113658Sdeischen	if (inited == 0) {
315113658Sdeischen		TAILQ_INIT(&active_kseq);
316113658Sdeischen		TAILQ_INIT(&active_kse_groupq);
317113658Sdeischen		TAILQ_INIT(&free_kseq);
318113658Sdeischen		TAILQ_INIT(&free_kse_groupq);
319113658Sdeischen		TAILQ_INIT(&free_threadq);
320113661Sdeischen		TAILQ_INIT(&gc_ksegq);
321113658Sdeischen		if (_lock_init(&kse_lock, LCK_ADAPTIVE,
322113658Sdeischen		    _kse_lock_wait, _kse_lock_wakeup) != 0)
323113658Sdeischen			PANIC("Unable to initialize free KSE queue lock");
324113658Sdeischen		if (_lock_init(&thread_lock, LCK_ADAPTIVE,
325113658Sdeischen		    _kse_lock_wait, _kse_lock_wakeup) != 0)
326113658Sdeischen			PANIC("Unable to initialize free thread queue lock");
327113658Sdeischen		if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
328113658Sdeischen		    _kse_lock_wait, _kse_lock_wakeup) != 0)
329113658Sdeischen			PANIC("Unable to initialize thread list lock");
330113658Sdeischen		active_kse_count = 0;
331113658Sdeischen		active_kseg_count = 0;
332113661Sdeischen		_gc_count = 0;
333113658Sdeischen		inited = 1;
334113658Sdeischen	}
335113658Sdeischen}
336113658Sdeischen
337113658Sdeischenint
338113658Sdeischen_kse_isthreaded(void)
339113658Sdeischen{
340113658Sdeischen	return (__isthreaded != 0);
341113658Sdeischen}
342113658Sdeischen
343113658Sdeischen/*
344113658Sdeischen * This is called when the first thread (other than the initial
345113658Sdeischen * thread) is created.
346113658Sdeischen */
347113786Sdeischenint
348113658Sdeischen_kse_setthreaded(int threaded)
349113658Sdeischen{
350113658Sdeischen	if ((threaded != 0) && (__isthreaded == 0)) {
351106191Smini		/*
352113658Sdeischen		 * Locking functions in libc are required when there are
353113658Sdeischen		 * threads other than the initial thread.
354106191Smini		 */
355113658Sdeischen		__isthreaded = 1;
356113658Sdeischen
357113658Sdeischen		/*
358113658Sdeischen		 * Tell the kernel to create a KSE for the initial thread
359113658Sdeischen		 * and enable upcalls in it.
360113658Sdeischen		 */
361113786Sdeischen		_kse_initial->k_flags |= KF_STARTED;
362113786Sdeischen		if (kse_create(&_kse_initial->k_mbx, 0) != 0) {
363113786Sdeischen			_kse_initial->k_flags &= ~KF_STARTED;
364114187Sdeischen			__isthreaded = 0;
365113786Sdeischen			/* may abort() */
366113786Sdeischen			DBG_MSG("kse_create failed\n");
367113786Sdeischen			return (-1);
368113786Sdeischen		}
369113658Sdeischen		KSE_SET_MBOX(_kse_initial, _thr_initial);
370114187Sdeischen		_thr_setmaxconcurrency();
371113658Sdeischen	}
372113786Sdeischen	return (0);
373113658Sdeischen}
374113658Sdeischen
375113658Sdeischen/*
376113658Sdeischen * Lock wait and wakeup handlers for KSE locks.  These are only used by
377113658Sdeischen * KSEs, and should never be used by threads.  KSE locks include the
378113658Sdeischen * KSE group lock (used for locking the scheduling queue) and the
379113658Sdeischen * kse_lock defined above.
380113658Sdeischen *
381113658Sdeischen * When a KSE lock attempt blocks, the entire KSE blocks allowing another
382113658Sdeischen * KSE to run.  For the most part, it doesn't make much sense to try and
383113658Sdeischen * schedule another thread because you need to lock the scheduling queue
384113658Sdeischen * in order to do that.  And since the KSE lock is used to lock the scheduling
385113658Sdeischen * queue, you would just end up blocking again.
386113658Sdeischen */
387113658Sdeischenvoid
388113658Sdeischen_kse_lock_wait(struct lock *lock, struct lockuser *lu)
389113658Sdeischen{
390113658Sdeischen	struct kse *curkse = (struct kse *)_LCK_GET_PRIVATE(lu);
391113658Sdeischen	struct timespec ts;
392113786Sdeischen	int saved_flags;
393113658Sdeischen
394113786Sdeischen	if (curkse->k_mbx.km_curthread != NULL)
395113786Sdeischen		PANIC("kse_lock_wait does not disable upcall.\n");
396113658Sdeischen	/*
397113658Sdeischen	 * Enter a loop to wait until we get the lock.
398113658Sdeischen	 */
399113658Sdeischen	ts.tv_sec = 0;
400113658Sdeischen	ts.tv_nsec = 1000000;  /* 1 sec */
401113658Sdeischen	KSE_SET_WAIT(curkse);
402113658Sdeischen	while (_LCK_BUSY(lu)) {
403113658Sdeischen		/*
404113658Sdeischen		 * Yield the kse and wait to be notified when the lock
405113658Sdeischen		 * is granted.
406113658Sdeischen		 */
407113786Sdeischen		saved_flags = curkse->k_mbx.km_flags;
408113870Sdeischen		curkse->k_mbx.km_flags |= KMF_NOUPCALL | KMF_NOCOMPLETED;
409113786Sdeischen		kse_release(&ts);
410113786Sdeischen		curkse->k_mbx.km_flags = saved_flags;
411113658Sdeischen
412113658Sdeischen		/*
413113658Sdeischen		 * Make sure that the wait flag is set again in case
414113658Sdeischen		 * we wokeup without the lock being granted.
415113658Sdeischen		 */
416113658Sdeischen		KSE_SET_WAIT(curkse);
417113658Sdeischen	}
418113658Sdeischen	KSE_CLEAR_WAIT(curkse);
419113658Sdeischen}
420113658Sdeischen
421113658Sdeischenvoid
422113658Sdeischen_kse_lock_wakeup(struct lock *lock, struct lockuser *lu)
423113658Sdeischen{
424113658Sdeischen	struct kse *curkse;
425113658Sdeischen	struct kse *kse;
426113658Sdeischen
427113658Sdeischen	curkse = _get_curkse();
428113658Sdeischen	kse = (struct kse *)_LCK_GET_PRIVATE(lu);
429113658Sdeischen
430113658Sdeischen	if (kse == curkse)
431113658Sdeischen		PANIC("KSE trying to wake itself up in lock");
432113658Sdeischen	else if (KSE_WAITING(kse)) {
433113658Sdeischen		/*
434113658Sdeischen		 * Notify the owning kse that it has the lock.
435113658Sdeischen		 */
436113658Sdeischen		KSE_WAKEUP(kse);
437113658Sdeischen	}
438113658Sdeischen}
439113658Sdeischen
440113658Sdeischen/*
441113658Sdeischen * Thread wait and wakeup handlers for thread locks.  These are only used
442113658Sdeischen * by threads, never by KSEs.  Thread locks include the per-thread lock
443113658Sdeischen * (defined in its structure), and condition variable and mutex locks.
444113658Sdeischen */
445113658Sdeischenvoid
446113658Sdeischen_thr_lock_wait(struct lock *lock, struct lockuser *lu)
447113658Sdeischen{
448113658Sdeischen	struct pthread *curthread = (struct pthread *)lu->lu_private;
449113658Sdeischen	int count;
450113658Sdeischen
451113658Sdeischen	/*
452113658Sdeischen	 * Spin for a bit.
453113658Sdeischen	 *
454113658Sdeischen	 * XXX - We probably want to make this a bit smarter.  It
455113658Sdeischen	 *       doesn't make sense to spin unless there is more
456113658Sdeischen	 *       than 1 CPU.  A thread that is holding one of these
457113658Sdeischen	 *       locks is prevented from being swapped out for another
458113658Sdeischen	 *       thread within the same scheduling entity.
459113658Sdeischen	 */
460113658Sdeischen	count = 0;
461113658Sdeischen	while (_LCK_BUSY(lu) && count < 300)
462113658Sdeischen		count++;
463113658Sdeischen	while (_LCK_BUSY(lu)) {
464114187Sdeischen		THR_LOCK_SWITCH(curthread);
465113658Sdeischen		if (_LCK_BUSY(lu)) {
466113658Sdeischen			/* Wait for the lock: */
467113658Sdeischen			atomic_store_rel_int(&curthread->need_wakeup, 1);
468113658Sdeischen			THR_SET_STATE(curthread, PS_LOCKWAIT);
469113658Sdeischen			_thr_sched_switch(curthread);
470106191Smini		}
471114187Sdeischen		THR_UNLOCK_SWITCH(curthread);
472113658Sdeischen	}
473113658Sdeischen}
474106191Smini
475113658Sdeischenvoid
476113658Sdeischen_thr_lock_wakeup(struct lock *lock, struct lockuser *lu)
477113658Sdeischen{
478113658Sdeischen	struct pthread *thread;
479113658Sdeischen	struct pthread *curthread;
480114187Sdeischen	int unlock;
481113658Sdeischen
482113658Sdeischen	curthread = _get_curthread();
483113658Sdeischen	thread = (struct pthread *)_LCK_GET_PRIVATE(lu);
484113658Sdeischen
485114187Sdeischen	unlock = 0;
486114187Sdeischen	if (curthread->kseg == thread->kseg) {
487114187Sdeischen		/* Not already locked */
488114187Sdeischen		if (curthread->lock_switch == 0) {
489114187Sdeischen			THR_SCHED_LOCK(curthread, thread);
490114187Sdeischen			unlock = 1;
491114187Sdeischen		}
492114187Sdeischen	} else {
493114187Sdeischen		THR_SCHED_LOCK(curthread, thread);
494114187Sdeischen		unlock = 1;
495114187Sdeischen	}
496113658Sdeischen	_thr_setrunnable_unlocked(thread);
497113658Sdeischen	atomic_store_rel_int(&thread->need_wakeup, 0);
498114187Sdeischen	if (unlock)
499114187Sdeischen		THR_SCHED_UNLOCK(curthread, thread);
500113658Sdeischen}
501113658Sdeischen
502113658Sdeischenkse_critical_t
503113658Sdeischen_kse_critical_enter(void)
504113658Sdeischen{
505113658Sdeischen	kse_critical_t crit;
506113658Sdeischen
507113658Sdeischen	crit = _ksd_readandclear_tmbx;
508113658Sdeischen	return (crit);
509113658Sdeischen}
510113658Sdeischen
511113658Sdeischenvoid
512113658Sdeischen_kse_critical_leave(kse_critical_t crit)
513113658Sdeischen{
514113658Sdeischen	struct pthread *curthread;
515113658Sdeischen
516113658Sdeischen	_ksd_set_tmbx(crit);
517113658Sdeischen	if ((crit != NULL) && ((curthread = _get_curthread()) != NULL))
518113658Sdeischen		THR_YIELD_CHECK(curthread);
519113658Sdeischen}
520113658Sdeischen
521113942Sdeischenint
522113942Sdeischen_kse_in_critical(void)
523113942Sdeischen{
524113942Sdeischen	return (_ksd_get_tmbx() == NULL);
525113942Sdeischen}
526113942Sdeischen
527113658Sdeischenvoid
528113658Sdeischen_thr_critical_enter(struct pthread *thread)
529113658Sdeischen{
530113658Sdeischen	thread->critical_count++;
531113658Sdeischen}
532113658Sdeischen
533113658Sdeischenvoid
534113658Sdeischen_thr_critical_leave(struct pthread *thread)
535113658Sdeischen{
536113658Sdeischen	thread->critical_count--;
537113658Sdeischen	THR_YIELD_CHECK(thread);
538113658Sdeischen}
539113658Sdeischen
540113658Sdeischen/*
541113658Sdeischen * XXX - We may need to take the scheduling lock before calling
542113658Sdeischen *       this, or perhaps take the lock within here before
543113658Sdeischen *       doing anything else.
544113658Sdeischen */
545113658Sdeischenvoid
546113658Sdeischen_thr_sched_switch(struct pthread *curthread)
547113658Sdeischen{
548113658Sdeischen	struct pthread_sigframe psf;
549113658Sdeischen	struct kse *curkse;
550113658Sdeischen	volatile int once = 0;
551113658Sdeischen
552113658Sdeischen	/* We're in the scheduler, 5 by 5: */
553114187Sdeischen	THR_ASSERT(curthread->lock_switch, "lock_switch");
554114187Sdeischen	THR_ASSERT(_kse_in_critical(), "not in critical region");
555113658Sdeischen	curkse = _get_curkse();
556113658Sdeischen
557113658Sdeischen	curthread->need_switchout = 1;	/* The thread yielded on its own. */
558113658Sdeischen	curthread->critical_yield = 0;	/* No need to yield anymore. */
559113658Sdeischen	curthread->slice_usec = -1;	/* Restart the time slice. */
560113658Sdeischen
561113658Sdeischen	/*
562113658Sdeischen	 * The signal frame is allocated off the stack because
563113658Sdeischen	 * a thread can be interrupted by other signals while
564113658Sdeischen	 * it is running down pending signals.
565113658Sdeischen	 */
566113658Sdeischen	sigemptyset(&psf.psf_sigset);
567113658Sdeischen	curthread->curframe = &psf;
568113658Sdeischen
569113658Sdeischen	_thread_enter_uts(&curthread->tmbx, &curkse->k_mbx);
570113658Sdeischen
571113658Sdeischen	/*
572113658Sdeischen	 * This thread is being resumed; check for cancellations.
573113658Sdeischen	 */
574113658Sdeischen	if ((once == 0) && (!THR_IN_CRITICAL(curthread))) {
575113658Sdeischen		once = 1;
576114187Sdeischen		curthread->critical_count++;
577114187Sdeischen		THR_UNLOCK_SWITCH(curthread);
578114187Sdeischen		curthread->critical_count--;
579113658Sdeischen		thr_resume_check(curthread, &curthread->tmbx.tm_context, &psf);
580114187Sdeischen		THR_LOCK_SWITCH(curthread);
581113658Sdeischen	}
582113658Sdeischen}
583113658Sdeischen
584113658Sdeischen/*
585113658Sdeischen * This is the scheduler for a KSE which runs a scope system thread.
586113658Sdeischen * The multi-thread KSE scheduler should also work for a single threaded
587113658Sdeischen * KSE, but we use a separate scheduler so that it can be fine-tuned
588113658Sdeischen * to be more efficient (and perhaps not need a separate stack for
589113658Sdeischen * the KSE, allowing it to use the thread's stack).
590113658Sdeischen *
591113658Sdeischen * XXX - This probably needs some work.
592113658Sdeischen */
593113786Sdeischen#ifdef NOT_YET
594113658Sdeischenstatic void
595113658Sdeischenkse_sched_single(struct kse *curkse)
596113658Sdeischen{
597113786Sdeischen	struct pthread *curthread = curkse->k_curthread;
598113786Sdeischen	struct pthread *td_wait;
599113658Sdeischen	struct timespec ts;
600113658Sdeischen	int level;
601113658Sdeischen
602113658Sdeischen	if (curthread->active == 0) {
603113658Sdeischen		if (curthread->state != PS_RUNNING) {
604113658Sdeischen			/* Check to see if the thread has timed out. */
605113658Sdeischen			KSE_GET_TOD(curkse, &ts);
606113658Sdeischen			if (thr_timedout(curthread, &ts) != 0) {
607113658Sdeischen				curthread->timeout = 1;
608113658Sdeischen				curthread->state = PS_RUNNING;
609113658Sdeischen			}
610106191Smini		}
611113786Sdeischen	}
612106191Smini
613113786Sdeischen	/* This thread no longer needs to yield the CPU: */
614113786Sdeischen	curthread->critical_yield = 0;
615113786Sdeischen	curthread->need_switchout = 0;
616113658Sdeischen
617113786Sdeischen	/*
618113786Sdeischen	 * Lock the scheduling queue.
619113786Sdeischen	 *
620113786Sdeischen	 * There is no scheduling queue for single threaded KSEs,
621113786Sdeischen	 * but we need a lock for protection regardless.
622113786Sdeischen	 */
623113786Sdeischen	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
624113786Sdeischen
625113786Sdeischen	/*
626113786Sdeischen	 * This has to do the job of kse_switchout_thread(), only
627113786Sdeischen	 * for a single threaded KSE/KSEG.
628113786Sdeischen	 */
629113786Sdeischen
630113786Sdeischen	switch (curthread->state) {
631113786Sdeischen	case PS_DEAD:
632113786Sdeischen		/* Unlock the scheduling queue and exit the KSE. */
633113786Sdeischen		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
634113786Sdeischen		kse_fini(curkse);	/* does not return */
635113786Sdeischen		break;
636113786Sdeischen
637113786Sdeischen	case PS_COND_WAIT:
638113786Sdeischen	case PS_SLEEP_WAIT:
639113786Sdeischen		/* Only insert threads that can timeout: */
640113786Sdeischen		if (curthread->wakeup_time.tv_sec != -1) {
641113786Sdeischen			/* Insert into the waiting queue: */
642113786Sdeischen			KSE_WAITQ_INSERT(curkse, curthread);
643113786Sdeischen		}
644113786Sdeischen		break;
645113786Sdeischen
646113786Sdeischen	case PS_LOCKWAIT:
647113786Sdeischen		level = curthread->locklevel - 1;
648113786Sdeischen		if (_LCK_BUSY(&curthread->lockusers[level]))
649113786Sdeischen			KSE_WAITQ_INSERT(curkse, curthread);
650113786Sdeischen		else
651113786Sdeischen			THR_SET_STATE(curthread, PS_RUNNING);
652113786Sdeischen		break;
653113786Sdeischen
654113786Sdeischen	case PS_JOIN:
655113786Sdeischen	case PS_MUTEX_WAIT:
656113786Sdeischen	case PS_RUNNING:
657113786Sdeischen	case PS_SIGSUSPEND:
658113786Sdeischen	case PS_SIGWAIT:
659113786Sdeischen	case PS_SUSPENDED:
660113786Sdeischen	case PS_DEADLOCK:
661113786Sdeischen	default:
662113658Sdeischen		/*
663113786Sdeischen		 * These states don't timeout and don't need
664113786Sdeischen		 * to be in the waiting queue.
665113658Sdeischen		 */
666113786Sdeischen		break;
667113786Sdeischen	}
668113786Sdeischen	while (curthread->state != PS_RUNNING) {
669113786Sdeischen		curthread->active = 0;
670113786Sdeischen		td_wait = KSE_WAITQ_FIRST(curkse);
671113658Sdeischen
672113786Sdeischen		kse_wait(curkse, td_wait);
673113658Sdeischen
674113786Sdeischen	    	if (td_wait != NULL) {
675113786Sdeischen			KSE_GET_TOD(curkse, &ts);
676113786Sdeischen			if (thr_timedout(curthread, &ts)) {
677113786Sdeischen				/* Indicate the thread timedout: */
678113786Sdeischen				td_wait->timeout = 1;
679113786Sdeischen
680113786Sdeischen				/* Make the thread runnable. */
681113786Sdeischen				THR_SET_STATE(td_wait, PS_RUNNING);
682113786Sdeischen				KSE_WAITQ_REMOVE(curkse, td_wait);
683113658Sdeischen			}
684106191Smini		}
685113786Sdeischen		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
686113786Sdeischen		kse_check_signals(curkse);
687113786Sdeischen		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
688113658Sdeischen	}
689107202Smini
690113658Sdeischen	/* Remove the frame reference. */
691113658Sdeischen	curthread->curframe = NULL;
692107202Smini
693113658Sdeischen	/* Unlock the scheduling queue. */
694113658Sdeischen	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
695107202Smini
696113658Sdeischen	/*
697113658Sdeischen	 * Continue the thread at its current frame:
698113658Sdeischen	 */
699113786Sdeischen	DBG_MSG("Continuing bound thread %p\n", curthread);
700113658Sdeischen	_thread_switch(&curthread->tmbx, &curkse->k_mbx.km_curthread);
701113786Sdeischen	PANIC("Thread has returned from _thread_switch");
702113658Sdeischen}
703113786Sdeischen#endif
704106191Smini
705113658Sdeischenvoid
706113658Sdeischendump_queues(struct kse *curkse)
707113658Sdeischen{
708113658Sdeischen	struct pthread *thread;
709106191Smini
710113658Sdeischen	DBG_MSG("Threads in waiting queue:\n");
711113658Sdeischen	TAILQ_FOREACH(thread, &curkse->k_kseg->kg_schedq.sq_waitq, pqe) {
712113658Sdeischen		DBG_MSG("  thread %p, state %d, blocked %d\n",
713113658Sdeischen		    thread, thread->state, thread->blocked);
714113658Sdeischen	}
715113658Sdeischen}
716106191Smini
717113658Sdeischen/*
718113658Sdeischen * This is the scheduler for a KSE which runs multiple threads.
719113658Sdeischen */
720113658Sdeischenstatic void
721113658Sdeischenkse_sched_multi(struct kse *curkse)
722113658Sdeischen{
723113786Sdeischen	struct pthread *curthread, *td_wait;
724113658Sdeischen	struct pthread_sigframe *curframe;
725113658Sdeischen	int ret;
726113658Sdeischen
727113870Sdeischen	THR_ASSERT(curkse->k_mbx.km_curthread == NULL,
728113870Sdeischen	    "Mailbox not null in kse_sched_multi");
729113870Sdeischen
730113786Sdeischen	/* Check for first time initialization: */
731113786Sdeischen	if ((curkse->k_flags & KF_INITIALIZED) == 0) {
732113786Sdeischen		/* Setup this KSEs specific data. */
733113786Sdeischen		_ksd_setprivate(&curkse->k_ksd);
734113786Sdeischen		_set_curkse(curkse);
735113786Sdeischen
736113786Sdeischen		/* Set this before grabbing the context. */
737113786Sdeischen		curkse->k_flags |= KF_INITIALIZED;
738113786Sdeischen	}
739113786Sdeischen
740113658Sdeischen	/* This may have returned from a kse_release(). */
741113870Sdeischen	if (KSE_WAITING(curkse)) {
742113870Sdeischen		DBG_MSG("Entered upcall when KSE is waiting.");
743113658Sdeischen		KSE_CLEAR_WAIT(curkse);
744113870Sdeischen	}
745113658Sdeischen
746114187Sdeischen	curthread = curkse->k_curthread;
747114187Sdeischen	if (curthread == NULL || curthread->lock_switch == 0) {
748114187Sdeischen		/*
749114187Sdeischen		 * curthread was preempted by upcall, it is not a volunteer
750114187Sdeischen		 * context switch. Lock the scheduling lock.
751114187Sdeischen		 */
752114187Sdeischen		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
753114187Sdeischen	}
754113658Sdeischen
755113658Sdeischen	/*
756113658Sdeischen	 * If the current thread was completed in another KSE, then
757113658Sdeischen	 * it will be in the run queue.  Don't mark it as being blocked.
758113658Sdeischen	 */
759114187Sdeischen	if ((curthread != NULL) &&
760113658Sdeischen	    ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0) &&
761113658Sdeischen	    (curthread->need_switchout == 0)) {
762113658Sdeischen		/*
763113658Sdeischen		 * Assume the current thread is blocked; when the
764113658Sdeischen		 * completed threads are checked and if the current
765113658Sdeischen		 * thread is among the completed, the blocked flag
766113658Sdeischen		 * will be cleared.
767113658Sdeischen		 */
768113658Sdeischen		curthread->blocked = 1;
769106191Smini	}
770106191Smini
771113658Sdeischen	/* Check for any unblocked threads in the kernel. */
772113658Sdeischen	kse_check_completed(curkse);
77367097Sdeischen
77435509Sjb	/*
775113658Sdeischen	 * Check for threads that have timed-out.
77635509Sjb	 */
777113658Sdeischen	kse_check_waitq(curkse);
77813546Sjulian
779113658Sdeischen	/*
780113658Sdeischen	 * Switchout the current thread, if necessary, as the last step
781113658Sdeischen	 * so that it is inserted into the run queue (if it's runnable)
782113658Sdeischen	 * _after_ any other threads that were added to it above.
783113658Sdeischen	 */
784113658Sdeischen	if (curthread == NULL)
785113658Sdeischen		;  /* Nothing to do here. */
786113658Sdeischen	else if ((curthread->need_switchout == 0) &&
787113658Sdeischen	    (curthread->blocked == 0) && (THR_IN_CRITICAL(curthread))) {
788113658Sdeischen		/*
789113658Sdeischen		 * Resume the thread and tell it to yield when
790113658Sdeischen		 * it leaves the critical region.
791113658Sdeischen		 */
792114187Sdeischen		curthread->critical_yield = 1;
793113658Sdeischen		curthread->active = 1;
794113658Sdeischen		if ((curthread->flags & THR_FLAGS_IN_RUNQ) != 0)
795113658Sdeischen			KSE_RUNQ_REMOVE(curkse, curthread);
796113658Sdeischen		curkse->k_curthread = curthread;
797113658Sdeischen		curthread->kse = curkse;
798113658Sdeischen		DBG_MSG("Continuing thread %p in critical region\n",
799113658Sdeischen		    curthread);
800114187Sdeischen		if (curthread->lock_switch) {
801114187Sdeischen			KSE_SCHED_LOCK(curkse, curkse->k_kseg);
802114187Sdeischen			ret = _thread_switch(&curthread->tmbx, 0);
803114187Sdeischen		} else {
804114187Sdeischen			KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
805114187Sdeischen			ret = _thread_switch(&curthread->tmbx,
806114187Sdeischen		    		&curkse->k_mbx.km_curthread);
807114187Sdeischen		}
808113658Sdeischen		if (ret != 0)
809113658Sdeischen			PANIC("Can't resume thread in critical region\n");
810103419Smini	}
811113658Sdeischen	else if ((curthread->flags & THR_FLAGS_IN_RUNQ) == 0)
812113658Sdeischen		kse_switchout_thread(curkse, curthread);
813113658Sdeischen	curkse->k_curthread = NULL;
81453812Salfred
815114187Sdeischen	kse_wakeup_multi(curkse);
816114187Sdeischen
817113658Sdeischen	/* This has to be done without the scheduling lock held. */
818113658Sdeischen	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
819113658Sdeischen	kse_check_signals(curkse);
820113658Sdeischen	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
821113658Sdeischen
822113658Sdeischen	dump_queues(curkse);
823113658Sdeischen
824113658Sdeischen	/* Check if there are no threads ready to run: */
825113658Sdeischen	while (((curthread = KSE_RUNQ_FIRST(curkse)) == NULL) &&
826113658Sdeischen	    (curkse->k_kseg->kg_threadcount != 0)) {
827113658Sdeischen		/*
828113658Sdeischen		 * Wait for a thread to become active or until there are
829113658Sdeischen		 * no more threads.
830113658Sdeischen		 */
831113786Sdeischen		td_wait = KSE_WAITQ_FIRST(curkse);
832113786Sdeischen		kse_wait(curkse, td_wait);
833113786Sdeischen		kse_check_completed(curkse);
834113658Sdeischen		kse_check_waitq(curkse);
835113658Sdeischen		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
836113658Sdeischen		kse_check_signals(curkse);
837113658Sdeischen		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
83867097Sdeischen	}
839113658Sdeischen
840113658Sdeischen	/* Check for no more threads: */
841113658Sdeischen	if (curkse->k_kseg->kg_threadcount == 0) {
842113658Sdeischen		/*
843113658Sdeischen		 * Normally this shouldn't return, but it will if there
844113658Sdeischen		 * are other KSEs running that create new threads that
845113658Sdeischen		 * are assigned to this KSE[G].  For instance, if a scope
846113658Sdeischen		 * system thread were to create a scope process thread
847113658Sdeischen		 * and this kse[g] is the initial kse[g], then that newly
848113658Sdeischen		 * created thread would be assigned to us (the initial
849113658Sdeischen		 * kse[g]).
850113658Sdeischen		 */
851113658Sdeischen		KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
852113658Sdeischen		kse_fini(curkse);
853113658Sdeischen		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
854113658Sdeischen		curthread = KSE_RUNQ_FIRST(curkse);
855113658Sdeischen	}
856113658Sdeischen
857113658Sdeischen	THR_ASSERT(curthread != NULL,
858113658Sdeischen	    "Return from kse_wait/fini without thread.");
859113658Sdeischen	THR_ASSERT(curthread->state != PS_DEAD,
860113658Sdeischen	    "Trying to resume dead thread!");
861113658Sdeischen	KSE_RUNQ_REMOVE(curkse, curthread);
862113658Sdeischen
863113658Sdeischen	/*
864113658Sdeischen	 * Make the selected thread the current thread.
865113658Sdeischen	 */
866113658Sdeischen	curkse->k_curthread = curthread;
867113658Sdeischen
868113658Sdeischen	/*
869113658Sdeischen	 * Make sure the current thread's kse points to this kse.
870113658Sdeischen	 */
871113658Sdeischen	curthread->kse = curkse;
872113658Sdeischen
873113658Sdeischen	/*
874113658Sdeischen	 * Reset accounting.
875113658Sdeischen	 */
876113658Sdeischen	curthread->tmbx.tm_uticks = 0;
877113658Sdeischen	curthread->tmbx.tm_sticks = 0;
878113658Sdeischen
879113658Sdeischen	/*
880113658Sdeischen	 * Reset the time slice if this thread is running for the first
881113658Sdeischen	 * time or running again after using its full time slice allocation.
882113658Sdeischen	 */
883113658Sdeischen	if (curthread->slice_usec == -1)
884113658Sdeischen		curthread->slice_usec = 0;
885113658Sdeischen
886113658Sdeischen	/* Mark the thread active. */
887113658Sdeischen	curthread->active = 1;
888113658Sdeischen
889113658Sdeischen	/* Remove the frame reference. */
890113658Sdeischen	curframe = curthread->curframe;
891113658Sdeischen	curthread->curframe = NULL;
892113658Sdeischen
893114187Sdeischen	kse_wakeup_multi(curkse);
894114187Sdeischen
895113658Sdeischen	/* Unlock the scheduling queue: */
896113658Sdeischen	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
897113658Sdeischen
898113658Sdeischen	/*
899113658Sdeischen	 * The thread's current signal frame will only be NULL if it
900113658Sdeischen	 * is being resumed after being blocked in the kernel.  In
901113658Sdeischen	 * this case, and if the thread needs to run down pending
902113658Sdeischen	 * signals or needs a cancellation check, we need to add a
903113658Sdeischen	 * signal frame to the thread's context.
904113658Sdeischen	 */
905113661Sdeischen#ifdef NOT_YET
906113658Sdeischen	if ((curframe == NULL) && ((curthread->check_pending != 0) ||
907113658Sdeischen	    (((curthread->cancelflags & THR_AT_CANCEL_POINT) == 0) &&
908114187Sdeischen	    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))))
909113658Sdeischen		signalcontext(&curthread->tmbx.tm_context, 0,
910113658Sdeischen		    (__sighandler_t *)thr_resume_wrapper);
911114187Sdeischen#else
912114187Sdeischen	if ((curframe == NULL) && (curthread->check_pending != 0))
913114187Sdeischen		signalcontext(&curthread->tmbx.tm_context, 0,
914114187Sdeischen		    (__sighandler_t *)thr_resume_wrapper);
915113658Sdeischen#endif
916113786Sdeischen#ifdef GS_HACK
917113786Sdeischen	/* XXX - The kernel sometimes forgets to restore %gs properly. */
918113786Sdeischen	_ksd_setprivate(&curkse->k_ksd);
919113786Sdeischen#endif
920113658Sdeischen	/*
921113658Sdeischen	 * Continue the thread at its current frame:
922113658Sdeischen	 */
923114187Sdeischen	if (curthread->lock_switch) {
924114187Sdeischen		KSE_SCHED_LOCK(curkse, curkse->k_kseg);
925114187Sdeischen		ret = _thread_switch(&curthread->tmbx, 0);
926114187Sdeischen	} else {
927114187Sdeischen		ret = _thread_switch(&curthread->tmbx,
928114187Sdeischen		 	&curkse->k_mbx.km_curthread);
929114187Sdeischen	}
930113658Sdeischen	if (ret != 0)
931113658Sdeischen		PANIC("Thread has returned from _thread_switch");
932113658Sdeischen
933113658Sdeischen	/* This point should not be reached. */
934113658Sdeischen	PANIC("Thread has returned from _thread_switch");
93567097Sdeischen}
93648046Sjb
937113658Sdeischenstatic void
938113658Sdeischenkse_check_signals(struct kse *curkse)
93967097Sdeischen{
940113658Sdeischen	sigset_t sigset;
941113658Sdeischen	int i;
94267097Sdeischen
943113658Sdeischen	/* Deliver posted signals. */
944113658Sdeischen	for (i = 0; i < _SIG_WORDS; i++) {
945113658Sdeischen		atomic_swap_int(&curkse->k_mbx.km_sigscaught.__bits[i],
946113658Sdeischen		    0, &sigset.__bits[i]);
947113658Sdeischen	}
948113658Sdeischen	if (SIGNOTEMPTY(sigset)) {
949113658Sdeischen		/*
950113658Sdeischen		 * Dispatch each signal.
951113658Sdeischen		 *
952113658Sdeischen		 * XXX - There is no siginfo for any of these.
953113658Sdeischen		 *       I think there should be, especially for
954113658Sdeischen		 *       signals from other processes (si_pid, si_uid).
955113658Sdeischen		 */
956113658Sdeischen		for (i = 1; i < NSIG; i++) {
957113658Sdeischen			if (sigismember(&sigset, i) != 0) {
958113658Sdeischen				DBG_MSG("Dispatching signal %d\n", i);
959113658Sdeischen				_thr_sig_dispatch(curkse, i,
960113658Sdeischen				    NULL /* no siginfo */);
961113658Sdeischen			}
962113658Sdeischen		}
963113658Sdeischen		sigemptyset(&sigset);
964113658Sdeischen		__sys_sigprocmask(SIG_SETMASK, &sigset, NULL);
965113658Sdeischen	}
966113658Sdeischen}
967103419Smini
968113658Sdeischenstatic void
969113658Sdeischenthr_resume_wrapper(int unused_1, siginfo_t *unused_2, ucontext_t *ucp)
970113658Sdeischen{
971113658Sdeischen	struct pthread *curthread = _get_curthread();
97213546Sjulian
973113658Sdeischen	thr_resume_check(curthread, ucp, NULL);
974113658Sdeischen}
975113658Sdeischen
976113658Sdeischenstatic void
977113658Sdeischenthr_resume_check(struct pthread *curthread, ucontext_t *ucp,
978113658Sdeischen    struct pthread_sigframe *psf)
979113658Sdeischen{
980113658Sdeischen	/* Check signals before cancellations. */
981113658Sdeischen	while (curthread->check_pending != 0) {
982113658Sdeischen		/* Clear the pending flag. */
983113658Sdeischen		curthread->check_pending = 0;
984113658Sdeischen
985106191Smini		/*
986113658Sdeischen		 * It's perfectly valid, though not portable, for
987113658Sdeischen		 * signal handlers to munge their interrupted context
988113658Sdeischen		 * and expect to return to it.  Ensure we use the
989113658Sdeischen		 * correct context when running down signals.
990106191Smini		 */
991113658Sdeischen		_thr_sig_rundown(curthread, ucp, psf);
992113658Sdeischen	}
993113658Sdeischen
994114187Sdeischen#ifdef NOT_YET
995113658Sdeischen	if (((curthread->cancelflags & THR_AT_CANCEL_POINT) == 0) &&
996113658Sdeischen	    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
997113658Sdeischen		pthread_testcancel();
998114187Sdeischen#endif
999113658Sdeischen}
1000113658Sdeischen
1001113658Sdeischen/*
1002113658Sdeischen * Clean up a thread.  This must be called with the thread's KSE
1003113658Sdeischen * scheduling lock held.  The thread must be a thread from the
1004113658Sdeischen * KSE's group.
1005113658Sdeischen */
1006113658Sdeischenstatic void
1007113658Sdeischenthr_cleanup(struct kse *curkse, struct pthread *thread)
1008113658Sdeischen{
1009113658Sdeischen	struct pthread *joiner;
1010113658Sdeischen
1011113658Sdeischen	if ((joiner = thread->joiner) != NULL) {
1012113658Sdeischen		thread->joiner = NULL;
1013113658Sdeischen		if ((joiner->state == PS_JOIN) &&
1014113658Sdeischen		    (joiner->join_status.thread == thread)) {
1015113658Sdeischen			joiner->join_status.thread = NULL;
1016113658Sdeischen
1017113658Sdeischen			/* Set the return status for the joining thread: */
1018113658Sdeischen			joiner->join_status.ret = thread->ret;
1019113658Sdeischen
1020113658Sdeischen			/* Make the thread runnable. */
1021113658Sdeischen			if (joiner->kseg == curkse->k_kseg)
1022113658Sdeischen				_thr_setrunnable_unlocked(joiner);
1023113658Sdeischen			else {
1024113658Sdeischen				KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1025113658Sdeischen				KSE_SCHED_LOCK(curkse, joiner->kseg);
1026113658Sdeischen				_thr_setrunnable_unlocked(joiner);
1027113658Sdeischen				KSE_SCHED_UNLOCK(curkse, joiner->kseg);
1028113658Sdeischen				KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1029106786Smini			}
103013546Sjulian		}
1031113658Sdeischen		thread->attr.flags |= PTHREAD_DETACHED;
1032113658Sdeischen	}
103313546Sjulian
1034113661Sdeischen	if ((thread->attr.flags & PTHREAD_SCOPE_PROCESS) == 0) {
1035113661Sdeischen		/*
1036113661Sdeischen		 * Remove the thread from the KSEG's list of threads.
1037113661Sdeischen	 	 */
1038113661Sdeischen		KSEG_THRQ_REMOVE(thread->kseg, thread);
1039113661Sdeischen		/*
1040113661Sdeischen		 * Migrate the thread to the main KSE so that this
1041113661Sdeischen		 * KSE and KSEG can be cleaned when their last thread
1042113661Sdeischen		 * exits.
1043113661Sdeischen		 */
1044113661Sdeischen		thread->kseg = _kse_initial->k_kseg;
1045113661Sdeischen		thread->kse = _kse_initial;
1046113661Sdeischen	}
1047113658Sdeischen	thread->flags |= THR_FLAGS_GC_SAFE;
1048113661Sdeischen
1049113661Sdeischen	/*
1050113661Sdeischen	 * We can't hold the thread list lock while holding the
1051113661Sdeischen	 * scheduler lock.
1052113661Sdeischen	 */
1053113661Sdeischen	KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
1054113661Sdeischen	DBG_MSG("Adding thread %p to GC list\n", thread);
1055113658Sdeischen	KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
1056113661Sdeischen	THR_GCLIST_ADD(thread);
1057113658Sdeischen	KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
1058113661Sdeischen	KSE_SCHED_LOCK(curkse, curkse->k_kseg);
1059113658Sdeischen}
106067097Sdeischen
1061113658Sdeischenvoid
1062113661Sdeischen_thr_gc(struct pthread *curthread)
1063113658Sdeischen{
1064113661Sdeischen	struct pthread *td, *td_next;
1065113661Sdeischen	kse_critical_t crit;
1066113786Sdeischen	TAILQ_HEAD(, pthread) worklist;
1067106786Smini
1068113786Sdeischen	TAILQ_INIT(&worklist);
1069113661Sdeischen	crit = _kse_critical_enter();
1070113661Sdeischen	KSE_LOCK_ACQUIRE(curthread->kse, &_thread_list_lock);
1071113661Sdeischen
1072113661Sdeischen	/* Check the threads waiting for GC. */
1073113661Sdeischen	for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = td_next) {
1074113661Sdeischen		td_next = TAILQ_NEXT(td, gcle);
1075113661Sdeischen		if ((td->flags & THR_FLAGS_GC_SAFE) == 0)
1076113661Sdeischen			continue;
1077113661Sdeischen#ifdef NOT_YET
1078113661Sdeischen		else if (((td->attr.flags & PTHREAD_SCOPE_PROCESS) != 0) &&
1079113661Sdeischen		    (td->kse->k_mbx.km_flags == 0)) {
1080113661Sdeischen			/*
1081113661Sdeischen			 * The thread and KSE are operating on the same
1082113661Sdeischen			 * stack.  Wait for the KSE to exit before freeing
1083113661Sdeischen			 * the thread's stack as well as everything else.
1084113661Sdeischen			 */
1085113661Sdeischen			continue;
1086113661Sdeischen		}
1087113661Sdeischen#endif
1088113786Sdeischen		/*
1089113786Sdeischen		 * Remove the thread from the GC list.  If the thread
1090113786Sdeischen		 * isn't yet detached, it will get added back to the
1091113786Sdeischen		 * GC list at a later time.
1092113786Sdeischen		 */
1093113658Sdeischen		THR_GCLIST_REMOVE(td);
1094113786Sdeischen		DBG_MSG("Freeing thread %p stack\n", td);
1095113786Sdeischen		/*
1096113786Sdeischen		 * We can free the thread stack since it's no longer
1097113786Sdeischen		 * in use.
1098113786Sdeischen		 */
1099113661Sdeischen		_thr_stack_free(&td->attr);
1100113786Sdeischen		if (((td->attr.flags & PTHREAD_DETACHED) != 0) &&
1101113786Sdeischen		    (td->refcount == 0)) {
1102113786Sdeischen			/*
1103113786Sdeischen			 * The thread has detached and is no longer
1104113786Sdeischen			 * referenced.  It is safe to remove all
1105113786Sdeischen			 * remnants of the thread.
1106113786Sdeischen			 */
1107114187Sdeischen			THR_LIST_REMOVE(td);
1108113786Sdeischen			TAILQ_INSERT_HEAD(&worklist, td, gcle);
1109113786Sdeischen		}
1110113786Sdeischen	}
1111113786Sdeischen	KSE_LOCK_RELEASE(curthread->kse, &_thread_list_lock);
1112113786Sdeischen	_kse_critical_leave(crit);
1113113658Sdeischen
1114113786Sdeischen	while ((td = TAILQ_FIRST(&worklist)) != NULL) {
1115113786Sdeischen		TAILQ_REMOVE(&worklist, td, gcle);
1116113786Sdeischen
1117113661Sdeischen		if ((td->attr.flags & PTHREAD_SCOPE_PROCESS) != 0) {
1118113786Sdeischen			crit = _kse_critical_enter();
1119113661Sdeischen			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1120113661Sdeischen			kse_free_unlocked(td->kse);
1121113870Sdeischen			kseg_free_unlocked(td->kseg);
1122113661Sdeischen			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1123113661Sdeischen			_kse_critical_leave(crit);
1124113658Sdeischen		}
1125113786Sdeischen		DBG_MSG("Freeing thread %p\n", td);
1126113786Sdeischen		_thr_free(curthread, td);
1127113658Sdeischen	}
1128113658Sdeischen}
1129106786Smini
1130113658Sdeischen
1131113658Sdeischen/*
1132113658Sdeischen * Only new threads that are running or suspended may be scheduled.
1133113658Sdeischen */
1134113870Sdeischenint
1135113658Sdeischen_thr_schedule_add(struct pthread *curthread, struct pthread *newthread)
1136113658Sdeischen{
1137113658Sdeischen	struct kse *curkse;
1138113658Sdeischen	kse_critical_t crit;
1139113658Sdeischen	int need_start;
1140113870Sdeischen	int ret;
1141113658Sdeischen
1142113658Sdeischen	/*
1143113658Sdeischen	 * If this is the first time creating a thread, make sure
1144113658Sdeischen	 * the mailbox is set for the current thread.
1145113658Sdeischen	 */
1146113658Sdeischen	if ((newthread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
1147113786Sdeischen#ifdef NOT_YET
1148113786Sdeischen		/* We use the thread's stack as the KSE's stack. */
1149113786Sdeischen		new_thread->kse->k_mbx.km_stack.ss_sp =
1150113786Sdeischen		    new_thread->attr.stackaddr_attr;
1151113786Sdeischen		new_thread->kse->k_mbx.km_stack.ss_size =
1152113786Sdeischen		    new_thread->attr.stacksize_attr;
1153113786Sdeischen#endif
1154113658Sdeischen		/*
1155113658Sdeischen		 * No need to lock the scheduling queue since the
1156113658Sdeischen		 * KSE/KSEG pair have not yet been started.
1157113658Sdeischen		 */
1158113658Sdeischen		KSEG_THRQ_ADD(newthread->kseg, newthread);
1159113786Sdeischen		TAILQ_INSERT_TAIL(&newthread->kseg->kg_kseq, newthread->kse,
1160113786Sdeischen		    k_kgqe);
1161113870Sdeischen		newthread->kseg->kg_ksecount = 1;
1162113658Sdeischen		if (newthread->state == PS_RUNNING)
1163113658Sdeischen			THR_RUNQ_INSERT_TAIL(newthread);
1164113786Sdeischen		newthread->kse->k_curthread = NULL;
1165113786Sdeischen		newthread->kse->k_mbx.km_flags = 0;
1166113786Sdeischen		newthread->kse->k_mbx.km_func = (kse_func_t *)kse_sched_multi;
1167113881Sdeischen		newthread->kse->k_mbx.km_quantum = 0;
1168113786Sdeischen
1169113658Sdeischen		/*
1170113658Sdeischen		 * This thread needs a new KSE and KSEG.
1171113658Sdeischen		 */
1172113658Sdeischen		crit = _kse_critical_enter();
1173113658Sdeischen		curkse = _get_curkse();
1174113658Sdeischen		_ksd_setprivate(&newthread->kse->k_ksd);
1175113786Sdeischen		newthread->kse->k_flags |= KF_INITIALIZED;
1176113870Sdeischen		ret = kse_create(&newthread->kse->k_mbx, 1);
1177113870Sdeischen		if (ret != 0)
1178113870Sdeischen			ret = errno;
1179113658Sdeischen		_ksd_setprivate(&curkse->k_ksd);
1180113658Sdeischen		_kse_critical_leave(crit);
1181113658Sdeischen	}
1182113658Sdeischen	else {
1183113658Sdeischen		/*
1184113658Sdeischen		 * Lock the KSE and add the new thread to its list of
1185113658Sdeischen		 * assigned threads.  If the new thread is runnable, also
1186113658Sdeischen		 * add it to the KSE's run queue.
1187113658Sdeischen		 */
1188113658Sdeischen		need_start = 0;
1189113658Sdeischen		KSE_SCHED_LOCK(curthread->kse, newthread->kseg);
1190113658Sdeischen		KSEG_THRQ_ADD(newthread->kseg, newthread);
1191113658Sdeischen		if (newthread->state == PS_RUNNING)
1192113658Sdeischen			THR_RUNQ_INSERT_TAIL(newthread);
1193113658Sdeischen		if ((newthread->kse->k_flags & KF_STARTED) == 0) {
119413546Sjulian			/*
1195113658Sdeischen			 * This KSE hasn't been started yet.  Start it
1196113658Sdeischen			 * outside of holding the lock.
1197103419Smini			 */
1198113658Sdeischen			newthread->kse->k_flags |= KF_STARTED;
1199113786Sdeischen			newthread->kse->k_mbx.km_func =
1200113786Sdeischen			    (kse_func_t *)kse_sched_multi;
1201113786Sdeischen			newthread->kse->k_mbx.km_flags = 0;
1202113658Sdeischen			need_start = 1;
1203113658Sdeischen		}
1204113658Sdeischen		KSE_SCHED_UNLOCK(curthread->kse, newthread->kseg);
1205113658Sdeischen
1206113658Sdeischen	  	if (need_start != 0)
1207113658Sdeischen			kse_create(&newthread->kse->k_mbx, 0);
1208113658Sdeischen		else if ((newthread->state == PS_RUNNING) &&
1209114187Sdeischen		    KSE_IS_IDLE(newthread->kse)) {
1210103419Smini			/*
1211113658Sdeischen			 * The thread is being scheduled on another KSEG.
121213546Sjulian			 */
1213114187Sdeischen			kse_wakeup_one(newthread);
121448046Sjb		}
1215113870Sdeischen		ret = 0;
1216113658Sdeischen	}
1217113870Sdeischen	return (ret);
1218113658Sdeischen}
121913546Sjulian
1220113658Sdeischenvoid
1221113658Sdeischenkse_waitq_insert(struct pthread *thread)
1222113658Sdeischen{
1223113658Sdeischen	struct pthread *td;
1224113658Sdeischen
1225113658Sdeischen	if (thread->wakeup_time.tv_sec == -1)
1226113658Sdeischen		TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq, thread,
1227113658Sdeischen		    pqe);
1228113658Sdeischen	else {
1229113658Sdeischen		td = TAILQ_FIRST(&thread->kse->k_schedq->sq_waitq);
1230113658Sdeischen		while ((td != NULL) && (td->wakeup_time.tv_sec != -1) &&
1231113658Sdeischen		    ((td->wakeup_time.tv_sec < thread->wakeup_time.tv_sec) ||
1232113658Sdeischen		    ((td->wakeup_time.tv_sec == thread->wakeup_time.tv_sec) &&
1233113658Sdeischen		    (td->wakeup_time.tv_nsec <= thread->wakeup_time.tv_nsec))))
1234113658Sdeischen			td = TAILQ_NEXT(td, pqe);
1235113658Sdeischen		if (td == NULL)
1236113658Sdeischen			TAILQ_INSERT_TAIL(&thread->kse->k_schedq->sq_waitq,
1237113658Sdeischen			    thread, pqe);
1238113658Sdeischen		else
1239113658Sdeischen			TAILQ_INSERT_BEFORE(td, thread, pqe);
1240113658Sdeischen	}
1241113658Sdeischen	thread->flags |= THR_FLAGS_IN_WAITQ;
1242113658Sdeischen}
1243113658Sdeischen
1244113658Sdeischen/*
1245113658Sdeischen * This must be called with the scheduling lock held.
1246113658Sdeischen */
1247113658Sdeischenstatic void
1248113658Sdeischenkse_check_completed(struct kse *kse)
1249113658Sdeischen{
1250113658Sdeischen	struct pthread *thread;
1251113658Sdeischen	struct kse_thr_mailbox *completed;
1252113658Sdeischen
1253113658Sdeischen	if ((completed = kse->k_mbx.km_completed) != NULL) {
1254113658Sdeischen		kse->k_mbx.km_completed = NULL;
1255113658Sdeischen		while (completed != NULL) {
1256113658Sdeischen			thread = completed->tm_udata;
1257113658Sdeischen			DBG_MSG("Found completed thread %p, name %s\n",
1258113658Sdeischen			    thread,
1259113658Sdeischen			    (thread->name == NULL) ? "none" : thread->name);
1260113658Sdeischen			thread->blocked = 0;
1261113658Sdeischen			if (thread != kse->k_curthread)
1262113658Sdeischen				KSE_RUNQ_INSERT_TAIL(kse, thread);
1263113658Sdeischen			completed = completed->tm_next;
1264113658Sdeischen		}
1265113658Sdeischen	}
1266113658Sdeischen}
1267113658Sdeischen
1268113658Sdeischen/*
1269113658Sdeischen * This must be called with the scheduling lock held.
1270113658Sdeischen */
1271113658Sdeischenstatic void
1272113658Sdeischenkse_check_waitq(struct kse *kse)
1273113658Sdeischen{
1274113658Sdeischen	struct pthread	*pthread;
1275113658Sdeischen	struct timespec ts;
1276113658Sdeischen
1277113658Sdeischen	KSE_GET_TOD(kse, &ts);
1278113658Sdeischen
1279113658Sdeischen	/*
1280113658Sdeischen	 * Wake up threads that have timedout.  This has to be
1281113658Sdeischen	 * done before adding the current thread to the run queue
1282113658Sdeischen	 * so that a CPU intensive thread doesn't get preference
1283113658Sdeischen	 * over waiting threads.
1284113658Sdeischen	 */
1285113658Sdeischen	while (((pthread = KSE_WAITQ_FIRST(kse)) != NULL) &&
1286113658Sdeischen	    thr_timedout(pthread, &ts)) {
1287113658Sdeischen		/* Remove the thread from the wait queue: */
1288113658Sdeischen		KSE_WAITQ_REMOVE(kse, pthread);
1289113658Sdeischen		DBG_MSG("Found timedout thread %p in waitq\n", pthread);
1290113658Sdeischen
1291113658Sdeischen		/* Indicate the thread timedout: */
1292113658Sdeischen		pthread->timeout = 1;
1293113658Sdeischen
1294113658Sdeischen		/* Add the thread to the priority queue: */
1295113658Sdeischen		THR_SET_STATE(pthread, PS_RUNNING);
1296113658Sdeischen		KSE_RUNQ_INSERT_TAIL(kse, pthread);
1297113658Sdeischen	}
1298113658Sdeischen}
1299113658Sdeischen
1300113658Sdeischenstatic int
1301113658Sdeischenthr_timedout(struct pthread *thread, struct timespec *curtime)
1302113658Sdeischen{
1303113658Sdeischen	if (thread->wakeup_time.tv_sec < 0)
1304113658Sdeischen		return (0);
1305113658Sdeischen	else if (thread->wakeup_time.tv_sec > curtime->tv_sec)
1306113658Sdeischen		return (0);
1307113658Sdeischen	else if ((thread->wakeup_time.tv_sec == curtime->tv_sec) &&
1308113658Sdeischen	    (thread->wakeup_time.tv_nsec > curtime->tv_nsec))
1309113658Sdeischen		return (0);
1310113658Sdeischen	else
1311113658Sdeischen		return (1);
1312113658Sdeischen}
1313113658Sdeischen
1314113658Sdeischen/*
1315113658Sdeischen * This must be called with the scheduling lock held.
1316113658Sdeischen *
1317113658Sdeischen * Each thread has a time slice, a wakeup time (used when it wants
1318113658Sdeischen * to wait for a specified amount of time), a run state, and an
1319113658Sdeischen * active flag.
1320113658Sdeischen *
1321113658Sdeischen * When a thread gets run by the scheduler, the active flag is
1322113658Sdeischen * set to non-zero (1).  When a thread performs an explicit yield
1323113658Sdeischen * or schedules a state change, it enters the scheduler and the
1324113658Sdeischen * active flag is cleared.  When the active flag is still seen
1325113658Sdeischen * set in the scheduler, that means that the thread is blocked in
1326113658Sdeischen * the kernel (because it is cleared before entering the scheduler
1327113658Sdeischen * in all other instances).
1328113658Sdeischen *
1329113658Sdeischen * The wakeup time is only set for those states that can timeout.
1330113658Sdeischen * It is set to (-1, -1) for all other instances.
1331113658Sdeischen *
1332113658Sdeischen * The thread's run state, aside from being useful when debugging,
1333113658Sdeischen * is used to place the thread in an appropriate queue.  There
1334113658Sdeischen * are 2 basic queues:
1335113658Sdeischen *
1336113658Sdeischen *   o run queue - queue ordered by priority for all threads
1337113658Sdeischen *                 that are runnable
1338113658Sdeischen *   o waiting queue - queue sorted by wakeup time for all threads
1339113658Sdeischen *                     that are not otherwise runnable (not blocked
1340113658Sdeischen *                     in kernel, not waiting for locks)
1341113658Sdeischen *
1342113658Sdeischen * The thread's time slice is used for round-robin scheduling
1343113658Sdeischen * (the default scheduling policy).  While a SCHED_RR thread
1344113658Sdeischen * is runnable it's time slice accumulates.  When it reaches
1345113658Sdeischen * the time slice interval, it gets reset and added to the end
1346113658Sdeischen * of the queue of threads at its priority.  When a thread no
1347113658Sdeischen * longer becomes runnable (blocks in kernel, waits, etc), its
1348113658Sdeischen * time slice is reset.
1349113658Sdeischen *
1350113658Sdeischen * The job of kse_switchout_thread() is to handle all of the above.
1351113658Sdeischen */
1352113658Sdeischenstatic void
1353113658Sdeischenkse_switchout_thread(struct kse *kse, struct pthread *thread)
1354113658Sdeischen{
1355113658Sdeischen	int level;
1356113658Sdeischen
1357113658Sdeischen	/*
1358113658Sdeischen	 * Place the currently running thread into the
1359113658Sdeischen	 * appropriate queue(s).
1360113658Sdeischen	 */
1361113658Sdeischen	DBG_MSG("Switching out thread %p, state %d\n", thread, thread->state);
1362113658Sdeischen	if (thread->blocked != 0) {
1363114187Sdeischen		thread->active = 0;
1364114187Sdeischen		thread->need_switchout = 0;
1365113658Sdeischen		/* This thread must have blocked in the kernel. */
1366113658Sdeischen		/* thread->slice_usec = -1;*/	/* restart timeslice */
136748046Sjb		/*
1368113658Sdeischen		 * XXX - Check for pending signals for this thread to
1369113658Sdeischen		 *       see if we need to interrupt it in the kernel.
137013546Sjulian		 */
1371113658Sdeischen		/* if (thread->check_pending != 0) */
1372113658Sdeischen		if ((thread->slice_usec != -1) &&
1373113658Sdeischen		    (thread->attr.sched_policy != SCHED_FIFO))
1374113658Sdeischen			thread->slice_usec += (thread->tmbx.tm_uticks
1375113658Sdeischen			    + thread->tmbx.tm_sticks) * _clock_res_usec;
1376113658Sdeischen	}
1377113658Sdeischen	else {
1378113658Sdeischen		switch (thread->state) {
1379113658Sdeischen		case PS_DEAD:
1380113658Sdeischen			/*
1381113658Sdeischen			 * The scheduler is operating on a different
1382113658Sdeischen			 * stack.  It is safe to do garbage collecting
1383113658Sdeischen			 * here.
1384113658Sdeischen			 */
1385114187Sdeischen			thread->active = 0;
1386114187Sdeischen			thread->need_switchout = 0;
1387113658Sdeischen			thr_cleanup(kse, thread);
1388113658Sdeischen			return;
1389113658Sdeischen			break;
139013546Sjulian
1391113658Sdeischen		case PS_RUNNING:
1392113658Sdeischen			/* Nothing to do here. */
1393113658Sdeischen			break;
139448046Sjb
1395113658Sdeischen		case PS_COND_WAIT:
1396113658Sdeischen		case PS_SLEEP_WAIT:
1397113658Sdeischen			/* Insert into the waiting queue: */
1398113658Sdeischen			KSE_WAITQ_INSERT(kse, thread);
1399113658Sdeischen			break;
140013546Sjulian
1401113658Sdeischen		case PS_LOCKWAIT:
140213546Sjulian			/*
1403113658Sdeischen			 * This state doesn't timeout.
140413546Sjulian			 */
1405113658Sdeischen			thread->wakeup_time.tv_sec = -1;
1406113658Sdeischen			thread->wakeup_time.tv_nsec = -1;
1407113658Sdeischen			level = thread->locklevel - 1;
1408113658Sdeischen			if (_LCK_BUSY(&thread->lockusers[level]))
1409113658Sdeischen				KSE_WAITQ_INSERT(kse, thread);
1410113658Sdeischen			else
1411113658Sdeischen				THR_SET_STATE(thread, PS_RUNNING);
1412113658Sdeischen			break;
141313546Sjulian
1414113658Sdeischen		case PS_JOIN:
1415113658Sdeischen		case PS_MUTEX_WAIT:
1416113658Sdeischen		case PS_SIGSUSPEND:
1417113658Sdeischen		case PS_SIGWAIT:
1418113658Sdeischen		case PS_SUSPENDED:
1419113658Sdeischen		case PS_DEADLOCK:
1420113658Sdeischen		default:
142113546Sjulian			/*
1422113658Sdeischen			 * These states don't timeout.
142313546Sjulian			 */
1424113658Sdeischen			thread->wakeup_time.tv_sec = -1;
1425113658Sdeischen			thread->wakeup_time.tv_nsec = -1;
142613546Sjulian
1427113658Sdeischen			/* Insert into the waiting queue: */
1428113658Sdeischen			KSE_WAITQ_INSERT(kse, thread);
1429113658Sdeischen			break;
1430113658Sdeischen		}
1431113658Sdeischen		if (thread->state != PS_RUNNING) {
1432113658Sdeischen			/* Restart the time slice: */
1433113658Sdeischen			thread->slice_usec = -1;
1434106191Smini		} else {
1435113658Sdeischen			if (thread->need_switchout != 0)
1436113658Sdeischen				/*
1437113658Sdeischen				 * The thread yielded on its own;
1438113658Sdeischen				 * restart the timeslice.
1439113658Sdeischen				 */
1440113658Sdeischen				thread->slice_usec = -1;
1441113658Sdeischen			else if ((thread->slice_usec != -1) &&
1442113658Sdeischen	   		    (thread->attr.sched_policy != SCHED_FIFO)) {
1443113658Sdeischen				thread->slice_usec += (thread->tmbx.tm_uticks
1444113658Sdeischen				    + thread->tmbx.tm_sticks) * _clock_res_usec;
1445113658Sdeischen				/* Check for time quantum exceeded: */
1446113658Sdeischen				if (thread->slice_usec > TIMESLICE_USEC)
1447113658Sdeischen					thread->slice_usec = -1;
1448106786Smini			}
1449113658Sdeischen			if (thread->slice_usec == -1) {
1450106786Smini				/*
1451113658Sdeischen				 * The thread exceeded its time quantum or
1452113658Sdeischen				 * it yielded the CPU; place it at the tail
1453113658Sdeischen				 * of the queue for its priority.
1454106786Smini				 */
1455113658Sdeischen				KSE_RUNQ_INSERT_TAIL(kse, thread);
1456113658Sdeischen			} else {
1457113658Sdeischen				/*
1458113658Sdeischen				 * The thread hasn't exceeded its interval
1459113658Sdeischen				 * Place it at the head of the queue for its
1460113658Sdeischen				 * priority.
1461113658Sdeischen				 */
1462113658Sdeischen				KSE_RUNQ_INSERT_HEAD(kse, thread);
1463113658Sdeischen			}
146413546Sjulian		}
146513546Sjulian	}
1466113658Sdeischen	thread->active = 0;
1467113658Sdeischen	thread->need_switchout = 0;
146813546Sjulian}
146913546Sjulian
1470113658Sdeischen/*
1471113658Sdeischen * This function waits for the smallest timeout value of any waiting
1472113658Sdeischen * thread, or until it receives a message from another KSE.
1473113658Sdeischen *
1474113658Sdeischen * This must be called with the scheduling lock held.
1475113658Sdeischen */
1476113658Sdeischenstatic void
1477113786Sdeischenkse_wait(struct kse *kse, struct pthread *td_wait)
147813546Sjulian{
1479113786Sdeischen	struct timespec ts, ts_sleep;
1480113786Sdeischen	int saved_flags;
148171581Sdeischen
1482113786Sdeischen	KSE_GET_TOD(kse, &ts);
148348046Sjb
1484113786Sdeischen	if ((td_wait == NULL) || (td_wait->wakeup_time.tv_sec < 0)) {
1485113870Sdeischen		/* Limit sleep to no more than 1 minute. */
1486113870Sdeischen		ts_sleep.tv_sec = 60;
1487113786Sdeischen		ts_sleep.tv_nsec = 0;
1488113786Sdeischen	} else {
1489113786Sdeischen		TIMESPEC_SUB(&ts_sleep, &td_wait->wakeup_time, &ts);
1490113870Sdeischen		if (ts_sleep.tv_sec > 60) {
1491113870Sdeischen			ts_sleep.tv_sec = 60;
1492113658Sdeischen			ts_sleep.tv_nsec = 0;
1493113658Sdeischen		}
1494113658Sdeischen	}
1495113786Sdeischen	/* Don't sleep for negative times. */
1496113786Sdeischen	if ((ts_sleep.tv_sec >= 0) && (ts_sleep.tv_nsec >= 0)) {
1497114187Sdeischen		KSE_SET_IDLE(kse);
1498114187Sdeischen		kse->k_kseg->kg_idle_kses++;
1499113786Sdeischen		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1500113786Sdeischen		saved_flags = kse->k_mbx.km_flags;
1501113786Sdeischen		kse->k_mbx.km_flags |= KMF_NOUPCALL;
1502113786Sdeischen		kse_release(&ts_sleep);
1503113786Sdeischen		kse->k_mbx.km_flags = saved_flags;
1504113786Sdeischen		KSE_SCHED_LOCK(kse, kse->k_kseg);
1505114187Sdeischen		if (KSE_IS_IDLE(kse)) {
1506114187Sdeischen			KSE_CLEAR_IDLE(kse);
1507114187Sdeischen			kse->k_kseg->kg_idle_kses--;
1508114187Sdeischen		}
1509113786Sdeischen	}
151013546Sjulian}
151113546Sjulian
1512113658Sdeischen/*
1513113658Sdeischen * Avoid calling this kse_exit() so as not to confuse it with the
1514113658Sdeischen * system call of the same name.
1515113658Sdeischen */
1516113658Sdeischenstatic void
1517113658Sdeischenkse_fini(struct kse *kse)
151841164Sjb{
1519113658Sdeischen	struct timespec ts;
1520113661Sdeischen	struct kse_group *free_kseg = NULL;
152171581Sdeischen
1522113661Sdeischen	if ((kse->k_kseg->kg_flags & KGF_SINGLE_THREAD) != 0)
1523113661Sdeischen		kse_exit();
152448046Sjb	/*
1525113661Sdeischen	 * Check to see if this is one of the main kses.
152648046Sjb	 */
1527113661Sdeischen	else if (kse->k_kseg != _kse_initial->k_kseg) {
1528113661Sdeischen		/* Remove this KSE from the KSEG's list of KSEs. */
1529113661Sdeischen		KSE_SCHED_LOCK(kse, kse->k_kseg);
1530113661Sdeischen		TAILQ_REMOVE(&kse->k_kseg->kg_kseq, kse, k_kgqe);
1531113870Sdeischen		kse->k_kseg->kg_ksecount--;
1532113661Sdeischen		if (TAILQ_EMPTY(&kse->k_kseg->kg_kseq))
1533113661Sdeischen			free_kseg = kse->k_kseg;
1534113661Sdeischen		KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1535113661Sdeischen
1536113658Sdeischen		/*
1537113661Sdeischen		 * Add this KSE to the list of free KSEs along with
1538113661Sdeischen		 * the KSEG if is now orphaned.
1539113661Sdeischen		 */
1540114187Sdeischen#ifdef NOT_YET
1541113661Sdeischen		KSE_LOCK_ACQUIRE(kse, &kse_lock);
1542113661Sdeischen		if (free_kseg != NULL)
1543113870Sdeischen			kseg_free_unlocked(free_kseg);
1544113661Sdeischen		kse_free_unlocked(kse);
1545113661Sdeischen		KSE_LOCK_RELEASE(kse, &kse_lock);
1546114187Sdeischen#endif
1547113661Sdeischen		kse_exit();
1548113661Sdeischen		/* Never returns. */
1549113661Sdeischen	} else {
1550113661Sdeischen		/*
1551113658Sdeischen		 * Wait for the last KSE/thread to exit, or for more
1552113658Sdeischen		 * threads to be created (it is possible for additional
1553113658Sdeischen		 * scope process threads to be created after the main
1554113658Sdeischen		 * thread exits).
1555113658Sdeischen		 */
1556113658Sdeischen		ts.tv_sec = 120;
1557113658Sdeischen		ts.tv_nsec = 0;
1558113658Sdeischen		KSE_SET_WAIT(kse);
1559113658Sdeischen		KSE_SCHED_LOCK(kse, kse->k_kseg);
1560113658Sdeischen		if ((active_kse_count > 1) &&
1561113658Sdeischen		    (kse->k_kseg->kg_threadcount == 0)) {
1562113658Sdeischen			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
1563113658Sdeischen			kse_release(&ts);
1564113658Sdeischen			/* The above never returns. */
1565113658Sdeischen		}
1566113870Sdeischen		else
1567113870Sdeischen			KSE_SCHED_UNLOCK(kse, kse->k_kseg);
156848046Sjb
1569113658Sdeischen		/* There are no more threads; exit this process: */
1570113658Sdeischen		if (kse->k_kseg->kg_threadcount == 0) {
1571113658Sdeischen			/* kse_exit(); */
1572113658Sdeischen			__isthreaded = 0;
1573113658Sdeischen			exit(0);
1574113658Sdeischen		}
1575113658Sdeischen	}
157641164Sjb}
157741164Sjb
1578106786Sminivoid
1579113658Sdeischen_thr_sig_add(struct pthread *thread, int sig, siginfo_t *info, ucontext_t *ucp)
158013546Sjulian{
1581113658Sdeischen	struct kse *curkse;
158213546Sjulian
1583113658Sdeischen	curkse = _get_curkse();
1584113658Sdeischen
1585113658Sdeischen	KSE_SCHED_LOCK(curkse, thread->kseg);
1586113658Sdeischen	/*
1587113658Sdeischen	 * A threads assigned KSE can't change out from under us
1588113658Sdeischen	 * when we hold the scheduler lock.
1589113658Sdeischen	 */
1590113658Sdeischen	if (THR_IS_ACTIVE(thread)) {
1591113658Sdeischen		/* Thread is active.  Can't install the signal for it. */
1592113658Sdeischen		/* Make a note in the thread that it has a signal. */
1593113658Sdeischen		sigaddset(&thread->sigpend, sig);
1594113658Sdeischen		thread->check_pending = 1;
159513546Sjulian	}
1596113658Sdeischen	else {
1597113658Sdeischen		/* Make a note in the thread that it has a signal. */
1598113658Sdeischen		sigaddset(&thread->sigpend, sig);
1599113658Sdeischen		thread->check_pending = 1;
1600113658Sdeischen
1601113658Sdeischen		if (thread->blocked != 0) {
1602113658Sdeischen			/* Tell the kernel to interrupt the thread. */
1603113658Sdeischen			kse_thr_interrupt(&thread->tmbx);
1604113658Sdeischen		}
1605113658Sdeischen	}
1606113658Sdeischen	KSE_SCHED_UNLOCK(curkse, thread->kseg);
160713546Sjulian}
160813546Sjulian
160913546Sjulianvoid
1610113658Sdeischen_thr_set_timeout(const struct timespec *timeout)
161113546Sjulian{
161271581Sdeischen	struct pthread	*curthread = _get_curthread();
1613113658Sdeischen	struct timespec ts;
161413546Sjulian
161513546Sjulian	/* Reset the timeout flag for the running thread: */
161671581Sdeischen	curthread->timeout = 0;
161713546Sjulian
161813546Sjulian	/* Check if the thread is to wait forever: */
161913546Sjulian	if (timeout == NULL) {
162013546Sjulian		/*
162113546Sjulian		 * Set the wakeup time to something that can be recognised as
162268516Sdeischen		 * different to an actual time of day:
162313546Sjulian		 */
162471581Sdeischen		curthread->wakeup_time.tv_sec = -1;
162571581Sdeischen		curthread->wakeup_time.tv_nsec = -1;
162613546Sjulian	}
162713546Sjulian	/* Check if no waiting is required: */
1628113658Sdeischen	else if ((timeout->tv_sec == 0) && (timeout->tv_nsec == 0)) {
162913546Sjulian		/* Set the wake up time to 'immediately': */
163071581Sdeischen		curthread->wakeup_time.tv_sec = 0;
163171581Sdeischen		curthread->wakeup_time.tv_nsec = 0;
163213546Sjulian	} else {
1633113658Sdeischen		/* Calculate the time for the current thread to wakeup: */
1634113658Sdeischen		KSE_GET_TOD(curthread->kse, &ts);
1635113658Sdeischen		TIMESPEC_ADD(&curthread->wakeup_time, &ts, timeout);
1636113658Sdeischen	}
1637113658Sdeischen}
163813546Sjulian
1639113658Sdeischenvoid
1640113658Sdeischen_thr_panic_exit(char *file, int line, char *msg)
1641113658Sdeischen{
1642113658Sdeischen	char buf[256];
164313546Sjulian
1644113658Sdeischen	snprintf(buf, sizeof(buf), "(%s:%d) %s\n", file, line, msg);
1645113658Sdeischen	__sys_write(2, buf, strlen(buf));
1646113658Sdeischen	abort();
164713546Sjulian}
164844963Sjb
164944963Sjbvoid
1650113658Sdeischen_thr_setrunnable(struct pthread *curthread, struct pthread *thread)
165144963Sjb{
1652113658Sdeischen	kse_critical_t crit;
165371581Sdeischen
1654113658Sdeischen	crit = _kse_critical_enter();
1655113658Sdeischen	KSE_SCHED_LOCK(curthread->kse, thread->kseg);
1656113658Sdeischen	_thr_setrunnable_unlocked(thread);
1657113658Sdeischen	KSE_SCHED_UNLOCK(curthread->kse, thread->kseg);
1658113658Sdeischen	_kse_critical_leave(crit);
165944963Sjb}
166044963Sjb
166144963Sjbvoid
1662113658Sdeischen_thr_setrunnable_unlocked(struct pthread *thread)
166344963Sjb{
1664113658Sdeischen	if ((thread->kseg->kg_flags & KGF_SINGLE_THREAD) != 0)
1665113658Sdeischen		/* No silly queues for these threads. */
1666113658Sdeischen		THR_SET_STATE(thread, PS_RUNNING);
1667114187Sdeischen	else if (thread->state != PS_RUNNING) {
1668113658Sdeischen		if ((thread->flags & THR_FLAGS_IN_WAITQ) != 0)
1669113658Sdeischen			KSE_WAITQ_REMOVE(thread->kse, thread);
1670113658Sdeischen		THR_SET_STATE(thread, PS_RUNNING);
1671113658Sdeischen		if ((thread->blocked == 0) &&
1672113658Sdeischen		    (thread->flags & THR_FLAGS_IN_RUNQ) == 0)
1673113658Sdeischen			THR_RUNQ_INSERT_TAIL(thread);
1674113658Sdeischen	}
1675113658Sdeischen        /*
1676113658Sdeischen         * XXX - Threads are not yet assigned to specific KSEs; they are
1677113658Sdeischen         *       assigned to the KSEG.  So the fact that a thread's KSE is
1678113658Sdeischen         *       waiting doesn't necessarily mean that it will be the KSE
1679113658Sdeischen         *       that runs the thread after the lock is granted.  But we
1680113658Sdeischen         *       don't know if the other KSEs within the same KSEG are
1681113658Sdeischen         *       also in a waiting state or not so we err on the side of
1682113658Sdeischen         *       caution and wakeup the thread's last known KSE.  We
1683113658Sdeischen         *       ensure that the threads KSE doesn't change while it's
1684113658Sdeischen         *       scheduling lock is held so it is safe to reference it
1685113658Sdeischen         *       (the KSE).  If the KSE wakes up and doesn't find any more
1686113658Sdeischen         *       work it will again go back to waiting so no harm is done.
1687113658Sdeischen         */
1688114187Sdeischen	kse_wakeup_one(thread);
1689114187Sdeischen}
1690114187Sdeischen
1691114187Sdeischenstatic void
1692114187Sdeischenkse_wakeup_one(struct pthread *thread)
1693114187Sdeischen{
1694114187Sdeischen	struct kse *ke;
1695114187Sdeischen
1696114187Sdeischen	if (KSE_IS_IDLE(thread->kse)) {
1697114187Sdeischen		KSE_CLEAR_IDLE(thread->kse);
1698114187Sdeischen		thread->kseg->kg_idle_kses--;
1699113658Sdeischen		KSE_WAKEUP(thread->kse);
1700114187Sdeischen	} else {
1701114187Sdeischen		TAILQ_FOREACH(ke, &thread->kseg->kg_kseq, k_kgqe) {
1702114187Sdeischen			if (KSE_IS_IDLE(ke)) {
1703114187Sdeischen				KSE_CLEAR_IDLE(ke);
1704114187Sdeischen				ke->k_kseg->kg_idle_kses--;
1705114187Sdeischen				KSE_WAKEUP(ke);
1706114187Sdeischen				return;
1707114187Sdeischen			}
1708114187Sdeischen		}
1709114187Sdeischen	}
1710113658Sdeischen}
171171581Sdeischen
1712114187Sdeischenstatic void
1713114187Sdeischenkse_wakeup_multi(struct kse *curkse)
1714114187Sdeischen{
1715114187Sdeischen	struct kse *ke;
1716114187Sdeischen	int tmp;
1717114187Sdeischen
1718114187Sdeischen	if ((tmp = KSE_RUNQ_THREADS(curkse)) && curkse->k_kseg->kg_idle_kses) {
1719114187Sdeischen		TAILQ_FOREACH(ke, &curkse->k_kseg->kg_kseq, k_kgqe) {
1720114187Sdeischen			if (KSE_IS_IDLE(ke)) {
1721114187Sdeischen				KSE_CLEAR_IDLE(ke);
1722114187Sdeischen				ke->k_kseg->kg_idle_kses--;
1723114187Sdeischen				KSE_WAKEUP(ke);
1724114187Sdeischen				if (--tmp == 0)
1725114187Sdeischen					break;
1726114187Sdeischen			}
1727114187Sdeischen		}
1728114187Sdeischen	}
1729114187Sdeischen}
1730114187Sdeischen
1731113658Sdeischenstruct pthread *
1732113658Sdeischen_get_curthread(void)
1733113658Sdeischen{
1734113658Sdeischen	return (_ksd_curthread);
1735113658Sdeischen}
1736113658Sdeischen
1737113658Sdeischen/* This assumes the caller has disabled upcalls. */
1738113658Sdeischenstruct kse *
1739113658Sdeischen_get_curkse(void)
1740113658Sdeischen{
1741113658Sdeischen	return (_ksd_curkse);
1742113658Sdeischen}
1743113658Sdeischen
1744113658Sdeischenvoid
1745113658Sdeischen_set_curkse(struct kse *kse)
1746113658Sdeischen{
1747113658Sdeischen	_ksd_setprivate(&kse->k_ksd);
1748113658Sdeischen}
1749113658Sdeischen
1750113658Sdeischen/*
1751113658Sdeischen * Allocate a new KSEG.
1752113658Sdeischen *
1753113661Sdeischen * We allow the current thread to be NULL in the case that this
1754113658Sdeischen * is the first time a KSEG is being created (library initialization).
1755113658Sdeischen * In this case, we don't need to (and can't) take any locks.
1756113658Sdeischen */
1757113658Sdeischenstruct kse_group *
1758113661Sdeischen_kseg_alloc(struct pthread *curthread)
1759113658Sdeischen{
1760113658Sdeischen	struct kse_group *kseg = NULL;
1761113661Sdeischen	kse_critical_t crit;
1762113658Sdeischen
1763113661Sdeischen	if ((curthread != NULL) && (free_kseg_count > 0)) {
1764113658Sdeischen		/* Use the kse lock for the kseg queue. */
1765113661Sdeischen		crit = _kse_critical_enter();
1766113661Sdeischen		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1767113658Sdeischen		if ((kseg = TAILQ_FIRST(&free_kse_groupq)) != NULL) {
1768113658Sdeischen			TAILQ_REMOVE(&free_kse_groupq, kseg, kg_qe);
1769113658Sdeischen			free_kseg_count--;
1770113658Sdeischen			active_kseg_count++;
1771113658Sdeischen			TAILQ_INSERT_TAIL(&active_kse_groupq, kseg, kg_qe);
1772113658Sdeischen		}
1773113661Sdeischen		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1774113661Sdeischen		_kse_critical_leave(crit);
1775113786Sdeischen		if (kseg)
1776113786Sdeischen			kseg_reinit(kseg);
1777113658Sdeischen	}
1778113658Sdeischen
177944963Sjb	/*
1780113658Sdeischen	 * If requested, attempt to allocate a new KSE group only if the
1781113658Sdeischen	 * KSE allocation was successful and a KSE group wasn't found in
1782113658Sdeischen	 * the free list.
178344963Sjb	 */
1784113658Sdeischen	if ((kseg == NULL) &&
1785113658Sdeischen	    ((kseg = (struct kse_group *)malloc(sizeof(*kseg))) != NULL)) {
1786113661Sdeischen		if (_pq_alloc(&kseg->kg_schedq.sq_runq,
1787113661Sdeischen		    THR_MIN_PRIORITY, THR_LAST_PRIORITY) != 0) {
1788113661Sdeischen			free(kseg);
1789113661Sdeischen			kseg = NULL;
1790113661Sdeischen		} else {
1791113661Sdeischen			kseg_init(kseg);
1792113661Sdeischen			/* Add the KSEG to the list of active KSEGs. */
1793113661Sdeischen			if (curthread != NULL) {
1794113661Sdeischen				crit = _kse_critical_enter();
1795113661Sdeischen				KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1796113661Sdeischen				active_kseg_count++;
1797113661Sdeischen				TAILQ_INSERT_TAIL(&active_kse_groupq,
1798113661Sdeischen				    kseg, kg_qe);
1799113661Sdeischen				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1800113661Sdeischen				_kse_critical_leave(crit);
1801113661Sdeischen			} else {
1802113661Sdeischen				active_kseg_count++;
1803113661Sdeischen				TAILQ_INSERT_TAIL(&active_kse_groupq,
1804113661Sdeischen				    kseg, kg_qe);
1805113661Sdeischen			}
1806113661Sdeischen		}
180748046Sjb	}
1808113658Sdeischen	return (kseg);
1809113658Sdeischen}
181048046Sjb
1811113658Sdeischen/*
1812113658Sdeischen * This must be called with the kse lock held and when there are
1813113658Sdeischen * no more threads that reference it.
1814113658Sdeischen */
1815113658Sdeischenstatic void
1816113870Sdeischenkseg_free_unlocked(struct kse_group *kseg)
1817113658Sdeischen{
1818113661Sdeischen	TAILQ_REMOVE(&active_kse_groupq, kseg, kg_qe);
1819113658Sdeischen	TAILQ_INSERT_HEAD(&free_kse_groupq, kseg, kg_qe);
1820113658Sdeischen	free_kseg_count++;
1821113658Sdeischen	active_kseg_count--;
1822113658Sdeischen}
1823113658Sdeischen
1824113870Sdeischenvoid
1825113870Sdeischen_kseg_free(struct kse_group *kseg)
1826113870Sdeischen{
1827113870Sdeischen	struct kse *curkse;
1828113870Sdeischen	kse_critical_t crit;
1829113870Sdeischen
1830113870Sdeischen	crit = _kse_critical_enter();
1831113870Sdeischen	curkse = _get_curkse();
1832113870Sdeischen	KSE_LOCK_ACQUIRE(curkse, &kse_lock);
1833113870Sdeischen	kseg_free_unlocked(kseg);
1834113870Sdeischen	KSE_LOCK_RELEASE(curkse, &kse_lock);
1835113870Sdeischen	_kse_critical_leave(crit);
1836113870Sdeischen}
1837113870Sdeischen
1838113658Sdeischen/*
1839113658Sdeischen * Allocate a new KSE.
1840113658Sdeischen *
1841113661Sdeischen * We allow the current thread to be NULL in the case that this
1842113658Sdeischen * is the first time a KSE is being created (library initialization).
1843113658Sdeischen * In this case, we don't need to (and can't) take any locks.
1844113658Sdeischen */
1845113658Sdeischenstruct kse *
1846113661Sdeischen_kse_alloc(struct pthread *curthread)
1847113658Sdeischen{
1848113658Sdeischen	struct kse *kse = NULL;
1849113661Sdeischen	kse_critical_t crit;
1850113658Sdeischen	int need_ksd = 0;
1851113658Sdeischen	int i;
1852113658Sdeischen
1853113661Sdeischen	if ((curthread != NULL) && (free_kse_count > 0)) {
1854113661Sdeischen		crit = _kse_critical_enter();
1855113661Sdeischen		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1856113658Sdeischen		/* Search for a finished KSE. */
1857113658Sdeischen		kse = TAILQ_FIRST(&free_kseq);
1858113786Sdeischen#ifdef NOT_YET
1859113786Sdeischen#define KEMBX_DONE	0x04
1860113658Sdeischen		while ((kse != NULL) &&
1861113658Sdeischen		    ((kse->k_mbx.km_flags & KEMBX_DONE) == 0)) {
1862113658Sdeischen			kse = TAILQ_NEXT(kse, k_qe);
1863113658Sdeischen		}
1864113658Sdeischen#undef KEMBX_DONE
1865113786Sdeischen#endif
1866113658Sdeischen		if (kse != NULL) {
1867113658Sdeischen			TAILQ_REMOVE(&free_kseq, kse, k_qe);
1868113658Sdeischen			free_kse_count--;
1869113870Sdeischen			TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
1870113658Sdeischen			active_kse_count++;
1871113658Sdeischen		}
1872113661Sdeischen		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1873113661Sdeischen		_kse_critical_leave(crit);
1874113942Sdeischen		if (kse != NULL)
1875113942Sdeischen			kse_reinit(kse);
1876113658Sdeischen	}
1877113658Sdeischen	if ((kse == NULL) &&
1878113658Sdeischen	    ((kse = (struct kse *)malloc(sizeof(*kse))) != NULL)) {
1879113658Sdeischen		bzero(kse, sizeof(*kse));
1880113658Sdeischen
1881113658Sdeischen		/* Initialize the lockusers. */
1882113658Sdeischen		for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1883113658Sdeischen			_lockuser_init(&kse->k_lockusers[i], (void *)kse);
1884113658Sdeischen			_LCK_SET_PRIVATE2(&kse->k_lockusers[i], NULL);
1885113658Sdeischen		}
1886113786Sdeischen		/* _lock_init(kse->k_lock, ...) */
1887113658Sdeischen
1888113658Sdeischen		/* We had to malloc a kse; mark it as needing a new ID.*/
1889113658Sdeischen		need_ksd = 1;
1890113658Sdeischen
189144963Sjb		/*
1892113658Sdeischen		 * Create the KSE context.
1893113658Sdeischen		 *
1894113658Sdeischen		 * XXX - For now this is done here in the allocation.
1895113658Sdeischen		 *       In the future, we may want to have it done
1896113658Sdeischen		 *       outside the allocation so that scope system
1897113658Sdeischen		 *       threads (one thread per KSE) are not required
1898113658Sdeischen		 *       to have a stack for an unneeded kse upcall.
189967097Sdeischen		 */
1900113786Sdeischen		kse->k_mbx.km_func = (kse_func_t *)kse_sched_multi;
1901113658Sdeischen		kse->k_mbx.km_stack.ss_sp = (char *)malloc(KSE_STACKSIZE);
1902113658Sdeischen		kse->k_mbx.km_stack.ss_size = KSE_STACKSIZE;
1903113658Sdeischen		kse->k_mbx.km_udata = (void *)kse;
1904113658Sdeischen		kse->k_mbx.km_quantum = 20000;
1905113786Sdeischen		/*
1906113786Sdeischen		 * We need to keep a copy of the stack in case it
1907113786Sdeischen		 * doesn't get used; a KSE running a scope system
1908113786Sdeischen		 * thread will use that thread's stack.
1909113786Sdeischen		 */
1910113786Sdeischen		kse->k_stack.ss_sp = kse->k_mbx.km_stack.ss_sp;
1911113786Sdeischen		kse->k_stack.ss_size = kse->k_mbx.km_stack.ss_size;
1912113786Sdeischen		if (kse->k_mbx.km_stack.ss_sp == NULL) {
1913113786Sdeischen			for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1914113786Sdeischen				_lockuser_destroy(&kse->k_lockusers[i]);
1915113786Sdeischen			}
1916113786Sdeischen			/* _lock_destroy(&kse->k_lock); */
1917113658Sdeischen			free(kse);
1918113658Sdeischen			kse = NULL;
1919113658Sdeischen		}
192044963Sjb	}
1921113658Sdeischen	if ((kse != NULL) && (need_ksd != 0)) {
1922113658Sdeischen		/* This KSE needs initialization. */
1923113661Sdeischen		if (curthread != NULL) {
1924113661Sdeischen			crit = _kse_critical_enter();
1925113661Sdeischen			KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1926113661Sdeischen		}
1927113658Sdeischen		/* Initialize KSD inside of the lock. */
1928113658Sdeischen		if (_ksd_create(&kse->k_ksd, (void *)kse, sizeof(*kse)) != 0) {
1929113661Sdeischen			if (curthread != NULL) {
1930113661Sdeischen				KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1931113661Sdeischen				_kse_critical_leave(crit);
1932113661Sdeischen			}
1933113658Sdeischen			free(kse->k_mbx.km_stack.ss_sp);
1934113658Sdeischen			for (i = 0; i < MAX_KSE_LOCKLEVEL; i++) {
1935113658Sdeischen				_lockuser_destroy(&kse->k_lockusers[i]);
1936113658Sdeischen			}
1937113658Sdeischen			free(kse);
1938113658Sdeischen			return (NULL);
1939113658Sdeischen		}
1940113658Sdeischen		kse->k_flags = 0;
1941113870Sdeischen		TAILQ_INSERT_TAIL(&active_kseq, kse, k_qe);
1942113658Sdeischen		active_kse_count++;
1943113661Sdeischen		if (curthread != NULL) {
1944113661Sdeischen			KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1945113661Sdeischen			_kse_critical_leave(crit);
1946113661Sdeischen		}
1947113658Sdeischen	}
1948113658Sdeischen	return (kse);
194948046Sjb}
195044963Sjb
1951113942Sdeischenstatic void
1952113942Sdeischenkse_reinit(struct kse *kse)
1953113942Sdeischen{
1954113942Sdeischen	bzero(&kse->k_mbx, sizeof(struct kse_mailbox));
1955113942Sdeischen	kse->k_curthread = 0;
1956113942Sdeischen	kse->k_kseg = 0;
1957113942Sdeischen	kse->k_schedq = 0;
1958113942Sdeischen	kse->k_locklevel = 0;
1959113942Sdeischen	sigemptyset(&kse->k_sigmask);
1960113942Sdeischen	bzero(&kse->k_sigq, sizeof(kse->k_sigq));
1961113942Sdeischen	kse->k_check_sigq = 0;
1962114187Sdeischen	kse->k_flags = 0;
1963113942Sdeischen	kse->k_waiting = 0;
1964113942Sdeischen	kse->k_error = 0;
1965113942Sdeischen	kse->k_cpu = 0;
1966113942Sdeischen	kse->k_done = 0;
1967113942Sdeischen}
1968113942Sdeischen
1969113658Sdeischenvoid
1970113661Sdeischenkse_free_unlocked(struct kse *kse)
197144963Sjb{
1972113870Sdeischen	TAILQ_REMOVE(&active_kseq, kse, k_qe);
1973113658Sdeischen	active_kse_count--;
1974113658Sdeischen	kse->k_kseg = NULL;
1975113881Sdeischen	kse->k_mbx.km_quantum = 20000;
1976113658Sdeischen	kse->k_flags &= ~KF_INITIALIZED;
1977113658Sdeischen	TAILQ_INSERT_HEAD(&free_kseq, kse, k_qe);
1978113658Sdeischen	free_kse_count++;
197944963Sjb}
198071581Sdeischen
1981113661Sdeischenvoid
1982113661Sdeischen_kse_free(struct pthread *curthread, struct kse *kse)
1983113661Sdeischen{
1984113661Sdeischen	kse_critical_t crit;
1985113661Sdeischen
1986113661Sdeischen	if (curthread == NULL)
1987113661Sdeischen		kse_free_unlocked(kse);
1988113661Sdeischen	else {
1989113661Sdeischen		crit = _kse_critical_enter();
1990113661Sdeischen		KSE_LOCK_ACQUIRE(curthread->kse, &kse_lock);
1991113661Sdeischen		kse_free_unlocked(kse);
1992113661Sdeischen		KSE_LOCK_RELEASE(curthread->kse, &kse_lock);
1993113661Sdeischen		_kse_critical_leave(crit);
1994113661Sdeischen	}
1995113661Sdeischen}
1996113661Sdeischen
1997113658Sdeischenstatic void
1998113658Sdeischenkseg_init(struct kse_group *kseg)
1999113658Sdeischen{
2000113786Sdeischen	kseg_reinit(kseg);
2001113786Sdeischen	_lock_init(&kseg->kg_lock, LCK_ADAPTIVE, _kse_lock_wait,
2002113786Sdeischen	    _kse_lock_wakeup);
2003113786Sdeischen}
2004113786Sdeischen
2005113786Sdeischenstatic void
2006113786Sdeischenkseg_reinit(struct kse_group *kseg)
2007113786Sdeischen{
2008113658Sdeischen	TAILQ_INIT(&kseg->kg_kseq);
2009113658Sdeischen	TAILQ_INIT(&kseg->kg_threadq);
2010113658Sdeischen	TAILQ_INIT(&kseg->kg_schedq.sq_waitq);
2011113658Sdeischen	kseg->kg_threadcount = 0;
2012113870Sdeischen	kseg->kg_ksecount = 0;
2013113658Sdeischen	kseg->kg_idle_kses = 0;
2014113658Sdeischen	kseg->kg_flags = 0;
2015113658Sdeischen}
2016113658Sdeischen
201771581Sdeischenstruct pthread *
2018113658Sdeischen_thr_alloc(struct pthread *curthread)
201971581Sdeischen{
2020113658Sdeischen	kse_critical_t crit;
2021113658Sdeischen	struct pthread *thread = NULL;
202271581Sdeischen
2023113658Sdeischen	if (curthread != NULL) {
2024113661Sdeischen		if (GC_NEEDED())
2025113661Sdeischen			_thr_gc(curthread);
2026113658Sdeischen		if (free_thread_count > 0) {
2027113658Sdeischen			crit = _kse_critical_enter();
2028113661Sdeischen			KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2029113658Sdeischen			if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
2030113658Sdeischen				TAILQ_REMOVE(&free_threadq, thread, tle);
2031113658Sdeischen				free_thread_count--;
2032113658Sdeischen			}
2033113661Sdeischen			KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2034114187Sdeischen			_kse_critical_leave(crit);
2035113658Sdeischen		}
2036113658Sdeischen	}
2037113658Sdeischen	if (thread == NULL)
2038113658Sdeischen		thread = (struct pthread *)malloc(sizeof(struct pthread));
2039113658Sdeischen	return (thread);
204071581Sdeischen}
204171581Sdeischen
204271581Sdeischenvoid
2043113658Sdeischen_thr_free(struct pthread *curthread, struct pthread *thread)
204471581Sdeischen{
2045113658Sdeischen	kse_critical_t crit;
2046113786Sdeischen	int i;
2047113658Sdeischen
2048113661Sdeischen	DBG_MSG("Freeing thread %p\n", thread);
2049113786Sdeischen	if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
2050113786Sdeischen		for (i = 0; i < MAX_THR_LOCKLEVEL; i++) {
2051113786Sdeischen			_lockuser_destroy(&thread->lockusers[i]);
2052113786Sdeischen		}
2053113786Sdeischen		_lock_destroy(&thread->lock);
2054113658Sdeischen		free(thread);
2055113786Sdeischen	}
2056113658Sdeischen	else {
2057113658Sdeischen		crit = _kse_critical_enter();
2058113661Sdeischen		KSE_LOCK_ACQUIRE(curthread->kse, &thread_lock);
2059113658Sdeischen		TAILQ_INSERT_HEAD(&free_threadq, thread, tle);
2060113658Sdeischen		free_thread_count++;
2061113661Sdeischen		KSE_LOCK_RELEASE(curthread->kse, &thread_lock);
2062113658Sdeischen		_kse_critical_leave(crit);
2063113658Sdeischen	}
206471581Sdeischen}
2065