kern_thread.c revision 106180
199026Sjulian/*
299026Sjulian * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
399026Sjulian *  All rights reserved.
499026Sjulian *
599026Sjulian * Redistribution and use in source and binary forms, with or without
699026Sjulian * modification, are permitted provided that the following conditions
799026Sjulian * are met:
899026Sjulian * 1. Redistributions of source code must retain the above copyright
999026Sjulian *    notice(s), this list of conditions and the following disclaimer as
1099026Sjulian *    the first lines of this file unmodified other than the possible
1199026Sjulian *    addition of one or more copyright notices.
1299026Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1399026Sjulian *    notice(s), this list of conditions and the following disclaimer in the
1499026Sjulian *    documentation and/or other materials provided with the distribution.
1599026Sjulian *
1699026Sjulian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1799026Sjulian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1899026Sjulian * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1999026Sjulian * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2099026Sjulian * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2199026Sjulian * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2299026Sjulian * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2399026Sjulian * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2499026Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2599026Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2699026Sjulian * DAMAGE.
2799026Sjulian *
2899026Sjulian * $FreeBSD: head/sys/kern/kern_thread.c 106180 2002-10-30 02:28:41Z davidxu $
2999026Sjulian */
3099026Sjulian
3199026Sjulian#include <sys/param.h>
3299026Sjulian#include <sys/systm.h>
3399026Sjulian#include <sys/kernel.h>
3499026Sjulian#include <sys/lock.h>
3599026Sjulian#include <sys/malloc.h>
3699026Sjulian#include <sys/mutex.h>
3799026Sjulian#include <sys/proc.h>
3899026Sjulian#include <sys/sysctl.h>
39105854Sjulian#include <sys/sysproto.h>
4099026Sjulian#include <sys/filedesc.h>
4199026Sjulian#include <sys/tty.h>
4299026Sjulian#include <sys/signalvar.h>
4399026Sjulian#include <sys/sx.h>
4499026Sjulian#include <sys/user.h>
4599026Sjulian#include <sys/jail.h>
4699026Sjulian#include <sys/kse.h>
4799026Sjulian#include <sys/ktr.h>
48103410Smini#include <sys/ucontext.h>
4999026Sjulian
5099026Sjulian#include <vm/vm.h>
5199026Sjulian#include <vm/vm_object.h>
5299026Sjulian#include <vm/pmap.h>
5399026Sjulian#include <vm/uma.h>
5499026Sjulian#include <vm/vm_map.h>
5599026Sjulian
56100273Speter#include <machine/frame.h>
57100273Speter
5899026Sjulian/*
59103367Sjulian * KSEGRP related storage.
6099026Sjulian */
61103367Sjulianstatic uma_zone_t ksegrp_zone;
62103367Sjulianstatic uma_zone_t kse_zone;
6399026Sjulianstatic uma_zone_t thread_zone;
6499026Sjulian
65103367Sjulian/* DEBUG ONLY */
6699026SjulianSYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
6799026Sjulianstatic int oiks_debug = 1;	/* 0 disable, 1 printf, 2 enter debugger */
6899026SjulianSYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW,
6999026Sjulian	&oiks_debug, 0, "OIKS thread debug");
7099026Sjulian
71104695Sjulianstatic int max_threads_per_proc = 10;
72103367SjulianSYSCTL_INT(_kern_threads, OID_AUTO, max_per_proc, CTLFLAG_RW,
73103367Sjulian	&max_threads_per_proc, 0, "Limit on threads per proc");
74103367Sjulian
7599026Sjulian#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
7699026Sjulian
7799026Sjulianstruct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
78105854SjulianTAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
79105854SjulianTAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
8099026Sjulianstruct mtx zombie_thread_lock;
8199026SjulianMTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock,
8299026Sjulian    "zombie_thread_lock", MTX_SPIN);
8399026Sjulian
84105854Sjulian
85105854Sjulian
86105854Sjulianvoid kse_purge(struct proc *p, struct thread *td);
8799026Sjulian/*
8899026Sjulian * Pepare a thread for use.
8999026Sjulian */
9099026Sjulianstatic void
9199026Sjulianthread_ctor(void *mem, int size, void *arg)
9299026Sjulian{
9399026Sjulian	struct thread	*td;
9499026Sjulian
9599026Sjulian	KASSERT((size == sizeof(struct thread)),
9699552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
9799026Sjulian
9899026Sjulian	td = (struct thread *)mem;
99103216Sjulian	td->td_state = TDS_INACTIVE;
10099026Sjulian	td->td_flags |= TDF_UNBOUND;
10199026Sjulian}
10299026Sjulian
10399026Sjulian/*
10499026Sjulian * Reclaim a thread after use.
10599026Sjulian */
10699026Sjulianstatic void
10799026Sjulianthread_dtor(void *mem, int size, void *arg)
10899026Sjulian{
10999026Sjulian	struct thread	*td;
11099026Sjulian
11199026Sjulian	KASSERT((size == sizeof(struct thread)),
11299552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
11399026Sjulian
11499026Sjulian	td = (struct thread *)mem;
11599026Sjulian
11699026Sjulian#ifdef INVARIANTS
11799026Sjulian	/* Verify that this thread is in a safe state to free. */
11899026Sjulian	switch (td->td_state) {
119103216Sjulian	case TDS_INHIBITED:
120103216Sjulian	case TDS_RUNNING:
121103216Sjulian	case TDS_CAN_RUN:
12299026Sjulian	case TDS_RUNQ:
12399026Sjulian		/*
12499026Sjulian		 * We must never unlink a thread that is in one of
12599026Sjulian		 * these states, because it is currently active.
12699026Sjulian		 */
12799026Sjulian		panic("bad state for thread unlinking");
12899026Sjulian		/* NOTREACHED */
129103216Sjulian	case TDS_INACTIVE:
13099026Sjulian		break;
13199026Sjulian	default:
13299026Sjulian		panic("bad thread state");
13399026Sjulian		/* NOTREACHED */
13499026Sjulian	}
13599026Sjulian#endif
13699026Sjulian}
13799026Sjulian
13899026Sjulian/*
13999026Sjulian * Initialize type-stable parts of a thread (when newly created).
14099026Sjulian */
14199026Sjulianstatic void
14299026Sjulianthread_init(void *mem, int size)
14399026Sjulian{
14499026Sjulian	struct thread	*td;
14599026Sjulian
14699026Sjulian	KASSERT((size == sizeof(struct thread)),
14799552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
14899026Sjulian
14999026Sjulian	td = (struct thread *)mem;
150103312Sjulian	mtx_lock(&Giant);
151104354Sscottl	pmap_new_thread(td, 0);
152103312Sjulian	mtx_unlock(&Giant);
15399026Sjulian	cpu_thread_setup(td);
15499026Sjulian}
15599026Sjulian
15699026Sjulian/*
15799026Sjulian * Tear down type-stable parts of a thread (just before being discarded).
15899026Sjulian */
15999026Sjulianstatic void
16099026Sjulianthread_fini(void *mem, int size)
16199026Sjulian{
16299026Sjulian	struct thread	*td;
16399026Sjulian
16499026Sjulian	KASSERT((size == sizeof(struct thread)),
16599552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
16699026Sjulian
16799026Sjulian	td = (struct thread *)mem;
16899026Sjulian	pmap_dispose_thread(td);
16999026Sjulian}
17099026Sjulian
171105854Sjulian/*
172105854Sjulian * KSE is linked onto the idle queue.
173105854Sjulian */
174105854Sjulianvoid
175105854Sjuliankse_link(struct kse *ke, struct ksegrp *kg)
176105854Sjulian{
177105854Sjulian	struct proc *p = kg->kg_proc;
178105854Sjulian
179105854Sjulian	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
180105854Sjulian	kg->kg_kses++;
181105854Sjulian	ke->ke_state = KES_UNQUEUED;
182105854Sjulian	ke->ke_proc	= p;
183105854Sjulian	ke->ke_ksegrp	= kg;
184105854Sjulian	ke->ke_thread	= NULL;
185105854Sjulian	ke->ke_oncpu = NOCPU;
186105854Sjulian}
187105854Sjulian
188105854Sjulianvoid
189105854Sjuliankse_unlink(struct kse *ke)
190105854Sjulian{
191105854Sjulian	struct ksegrp *kg;
192105854Sjulian
193105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
194105854Sjulian	kg = ke->ke_ksegrp;
195105854Sjulian	if (ke->ke_state == KES_IDLE) {
196105854Sjulian		kg->kg_idle_kses--;
197105854Sjulian		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
198105854Sjulian	}
199105854Sjulian
200105854Sjulian	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
201105854Sjulian	if (--kg->kg_kses == 0) {
202105854Sjulian			ksegrp_unlink(kg);
203105854Sjulian	}
204105854Sjulian	/*
205105854Sjulian	 * Aggregate stats from the KSE
206105854Sjulian	 */
207105854Sjulian	kse_stash(ke);
208105854Sjulian}
209105854Sjulian
210105854Sjulianvoid
211105854Sjulianksegrp_link(struct ksegrp *kg, struct proc *p)
212105854Sjulian{
213105854Sjulian
214105854Sjulian	TAILQ_INIT(&kg->kg_threads);
215105854Sjulian	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
216105854Sjulian	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
217105854Sjulian	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
218105854Sjulian	TAILQ_INIT(&kg->kg_iq);		/* idle kses in ksegrp */
219105854Sjulian	TAILQ_INIT(&kg->kg_lq);		/* loan kses in ksegrp */
220105854Sjulian	kg->kg_proc	= p;
221105854Sjulian/* the following counters are in the -zero- section and may not need clearing */
222105854Sjulian	kg->kg_numthreads = 0;
223105854Sjulian	kg->kg_runnable = 0;
224105854Sjulian	kg->kg_kses = 0;
225105854Sjulian	kg->kg_idle_kses = 0;
226105854Sjulian	kg->kg_loan_kses = 0;
227105854Sjulian	kg->kg_runq_kses = 0; /* XXXKSE change name */
228105854Sjulian/* link it in now that it's consistent */
229105854Sjulian	p->p_numksegrps++;
230105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
231105854Sjulian}
232105854Sjulian
233105854Sjulianvoid
234105854Sjulianksegrp_unlink(struct ksegrp *kg)
235105854Sjulian{
236105854Sjulian	struct proc *p;
237105854Sjulian
238105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
239105854Sjulian	p = kg->kg_proc;
240105854Sjulian	KASSERT(((kg->kg_numthreads == 0) && (kg->kg_kses == 0)),
241105854Sjulian	    ("kseg_unlink: residual threads or KSEs"));
242105854Sjulian	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
243105854Sjulian	p->p_numksegrps--;
244105854Sjulian	/*
245105854Sjulian	 * Aggregate stats from the KSE
246105854Sjulian	 */
247105854Sjulian	ksegrp_stash(kg);
248105854Sjulian}
249105854Sjulian
25099026Sjulian/*
251105854Sjulian * for a newly created process,
252105854Sjulian * link up a the structure and its initial threads etc.
253105854Sjulian */
254105854Sjulianvoid
255105854Sjulianproc_linkup(struct proc *p, struct ksegrp *kg,
256105854Sjulian			struct kse *ke, struct thread *td)
257105854Sjulian{
258105854Sjulian
259105854Sjulian	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
260105854Sjulian	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
261105854Sjulian	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
262105854Sjulian	p->p_numksegrps = 0;
263105854Sjulian	p->p_numthreads = 0;
264105854Sjulian
265105854Sjulian	ksegrp_link(kg, p);
266105854Sjulian	kse_link(ke, kg);
267105854Sjulian	thread_link(td, kg);
268105854Sjulian}
269105854Sjulian
270105854Sjulianint
271105854Sjuliankse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
272105854Sjulian{
273106180Sdavidxu	struct proc *p;
274106180Sdavidxu	struct thread *td2;
275105854Sjulian
276106180Sdavidxu	p = td->td_proc;
277106180Sdavidxu	mtx_lock_spin(&sched_lock);
278106180Sdavidxu	FOREACH_THREAD_IN_PROC(p, td2) {
279106180Sdavidxu		if (td2->td_mailbox == uap->tmbx) {
280106180Sdavidxu			td2->td_flags |= TDF_INTERRUPT;
281106180Sdavidxu			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) {
282106180Sdavidxu				if (td2->td_flags & TDF_CVWAITQ)
283106180Sdavidxu					cv_abort(td2);
284106180Sdavidxu				else
285106180Sdavidxu					abortsleep(td2);
286106180Sdavidxu			}
287106180Sdavidxu			mtx_unlock_spin(&sched_lock);
288106180Sdavidxu			return 0;
289106180Sdavidxu		}
290106180Sdavidxu	}
291106180Sdavidxu	mtx_unlock_spin(&sched_lock);
292106180Sdavidxu	return(ESRCH);
293105854Sjulian}
294105854Sjulian
295105854Sjulianint
296105854Sjuliankse_exit(struct thread *td, struct kse_exit_args *uap)
297105854Sjulian{
298105854Sjulian	struct proc *p;
299105854Sjulian	struct ksegrp *kg;
300105854Sjulian
301105854Sjulian	p = td->td_proc;
302105854Sjulian	/* KSE-enabled processes only, please. */
303105854Sjulian	if (!(p->p_flag & P_KSES))
304105854Sjulian		return EINVAL;
305105854Sjulian	/* must be a bound thread */
306105854Sjulian	if (td->td_flags & TDF_UNBOUND)
307105854Sjulian		return EINVAL;
308105854Sjulian	kg = td->td_ksegrp;
309105854Sjulian	/* serialize killing kse */
310105854Sjulian	PROC_LOCK(p);
311105854Sjulian	mtx_lock_spin(&sched_lock);
312105854Sjulian	if ((kg->kg_kses == 1) && (kg->kg_numthreads > 1)) {
313105854Sjulian		mtx_unlock_spin(&sched_lock);
314105854Sjulian		PROC_UNLOCK(p);
315105854Sjulian		return (EDEADLK);
316105854Sjulian	}
317105854Sjulian	if ((p->p_numthreads == 1) && (p->p_numksegrps == 1)) {
318105854Sjulian		p->p_flag &= ~P_KSES;
319105854Sjulian		mtx_unlock_spin(&sched_lock);
320105854Sjulian		PROC_UNLOCK(p);
321105854Sjulian	} else {
322105854Sjulian		while (mtx_owned(&Giant))
323105854Sjulian			mtx_unlock(&Giant);
324105854Sjulian		td->td_kse->ke_flags |= KEF_EXIT;
325105854Sjulian		thread_exit();
326105854Sjulian		/* NOTREACHED */
327105854Sjulian	}
328105854Sjulian	return 0;
329105854Sjulian}
330105854Sjulian
331105854Sjulianint
332105854Sjuliankse_release(struct thread *td, struct kse_release_args *uap)
333105854Sjulian{
334105854Sjulian	struct proc *p;
335105854Sjulian
336105854Sjulian	p = td->td_proc;
337105854Sjulian	/* KSE-enabled processes only, please. */
338105854Sjulian	if (p->p_flag & P_KSES) {
339105854Sjulian		PROC_LOCK(p);
340105854Sjulian		mtx_lock_spin(&sched_lock);
341105854Sjulian		thread_exit();
342105854Sjulian		/* NOTREACHED */
343105854Sjulian	}
344105854Sjulian	return (EINVAL);
345105854Sjulian}
346105854Sjulian
347105854Sjulian/* struct kse_wakeup_args {
348105854Sjulian	struct kse_mailbox *mbx;
349105854Sjulian}; */
350105854Sjulianint
351105854Sjuliankse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
352105854Sjulian{
353105854Sjulian	struct proc *p;
354105854Sjulian	struct kse *ke, *ke2;
355105854Sjulian	struct ksegrp *kg;
356105854Sjulian
357105854Sjulian	p = td->td_proc;
358105854Sjulian	/* KSE-enabled processes only, please. */
359105854Sjulian	if (!(p->p_flag & P_KSES))
360105854Sjulian		return EINVAL;
361105854Sjulian	if (td->td_standin == NULL)
362105854Sjulian		td->td_standin = thread_alloc();
363105854Sjulian	ke = NULL;
364105854Sjulian	mtx_lock_spin(&sched_lock);
365105854Sjulian	if (uap->mbx) {
366105854Sjulian		FOREACH_KSEGRP_IN_PROC(p, kg) {
367105854Sjulian			FOREACH_KSE_IN_GROUP(kg, ke2) {
368105854Sjulian				if (ke2->ke_mailbox != uap->mbx)
369105854Sjulian					continue;
370105854Sjulian				if (ke2->ke_state == KES_IDLE) {
371105854Sjulian					ke = ke2;
372105854Sjulian					goto found;
373105854Sjulian				} else {
374105854Sjulian					mtx_unlock_spin(&sched_lock);
375105854Sjulian					td->td_retval[0] = 0;
376105854Sjulian					td->td_retval[1] = 0;
377105854Sjulian					return 0;
378105854Sjulian				}
379105854Sjulian			}
380105854Sjulian		}
381105854Sjulian	} else {
382105854Sjulian		kg = td->td_ksegrp;
383105854Sjulian		ke = TAILQ_FIRST(&kg->kg_iq);
384105854Sjulian	}
385105854Sjulian	if (ke == NULL) {
386105854Sjulian		mtx_unlock_spin(&sched_lock);
387105854Sjulian		return ESRCH;
388105854Sjulian	}
389105854Sjulianfound:
390105854Sjulian	thread_schedule_upcall(td, ke);
391105854Sjulian	mtx_unlock_spin(&sched_lock);
392105854Sjulian	td->td_retval[0] = 0;
393105854Sjulian	td->td_retval[1] = 0;
394105854Sjulian	return 0;
395105854Sjulian}
396105854Sjulian
397105854Sjulian/*
398105854Sjulian * No new KSEG: first call: use current KSE, don't schedule an upcall
399105854Sjulian * All other situations, do allocate a new KSE and schedule an upcall on it.
400105854Sjulian */
401105854Sjulian/* struct kse_create_args {
402105854Sjulian	struct kse_mailbox *mbx;
403105854Sjulian	int newgroup;
404105854Sjulian}; */
405105854Sjulianint
406105854Sjuliankse_create(struct thread *td, struct kse_create_args *uap)
407105854Sjulian{
408105854Sjulian	struct kse *newke;
409105854Sjulian	struct kse *ke;
410105854Sjulian	struct ksegrp *newkg;
411105854Sjulian	struct ksegrp *kg;
412105854Sjulian	struct proc *p;
413105854Sjulian	struct kse_mailbox mbx;
414105854Sjulian	int err;
415105854Sjulian
416105854Sjulian	p = td->td_proc;
417105854Sjulian	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
418105854Sjulian		return (err);
419105854Sjulian
420105854Sjulian	p->p_flag |= P_KSES; /* easier to just set it than to test and set */
421105854Sjulian	kg = td->td_ksegrp;
422105854Sjulian	if (uap->newgroup) {
423105854Sjulian		/*
424105854Sjulian		 * If we want a new KSEGRP it doesn't matter whether
425105854Sjulian		 * we have already fired up KSE mode before or not.
426105854Sjulian		 * We put the process in KSE mode and create a new KSEGRP
427105854Sjulian		 * and KSE. If our KSE has not got a mailbox yet then
428105854Sjulian		 * that doesn't matter, just leave it that way. It will
429105854Sjulian		 * ensure that this thread stay BOUND. It's possible
430105854Sjulian		 * that the call came form a threaded library and the main
431105854Sjulian		 * program knows nothing of threads.
432105854Sjulian		 */
433105854Sjulian		newkg = ksegrp_alloc();
434105854Sjulian		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
435105854Sjulian		      kg_startzero, kg_endzero));
436105854Sjulian		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
437105854Sjulian		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
438105854Sjulian		newke = kse_alloc();
439105854Sjulian	} else {
440105854Sjulian		/*
441105854Sjulian		 * Otherwise, if we have already set this KSE
442105854Sjulian		 * to have a mailbox, we want to make another KSE here,
443105854Sjulian		 * but only if there are not already the limit, which
444105854Sjulian		 * is 1 per CPU max.
445105854Sjulian		 *
446105854Sjulian		 * If the current KSE doesn't have a mailbox we just use it
447105854Sjulian		 * and give it one.
448105854Sjulian		 *
449105854Sjulian		 * Because we don't like to access
450105854Sjulian		 * the KSE outside of schedlock if we are UNBOUND,
451105854Sjulian		 * (because it can change if we are preempted by an interrupt)
452105854Sjulian		 * we can deduce it as having a mailbox if we are UNBOUND,
453105854Sjulian		 * and only need to actually look at it if we are BOUND,
454105854Sjulian		 * which is safe.
455105854Sjulian		 */
456105854Sjulian		if ((td->td_flags & TDF_UNBOUND) || td->td_kse->ke_mailbox) {
457105854Sjulian#if 0  /* while debugging */
458105854Sjulian#ifdef SMP
459105854Sjulian			if (kg->kg_kses > mp_ncpus)
460105854Sjulian#endif
461105854Sjulian				return (EPROCLIM);
462105854Sjulian#endif
463105854Sjulian			newke = kse_alloc();
464105854Sjulian		} else {
465105854Sjulian			newke = NULL;
466105854Sjulian		}
467105854Sjulian		newkg = NULL;
468105854Sjulian	}
469105854Sjulian	if (newke) {
470105854Sjulian		bzero(&newke->ke_startzero, RANGEOF(struct kse,
471105854Sjulian		      ke_startzero, ke_endzero));
472105854Sjulian#if 0
473105854Sjulian		bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
474105854Sjulian		      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
475105854Sjulian#endif
476105854Sjulian		/* For the first call this may not have been set */
477105854Sjulian		if (td->td_standin == NULL) {
478105854Sjulian			td->td_standin = thread_alloc();
479105854Sjulian		}
480105854Sjulian		mtx_lock_spin(&sched_lock);
481105854Sjulian		if (newkg)
482105854Sjulian			ksegrp_link(newkg, p);
483105854Sjulian		else
484105854Sjulian			newkg = kg;
485105854Sjulian		kse_link(newke, newkg);
486106075Sdavidxu		if (p->p_sflag & PS_NEEDSIGCHK)
487106075Sdavidxu			newke->ke_flags |= KEF_ASTPENDING;
488105854Sjulian		newke->ke_mailbox = uap->mbx;
489105854Sjulian		newke->ke_upcall = mbx.km_func;
490105854Sjulian		bcopy(&mbx.km_stack, &newke->ke_stack, sizeof(stack_t));
491105854Sjulian		thread_schedule_upcall(td, newke);
492105854Sjulian		mtx_unlock_spin(&sched_lock);
493105854Sjulian	} else {
494105854Sjulian		/*
495105854Sjulian		 * If we didn't allocate a new KSE then the we are using
496105854Sjulian		 * the exisiting (BOUND) kse.
497105854Sjulian		 */
498105854Sjulian		ke = td->td_kse;
499105854Sjulian		ke->ke_mailbox = uap->mbx;
500105854Sjulian		ke->ke_upcall = mbx.km_func;
501105854Sjulian		bcopy(&mbx.km_stack, &ke->ke_stack, sizeof(stack_t));
502105854Sjulian	}
503105854Sjulian	/*
504105854Sjulian	 * Fill out the KSE-mode specific fields of the new kse.
505105854Sjulian	 */
506105854Sjulian
507105854Sjulian	td->td_retval[0] = 0;
508105854Sjulian	td->td_retval[1] = 0;
509105854Sjulian	return (0);
510105854Sjulian}
511105854Sjulian
512105854Sjulian/*
513103410Smini * Fill a ucontext_t with a thread's context information.
514103410Smini *
515103410Smini * This is an analogue to getcontext(3).
516103410Smini */
517103410Sminivoid
518103410Sminithread_getcontext(struct thread *td, ucontext_t *uc)
519103410Smini{
520103410Smini
521103464Speter/*
522103464Speter * XXX this is declared in a MD include file, i386/include/ucontext.h but
523103464Speter * is used in MI code.
524103464Speter */
525103463Speter#ifdef __i386__
526103410Smini	get_mcontext(td, &uc->uc_mcontext);
527103463Speter#endif
528103410Smini	uc->uc_sigmask = td->td_proc->p_sigmask;
529103410Smini}
530103410Smini
531103410Smini/*
532103410Smini * Set a thread's context from a ucontext_t.
533103410Smini *
534103410Smini * This is an analogue to setcontext(3).
535103410Smini */
536103410Sminiint
537103410Sminithread_setcontext(struct thread *td, ucontext_t *uc)
538103410Smini{
539103410Smini	int ret;
540103410Smini
541103464Speter/*
542103464Speter * XXX this is declared in a MD include file, i386/include/ucontext.h but
543103464Speter * is used in MI code.
544103464Speter */
545103463Speter#ifdef __i386__
546103410Smini	ret = set_mcontext(td, &uc->uc_mcontext);
547103463Speter#else
548103463Speter	ret = ENOSYS;
549103463Speter#endif
550103410Smini	if (ret == 0) {
551103410Smini		SIG_CANTMASK(uc->uc_sigmask);
552103410Smini		PROC_LOCK(td->td_proc);
553103410Smini		td->td_proc->p_sigmask = uc->uc_sigmask;
554103410Smini		PROC_UNLOCK(td->td_proc);
555103410Smini	}
556103410Smini	return (ret);
557103410Smini}
558103410Smini
559103410Smini/*
56099026Sjulian * Initialize global thread allocation resources.
56199026Sjulian */
56299026Sjulianvoid
56399026Sjulianthreadinit(void)
56499026Sjulian{
56599026Sjulian
566104437Speter#ifndef __ia64__
56799026Sjulian	thread_zone = uma_zcreate("THREAD", sizeof (struct thread),
56899026Sjulian	    thread_ctor, thread_dtor, thread_init, thread_fini,
56999026Sjulian	    UMA_ALIGN_CACHE, 0);
570104437Speter#else
571104437Speter	/*
572104437Speter	 * XXX the ia64 kstack allocator is really lame and is at the mercy
573104437Speter	 * of contigmallloc().  This hackery is to pre-construct a whole
574104437Speter	 * pile of thread structures with associated kernel stacks early
575104437Speter	 * in the system startup while contigmalloc() still works. Once we
576104437Speter	 * have them, keep them.  Sigh.
577104437Speter	 */
578104437Speter	thread_zone = uma_zcreate("THREAD", sizeof (struct thread),
579104437Speter	    thread_ctor, thread_dtor, thread_init, thread_fini,
580104437Speter	    UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
581104437Speter	uma_prealloc(thread_zone, 512);		/* XXX arbitary */
582104437Speter#endif
583103367Sjulian	ksegrp_zone = uma_zcreate("KSEGRP", sizeof (struct ksegrp),
584103367Sjulian	    NULL, NULL, NULL, NULL,
585103367Sjulian	    UMA_ALIGN_CACHE, 0);
586103367Sjulian	kse_zone = uma_zcreate("KSE", sizeof (struct kse),
587103367Sjulian	    NULL, NULL, NULL, NULL,
588103367Sjulian	    UMA_ALIGN_CACHE, 0);
58999026Sjulian}
59099026Sjulian
59199026Sjulian/*
592103002Sjulian * Stash an embarasingly extra thread into the zombie thread queue.
59399026Sjulian */
59499026Sjulianvoid
59599026Sjulianthread_stash(struct thread *td)
59699026Sjulian{
59799026Sjulian	mtx_lock_spin(&zombie_thread_lock);
59899026Sjulian	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
59999026Sjulian	mtx_unlock_spin(&zombie_thread_lock);
60099026Sjulian}
60199026Sjulian
602103410Smini/*
603105854Sjulian * Stash an embarasingly extra kse into the zombie kse queue.
604105854Sjulian */
605105854Sjulianvoid
606105854Sjuliankse_stash(struct kse *ke)
607105854Sjulian{
608105854Sjulian	mtx_lock_spin(&zombie_thread_lock);
609105854Sjulian	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
610105854Sjulian	mtx_unlock_spin(&zombie_thread_lock);
611105854Sjulian}
612105854Sjulian
613105854Sjulian/*
614105854Sjulian * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
615105854Sjulian */
616105854Sjulianvoid
617105854Sjulianksegrp_stash(struct ksegrp *kg)
618105854Sjulian{
619105854Sjulian	mtx_lock_spin(&zombie_thread_lock);
620105854Sjulian	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
621105854Sjulian	mtx_unlock_spin(&zombie_thread_lock);
622105854Sjulian}
623105854Sjulian
624105854Sjulian/*
625103410Smini * Reap zombie threads.
62699026Sjulian */
62799026Sjulianvoid
62899026Sjulianthread_reap(void)
62999026Sjulian{
630105854Sjulian	struct thread *td_first, *td_next;
631105854Sjulian	struct kse *ke_first, *ke_next;
632105854Sjulian	struct ksegrp *kg_first, * kg_next;
63399026Sjulian
63499026Sjulian	/*
63599026Sjulian	 * don't even bother to lock if none at this instant
63699026Sjulian	 * We really don't care about the next instant..
63799026Sjulian	 */
638105854Sjulian	if ((!TAILQ_EMPTY(&zombie_threads))
639105854Sjulian	    || (!TAILQ_EMPTY(&zombie_kses))
640105854Sjulian	    || (!TAILQ_EMPTY(&zombie_ksegrps))) {
64199026Sjulian		mtx_lock_spin(&zombie_thread_lock);
642105854Sjulian		td_first = TAILQ_FIRST(&zombie_threads);
643105854Sjulian		ke_first = TAILQ_FIRST(&zombie_kses);
644105854Sjulian		kg_first = TAILQ_FIRST(&zombie_ksegrps);
645105854Sjulian		if (td_first)
646105854Sjulian			TAILQ_INIT(&zombie_threads);
647105854Sjulian		if (ke_first)
648105854Sjulian			TAILQ_INIT(&zombie_kses);
649105854Sjulian		if (kg_first)
650105854Sjulian			TAILQ_INIT(&zombie_ksegrps);
651105854Sjulian		mtx_unlock_spin(&zombie_thread_lock);
652105854Sjulian		while (td_first) {
653105854Sjulian			td_next = TAILQ_NEXT(td_first, td_runq);
654105854Sjulian			thread_free(td_first);
655105854Sjulian			td_first = td_next;
65699026Sjulian		}
657105854Sjulian		while (ke_first) {
658105854Sjulian			ke_next = TAILQ_NEXT(ke_first, ke_procq);
659105854Sjulian			kse_free(ke_first);
660105854Sjulian			ke_first = ke_next;
661105854Sjulian		}
662105854Sjulian		while (kg_first) {
663105854Sjulian			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
664105854Sjulian			ksegrp_free(kg_first);
665105854Sjulian			kg_first = kg_next;
666105854Sjulian		}
66799026Sjulian	}
66899026Sjulian}
66999026Sjulian
67099026Sjulian/*
671103367Sjulian * Allocate a ksegrp.
672103367Sjulian */
673103367Sjulianstruct ksegrp *
674103367Sjulianksegrp_alloc(void)
675103367Sjulian{
676103367Sjulian	return (uma_zalloc(ksegrp_zone, M_WAITOK));
677103367Sjulian}
678103367Sjulian
679103367Sjulian/*
680103367Sjulian * Allocate a kse.
681103367Sjulian */
682103367Sjulianstruct kse *
683103367Sjuliankse_alloc(void)
684103367Sjulian{
685103367Sjulian	return (uma_zalloc(kse_zone, M_WAITOK));
686103367Sjulian}
687103367Sjulian
688103367Sjulian/*
68999026Sjulian * Allocate a thread.
69099026Sjulian */
69199026Sjulianstruct thread *
69299026Sjulianthread_alloc(void)
69399026Sjulian{
69499026Sjulian	thread_reap(); /* check if any zombies to get */
69599026Sjulian	return (uma_zalloc(thread_zone, M_WAITOK));
69699026Sjulian}
69799026Sjulian
69899026Sjulian/*
699103367Sjulian * Deallocate a ksegrp.
700103367Sjulian */
701103367Sjulianvoid
702103367Sjulianksegrp_free(struct ksegrp *td)
703103367Sjulian{
704103367Sjulian	uma_zfree(ksegrp_zone, td);
705103367Sjulian}
706103367Sjulian
707103367Sjulian/*
708103367Sjulian * Deallocate a kse.
709103367Sjulian */
710103367Sjulianvoid
711103367Sjuliankse_free(struct kse *td)
712103367Sjulian{
713103367Sjulian	uma_zfree(kse_zone, td);
714103367Sjulian}
715103367Sjulian
716103367Sjulian/*
71799026Sjulian * Deallocate a thread.
71899026Sjulian */
71999026Sjulianvoid
72099026Sjulianthread_free(struct thread *td)
72199026Sjulian{
72299026Sjulian	uma_zfree(thread_zone, td);
72399026Sjulian}
72499026Sjulian
72599026Sjulian/*
72699026Sjulian * Store the thread context in the UTS's mailbox.
727104031Sjulian * then add the mailbox at the head of a list we are building in user space.
728104031Sjulian * The list is anchored in the ksegrp structure.
72999026Sjulian */
73099026Sjulianint
73199026Sjulianthread_export_context(struct thread *td)
73299026Sjulian{
733104503Sjmallett	struct proc *p;
734104031Sjulian	struct ksegrp *kg;
735104031Sjulian	uintptr_t mbx;
736104031Sjulian	void *addr;
73799026Sjulian	int error;
738103410Smini	ucontext_t uc;
73999026Sjulian
740104503Sjmallett	p = td->td_proc;
741104503Sjmallett	kg = td->td_ksegrp;
742104503Sjmallett
743104031Sjulian	/* Export the user/machine context. */
744104031Sjulian#if 0
745104031Sjulian	addr = (caddr_t)td->td_mailbox +
746104031Sjulian	    offsetof(struct kse_thr_mailbox, tm_context);
747104031Sjulian#else /* if user pointer arithmetic is valid in the kernel */
748104031Sjulian		addr = (void *)(&td->td_mailbox->tm_context);
749100271Speter#endif
750104031Sjulian	error = copyin(addr, &uc, sizeof(ucontext_t));
751103410Smini	if (error == 0) {
752103410Smini		thread_getcontext(td, &uc);
753104031Sjulian		error = copyout(&uc, addr, sizeof(ucontext_t));
754104031Sjulian
755103410Smini	}
756104031Sjulian	if (error) {
757104126Sjulian		PROC_LOCK(p);
758104126Sjulian		psignal(p, SIGSEGV);
759104126Sjulian		PROC_UNLOCK(p);
760104031Sjulian		return (error);
761104031Sjulian	}
762104031Sjulian	/* get address in latest mbox of list pointer */
763104031Sjulian#if 0
764104031Sjulian	addr = (caddr_t)td->td_mailbox
765104031Sjulian	    + offsetof(struct kse_thr_mailbox , tm_next);
766104031Sjulian#else /* if user pointer arithmetic is valid in the kernel */
767104031Sjulian	addr = (void *)(&td->td_mailbox->tm_next);
768104031Sjulian#endif
769104031Sjulian	/*
770104031Sjulian	 * Put the saved address of the previous first
771104031Sjulian	 * entry into this one
772104031Sjulian	 */
773104031Sjulian	for (;;) {
774104031Sjulian		mbx = (uintptr_t)kg->kg_completed;
775104031Sjulian		if (suword(addr, mbx)) {
776104126Sjulian			PROC_LOCK(p);
777104126Sjulian			psignal(p, SIGSEGV);
778104126Sjulian			PROC_UNLOCK(p);
779104031Sjulian			return (EFAULT);
780104031Sjulian		}
781104126Sjulian		PROC_LOCK(p);
782104031Sjulian		if (mbx == (uintptr_t)kg->kg_completed) {
783104031Sjulian			kg->kg_completed = td->td_mailbox;
784104126Sjulian			PROC_UNLOCK(p);
785104031Sjulian			break;
786104031Sjulian		}
787104126Sjulian		PROC_UNLOCK(p);
788104031Sjulian	}
789104031Sjulian	return (0);
790104031Sjulian}
79199026Sjulian
792104031Sjulian/*
793104031Sjulian * Take the list of completed mailboxes for this KSEGRP and put them on this
794104031Sjulian * KSE's mailbox as it's the next one going up.
795104031Sjulian */
796104031Sjulianstatic int
797104031Sjulianthread_link_mboxes(struct ksegrp *kg, struct kse *ke)
798104031Sjulian{
799104126Sjulian	struct proc *p = kg->kg_proc;
800104031Sjulian	void *addr;
801104031Sjulian	uintptr_t mbx;
802104031Sjulian
803104031Sjulian#if 0
804104031Sjulian	addr = (caddr_t)ke->ke_mailbox
805104031Sjulian	    + offsetof(struct kse_mailbox, km_completed);
806104031Sjulian#else /* if user pointer arithmetic is valid in the kernel */
807104031Sjulian		addr = (void *)(&ke->ke_mailbox->km_completed);
808104031Sjulian#endif
809104031Sjulian	for (;;) {
810104031Sjulian		mbx = (uintptr_t)kg->kg_completed;
811104031Sjulian		if (suword(addr, mbx)) {
812104126Sjulian			PROC_LOCK(p);
813104126Sjulian			psignal(p, SIGSEGV);
814104126Sjulian			PROC_UNLOCK(p);
815104031Sjulian			return (EFAULT);
816104031Sjulian		}
817104031Sjulian		/* XXXKSE could use atomic CMPXCH here */
818104126Sjulian		PROC_LOCK(p);
819104031Sjulian		if (mbx == (uintptr_t)kg->kg_completed) {
820104031Sjulian			kg->kg_completed = NULL;
821104126Sjulian			PROC_UNLOCK(p);
822104031Sjulian			break;
823104031Sjulian		}
824104126Sjulian		PROC_UNLOCK(p);
82599026Sjulian	}
826104031Sjulian	return (0);
82799026Sjulian}
82899026Sjulian
82999026Sjulian/*
83099026Sjulian * Discard the current thread and exit from its context.
83199026Sjulian *
83299026Sjulian * Because we can't free a thread while we're operating under its context,
83399026Sjulian * push the current thread into our KSE's ke_tdspare slot, freeing the
83499026Sjulian * thread that might be there currently. Because we know that only this
83599026Sjulian * processor will run our KSE, we needn't worry about someone else grabbing
83699026Sjulian * our context before we do a cpu_throw.
83799026Sjulian */
83899026Sjulianvoid
83999026Sjulianthread_exit(void)
84099026Sjulian{
84199026Sjulian	struct thread *td;
84299026Sjulian	struct kse *ke;
84399026Sjulian	struct proc *p;
84499026Sjulian	struct ksegrp	*kg;
84599026Sjulian
84699026Sjulian	td = curthread;
84799026Sjulian	kg = td->td_ksegrp;
84899026Sjulian	p = td->td_proc;
84999026Sjulian	ke = td->td_kse;
85099026Sjulian
85199026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
852102581Sjulian	KASSERT(p != NULL, ("thread exiting without a process"));
853102581Sjulian	KASSERT(ke != NULL, ("thread exiting without a kse"));
854102581Sjulian	KASSERT(kg != NULL, ("thread exiting without a kse group"));
85599026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
85699026Sjulian	CTR1(KTR_PROC, "thread_exit: thread %p", td);
85799026Sjulian	KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
85899026Sjulian
85999026Sjulian	if (ke->ke_tdspare != NULL) {
860103216Sjulian		thread_stash(ke->ke_tdspare);
86199026Sjulian		ke->ke_tdspare = NULL;
86299026Sjulian	}
863104695Sjulian	if (td->td_standin != NULL) {
864104695Sjulian		thread_stash(td->td_standin);
865104695Sjulian		td->td_standin = NULL;
866104695Sjulian	}
867104695Sjulian
86899026Sjulian	cpu_thread_exit(td);	/* XXXSMP */
86999026Sjulian
870102581Sjulian	/*
871103002Sjulian	 * The last thread is left attached to the process
872103002Sjulian	 * So that the whole bundle gets recycled. Skip
873103002Sjulian	 * all this stuff.
874102581Sjulian	 */
875103002Sjulian	if (p->p_numthreads > 1) {
876105854Sjulian		/*
877105854Sjulian		 * Unlink this thread from its proc and the kseg.
878105854Sjulian		 * In keeping with the other structs we probably should
879105854Sjulian		 * have a thread_unlink() that does some of this but it
880105854Sjulian		 * would only be called from here (I think) so it would
881105854Sjulian		 * be a waste. (might be useful for proc_fini() as well.)
882105854Sjulian 		 */
883103002Sjulian		TAILQ_REMOVE(&p->p_threads, td, td_plist);
884103002Sjulian		p->p_numthreads--;
885103002Sjulian		TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
886103002Sjulian		kg->kg_numthreads--;
887103002Sjulian		/*
888103002Sjulian		 * The test below is NOT true if we are the
889103002Sjulian		 * sole exiting thread. P_STOPPED_SNGL is unset
890103002Sjulian		 * in exit1() after it is the only survivor.
891103002Sjulian		 */
892103002Sjulian		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
893103002Sjulian			if (p->p_numthreads == p->p_suspcount) {
894103216Sjulian				thread_unsuspend_one(p->p_singlethread);
895103002Sjulian			}
89699026Sjulian		}
897104695Sjulian
898104695Sjulian		/* Reassign this thread's KSE. */
899104695Sjulian		ke->ke_thread = NULL;
900104695Sjulian		td->td_kse = NULL;
901104695Sjulian		ke->ke_state = KES_UNQUEUED;
902105854Sjulian		KASSERT((ke->ke_bound != td),
903105854Sjulian		    ("thread_exit: entered with ke_bound set"));
904104695Sjulian
905104695Sjulian		/*
906105854Sjulian		 * The reason for all this hoopla is
907105854Sjulian		 * an attempt to stop our thread stack from being freed
908105854Sjulian		 * until AFTER we have stopped running on it.
909105854Sjulian		 * Since we are under schedlock, almost any method where
910105854Sjulian		 * it is eventually freed by someone else is probably ok.
911105854Sjulian		 * (Especially if they do it under schedlock). We could
912105854Sjulian		 * almost free it here if we could be certain that
913105854Sjulian		 * the uma code wouldn't pull it apart immediatly,
914105854Sjulian		 * but unfortunatly we can not guarantee that.
915104695Sjulian		 *
916105854Sjulian		 * For threads that are exiting and NOT killing their
917105854Sjulian		 * KSEs we can just stash it in the KSE, however
918105854Sjulian		 * in the case where the KSE is also being deallocated,
919105854Sjulian		 * we need to store it somewhere else. It turns out that
920105854Sjulian		 * we will never free the last KSE, so there is always one
921105854Sjulian		 * other KSE available. We might as well just choose one
922105854Sjulian		 * and stash it there. Being under schedlock should make that
923105854Sjulian		 * safe.
924105854Sjulian		 *
925105854Sjulian		 * In borrower threads, we can stash it in the lender
926105854Sjulian		 * Where it won't be needed until this thread is long gone.
927105854Sjulian		 * Borrower threads can't kill their KSE anyhow, so even
928105854Sjulian		 * the KSE would be a safe place for them. It is not
929105854Sjulian		 * necessary to have a KSE (or KSEGRP) at all beyond this
930105854Sjulian		 * point, while we are under the protection of schedlock.
931105854Sjulian		 *
932105854Sjulian		 * Either give the KSE to another thread to use (or make
933105854Sjulian		 * it idle), or free it entirely, possibly along with its
934105854Sjulian		 * ksegrp if it's the last one.
935104695Sjulian		 */
936105854Sjulian		if (ke->ke_flags & KEF_EXIT) {
937105854Sjulian			kse_unlink(ke);
938105854Sjulian			/*
939105854Sjulian			 * Designate another KSE to hold our thread.
940105854Sjulian			 * Safe as long as we abide by whatever lock
941105854Sjulian			 * we control it with.. The other KSE will not
942105854Sjulian			 * be able to run it until we release the schelock,
943105854Sjulian			 * but we need to be careful about it deciding to
944105854Sjulian			 * write to the stack before then. Luckily
945105854Sjulian			 * I believe that while another thread's
946105854Sjulian			 * standin thread can be used in this way, the
947105854Sjulian			 * spare thread for the KSE cannot be used without
948105854Sjulian			 * holding schedlock at least once.
949105854Sjulian			 */
950105854Sjulian			ke =  FIRST_KSE_IN_PROC(p);
951105854Sjulian		} else {
952105854Sjulian			kse_reassign(ke);
953105854Sjulian		}
954104695Sjulian		if (ke->ke_bound) {
955105854Sjulian			/*
956105854Sjulian			 * WE are a borrower..
957105854Sjulian			 * stash our thread with the owner.
958105854Sjulian			 */
959104695Sjulian			if (ke->ke_bound->td_standin) {
960104695Sjulian				thread_stash(ke->ke_bound->td_standin);
961104695Sjulian			}
962104695Sjulian			ke->ke_bound->td_standin = td;
963104695Sjulian		} else {
964105854Sjulian			if (ke->ke_tdspare != NULL) {
965105854Sjulian				thread_stash(ke->ke_tdspare);
966105854Sjulian				ke->ke_tdspare = NULL;
967105854Sjulian			}
968104695Sjulian			ke->ke_tdspare = td;
969104695Sjulian		}
970105854Sjulian		PROC_UNLOCK(p);
971105854Sjulian		td->td_state	= TDS_INACTIVE;
972105854Sjulian		td->td_proc	= NULL;
973105854Sjulian		td->td_ksegrp	= NULL;
974105854Sjulian		td->td_last_kse	= NULL;
975103002Sjulian	} else {
976103002Sjulian		PROC_UNLOCK(p);
97799026Sjulian	}
978103002Sjulian
97999026Sjulian	cpu_throw();
98099026Sjulian	/* NOTREACHED */
98199026Sjulian}
98299026Sjulian
98399026Sjulian/*
98499026Sjulian * Link a thread to a process.
985103002Sjulian * set up anything that needs to be initialized for it to
986103002Sjulian * be used by the process.
98799026Sjulian *
98899026Sjulian * Note that we do not link to the proc's ucred here.
98999026Sjulian * The thread is linked as if running but no KSE assigned.
99099026Sjulian */
99199026Sjulianvoid
99299026Sjulianthread_link(struct thread *td, struct ksegrp *kg)
99399026Sjulian{
99499026Sjulian	struct proc *p;
99599026Sjulian
99699026Sjulian	p = kg->kg_proc;
997103216Sjulian	td->td_state = TDS_INACTIVE;
99899026Sjulian	td->td_proc	= p;
99999026Sjulian	td->td_ksegrp	= kg;
100099026Sjulian	td->td_last_kse	= NULL;
100199026Sjulian
1002103002Sjulian	LIST_INIT(&td->td_contested);
1003103002Sjulian	callout_init(&td->td_slpcallout, 1);
100499026Sjulian	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
100599026Sjulian	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
100699026Sjulian	p->p_numthreads++;
100799026Sjulian	kg->kg_numthreads++;
1008103367Sjulian	if (oiks_debug && p->p_numthreads > max_threads_per_proc) {
100999026Sjulian		printf("OIKS %d\n", p->p_numthreads);
101099026Sjulian		if (oiks_debug > 1)
101199026Sjulian			Debugger("OIKS");
101299026Sjulian	}
101399026Sjulian	td->td_kse	= NULL;
101499026Sjulian}
101599026Sjulian
1016105854Sjulianvoid
1017105854Sjuliankse_purge(struct proc *p, struct thread *td)
1018105854Sjulian{
1019105854Sjulian	struct kse *ke;
1020105854Sjulian	struct ksegrp *kg;
1021105854Sjulian
1022105854Sjulian 	KASSERT(p->p_numthreads == 1, ("bad thread number"));
1023105854Sjulian	mtx_lock_spin(&sched_lock);
1024105854Sjulian	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
1025105854Sjulian		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
1026105854Sjulian			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
1027105854Sjulian			kg->kg_idle_kses--;
1028105854Sjulian			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
1029105854Sjulian			kg->kg_kses--;
1030105854Sjulian			if (ke->ke_tdspare)
1031105854Sjulian				thread_stash(ke->ke_tdspare);
1032105854Sjulian   			kse_stash(ke);
1033105854Sjulian		}
1034105854Sjulian		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
1035105854Sjulian		p->p_numksegrps--;
1036105854Sjulian		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
1037105854Sjulian		    ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
1038105854Sjulian			("wrong kg_kses"));
1039105854Sjulian		if (kg != td->td_ksegrp) {
1040105854Sjulian			ksegrp_stash(kg);
1041105854Sjulian		}
1042105854Sjulian	}
1043105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
1044105854Sjulian	p->p_numksegrps++;
1045105854Sjulian	mtx_unlock_spin(&sched_lock);
1046105854Sjulian}
1047105854Sjulian
1048105854Sjulian
104999026Sjulian/*
1050103410Smini * Create a thread and schedule it for upcall on the KSE given.
105199026Sjulian */
105299026Sjulianstruct thread *
105399026Sjulianthread_schedule_upcall(struct thread *td, struct kse *ke)
105499026Sjulian{
105599026Sjulian	struct thread *td2;
1056105930Sdavidxu	struct ksegrp *kg;
1057104695Sjulian	int newkse;
105899026Sjulian
105999026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
1060104695Sjulian	newkse = (ke != td->td_kse);
1061104695Sjulian
1062104695Sjulian	/*
1063104695Sjulian	 * If the kse is already owned by another thread then we can't
1064104695Sjulian	 * schedule an upcall because the other thread must be BOUND
1065104695Sjulian	 * which means it is not in a position to take an upcall.
1066104695Sjulian	 * We must be borrowing the KSE to allow us to complete some in-kernel
1067104695Sjulian	 * work. When we complete, the Bound thread will have teh chance to
1068104695Sjulian	 * complete. This thread will sleep as planned. Hopefully there will
1069104695Sjulian	 * eventually be un unbound thread that can be converted to an
1070104695Sjulian	 * upcall to report the completion of this thread.
1071104695Sjulian	 */
1072104695Sjulian	if (ke->ke_bound && ((ke->ke_bound->td_flags & TDF_UNBOUND) == 0)) {
1073104695Sjulian		return (NULL);
1074104695Sjulian	}
1075104695Sjulian	KASSERT((ke->ke_bound == NULL), ("kse already bound"));
1076104695Sjulian
1077105930Sdavidxu	if (ke->ke_state == KES_IDLE) {
1078105930Sdavidxu		kg = ke->ke_ksegrp;
1079105930Sdavidxu		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
1080105930Sdavidxu		kg->kg_idle_kses--;
1081105930Sdavidxu		ke->ke_state = KES_UNQUEUED;
1082105930Sdavidxu	}
1083104695Sjulian	if ((td2 = td->td_standin) != NULL) {
1084104695Sjulian		td->td_standin = NULL;
108599026Sjulian	} else {
1086104695Sjulian		if (newkse)
1087104695Sjulian			panic("no reserve thread when called with a new kse");
1088104695Sjulian		/*
1089104695Sjulian		 * If called from (e.g.) sleep and we do not have
1090104695Sjulian		 * a reserve thread, then we've used it, so do not
1091104695Sjulian		 * create an upcall.
1092104695Sjulian		 */
1093104695Sjulian		return(NULL);
109499026Sjulian	}
109599026Sjulian	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
1096104695Sjulian	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
1097103072Sjulian	bzero(&td2->td_startzero,
1098103002Sjulian	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
1099103002Sjulian	bcopy(&td->td_startcopy, &td2->td_startcopy,
1100103002Sjulian	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
110199026Sjulian	thread_link(td2, ke->ke_ksegrp);
1102103410Smini	cpu_set_upcall(td2, td->td_pcb);
1103104695Sjulian
1104104695Sjulian	/*
1105104695Sjulian	 * XXXKSE do we really need this? (default values for the
1106104695Sjulian	 * frame).
1107104695Sjulian	 */
1108103410Smini	bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe));
1109104695Sjulian
1110103410Smini	/*
1111104695Sjulian	 * Bind the new thread to the KSE,
1112104695Sjulian	 * and if it's our KSE, lend it back to ourself
1113104695Sjulian	 * so we can continue running.
1114103410Smini	 */
111599026Sjulian	td2->td_ucred = crhold(td->td_ucred);
1116104695Sjulian	td2->td_flags = TDF_UPCALLING; /* note: BOUND */
1117104695Sjulian	td2->td_kse = ke;
1118104695Sjulian	td2->td_state = TDS_CAN_RUN;
1119104695Sjulian	td2->td_inhibitors = 0;
1120104695Sjulian	/*
1121104695Sjulian	 * If called from msleep(), we are working on the current
1122104695Sjulian	 * KSE so fake that we borrowed it. If called from
1123104695Sjulian	 * kse_create(), don't, as we have a new kse too.
1124104695Sjulian	 */
1125104695Sjulian	if (!newkse) {
1126104695Sjulian		/*
1127104695Sjulian		 * This thread will be scheduled when the current thread
1128104695Sjulian		 * blocks, exits or tries to enter userspace, (which ever
1129104695Sjulian		 * happens first). When that happens the KSe will "revert"
1130104695Sjulian		 * to this thread in a BOUND manner. Since we are called
1131104695Sjulian		 * from msleep() this is going to be "very soon" in nearly
1132104695Sjulian		 * all cases.
1133104695Sjulian		 */
1134104695Sjulian		ke->ke_bound = td2;
1135104695Sjulian		TD_SET_LOAN(td2);
1136104695Sjulian	} else {
1137104695Sjulian		ke->ke_bound = NULL;
1138104695Sjulian		ke->ke_thread = td2;
1139105930Sdavidxu		ke->ke_state = KES_THREAD;
1140104695Sjulian		setrunqueue(td2);
1141104695Sjulian	}
1142104695Sjulian	return (td2);	/* bogus.. should be a void function */
114399026Sjulian}
114499026Sjulian
114599026Sjulian/*
1146103410Smini * Schedule an upcall to notify a KSE process recieved signals.
114799026Sjulian *
1148103410Smini * XXX - Modifying a sigset_t like this is totally bogus.
1149103410Smini */
1150103410Sministruct thread *
1151103410Sminisignal_upcall(struct proc *p, int sig)
1152103410Smini{
1153103410Smini	struct thread *td, *td2;
1154103410Smini	struct kse *ke;
1155103410Smini	sigset_t ss;
1156103410Smini	int error;
1157103410Smini
1158103410Smini	PROC_LOCK_ASSERT(p, MA_OWNED);
1159104695Sjulianreturn (NULL);
1160103410Smini
1161103410Smini	td = FIRST_THREAD_IN_PROC(p);
1162103410Smini	ke = td->td_kse;
1163103410Smini	PROC_UNLOCK(p);
1164103410Smini	error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t));
1165103410Smini	PROC_LOCK(p);
1166103410Smini	if (error)
1167103410Smini		return (NULL);
1168103410Smini	SIGADDSET(ss, sig);
1169103410Smini	PROC_UNLOCK(p);
1170103410Smini	error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t));
1171103410Smini	PROC_LOCK(p);
1172103410Smini	if (error)
1173103410Smini		return (NULL);
1174104695Sjulian	if (td->td_standin == NULL)
1175104695Sjulian		td->td_standin = thread_alloc();
1176103410Smini	mtx_lock_spin(&sched_lock);
1177104695Sjulian	td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */
1178103410Smini	mtx_unlock_spin(&sched_lock);
1179103410Smini	return (td2);
1180103410Smini}
1181103410Smini
1182103410Smini/*
1183105900Sjulian * setup done on the thread when it enters the kernel.
1184105900Sjulian * XXXKSE Presently only for syscalls but eventually all kernel entries.
1185105900Sjulian */
1186105900Sjulianvoid
1187105900Sjulianthread_user_enter(struct proc *p, struct thread *td)
1188105900Sjulian{
1189105900Sjulian	struct kse *ke;
1190105900Sjulian
1191105900Sjulian	/*
1192105900Sjulian	 * First check that we shouldn't just abort.
1193105900Sjulian	 * But check if we are the single thread first!
1194105900Sjulian	 * XXX p_singlethread not locked, but should be safe.
1195105900Sjulian	 */
1196105900Sjulian	if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
1197105900Sjulian		PROC_LOCK(p);
1198105900Sjulian		mtx_lock_spin(&sched_lock);
1199105900Sjulian		thread_exit();
1200105900Sjulian		/* NOTREACHED */
1201105900Sjulian	}
1202105900Sjulian
1203105900Sjulian	/*
1204105900Sjulian	 * If we are doing a syscall in a KSE environment,
1205105900Sjulian	 * note where our mailbox is. There is always the
1206105900Sjulian	 * possibility that we could do this lazily (in sleep()),
1207105900Sjulian	 * but for now do it every time.
1208105900Sjulian	 */
1209105901Sdavidxu	ke = td->td_kse;
1210105912Sjulian	if (ke->ke_mailbox != NULL) {
1211105900Sjulian#if 0
1212105900Sjulian		td->td_mailbox = (void *)fuword((caddr_t)ke->ke_mailbox
1213105900Sjulian		    + offsetof(struct kse_mailbox, km_curthread));
1214105900Sjulian#else /* if user pointer arithmetic is ok in the kernel */
1215105900Sjulian		td->td_mailbox =
1216105900Sjulian		    (void *)fuword( (void *)&ke->ke_mailbox->km_curthread);
1217105900Sjulian#endif
1218105900Sjulian		if ((td->td_mailbox == NULL) ||
1219105900Sjulian		    (td->td_mailbox == (void *)-1)) {
1220105900Sjulian			td->td_mailbox = NULL;	/* single thread it.. */
1221105900Sjulian			td->td_flags &= ~TDF_UNBOUND;
1222105900Sjulian		} else {
1223105900Sjulian			if (td->td_standin == NULL)
1224105900Sjulian				td->td_standin = thread_alloc();
1225105900Sjulian			td->td_flags |= TDF_UNBOUND;
1226105900Sjulian		}
1227105900Sjulian	}
1228105900Sjulian}
1229105900Sjulian
1230105900Sjulian/*
1231103410Smini * The extra work we go through if we are a threaded process when we
1232103410Smini * return to userland.
1233103410Smini *
123499026Sjulian * If we are a KSE process and returning to user mode, check for
123599026Sjulian * extra work to do before we return (e.g. for more syscalls
123699026Sjulian * to complete first).  If we were in a critical section, we should
123799026Sjulian * just return to let it finish. Same if we were in the UTS (in
1238103410Smini * which case the mailbox's context's busy indicator will be set).
1239103410Smini * The only traps we suport will have set the mailbox.
1240103410Smini * We will clear it here.
124199026Sjulian */
124299026Sjulianint
1243103838Sjulianthread_userret(struct thread *td, struct trapframe *frame)
124499026Sjulian{
1245103410Smini	int error;
1246104031Sjulian	int unbound;
1247104031Sjulian	struct kse *ke;
1248104695Sjulian	struct ksegrp *kg;
1249104695Sjulian	struct thread *td2;
1250104695Sjulian	struct proc *p;
125199026Sjulian
1252104695Sjulian	error = 0;
1253104157Sjulian
1254104031Sjulian	unbound = td->td_flags & TDF_UNBOUND;
1255104695Sjulian
1256104695Sjulian	kg = td->td_ksegrp;
1257104695Sjulian	p = td->td_proc;
1258104695Sjulian
1259103410Smini	/*
1260104695Sjulian	 * Originally bound threads never upcall but they may
1261104695Sjulian	 * loan out their KSE at this point.
1262104695Sjulian	 * Upcalls imply bound.. They also may want to do some Philantropy.
1263104695Sjulian	 * Unbound threads on the other hand either yield to other work
1264104695Sjulian	 * or transform into an upcall.
1265104695Sjulian	 * (having saved their context to user space in both cases)
1266103410Smini	 */
1267104695Sjulian	if (unbound ) {
1268104695Sjulian		/*
1269104695Sjulian		 * We are an unbound thread, looking to return to
1270104695Sjulian		 * user space.
1271104695Sjulian		 * THere are several possibilities:
1272104695Sjulian		 * 1) we are using a borrowed KSE. save state and exit.
1273104695Sjulian		 *    kse_reassign() will recycle the kse as needed,
1274104695Sjulian		 * 2) we are not.. save state, and then convert ourself
1275104695Sjulian		 *    to be an upcall, bound to the KSE.
1276104695Sjulian		 *    if there are others that need the kse,
1277104695Sjulian		 *    give them a chance by doing an mi_switch().
1278104695Sjulian		 *    Because we are bound, control will eventually return
1279104695Sjulian		 *    to us here.
1280104695Sjulian		 * ***
1281104695Sjulian		 * Save the thread's context, and link it
1282104695Sjulian		 * into the KSEGRP's list of completed threads.
1283104695Sjulian		 */
1284104695Sjulian		error = thread_export_context(td);
1285104695Sjulian		td->td_mailbox = NULL;
1286104695Sjulian		if (error) {
1287104695Sjulian			/*
1288104695Sjulian			 * If we are not running on a borrowed KSE, then
1289104695Sjulian			 * failing to do the KSE operation just defaults
1290104695Sjulian			 * back to synchonous operation, so just return from
1291104695Sjulian			 * the syscall. If it IS borrowed, there is nothing
1292104695Sjulian			 * we can do. We just lose that context. We
1293104695Sjulian			 * probably should note this somewhere and send
1294104695Sjulian			 * the process a signal.
1295104695Sjulian			 */
1296104695Sjulian			PROC_LOCK(td->td_proc);
1297104695Sjulian			psignal(td->td_proc, SIGSEGV);
1298104695Sjulian			mtx_lock_spin(&sched_lock);
1299104695Sjulian			if (td->td_kse->ke_bound == NULL) {
1300104695Sjulian				td->td_flags &= ~TDF_UNBOUND;
1301104695Sjulian				PROC_UNLOCK(td->td_proc);
1302104695Sjulian				mtx_unlock_spin(&sched_lock);
1303104695Sjulian				return (error);	/* go sync */
1304104695Sjulian			}
1305104695Sjulian			thread_exit();
1306104695Sjulian		}
1307104695Sjulian
1308104695Sjulian		/*
1309104695Sjulian		 * if the KSE is owned and we are borrowing it,
1310104695Sjulian		 * don't make an upcall, just exit so that the owner
1311104695Sjulian		 * can get its KSE if it wants it.
1312104695Sjulian		 * Our context is already safely stored for later
1313104695Sjulian		 * use by the UTS.
1314104695Sjulian		 */
1315104695Sjulian		PROC_LOCK(p);
1316104695Sjulian		mtx_lock_spin(&sched_lock);
1317104695Sjulian		if (td->td_kse->ke_bound) {
1318104695Sjulian			thread_exit();
1319104695Sjulian		}
1320104695Sjulian		PROC_UNLOCK(p);
1321104695Sjulian
1322104695Sjulian		/*
1323104695Sjulian		 * Turn ourself into a bound upcall.
1324104695Sjulian		 * We will rely on kse_reassign()
1325104695Sjulian		 * to make us run at a later time.
1326104695Sjulian		 * We should look just like a sheduled upcall
1327104695Sjulian		 * from msleep() or cv_wait().
1328104695Sjulian		 */
1329104695Sjulian		td->td_flags &= ~TDF_UNBOUND;
1330104695Sjulian		td->td_flags |= TDF_UPCALLING;
1331104695Sjulian		/* Only get here if we have become an upcall */
1332104695Sjulian
1333104695Sjulian	} else {
1334104695Sjulian		mtx_lock_spin(&sched_lock);
1335104695Sjulian	}
1336104695Sjulian	/*
1337104695Sjulian	 * We ARE going back to userland with this KSE.
1338104695Sjulian	 * Check for threads that need to borrow it.
1339104695Sjulian	 * Optimisation: don't call mi_switch if no-one wants the KSE.
1340104695Sjulian	 * Any other thread that comes ready after this missed the boat.
1341104695Sjulian	 */
1342104031Sjulian	ke = td->td_kse;
1343104695Sjulian	if ((td2 = kg->kg_last_assigned))
1344104695Sjulian		td2 = TAILQ_NEXT(td2, td_runq);
1345104695Sjulian	else
1346104695Sjulian		td2 = TAILQ_FIRST(&kg->kg_runq);
1347104695Sjulian	if (td2)  {
1348104695Sjulian		/*
1349104695Sjulian		 * force a switch to more urgent 'in kernel'
1350104695Sjulian		 * work. Control will return to this thread
1351104695Sjulian		 * when there is no more work to do.
1352104695Sjulian		 * kse_reassign() will do tha for us.
1353104695Sjulian		 */
1354104695Sjulian		TD_SET_LOAN(td);
1355104695Sjulian		ke->ke_bound = td;
1356104695Sjulian		ke->ke_thread = NULL;
1357104695Sjulian		mi_switch(); /* kse_reassign() will (re)find td2 */
135899026Sjulian	}
1359104695Sjulian	mtx_unlock_spin(&sched_lock);
1360104695Sjulian
1361103410Smini	/*
1362104695Sjulian	 * Optimisation:
1363104695Sjulian	 * Ensure that we have a spare thread available,
1364104695Sjulian	 * for when we re-enter the kernel.
1365103410Smini	 */
1366104695Sjulian	if (td->td_standin == NULL) {
1367104695Sjulian		if (ke->ke_tdspare) {
1368104695Sjulian			td->td_standin = ke->ke_tdspare;
1369104695Sjulian			ke->ke_tdspare = NULL;
1370104695Sjulian		} else {
1371104695Sjulian			td->td_standin = thread_alloc();
1372104695Sjulian		}
1373104695Sjulian	}
1374104695Sjulian
1375104695Sjulian	/*
1376104695Sjulian	 * To get here, we know there is no other need for our
1377104695Sjulian	 * KSE so we can proceed. If not upcalling, go back to
1378104695Sjulian	 * userspace. If we are, get the upcall set up.
1379104695Sjulian	 */
1380104695Sjulian	if ((td->td_flags & TDF_UPCALLING) == 0)
1381103410Smini		return (0);
1382104695Sjulian
1383104695Sjulian	/*
1384104695Sjulian	 * We must be an upcall to get this far.
1385104695Sjulian	 * There is no more work to do and we are going to ride
1386104695Sjulian	 * this thead/KSE up to userland as an upcall.
1387104695Sjulian	 * Do the last parts of the setup needed for the upcall.
1388104695Sjulian	 */
1389104695Sjulian	CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1390104695Sjulian	    td, td->td_proc->p_pid, td->td_proc->p_comm);
1391104695Sjulian
1392103410Smini	/*
1393104695Sjulian	 * Set user context to the UTS.
1394103410Smini	 */
1395104695Sjulian	cpu_set_upcall_kse(td, ke);
139699026Sjulian
1397104695Sjulian	/*
1398104695Sjulian	 * Put any completed mailboxes on this KSE's list.
1399104695Sjulian	 */
1400104695Sjulian	error = thread_link_mboxes(kg, ke);
1401104695Sjulian	if (error)
1402104695Sjulian		goto bad;
1403104031Sjulian
1404104695Sjulian	/*
1405104695Sjulian	 * Set state and mailbox.
1406104695Sjulian	 * From now on we are just a bound outgoing process.
1407104695Sjulian	 * **Problem** userret is often called several times.
1408104695Sjulian	 * it would be nice if this all happenned only on the first time
1409104695Sjulian	 * through. (the scan for extra work etc.)
1410104695Sjulian	 */
1411106180Sdavidxu	mtx_lock_spin(&sched_lock);
1412104695Sjulian	td->td_flags &= ~TDF_UPCALLING;
1413106180Sdavidxu	mtx_unlock_spin(&sched_lock);
1414104031Sjulian#if 0
1415104695Sjulian	error = suword((caddr_t)ke->ke_mailbox +
1416104695Sjulian	    offsetof(struct kse_mailbox, km_curthread), 0);
1417104031Sjulian#else	/* if user pointer arithmetic is ok in the kernel */
1418104695Sjulian	error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0);
1419104031Sjulian#endif
1420104695Sjulian	if (!error)
1421104695Sjulian		return (0);
1422104695Sjulian
1423104031Sjulianbad:
1424104031Sjulian	/*
1425104031Sjulian	 * Things are going to be so screwed we should just kill the process.
1426104031Sjulian 	 * how do we do that?
1427104031Sjulian	 */
1428104695Sjulian	PROC_LOCK(td->td_proc);
1429104695Sjulian	psignal(td->td_proc, SIGSEGV);
1430104695Sjulian	PROC_UNLOCK(td->td_proc);
1431104695Sjulian	return (error);	/* go sync */
143299026Sjulian}
143399026Sjulian
143499026Sjulian/*
143599026Sjulian * Enforce single-threading.
143699026Sjulian *
143799026Sjulian * Returns 1 if the caller must abort (another thread is waiting to
143899026Sjulian * exit the process or similar). Process is locked!
143999026Sjulian * Returns 0 when you are successfully the only thread running.
144099026Sjulian * A process has successfully single threaded in the suspend mode when
144199026Sjulian * There are no threads in user mode. Threads in the kernel must be
144299026Sjulian * allowed to continue until they get to the user boundary. They may even
144399026Sjulian * copy out their return values and data before suspending. They may however be
144499026Sjulian * accellerated in reaching the user boundary as we will wake up
144599026Sjulian * any sleeping threads that are interruptable. (PCATCH).
144699026Sjulian */
144799026Sjulianint
144899026Sjulianthread_single(int force_exit)
144999026Sjulian{
145099026Sjulian	struct thread *td;
145199026Sjulian	struct thread *td2;
145299026Sjulian	struct proc *p;
145399026Sjulian
145499026Sjulian	td = curthread;
145599026Sjulian	p = td->td_proc;
145699026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
145799026Sjulian	KASSERT((td != NULL), ("curthread is NULL"));
145899026Sjulian
145999026Sjulian	if ((p->p_flag & P_KSES) == 0)
146099026Sjulian		return (0);
146199026Sjulian
1462100648Sjulian	/* Is someone already single threading? */
1463100648Sjulian	if (p->p_singlethread)
146499026Sjulian		return (1);
146599026Sjulian
1466102950Sdavidxu	if (force_exit == SINGLE_EXIT)
146799026Sjulian		p->p_flag |= P_SINGLE_EXIT;
146899026Sjulian	else
146999026Sjulian		p->p_flag &= ~P_SINGLE_EXIT;
1470102950Sdavidxu	p->p_flag |= P_STOPPED_SINGLE;
147199026Sjulian	p->p_singlethread = td;
1472105911Sjulian	/* XXXKSE Which lock protects the below values? */
147399026Sjulian	while ((p->p_numthreads - p->p_suspcount) != 1) {
1474103216Sjulian		mtx_lock_spin(&sched_lock);
147599026Sjulian		FOREACH_THREAD_IN_PROC(p, td2) {
147699026Sjulian			if (td2 == td)
147799026Sjulian				continue;
1478103216Sjulian			if (TD_IS_INHIBITED(td2)) {
1479105911Sjulian				if (force_exit == SINGLE_EXIT) {
1480105911Sjulian					if (TD_IS_SUSPENDED(td2)) {
1481103216Sjulian						thread_unsuspend_one(td2);
1482105911Sjulian					}
1483105911Sjulian					if (TD_ON_SLEEPQ(td2) &&
1484105911Sjulian					    (td2->td_flags & TDF_SINTR)) {
1485105911Sjulian						if (td2->td_flags & TDF_CVWAITQ)
1486105911Sjulian							cv_abort(td2);
1487105911Sjulian						else
1488105911Sjulian							abortsleep(td2);
1489105911Sjulian					}
1490105911Sjulian				} else {
1491105911Sjulian					if (TD_IS_SUSPENDED(td2))
1492105874Sdavidxu						continue;
1493105911Sjulian					/* maybe other inhibitted states too? */
1494105970Sdavidxu					if (TD_IS_SLEEPING(td2))
1495105911Sjulian						thread_suspend_one(td2);
149699026Sjulian				}
149799026Sjulian			}
149899026Sjulian		}
1499105911Sjulian		/*
1500105911Sjulian		 * Maybe we suspended some threads.. was it enough?
1501105911Sjulian		 */
1502105911Sjulian		if ((p->p_numthreads - p->p_suspcount) == 1) {
1503105911Sjulian			mtx_unlock_spin(&sched_lock);
1504105911Sjulian			break;
1505105911Sjulian		}
1506105911Sjulian
150799026Sjulian		/*
150899026Sjulian		 * Wake us up when everyone else has suspended.
1509100648Sjulian		 * In the mean time we suspend as well.
151099026Sjulian		 */
1511103216Sjulian		thread_suspend_one(td);
151299026Sjulian		mtx_unlock(&Giant);
151399026Sjulian		PROC_UNLOCK(p);
151499026Sjulian		mi_switch();
151599026Sjulian		mtx_unlock_spin(&sched_lock);
151699026Sjulian		mtx_lock(&Giant);
151799026Sjulian		PROC_LOCK(p);
151899026Sjulian	}
1519105854Sjulian	if (force_exit == SINGLE_EXIT)
1520105854Sjulian		kse_purge(p, td);
152199026Sjulian	return (0);
152299026Sjulian}
152399026Sjulian
152499026Sjulian/*
152599026Sjulian * Called in from locations that can safely check to see
152699026Sjulian * whether we have to suspend or at least throttle for a
152799026Sjulian * single-thread event (e.g. fork).
152899026Sjulian *
152999026Sjulian * Such locations include userret().
153099026Sjulian * If the "return_instead" argument is non zero, the thread must be able to
153199026Sjulian * accept 0 (caller may continue), or 1 (caller must abort) as a result.
153299026Sjulian *
153399026Sjulian * The 'return_instead' argument tells the function if it may do a
153499026Sjulian * thread_exit() or suspend, or whether the caller must abort and back
153599026Sjulian * out instead.
153699026Sjulian *
153799026Sjulian * If the thread that set the single_threading request has set the
153899026Sjulian * P_SINGLE_EXIT bit in the process flags then this call will never return
153999026Sjulian * if 'return_instead' is false, but will exit.
154099026Sjulian *
154199026Sjulian * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
154299026Sjulian *---------------+--------------------+---------------------
154399026Sjulian *       0       | returns 0          |   returns 0 or 1
154499026Sjulian *               | when ST ends       |   immediatly
154599026Sjulian *---------------+--------------------+---------------------
154699026Sjulian *       1       | thread exits       |   returns 1
154799026Sjulian *               |                    |  immediatly
154899026Sjulian * 0 = thread_exit() or suspension ok,
154999026Sjulian * other = return error instead of stopping the thread.
155099026Sjulian *
155199026Sjulian * While a full suspension is under effect, even a single threading
155299026Sjulian * thread would be suspended if it made this call (but it shouldn't).
155399026Sjulian * This call should only be made from places where
155499026Sjulian * thread_exit() would be safe as that may be the outcome unless
155599026Sjulian * return_instead is set.
155699026Sjulian */
155799026Sjulianint
155899026Sjulianthread_suspend_check(int return_instead)
155999026Sjulian{
1560104502Sjmallett	struct thread *td;
1561104502Sjmallett	struct proc *p;
1562105854Sjulian	struct kse *ke;
1563105854Sjulian	struct ksegrp *kg;
156499026Sjulian
156599026Sjulian	td = curthread;
156699026Sjulian	p = td->td_proc;
1567105854Sjulian	kg = td->td_ksegrp;
156899026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
156999026Sjulian	while (P_SHOULDSTOP(p)) {
1570102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
157199026Sjulian			KASSERT(p->p_singlethread != NULL,
157299026Sjulian			    ("singlethread not set"));
157399026Sjulian			/*
1574100648Sjulian			 * The only suspension in action is a
1575100648Sjulian			 * single-threading. Single threader need not stop.
1576100646Sjulian			 * XXX Should be safe to access unlocked
1577100646Sjulian			 * as it can only be set to be true by us.
157899026Sjulian			 */
1579100648Sjulian			if (p->p_singlethread == td)
158099026Sjulian				return (0);	/* Exempt from stopping. */
158199026Sjulian		}
1582100648Sjulian		if (return_instead)
158399026Sjulian			return (1);
158499026Sjulian
158599026Sjulian		/*
158699026Sjulian		 * If the process is waiting for us to exit,
158799026Sjulian		 * this thread should just suicide.
1588102950Sdavidxu		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
158999026Sjulian		 */
159099026Sjulian		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
159199026Sjulian			mtx_lock_spin(&sched_lock);
159299026Sjulian			while (mtx_owned(&Giant))
159399026Sjulian				mtx_unlock(&Giant);
1594105854Sjulian			/*
1595105854Sjulian			 * free extra kses and ksegrps, we needn't worry
1596105854Sjulian			 * about if current thread is in same ksegrp as
1597105854Sjulian			 * p_singlethread and last kse in the group
1598105854Sjulian			 * could be killed, this is protected by kg_numthreads,
1599105854Sjulian			 * in this case, we deduce that kg_numthreads must > 1.
1600105854Sjulian			 */
1601105854Sjulian			ke = td->td_kse;
1602105854Sjulian			if (ke->ke_bound == NULL &&
1603105854Sjulian			    ((kg->kg_kses != 1) || (kg->kg_numthreads == 1)))
1604105854Sjulian				ke->ke_flags |= KEF_EXIT;
160599026Sjulian			thread_exit();
160699026Sjulian		}
160799026Sjulian
160899026Sjulian		/*
160999026Sjulian		 * When a thread suspends, it just
161099026Sjulian		 * moves to the processes's suspend queue
161199026Sjulian		 * and stays there.
161299026Sjulian		 *
161399026Sjulian		 * XXXKSE if TDF_BOUND is true
161499026Sjulian		 * it will not release it's KSE which might
161599026Sjulian		 * lead to deadlock if there are not enough KSEs
161699026Sjulian		 * to complete all waiting threads.
161799026Sjulian		 * Maybe be able to 'lend' it out again.
161899026Sjulian		 * (lent kse's can not go back to userland?)
161999026Sjulian		 * and can only be lent in STOPPED state.
162099026Sjulian		 */
1621102238Sjulian		mtx_lock_spin(&sched_lock);
1622102950Sdavidxu		if ((p->p_flag & P_STOPPED_SIG) &&
1623102238Sjulian		    (p->p_suspcount+1 == p->p_numthreads)) {
1624102238Sjulian			mtx_unlock_spin(&sched_lock);
1625102238Sjulian			PROC_LOCK(p->p_pptr);
1626102238Sjulian			if ((p->p_pptr->p_procsig->ps_flag &
1627102238Sjulian				PS_NOCLDSTOP) == 0) {
1628102238Sjulian				psignal(p->p_pptr, SIGCHLD);
1629102238Sjulian			}
1630102238Sjulian			PROC_UNLOCK(p->p_pptr);
1631103055Sjulian			mtx_lock_spin(&sched_lock);
1632102238Sjulian		}
163399026Sjulian		mtx_assert(&Giant, MA_NOTOWNED);
1634103216Sjulian		thread_suspend_one(td);
163599026Sjulian		PROC_UNLOCK(p);
1636102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1637100632Sjulian			if (p->p_numthreads == p->p_suspcount) {
1638103216Sjulian				thread_unsuspend_one(p->p_singlethread);
1639100632Sjulian			}
1640100632Sjulian		}
1641100594Sjulian		p->p_stats->p_ru.ru_nivcsw++;
164299026Sjulian		mi_switch();
164399026Sjulian		mtx_unlock_spin(&sched_lock);
164499026Sjulian		PROC_LOCK(p);
164599026Sjulian	}
164699026Sjulian	return (0);
164799026Sjulian}
164899026Sjulian
1649102898Sdavidxuvoid
1650102898Sdavidxuthread_suspend_one(struct thread *td)
1651102898Sdavidxu{
1652102898Sdavidxu	struct proc *p = td->td_proc;
1653102898Sdavidxu
1654102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1655102898Sdavidxu	p->p_suspcount++;
1656103216Sjulian	TD_SET_SUSPENDED(td);
1657102898Sdavidxu	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
1658103216Sjulian	/*
1659103216Sjulian	 * Hack: If we are suspending but are on the sleep queue
1660103216Sjulian	 * then we are in msleep or the cv equivalent. We
1661103216Sjulian	 * want to look like we have two Inhibitors.
1662105911Sjulian	 * May already be set.. doesn't matter.
1663103216Sjulian	 */
1664103216Sjulian	if (TD_ON_SLEEPQ(td))
1665103216Sjulian		TD_SET_SLEEPING(td);
1666102898Sdavidxu}
1667102898Sdavidxu
1668102898Sdavidxuvoid
1669102898Sdavidxuthread_unsuspend_one(struct thread *td)
1670102898Sdavidxu{
1671102898Sdavidxu	struct proc *p = td->td_proc;
1672102898Sdavidxu
1673102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1674102898Sdavidxu	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
1675103216Sjulian	TD_CLR_SUSPENDED(td);
1676102898Sdavidxu	p->p_suspcount--;
1677103216Sjulian	setrunnable(td);
1678102898Sdavidxu}
1679102898Sdavidxu
168099026Sjulian/*
168199026Sjulian * Allow all threads blocked by single threading to continue running.
168299026Sjulian */
168399026Sjulianvoid
168499026Sjulianthread_unsuspend(struct proc *p)
168599026Sjulian{
168699026Sjulian	struct thread *td;
168799026Sjulian
1688100646Sjulian	mtx_assert(&sched_lock, MA_OWNED);
168999026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
169099026Sjulian	if (!P_SHOULDSTOP(p)) {
169199026Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
1692102898Sdavidxu			thread_unsuspend_one(td);
169399026Sjulian		}
1694102950Sdavidxu	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
169599026Sjulian	    (p->p_numthreads == p->p_suspcount)) {
169699026Sjulian		/*
169799026Sjulian		 * Stopping everything also did the job for the single
169899026Sjulian		 * threading request. Now we've downgraded to single-threaded,
169999026Sjulian		 * let it continue.
170099026Sjulian		 */
1701102898Sdavidxu		thread_unsuspend_one(p->p_singlethread);
170299026Sjulian	}
170399026Sjulian}
170499026Sjulian
170599026Sjulianvoid
170699026Sjulianthread_single_end(void)
170799026Sjulian{
170899026Sjulian	struct thread *td;
170999026Sjulian	struct proc *p;
171099026Sjulian
171199026Sjulian	td = curthread;
171299026Sjulian	p = td->td_proc;
171399026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
1714102950Sdavidxu	p->p_flag &= ~P_STOPPED_SINGLE;
171599026Sjulian	p->p_singlethread = NULL;
1716102292Sjulian	/*
1717102292Sjulian	 * If there are other threads they mey now run,
1718102292Sjulian	 * unless of course there is a blanket 'stop order'
1719102292Sjulian	 * on the process. The single threader must be allowed
1720102292Sjulian	 * to continue however as this is a bad place to stop.
1721102292Sjulian	 */
1722102292Sjulian	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
1723102292Sjulian		mtx_lock_spin(&sched_lock);
1724102292Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
1725103216Sjulian			thread_unsuspend_one(td);
1726102292Sjulian		}
1727102292Sjulian		mtx_unlock_spin(&sched_lock);
1728102292Sjulian	}
172999026Sjulian}
173099026Sjulian
1731102292Sjulian
1732