kern_thread.c revision 107029
199026Sjulian/*
299026Sjulian * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
399026Sjulian *  All rights reserved.
499026Sjulian *
599026Sjulian * Redistribution and use in source and binary forms, with or without
699026Sjulian * modification, are permitted provided that the following conditions
799026Sjulian * are met:
899026Sjulian * 1. Redistributions of source code must retain the above copyright
999026Sjulian *    notice(s), this list of conditions and the following disclaimer as
1099026Sjulian *    the first lines of this file unmodified other than the possible
1199026Sjulian *    addition of one or more copyright notices.
1299026Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1399026Sjulian *    notice(s), this list of conditions and the following disclaimer in the
1499026Sjulian *    documentation and/or other materials provided with the distribution.
1599026Sjulian *
1699026Sjulian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1799026Sjulian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1899026Sjulian * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1999026Sjulian * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2099026Sjulian * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2199026Sjulian * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2299026Sjulian * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2399026Sjulian * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2499026Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2599026Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2699026Sjulian * DAMAGE.
2799026Sjulian *
2899026Sjulian * $FreeBSD: head/sys/kern/kern_thread.c 107029 2002-11-17 23:26:42Z julian $
2999026Sjulian */
3099026Sjulian
3199026Sjulian#include <sys/param.h>
3299026Sjulian#include <sys/systm.h>
3399026Sjulian#include <sys/kernel.h>
3499026Sjulian#include <sys/lock.h>
3599026Sjulian#include <sys/malloc.h>
3699026Sjulian#include <sys/mutex.h>
3799026Sjulian#include <sys/proc.h>
38107029Sjulian#include <sys/smp.h>
3999026Sjulian#include <sys/sysctl.h>
40105854Sjulian#include <sys/sysproto.h>
4199026Sjulian#include <sys/filedesc.h>
4299026Sjulian#include <sys/tty.h>
4399026Sjulian#include <sys/signalvar.h>
4499026Sjulian#include <sys/sx.h>
4599026Sjulian#include <sys/user.h>
4699026Sjulian#include <sys/jail.h>
4799026Sjulian#include <sys/kse.h>
4899026Sjulian#include <sys/ktr.h>
49103410Smini#include <sys/ucontext.h>
5099026Sjulian
5199026Sjulian#include <vm/vm.h>
5299026Sjulian#include <vm/vm_object.h>
5399026Sjulian#include <vm/pmap.h>
5499026Sjulian#include <vm/uma.h>
5599026Sjulian#include <vm/vm_map.h>
5699026Sjulian
57100273Speter#include <machine/frame.h>
58100273Speter
5999026Sjulian/*
60103367Sjulian * KSEGRP related storage.
6199026Sjulian */
62103367Sjulianstatic uma_zone_t ksegrp_zone;
63103367Sjulianstatic uma_zone_t kse_zone;
6499026Sjulianstatic uma_zone_t thread_zone;
6599026Sjulian
66103367Sjulian/* DEBUG ONLY */
6799026SjulianSYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
68107006Sdavidxustatic int oiks_debug = 0;	/* 0 disable, 1 printf, 2 enter debugger */
6999026SjulianSYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW,
7099026Sjulian	&oiks_debug, 0, "OIKS thread debug");
7199026Sjulian
72107006Sdavidxustatic int oiks_max_threads_per_proc = 10;
73107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, oiks_max_per_proc, CTLFLAG_RW,
74107006Sdavidxu	&oiks_max_threads_per_proc, 0, "Debug limit on threads per proc");
75107006Sdavidxu
76107006Sdavidxustatic int max_threads_per_proc = 30;
77107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
78103367Sjulian	&max_threads_per_proc, 0, "Limit on threads per proc");
79103367Sjulian
80107006Sdavidxustatic int max_groups_per_proc = 5;
81107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
82107006Sdavidxu	&max_groups_per_proc, 0, "Limit on thread groups per proc");
83107006Sdavidxu
8499026Sjulian#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
8599026Sjulian
8699026Sjulianstruct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
87105854SjulianTAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
88105854SjulianTAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
8999026Sjulianstruct mtx zombie_thread_lock;
9099026SjulianMTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock,
9199026Sjulian    "zombie_thread_lock", MTX_SPIN);
9299026Sjulian
93105854Sjulian
94105854Sjulian
95105854Sjulianvoid kse_purge(struct proc *p, struct thread *td);
9699026Sjulian/*
9799026Sjulian * Pepare a thread for use.
9899026Sjulian */
9999026Sjulianstatic void
10099026Sjulianthread_ctor(void *mem, int size, void *arg)
10199026Sjulian{
10299026Sjulian	struct thread	*td;
10399026Sjulian
10499026Sjulian	KASSERT((size == sizeof(struct thread)),
10599552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
10699026Sjulian
10799026Sjulian	td = (struct thread *)mem;
108103216Sjulian	td->td_state = TDS_INACTIVE;
10999026Sjulian	td->td_flags |= TDF_UNBOUND;
11099026Sjulian}
11199026Sjulian
11299026Sjulian/*
11399026Sjulian * Reclaim a thread after use.
11499026Sjulian */
11599026Sjulianstatic void
11699026Sjulianthread_dtor(void *mem, int size, void *arg)
11799026Sjulian{
11899026Sjulian	struct thread	*td;
11999026Sjulian
12099026Sjulian	KASSERT((size == sizeof(struct thread)),
12199552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
12299026Sjulian
12399026Sjulian	td = (struct thread *)mem;
12499026Sjulian
12599026Sjulian#ifdef INVARIANTS
12699026Sjulian	/* Verify that this thread is in a safe state to free. */
12799026Sjulian	switch (td->td_state) {
128103216Sjulian	case TDS_INHIBITED:
129103216Sjulian	case TDS_RUNNING:
130103216Sjulian	case TDS_CAN_RUN:
13199026Sjulian	case TDS_RUNQ:
13299026Sjulian		/*
13399026Sjulian		 * We must never unlink a thread that is in one of
13499026Sjulian		 * these states, because it is currently active.
13599026Sjulian		 */
13699026Sjulian		panic("bad state for thread unlinking");
13799026Sjulian		/* NOTREACHED */
138103216Sjulian	case TDS_INACTIVE:
13999026Sjulian		break;
14099026Sjulian	default:
14199026Sjulian		panic("bad thread state");
14299026Sjulian		/* NOTREACHED */
14399026Sjulian	}
14499026Sjulian#endif
14599026Sjulian}
14699026Sjulian
14799026Sjulian/*
14899026Sjulian * Initialize type-stable parts of a thread (when newly created).
14999026Sjulian */
15099026Sjulianstatic void
15199026Sjulianthread_init(void *mem, int size)
15299026Sjulian{
15399026Sjulian	struct thread	*td;
15499026Sjulian
15599026Sjulian	KASSERT((size == sizeof(struct thread)),
15699552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
15799026Sjulian
15899026Sjulian	td = (struct thread *)mem;
159103312Sjulian	mtx_lock(&Giant);
160104354Sscottl	pmap_new_thread(td, 0);
161103312Sjulian	mtx_unlock(&Giant);
16299026Sjulian	cpu_thread_setup(td);
16399026Sjulian}
16499026Sjulian
16599026Sjulian/*
16699026Sjulian * Tear down type-stable parts of a thread (just before being discarded).
16799026Sjulian */
16899026Sjulianstatic void
16999026Sjulianthread_fini(void *mem, int size)
17099026Sjulian{
17199026Sjulian	struct thread	*td;
17299026Sjulian
17399026Sjulian	KASSERT((size == sizeof(struct thread)),
17499552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
17599026Sjulian
17699026Sjulian	td = (struct thread *)mem;
17799026Sjulian	pmap_dispose_thread(td);
17899026Sjulian}
17999026Sjulian
180105854Sjulian/*
181105854Sjulian * KSE is linked onto the idle queue.
182105854Sjulian */
183105854Sjulianvoid
184105854Sjuliankse_link(struct kse *ke, struct ksegrp *kg)
185105854Sjulian{
186105854Sjulian	struct proc *p = kg->kg_proc;
187105854Sjulian
188105854Sjulian	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
189105854Sjulian	kg->kg_kses++;
190105854Sjulian	ke->ke_state = KES_UNQUEUED;
191105854Sjulian	ke->ke_proc	= p;
192105854Sjulian	ke->ke_ksegrp	= kg;
193105854Sjulian	ke->ke_thread	= NULL;
194105854Sjulian	ke->ke_oncpu = NOCPU;
195105854Sjulian}
196105854Sjulian
197105854Sjulianvoid
198105854Sjuliankse_unlink(struct kse *ke)
199105854Sjulian{
200105854Sjulian	struct ksegrp *kg;
201105854Sjulian
202105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
203105854Sjulian	kg = ke->ke_ksegrp;
204105854Sjulian	if (ke->ke_state == KES_IDLE) {
205105854Sjulian		kg->kg_idle_kses--;
206105854Sjulian		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
207105854Sjulian	}
208105854Sjulian
209105854Sjulian	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
210105854Sjulian	if (--kg->kg_kses == 0) {
211105854Sjulian			ksegrp_unlink(kg);
212105854Sjulian	}
213105854Sjulian	/*
214105854Sjulian	 * Aggregate stats from the KSE
215105854Sjulian	 */
216105854Sjulian	kse_stash(ke);
217105854Sjulian}
218105854Sjulian
219105854Sjulianvoid
220105854Sjulianksegrp_link(struct ksegrp *kg, struct proc *p)
221105854Sjulian{
222105854Sjulian
223105854Sjulian	TAILQ_INIT(&kg->kg_threads);
224105854Sjulian	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
225105854Sjulian	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
226105854Sjulian	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
227105854Sjulian	TAILQ_INIT(&kg->kg_iq);		/* idle kses in ksegrp */
228105854Sjulian	TAILQ_INIT(&kg->kg_lq);		/* loan kses in ksegrp */
229105854Sjulian	kg->kg_proc	= p;
230105854Sjulian/* the following counters are in the -zero- section and may not need clearing */
231105854Sjulian	kg->kg_numthreads = 0;
232105854Sjulian	kg->kg_runnable = 0;
233105854Sjulian	kg->kg_kses = 0;
234105854Sjulian	kg->kg_idle_kses = 0;
235105854Sjulian	kg->kg_loan_kses = 0;
236105854Sjulian	kg->kg_runq_kses = 0; /* XXXKSE change name */
237105854Sjulian/* link it in now that it's consistent */
238105854Sjulian	p->p_numksegrps++;
239105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
240105854Sjulian}
241105854Sjulian
242105854Sjulianvoid
243105854Sjulianksegrp_unlink(struct ksegrp *kg)
244105854Sjulian{
245105854Sjulian	struct proc *p;
246105854Sjulian
247105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
248105854Sjulian	p = kg->kg_proc;
249105854Sjulian	KASSERT(((kg->kg_numthreads == 0) && (kg->kg_kses == 0)),
250105854Sjulian	    ("kseg_unlink: residual threads or KSEs"));
251105854Sjulian	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
252105854Sjulian	p->p_numksegrps--;
253105854Sjulian	/*
254105854Sjulian	 * Aggregate stats from the KSE
255105854Sjulian	 */
256105854Sjulian	ksegrp_stash(kg);
257105854Sjulian}
258105854Sjulian
25999026Sjulian/*
260105854Sjulian * for a newly created process,
261105854Sjulian * link up a the structure and its initial threads etc.
262105854Sjulian */
263105854Sjulianvoid
264105854Sjulianproc_linkup(struct proc *p, struct ksegrp *kg,
265105854Sjulian			struct kse *ke, struct thread *td)
266105854Sjulian{
267105854Sjulian
268105854Sjulian	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
269105854Sjulian	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
270105854Sjulian	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
271105854Sjulian	p->p_numksegrps = 0;
272105854Sjulian	p->p_numthreads = 0;
273105854Sjulian
274105854Sjulian	ksegrp_link(kg, p);
275105854Sjulian	kse_link(ke, kg);
276105854Sjulian	thread_link(td, kg);
277105854Sjulian}
278105854Sjulian
279105854Sjulianint
280105854Sjuliankse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
281105854Sjulian{
282106180Sdavidxu	struct proc *p;
283106180Sdavidxu	struct thread *td2;
284105854Sjulian
285106242Sdavidxu	p = td->td_proc;
286106242Sdavidxu	/* KSE-enabled processes only, please. */
287106242Sdavidxu	if (!(p->p_flag & P_KSES))
288106242Sdavidxu		return (EINVAL);
289106188Sdavidxu	if (uap->tmbx == NULL)
290106188Sdavidxu		return (EINVAL);
291106180Sdavidxu	mtx_lock_spin(&sched_lock);
292106180Sdavidxu	FOREACH_THREAD_IN_PROC(p, td2) {
293106180Sdavidxu		if (td2->td_mailbox == uap->tmbx) {
294106180Sdavidxu			td2->td_flags |= TDF_INTERRUPT;
295106180Sdavidxu			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) {
296106180Sdavidxu				if (td2->td_flags & TDF_CVWAITQ)
297106180Sdavidxu					cv_abort(td2);
298106180Sdavidxu				else
299106180Sdavidxu					abortsleep(td2);
300106180Sdavidxu			}
301106180Sdavidxu			mtx_unlock_spin(&sched_lock);
302106181Sdavidxu			td->td_retval[0] = 0;
303106181Sdavidxu			td->td_retval[1] = 0;
304106182Sdavidxu			return (0);
305106180Sdavidxu		}
306106180Sdavidxu	}
307106180Sdavidxu	mtx_unlock_spin(&sched_lock);
308106182Sdavidxu	return (ESRCH);
309105854Sjulian}
310105854Sjulian
311105854Sjulianint
312105854Sjuliankse_exit(struct thread *td, struct kse_exit_args *uap)
313105854Sjulian{
314105854Sjulian	struct proc *p;
315105854Sjulian	struct ksegrp *kg;
316105854Sjulian
317105854Sjulian	p = td->td_proc;
318105854Sjulian	/* KSE-enabled processes only, please. */
319105854Sjulian	if (!(p->p_flag & P_KSES))
320106182Sdavidxu		return (EINVAL);
321105854Sjulian	/* must be a bound thread */
322105854Sjulian	if (td->td_flags & TDF_UNBOUND)
323106182Sdavidxu		return (EINVAL);
324105854Sjulian	kg = td->td_ksegrp;
325105854Sjulian	/* serialize killing kse */
326105854Sjulian	PROC_LOCK(p);
327105854Sjulian	mtx_lock_spin(&sched_lock);
328105854Sjulian	if ((kg->kg_kses == 1) && (kg->kg_numthreads > 1)) {
329105854Sjulian		mtx_unlock_spin(&sched_lock);
330105854Sjulian		PROC_UNLOCK(p);
331105854Sjulian		return (EDEADLK);
332105854Sjulian	}
333105854Sjulian	if ((p->p_numthreads == 1) && (p->p_numksegrps == 1)) {
334105854Sjulian		p->p_flag &= ~P_KSES;
335105854Sjulian		mtx_unlock_spin(&sched_lock);
336105854Sjulian		PROC_UNLOCK(p);
337105854Sjulian	} else {
338105854Sjulian		while (mtx_owned(&Giant))
339105854Sjulian			mtx_unlock(&Giant);
340105854Sjulian		td->td_kse->ke_flags |= KEF_EXIT;
341105854Sjulian		thread_exit();
342105854Sjulian		/* NOTREACHED */
343105854Sjulian	}
344106182Sdavidxu	return (0);
345105854Sjulian}
346105854Sjulian
347105854Sjulianint
348105854Sjuliankse_release(struct thread *td, struct kse_release_args *uap)
349105854Sjulian{
350105854Sjulian	struct proc *p;
351105854Sjulian
352105854Sjulian	p = td->td_proc;
353106903Sdavidxu	/* KSE-enabled processes only */
354106903Sdavidxu	if (!(p->p_flag & P_KSES))
355106903Sdavidxu		return (EINVAL);
356106903Sdavidxu	/*
357106903Sdavidxu	 * Must be a bound thread. And kse must have a mailbox ready,
358106903Sdavidxu	 * if not, the kse would can not generate an upcall.
359106903Sdavidxu	 */
360106903Sdavidxu	if (!(td->td_flags & TDF_UNBOUND) && (td->td_kse->ke_mailbox != NULL)) {
361105854Sjulian		PROC_LOCK(p);
362105854Sjulian		mtx_lock_spin(&sched_lock);
363106903Sdavidxu		/* prevent last thread from exiting */
364106903Sdavidxu		if (p->p_numthreads > 1) {
365106903Sdavidxu			thread_exit();
366106903Sdavidxu			/* NOTREACHED */
367106903Sdavidxu		} else {
368106903Sdavidxu			mtx_unlock_spin(&sched_lock);
369107003Sdavidxu			if (td->td_standin == NULL) {
370107003Sdavidxu				PROC_UNLOCK(p);
371107003Sdavidxu				td->td_standin = thread_alloc();
372107003Sdavidxu				PROC_LOCK(p);
373107003Sdavidxu			}
374107003Sdavidxu			msleep(p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH,
375107003Sdavidxu			       "pause", 0);
376107003Sdavidxu			mtx_lock_spin(&sched_lock);
377107003Sdavidxu			td->td_flags &= ~TDF_UNBOUND;
378107003Sdavidxu			thread_schedule_upcall(td, td->td_kse);
379107003Sdavidxu			thread_exit();
380106903Sdavidxu		}
381105854Sjulian	}
382105854Sjulian	return (EINVAL);
383105854Sjulian}
384105854Sjulian
385105854Sjulian/* struct kse_wakeup_args {
386105854Sjulian	struct kse_mailbox *mbx;
387105854Sjulian}; */
388105854Sjulianint
389105854Sjuliankse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
390105854Sjulian{
391105854Sjulian	struct proc *p;
392105854Sjulian	struct kse *ke, *ke2;
393105854Sjulian	struct ksegrp *kg;
394105854Sjulian
395105854Sjulian	p = td->td_proc;
396105854Sjulian	/* KSE-enabled processes only, please. */
397105854Sjulian	if (!(p->p_flag & P_KSES))
398105854Sjulian		return EINVAL;
399105854Sjulian	if (td->td_standin == NULL)
400105854Sjulian		td->td_standin = thread_alloc();
401105854Sjulian	ke = NULL;
402105854Sjulian	mtx_lock_spin(&sched_lock);
403105854Sjulian	if (uap->mbx) {
404105854Sjulian		FOREACH_KSEGRP_IN_PROC(p, kg) {
405105854Sjulian			FOREACH_KSE_IN_GROUP(kg, ke2) {
406105854Sjulian				if (ke2->ke_mailbox != uap->mbx)
407105854Sjulian					continue;
408105854Sjulian				if (ke2->ke_state == KES_IDLE) {
409105854Sjulian					ke = ke2;
410105854Sjulian					goto found;
411105854Sjulian				} else {
412105854Sjulian					mtx_unlock_spin(&sched_lock);
413105854Sjulian					td->td_retval[0] = 0;
414105854Sjulian					td->td_retval[1] = 0;
415106182Sdavidxu					return (0);
416105854Sjulian				}
417105854Sjulian			}
418105854Sjulian		}
419105854Sjulian	} else {
420105854Sjulian		kg = td->td_ksegrp;
421105854Sjulian		ke = TAILQ_FIRST(&kg->kg_iq);
422105854Sjulian	}
423105854Sjulian	if (ke == NULL) {
424105854Sjulian		mtx_unlock_spin(&sched_lock);
425106182Sdavidxu		return (ESRCH);
426105854Sjulian	}
427105854Sjulianfound:
428105854Sjulian	thread_schedule_upcall(td, ke);
429105854Sjulian	mtx_unlock_spin(&sched_lock);
430105854Sjulian	td->td_retval[0] = 0;
431105854Sjulian	td->td_retval[1] = 0;
432106182Sdavidxu	return (0);
433105854Sjulian}
434105854Sjulian
435105854Sjulian/*
436105854Sjulian * No new KSEG: first call: use current KSE, don't schedule an upcall
437105854Sjulian * All other situations, do allocate a new KSE and schedule an upcall on it.
438105854Sjulian */
439105854Sjulian/* struct kse_create_args {
440105854Sjulian	struct kse_mailbox *mbx;
441105854Sjulian	int newgroup;
442105854Sjulian}; */
443105854Sjulianint
444105854Sjuliankse_create(struct thread *td, struct kse_create_args *uap)
445105854Sjulian{
446105854Sjulian	struct kse *newke;
447105854Sjulian	struct kse *ke;
448105854Sjulian	struct ksegrp *newkg;
449105854Sjulian	struct ksegrp *kg;
450105854Sjulian	struct proc *p;
451105854Sjulian	struct kse_mailbox mbx;
452105854Sjulian	int err;
453105854Sjulian
454105854Sjulian	p = td->td_proc;
455105854Sjulian	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
456105854Sjulian		return (err);
457105854Sjulian
458105854Sjulian	p->p_flag |= P_KSES; /* easier to just set it than to test and set */
459105854Sjulian	kg = td->td_ksegrp;
460105854Sjulian	if (uap->newgroup) {
461107006Sdavidxu		if (p->p_numksegrps >= max_groups_per_proc)
462107006Sdavidxu			return (EPROCLIM);
463105854Sjulian		/*
464105854Sjulian		 * If we want a new KSEGRP it doesn't matter whether
465105854Sjulian		 * we have already fired up KSE mode before or not.
466105854Sjulian		 * We put the process in KSE mode and create a new KSEGRP
467105854Sjulian		 * and KSE. If our KSE has not got a mailbox yet then
468105854Sjulian		 * that doesn't matter, just leave it that way. It will
469105854Sjulian		 * ensure that this thread stay BOUND. It's possible
470105854Sjulian		 * that the call came form a threaded library and the main
471105854Sjulian		 * program knows nothing of threads.
472105854Sjulian		 */
473105854Sjulian		newkg = ksegrp_alloc();
474105854Sjulian		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
475105854Sjulian		      kg_startzero, kg_endzero));
476105854Sjulian		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
477105854Sjulian		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
478105854Sjulian		newke = kse_alloc();
479105854Sjulian	} else {
480105854Sjulian		/*
481105854Sjulian		 * Otherwise, if we have already set this KSE
482105854Sjulian		 * to have a mailbox, we want to make another KSE here,
483105854Sjulian		 * but only if there are not already the limit, which
484105854Sjulian		 * is 1 per CPU max.
485105854Sjulian		 *
486105854Sjulian		 * If the current KSE doesn't have a mailbox we just use it
487105854Sjulian		 * and give it one.
488105854Sjulian		 *
489105854Sjulian		 * Because we don't like to access
490105854Sjulian		 * the KSE outside of schedlock if we are UNBOUND,
491105854Sjulian		 * (because it can change if we are preempted by an interrupt)
492105854Sjulian		 * we can deduce it as having a mailbox if we are UNBOUND,
493105854Sjulian		 * and only need to actually look at it if we are BOUND,
494105854Sjulian		 * which is safe.
495105854Sjulian		 */
496105854Sjulian		if ((td->td_flags & TDF_UNBOUND) || td->td_kse->ke_mailbox) {
497107006Sdavidxu			if (oiks_debug == 0) {
498105854Sjulian#ifdef SMP
499105854Sjulian			if (kg->kg_kses > mp_ncpus)
500105854Sjulian#endif
501105854Sjulian				return (EPROCLIM);
502107006Sdavidxu			}
503105854Sjulian			newke = kse_alloc();
504105854Sjulian		} else {
505105854Sjulian			newke = NULL;
506105854Sjulian		}
507105854Sjulian		newkg = NULL;
508105854Sjulian	}
509105854Sjulian	if (newke) {
510105854Sjulian		bzero(&newke->ke_startzero, RANGEOF(struct kse,
511105854Sjulian		      ke_startzero, ke_endzero));
512105854Sjulian#if 0
513105854Sjulian		bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
514105854Sjulian		      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
515105854Sjulian#endif
516105854Sjulian		/* For the first call this may not have been set */
517105854Sjulian		if (td->td_standin == NULL) {
518105854Sjulian			td->td_standin = thread_alloc();
519105854Sjulian		}
520105854Sjulian		mtx_lock_spin(&sched_lock);
521107006Sdavidxu		if (newkg) {
522107006Sdavidxu			if (p->p_numksegrps >= max_groups_per_proc) {
523107006Sdavidxu				mtx_unlock_spin(&sched_lock);
524107006Sdavidxu				ksegrp_free(newkg);
525107006Sdavidxu				kse_free(newke);
526107006Sdavidxu				return (EPROCLIM);
527107006Sdavidxu			}
528105854Sjulian			ksegrp_link(newkg, p);
529107006Sdavidxu		}
530105854Sjulian		else
531105854Sjulian			newkg = kg;
532105854Sjulian		kse_link(newke, newkg);
533106075Sdavidxu		if (p->p_sflag & PS_NEEDSIGCHK)
534106075Sdavidxu			newke->ke_flags |= KEF_ASTPENDING;
535105854Sjulian		newke->ke_mailbox = uap->mbx;
536105854Sjulian		newke->ke_upcall = mbx.km_func;
537105854Sjulian		bcopy(&mbx.km_stack, &newke->ke_stack, sizeof(stack_t));
538105854Sjulian		thread_schedule_upcall(td, newke);
539105854Sjulian		mtx_unlock_spin(&sched_lock);
540105854Sjulian	} else {
541105854Sjulian		/*
542105854Sjulian		 * If we didn't allocate a new KSE then the we are using
543105854Sjulian		 * the exisiting (BOUND) kse.
544105854Sjulian		 */
545105854Sjulian		ke = td->td_kse;
546105854Sjulian		ke->ke_mailbox = uap->mbx;
547105854Sjulian		ke->ke_upcall = mbx.km_func;
548105854Sjulian		bcopy(&mbx.km_stack, &ke->ke_stack, sizeof(stack_t));
549105854Sjulian	}
550105854Sjulian	/*
551105854Sjulian	 * Fill out the KSE-mode specific fields of the new kse.
552105854Sjulian	 */
553105854Sjulian
554105854Sjulian	td->td_retval[0] = 0;
555105854Sjulian	td->td_retval[1] = 0;
556105854Sjulian	return (0);
557105854Sjulian}
558105854Sjulian
559105854Sjulian/*
560103410Smini * Fill a ucontext_t with a thread's context information.
561103410Smini *
562103410Smini * This is an analogue to getcontext(3).
563103410Smini */
564103410Sminivoid
565103410Sminithread_getcontext(struct thread *td, ucontext_t *uc)
566103410Smini{
567103410Smini
568103464Speter/*
569103464Speter * XXX this is declared in a MD include file, i386/include/ucontext.h but
570103464Speter * is used in MI code.
571103464Speter */
572103463Speter#ifdef __i386__
573103410Smini	get_mcontext(td, &uc->uc_mcontext);
574103463Speter#endif
575103410Smini	uc->uc_sigmask = td->td_proc->p_sigmask;
576103410Smini}
577103410Smini
578103410Smini/*
579103410Smini * Set a thread's context from a ucontext_t.
580103410Smini *
581103410Smini * This is an analogue to setcontext(3).
582103410Smini */
583103410Sminiint
584103410Sminithread_setcontext(struct thread *td, ucontext_t *uc)
585103410Smini{
586103410Smini	int ret;
587103410Smini
588103464Speter/*
589103464Speter * XXX this is declared in a MD include file, i386/include/ucontext.h but
590103464Speter * is used in MI code.
591103464Speter */
592103463Speter#ifdef __i386__
593103410Smini	ret = set_mcontext(td, &uc->uc_mcontext);
594103463Speter#else
595103463Speter	ret = ENOSYS;
596103463Speter#endif
597103410Smini	if (ret == 0) {
598103410Smini		SIG_CANTMASK(uc->uc_sigmask);
599103410Smini		PROC_LOCK(td->td_proc);
600103410Smini		td->td_proc->p_sigmask = uc->uc_sigmask;
601103410Smini		PROC_UNLOCK(td->td_proc);
602103410Smini	}
603103410Smini	return (ret);
604103410Smini}
605103410Smini
606103410Smini/*
60799026Sjulian * Initialize global thread allocation resources.
60899026Sjulian */
60999026Sjulianvoid
61099026Sjulianthreadinit(void)
61199026Sjulian{
61299026Sjulian
613104437Speter#ifndef __ia64__
61499026Sjulian	thread_zone = uma_zcreate("THREAD", sizeof (struct thread),
61599026Sjulian	    thread_ctor, thread_dtor, thread_init, thread_fini,
61699026Sjulian	    UMA_ALIGN_CACHE, 0);
617104437Speter#else
618104437Speter	/*
619104437Speter	 * XXX the ia64 kstack allocator is really lame and is at the mercy
620104437Speter	 * of contigmallloc().  This hackery is to pre-construct a whole
621104437Speter	 * pile of thread structures with associated kernel stacks early
622104437Speter	 * in the system startup while contigmalloc() still works. Once we
623104437Speter	 * have them, keep them.  Sigh.
624104437Speter	 */
625104437Speter	thread_zone = uma_zcreate("THREAD", sizeof (struct thread),
626104437Speter	    thread_ctor, thread_dtor, thread_init, thread_fini,
627104437Speter	    UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
628104437Speter	uma_prealloc(thread_zone, 512);		/* XXX arbitary */
629104437Speter#endif
630103367Sjulian	ksegrp_zone = uma_zcreate("KSEGRP", sizeof (struct ksegrp),
631103367Sjulian	    NULL, NULL, NULL, NULL,
632103367Sjulian	    UMA_ALIGN_CACHE, 0);
633103367Sjulian	kse_zone = uma_zcreate("KSE", sizeof (struct kse),
634103367Sjulian	    NULL, NULL, NULL, NULL,
635103367Sjulian	    UMA_ALIGN_CACHE, 0);
63699026Sjulian}
63799026Sjulian
63899026Sjulian/*
639103002Sjulian * Stash an embarasingly extra thread into the zombie thread queue.
64099026Sjulian */
64199026Sjulianvoid
64299026Sjulianthread_stash(struct thread *td)
64399026Sjulian{
64499026Sjulian	mtx_lock_spin(&zombie_thread_lock);
64599026Sjulian	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
64699026Sjulian	mtx_unlock_spin(&zombie_thread_lock);
64799026Sjulian}
64899026Sjulian
649103410Smini/*
650105854Sjulian * Stash an embarasingly extra kse into the zombie kse queue.
651105854Sjulian */
652105854Sjulianvoid
653105854Sjuliankse_stash(struct kse *ke)
654105854Sjulian{
655105854Sjulian	mtx_lock_spin(&zombie_thread_lock);
656105854Sjulian	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
657105854Sjulian	mtx_unlock_spin(&zombie_thread_lock);
658105854Sjulian}
659105854Sjulian
660105854Sjulian/*
661105854Sjulian * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
662105854Sjulian */
663105854Sjulianvoid
664105854Sjulianksegrp_stash(struct ksegrp *kg)
665105854Sjulian{
666105854Sjulian	mtx_lock_spin(&zombie_thread_lock);
667105854Sjulian	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
668105854Sjulian	mtx_unlock_spin(&zombie_thread_lock);
669105854Sjulian}
670105854Sjulian
671105854Sjulian/*
672103410Smini * Reap zombie threads.
67399026Sjulian */
67499026Sjulianvoid
67599026Sjulianthread_reap(void)
67699026Sjulian{
677105854Sjulian	struct thread *td_first, *td_next;
678105854Sjulian	struct kse *ke_first, *ke_next;
679105854Sjulian	struct ksegrp *kg_first, * kg_next;
68099026Sjulian
68199026Sjulian	/*
68299026Sjulian	 * don't even bother to lock if none at this instant
68399026Sjulian	 * We really don't care about the next instant..
68499026Sjulian	 */
685105854Sjulian	if ((!TAILQ_EMPTY(&zombie_threads))
686105854Sjulian	    || (!TAILQ_EMPTY(&zombie_kses))
687105854Sjulian	    || (!TAILQ_EMPTY(&zombie_ksegrps))) {
68899026Sjulian		mtx_lock_spin(&zombie_thread_lock);
689105854Sjulian		td_first = TAILQ_FIRST(&zombie_threads);
690105854Sjulian		ke_first = TAILQ_FIRST(&zombie_kses);
691105854Sjulian		kg_first = TAILQ_FIRST(&zombie_ksegrps);
692105854Sjulian		if (td_first)
693105854Sjulian			TAILQ_INIT(&zombie_threads);
694105854Sjulian		if (ke_first)
695105854Sjulian			TAILQ_INIT(&zombie_kses);
696105854Sjulian		if (kg_first)
697105854Sjulian			TAILQ_INIT(&zombie_ksegrps);
698105854Sjulian		mtx_unlock_spin(&zombie_thread_lock);
699105854Sjulian		while (td_first) {
700105854Sjulian			td_next = TAILQ_NEXT(td_first, td_runq);
701105854Sjulian			thread_free(td_first);
702105854Sjulian			td_first = td_next;
70399026Sjulian		}
704105854Sjulian		while (ke_first) {
705105854Sjulian			ke_next = TAILQ_NEXT(ke_first, ke_procq);
706105854Sjulian			kse_free(ke_first);
707105854Sjulian			ke_first = ke_next;
708105854Sjulian		}
709105854Sjulian		while (kg_first) {
710105854Sjulian			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
711105854Sjulian			ksegrp_free(kg_first);
712105854Sjulian			kg_first = kg_next;
713105854Sjulian		}
71499026Sjulian	}
71599026Sjulian}
71699026Sjulian
71799026Sjulian/*
718103367Sjulian * Allocate a ksegrp.
719103367Sjulian */
720103367Sjulianstruct ksegrp *
721103367Sjulianksegrp_alloc(void)
722103367Sjulian{
723103367Sjulian	return (uma_zalloc(ksegrp_zone, M_WAITOK));
724103367Sjulian}
725103367Sjulian
726103367Sjulian/*
727103367Sjulian * Allocate a kse.
728103367Sjulian */
729103367Sjulianstruct kse *
730103367Sjuliankse_alloc(void)
731103367Sjulian{
732103367Sjulian	return (uma_zalloc(kse_zone, M_WAITOK));
733103367Sjulian}
734103367Sjulian
735103367Sjulian/*
73699026Sjulian * Allocate a thread.
73799026Sjulian */
73899026Sjulianstruct thread *
73999026Sjulianthread_alloc(void)
74099026Sjulian{
74199026Sjulian	thread_reap(); /* check if any zombies to get */
74299026Sjulian	return (uma_zalloc(thread_zone, M_WAITOK));
74399026Sjulian}
74499026Sjulian
74599026Sjulian/*
746103367Sjulian * Deallocate a ksegrp.
747103367Sjulian */
748103367Sjulianvoid
749103367Sjulianksegrp_free(struct ksegrp *td)
750103367Sjulian{
751103367Sjulian	uma_zfree(ksegrp_zone, td);
752103367Sjulian}
753103367Sjulian
754103367Sjulian/*
755103367Sjulian * Deallocate a kse.
756103367Sjulian */
757103367Sjulianvoid
758103367Sjuliankse_free(struct kse *td)
759103367Sjulian{
760103367Sjulian	uma_zfree(kse_zone, td);
761103367Sjulian}
762103367Sjulian
763103367Sjulian/*
76499026Sjulian * Deallocate a thread.
76599026Sjulian */
76699026Sjulianvoid
76799026Sjulianthread_free(struct thread *td)
76899026Sjulian{
76999026Sjulian	uma_zfree(thread_zone, td);
77099026Sjulian}
77199026Sjulian
77299026Sjulian/*
77399026Sjulian * Store the thread context in the UTS's mailbox.
774104031Sjulian * then add the mailbox at the head of a list we are building in user space.
775104031Sjulian * The list is anchored in the ksegrp structure.
77699026Sjulian */
77799026Sjulianint
77899026Sjulianthread_export_context(struct thread *td)
77999026Sjulian{
780104503Sjmallett	struct proc *p;
781104031Sjulian	struct ksegrp *kg;
782104031Sjulian	uintptr_t mbx;
783104031Sjulian	void *addr;
78499026Sjulian	int error;
785103410Smini	ucontext_t uc;
78699026Sjulian
787104503Sjmallett	p = td->td_proc;
788104503Sjmallett	kg = td->td_ksegrp;
789104503Sjmallett
790104031Sjulian	/* Export the user/machine context. */
791104031Sjulian#if 0
792104031Sjulian	addr = (caddr_t)td->td_mailbox +
793104031Sjulian	    offsetof(struct kse_thr_mailbox, tm_context);
794104031Sjulian#else /* if user pointer arithmetic is valid in the kernel */
795104031Sjulian		addr = (void *)(&td->td_mailbox->tm_context);
796100271Speter#endif
797104031Sjulian	error = copyin(addr, &uc, sizeof(ucontext_t));
798103410Smini	if (error == 0) {
799103410Smini		thread_getcontext(td, &uc);
800104031Sjulian		error = copyout(&uc, addr, sizeof(ucontext_t));
801104031Sjulian
802103410Smini	}
803104031Sjulian	if (error) {
804104126Sjulian		PROC_LOCK(p);
805104126Sjulian		psignal(p, SIGSEGV);
806104126Sjulian		PROC_UNLOCK(p);
807104031Sjulian		return (error);
808104031Sjulian	}
809104031Sjulian	/* get address in latest mbox of list pointer */
810104031Sjulian#if 0
811104031Sjulian	addr = (caddr_t)td->td_mailbox
812104031Sjulian	    + offsetof(struct kse_thr_mailbox , tm_next);
813104031Sjulian#else /* if user pointer arithmetic is valid in the kernel */
814104031Sjulian	addr = (void *)(&td->td_mailbox->tm_next);
815104031Sjulian#endif
816104031Sjulian	/*
817104031Sjulian	 * Put the saved address of the previous first
818104031Sjulian	 * entry into this one
819104031Sjulian	 */
820104031Sjulian	for (;;) {
821104031Sjulian		mbx = (uintptr_t)kg->kg_completed;
822104031Sjulian		if (suword(addr, mbx)) {
823104126Sjulian			PROC_LOCK(p);
824104126Sjulian			psignal(p, SIGSEGV);
825104126Sjulian			PROC_UNLOCK(p);
826104031Sjulian			return (EFAULT);
827104031Sjulian		}
828104126Sjulian		PROC_LOCK(p);
829104031Sjulian		if (mbx == (uintptr_t)kg->kg_completed) {
830104031Sjulian			kg->kg_completed = td->td_mailbox;
831104126Sjulian			PROC_UNLOCK(p);
832104031Sjulian			break;
833104031Sjulian		}
834104126Sjulian		PROC_UNLOCK(p);
835104031Sjulian	}
836104031Sjulian	return (0);
837104031Sjulian}
83899026Sjulian
839104031Sjulian/*
840104031Sjulian * Take the list of completed mailboxes for this KSEGRP and put them on this
841104031Sjulian * KSE's mailbox as it's the next one going up.
842104031Sjulian */
843104031Sjulianstatic int
844104031Sjulianthread_link_mboxes(struct ksegrp *kg, struct kse *ke)
845104031Sjulian{
846104126Sjulian	struct proc *p = kg->kg_proc;
847104031Sjulian	void *addr;
848104031Sjulian	uintptr_t mbx;
849104031Sjulian
850104031Sjulian#if 0
851104031Sjulian	addr = (caddr_t)ke->ke_mailbox
852104031Sjulian	    + offsetof(struct kse_mailbox, km_completed);
853104031Sjulian#else /* if user pointer arithmetic is valid in the kernel */
854104031Sjulian		addr = (void *)(&ke->ke_mailbox->km_completed);
855104031Sjulian#endif
856104031Sjulian	for (;;) {
857104031Sjulian		mbx = (uintptr_t)kg->kg_completed;
858104031Sjulian		if (suword(addr, mbx)) {
859104126Sjulian			PROC_LOCK(p);
860104126Sjulian			psignal(p, SIGSEGV);
861104126Sjulian			PROC_UNLOCK(p);
862104031Sjulian			return (EFAULT);
863104031Sjulian		}
864104031Sjulian		/* XXXKSE could use atomic CMPXCH here */
865104126Sjulian		PROC_LOCK(p);
866104031Sjulian		if (mbx == (uintptr_t)kg->kg_completed) {
867104031Sjulian			kg->kg_completed = NULL;
868104126Sjulian			PROC_UNLOCK(p);
869104031Sjulian			break;
870104031Sjulian		}
871104126Sjulian		PROC_UNLOCK(p);
87299026Sjulian	}
873104031Sjulian	return (0);
87499026Sjulian}
87599026Sjulian
87699026Sjulian/*
87799026Sjulian * Discard the current thread and exit from its context.
87899026Sjulian *
87999026Sjulian * Because we can't free a thread while we're operating under its context,
88099026Sjulian * push the current thread into our KSE's ke_tdspare slot, freeing the
88199026Sjulian * thread that might be there currently. Because we know that only this
88299026Sjulian * processor will run our KSE, we needn't worry about someone else grabbing
88399026Sjulian * our context before we do a cpu_throw.
88499026Sjulian */
88599026Sjulianvoid
88699026Sjulianthread_exit(void)
88799026Sjulian{
88899026Sjulian	struct thread *td;
88999026Sjulian	struct kse *ke;
89099026Sjulian	struct proc *p;
89199026Sjulian	struct ksegrp	*kg;
89299026Sjulian
89399026Sjulian	td = curthread;
89499026Sjulian	kg = td->td_ksegrp;
89599026Sjulian	p = td->td_proc;
89699026Sjulian	ke = td->td_kse;
89799026Sjulian
89899026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
899102581Sjulian	KASSERT(p != NULL, ("thread exiting without a process"));
900102581Sjulian	KASSERT(ke != NULL, ("thread exiting without a kse"));
901102581Sjulian	KASSERT(kg != NULL, ("thread exiting without a kse group"));
90299026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
90399026Sjulian	CTR1(KTR_PROC, "thread_exit: thread %p", td);
90499026Sjulian	KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
90599026Sjulian
90699026Sjulian	if (ke->ke_tdspare != NULL) {
907103216Sjulian		thread_stash(ke->ke_tdspare);
90899026Sjulian		ke->ke_tdspare = NULL;
90999026Sjulian	}
910104695Sjulian	if (td->td_standin != NULL) {
911104695Sjulian		thread_stash(td->td_standin);
912104695Sjulian		td->td_standin = NULL;
913104695Sjulian	}
914104695Sjulian
91599026Sjulian	cpu_thread_exit(td);	/* XXXSMP */
91699026Sjulian
917102581Sjulian	/*
918103002Sjulian	 * The last thread is left attached to the process
919103002Sjulian	 * So that the whole bundle gets recycled. Skip
920103002Sjulian	 * all this stuff.
921102581Sjulian	 */
922103002Sjulian	if (p->p_numthreads > 1) {
923105854Sjulian		/*
924105854Sjulian		 * Unlink this thread from its proc and the kseg.
925105854Sjulian		 * In keeping with the other structs we probably should
926105854Sjulian		 * have a thread_unlink() that does some of this but it
927105854Sjulian		 * would only be called from here (I think) so it would
928105854Sjulian		 * be a waste. (might be useful for proc_fini() as well.)
929105854Sjulian 		 */
930103002Sjulian		TAILQ_REMOVE(&p->p_threads, td, td_plist);
931103002Sjulian		p->p_numthreads--;
932103002Sjulian		TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
933103002Sjulian		kg->kg_numthreads--;
934103002Sjulian		/*
935103002Sjulian		 * The test below is NOT true if we are the
936103002Sjulian		 * sole exiting thread. P_STOPPED_SNGL is unset
937103002Sjulian		 * in exit1() after it is the only survivor.
938103002Sjulian		 */
939103002Sjulian		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
940103002Sjulian			if (p->p_numthreads == p->p_suspcount) {
941103216Sjulian				thread_unsuspend_one(p->p_singlethread);
942103002Sjulian			}
94399026Sjulian		}
944104695Sjulian
945104695Sjulian		/* Reassign this thread's KSE. */
946104695Sjulian		ke->ke_thread = NULL;
947104695Sjulian		td->td_kse = NULL;
948104695Sjulian		ke->ke_state = KES_UNQUEUED;
949105854Sjulian		KASSERT((ke->ke_bound != td),
950105854Sjulian		    ("thread_exit: entered with ke_bound set"));
951104695Sjulian
952104695Sjulian		/*
953105854Sjulian		 * The reason for all this hoopla is
954105854Sjulian		 * an attempt to stop our thread stack from being freed
955105854Sjulian		 * until AFTER we have stopped running on it.
956105854Sjulian		 * Since we are under schedlock, almost any method where
957105854Sjulian		 * it is eventually freed by someone else is probably ok.
958105854Sjulian		 * (Especially if they do it under schedlock). We could
959105854Sjulian		 * almost free it here if we could be certain that
960105854Sjulian		 * the uma code wouldn't pull it apart immediatly,
961105854Sjulian		 * but unfortunatly we can not guarantee that.
962104695Sjulian		 *
963105854Sjulian		 * For threads that are exiting and NOT killing their
964105854Sjulian		 * KSEs we can just stash it in the KSE, however
965105854Sjulian		 * in the case where the KSE is also being deallocated,
966105854Sjulian		 * we need to store it somewhere else. It turns out that
967105854Sjulian		 * we will never free the last KSE, so there is always one
968105854Sjulian		 * other KSE available. We might as well just choose one
969105854Sjulian		 * and stash it there. Being under schedlock should make that
970105854Sjulian		 * safe.
971105854Sjulian		 *
972105854Sjulian		 * In borrower threads, we can stash it in the lender
973105854Sjulian		 * Where it won't be needed until this thread is long gone.
974105854Sjulian		 * Borrower threads can't kill their KSE anyhow, so even
975105854Sjulian		 * the KSE would be a safe place for them. It is not
976105854Sjulian		 * necessary to have a KSE (or KSEGRP) at all beyond this
977105854Sjulian		 * point, while we are under the protection of schedlock.
978105854Sjulian		 *
979105854Sjulian		 * Either give the KSE to another thread to use (or make
980105854Sjulian		 * it idle), or free it entirely, possibly along with its
981105854Sjulian		 * ksegrp if it's the last one.
982104695Sjulian		 */
983105854Sjulian		if (ke->ke_flags & KEF_EXIT) {
984105854Sjulian			kse_unlink(ke);
985105854Sjulian			/*
986105854Sjulian			 * Designate another KSE to hold our thread.
987105854Sjulian			 * Safe as long as we abide by whatever lock
988105854Sjulian			 * we control it with.. The other KSE will not
989105854Sjulian			 * be able to run it until we release the schelock,
990105854Sjulian			 * but we need to be careful about it deciding to
991105854Sjulian			 * write to the stack before then. Luckily
992105854Sjulian			 * I believe that while another thread's
993105854Sjulian			 * standin thread can be used in this way, the
994105854Sjulian			 * spare thread for the KSE cannot be used without
995105854Sjulian			 * holding schedlock at least once.
996105854Sjulian			 */
997105854Sjulian			ke =  FIRST_KSE_IN_PROC(p);
998105854Sjulian		} else {
999105854Sjulian			kse_reassign(ke);
1000105854Sjulian		}
1001107006Sdavidxu#if 0
1002104695Sjulian		if (ke->ke_bound) {
1003105854Sjulian			/*
1004105854Sjulian			 * WE are a borrower..
1005105854Sjulian			 * stash our thread with the owner.
1006105854Sjulian			 */
1007104695Sjulian			if (ke->ke_bound->td_standin) {
1008104695Sjulian				thread_stash(ke->ke_bound->td_standin);
1009104695Sjulian			}
1010104695Sjulian			ke->ke_bound->td_standin = td;
1011104695Sjulian		} else {
1012107006Sdavidxu#endif
1013105854Sjulian			if (ke->ke_tdspare != NULL) {
1014105854Sjulian				thread_stash(ke->ke_tdspare);
1015105854Sjulian				ke->ke_tdspare = NULL;
1016105854Sjulian			}
1017104695Sjulian			ke->ke_tdspare = td;
1018107006Sdavidxu#if 0
1019104695Sjulian		}
1020107006Sdavidxu#endif
1021105854Sjulian		PROC_UNLOCK(p);
1022105854Sjulian		td->td_state	= TDS_INACTIVE;
1023105854Sjulian		td->td_proc	= NULL;
1024105854Sjulian		td->td_ksegrp	= NULL;
1025105854Sjulian		td->td_last_kse	= NULL;
1026103002Sjulian	} else {
1027103002Sjulian		PROC_UNLOCK(p);
102899026Sjulian	}
1029103002Sjulian
103099026Sjulian	cpu_throw();
103199026Sjulian	/* NOTREACHED */
103299026Sjulian}
103399026Sjulian
103499026Sjulian/*
103599026Sjulian * Link a thread to a process.
1036103002Sjulian * set up anything that needs to be initialized for it to
1037103002Sjulian * be used by the process.
103899026Sjulian *
103999026Sjulian * Note that we do not link to the proc's ucred here.
104099026Sjulian * The thread is linked as if running but no KSE assigned.
104199026Sjulian */
104299026Sjulianvoid
104399026Sjulianthread_link(struct thread *td, struct ksegrp *kg)
104499026Sjulian{
104599026Sjulian	struct proc *p;
104699026Sjulian
104799026Sjulian	p = kg->kg_proc;
1048103216Sjulian	td->td_state = TDS_INACTIVE;
104999026Sjulian	td->td_proc	= p;
105099026Sjulian	td->td_ksegrp	= kg;
105199026Sjulian	td->td_last_kse	= NULL;
105299026Sjulian
1053103002Sjulian	LIST_INIT(&td->td_contested);
1054103002Sjulian	callout_init(&td->td_slpcallout, 1);
105599026Sjulian	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
105699026Sjulian	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
105799026Sjulian	p->p_numthreads++;
105899026Sjulian	kg->kg_numthreads++;
1059107006Sdavidxu	if (oiks_debug && (p->p_numthreads > oiks_max_threads_per_proc)) {
106099026Sjulian		printf("OIKS %d\n", p->p_numthreads);
106199026Sjulian		if (oiks_debug > 1)
106299026Sjulian			Debugger("OIKS");
106399026Sjulian	}
106499026Sjulian	td->td_kse	= NULL;
106599026Sjulian}
106699026Sjulian
1067105854Sjulianvoid
1068105854Sjuliankse_purge(struct proc *p, struct thread *td)
1069105854Sjulian{
1070105854Sjulian	struct kse *ke;
1071105854Sjulian	struct ksegrp *kg;
1072105854Sjulian
1073105854Sjulian 	KASSERT(p->p_numthreads == 1, ("bad thread number"));
1074105854Sjulian	mtx_lock_spin(&sched_lock);
1075105854Sjulian	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
1076105854Sjulian		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
1077105854Sjulian			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
1078105854Sjulian			kg->kg_idle_kses--;
1079105854Sjulian			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
1080105854Sjulian			kg->kg_kses--;
1081105854Sjulian			if (ke->ke_tdspare)
1082105854Sjulian				thread_stash(ke->ke_tdspare);
1083105854Sjulian   			kse_stash(ke);
1084105854Sjulian		}
1085105854Sjulian		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
1086105854Sjulian		p->p_numksegrps--;
1087105854Sjulian		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
1088105854Sjulian		    ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
1089105854Sjulian			("wrong kg_kses"));
1090105854Sjulian		if (kg != td->td_ksegrp) {
1091105854Sjulian			ksegrp_stash(kg);
1092105854Sjulian		}
1093105854Sjulian	}
1094105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
1095105854Sjulian	p->p_numksegrps++;
1096105854Sjulian	mtx_unlock_spin(&sched_lock);
1097105854Sjulian}
1098105854Sjulian
1099105854Sjulian
110099026Sjulian/*
1101103410Smini * Create a thread and schedule it for upcall on the KSE given.
110299026Sjulian */
110399026Sjulianstruct thread *
110499026Sjulianthread_schedule_upcall(struct thread *td, struct kse *ke)
110599026Sjulian{
110699026Sjulian	struct thread *td2;
1107105930Sdavidxu	struct ksegrp *kg;
1108104695Sjulian	int newkse;
110999026Sjulian
111099026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
1111104695Sjulian	newkse = (ke != td->td_kse);
1112104695Sjulian
1113104695Sjulian	/*
1114104695Sjulian	 * If the kse is already owned by another thread then we can't
1115104695Sjulian	 * schedule an upcall because the other thread must be BOUND
1116104695Sjulian	 * which means it is not in a position to take an upcall.
1117104695Sjulian	 * We must be borrowing the KSE to allow us to complete some in-kernel
1118104695Sjulian	 * work. When we complete, the Bound thread will have teh chance to
1119104695Sjulian	 * complete. This thread will sleep as planned. Hopefully there will
1120104695Sjulian	 * eventually be un unbound thread that can be converted to an
1121104695Sjulian	 * upcall to report the completion of this thread.
1122104695Sjulian	 */
1123104695Sjulian	if (ke->ke_bound && ((ke->ke_bound->td_flags & TDF_UNBOUND) == 0)) {
1124104695Sjulian		return (NULL);
1125104695Sjulian	}
1126104695Sjulian	KASSERT((ke->ke_bound == NULL), ("kse already bound"));
1127104695Sjulian
1128105930Sdavidxu	if (ke->ke_state == KES_IDLE) {
1129105930Sdavidxu		kg = ke->ke_ksegrp;
1130105930Sdavidxu		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
1131105930Sdavidxu		kg->kg_idle_kses--;
1132105930Sdavidxu		ke->ke_state = KES_UNQUEUED;
1133105930Sdavidxu	}
1134104695Sjulian	if ((td2 = td->td_standin) != NULL) {
1135104695Sjulian		td->td_standin = NULL;
113699026Sjulian	} else {
1137104695Sjulian		if (newkse)
1138104695Sjulian			panic("no reserve thread when called with a new kse");
1139104695Sjulian		/*
1140104695Sjulian		 * If called from (e.g.) sleep and we do not have
1141104695Sjulian		 * a reserve thread, then we've used it, so do not
1142104695Sjulian		 * create an upcall.
1143104695Sjulian		 */
1144106182Sdavidxu		return (NULL);
114599026Sjulian	}
114699026Sjulian	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
1147104695Sjulian	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
1148103072Sjulian	bzero(&td2->td_startzero,
1149103002Sjulian	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
1150103002Sjulian	bcopy(&td->td_startcopy, &td2->td_startcopy,
1151103002Sjulian	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
115299026Sjulian	thread_link(td2, ke->ke_ksegrp);
1153103410Smini	cpu_set_upcall(td2, td->td_pcb);
1154104695Sjulian
1155104695Sjulian	/*
1156104695Sjulian	 * XXXKSE do we really need this? (default values for the
1157104695Sjulian	 * frame).
1158104695Sjulian	 */
1159103410Smini	bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe));
1160104695Sjulian
1161103410Smini	/*
1162104695Sjulian	 * Bind the new thread to the KSE,
1163104695Sjulian	 * and if it's our KSE, lend it back to ourself
1164104695Sjulian	 * so we can continue running.
1165103410Smini	 */
116699026Sjulian	td2->td_ucred = crhold(td->td_ucred);
1167104695Sjulian	td2->td_flags = TDF_UPCALLING; /* note: BOUND */
1168104695Sjulian	td2->td_kse = ke;
1169104695Sjulian	td2->td_state = TDS_CAN_RUN;
1170104695Sjulian	td2->td_inhibitors = 0;
1171104695Sjulian	/*
1172104695Sjulian	 * If called from msleep(), we are working on the current
1173104695Sjulian	 * KSE so fake that we borrowed it. If called from
1174104695Sjulian	 * kse_create(), don't, as we have a new kse too.
1175104695Sjulian	 */
1176104695Sjulian	if (!newkse) {
1177104695Sjulian		/*
1178104695Sjulian		 * This thread will be scheduled when the current thread
1179104695Sjulian		 * blocks, exits or tries to enter userspace, (which ever
1180104695Sjulian		 * happens first). When that happens the KSe will "revert"
1181104695Sjulian		 * to this thread in a BOUND manner. Since we are called
1182104695Sjulian		 * from msleep() this is going to be "very soon" in nearly
1183104695Sjulian		 * all cases.
1184104695Sjulian		 */
1185104695Sjulian		ke->ke_bound = td2;
1186104695Sjulian		TD_SET_LOAN(td2);
1187104695Sjulian	} else {
1188104695Sjulian		ke->ke_bound = NULL;
1189104695Sjulian		ke->ke_thread = td2;
1190105930Sdavidxu		ke->ke_state = KES_THREAD;
1191104695Sjulian		setrunqueue(td2);
1192104695Sjulian	}
1193104695Sjulian	return (td2);	/* bogus.. should be a void function */
119499026Sjulian}
119599026Sjulian
119699026Sjulian/*
1197103410Smini * Schedule an upcall to notify a KSE process recieved signals.
119899026Sjulian *
1199103410Smini * XXX - Modifying a sigset_t like this is totally bogus.
1200103410Smini */
1201103410Sministruct thread *
1202103410Sminisignal_upcall(struct proc *p, int sig)
1203103410Smini{
1204103410Smini	struct thread *td, *td2;
1205103410Smini	struct kse *ke;
1206103410Smini	sigset_t ss;
1207103410Smini	int error;
1208103410Smini
1209103410Smini	PROC_LOCK_ASSERT(p, MA_OWNED);
1210104695Sjulianreturn (NULL);
1211103410Smini
1212103410Smini	td = FIRST_THREAD_IN_PROC(p);
1213103410Smini	ke = td->td_kse;
1214103410Smini	PROC_UNLOCK(p);
1215103410Smini	error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t));
1216103410Smini	PROC_LOCK(p);
1217103410Smini	if (error)
1218103410Smini		return (NULL);
1219103410Smini	SIGADDSET(ss, sig);
1220103410Smini	PROC_UNLOCK(p);
1221103410Smini	error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t));
1222103410Smini	PROC_LOCK(p);
1223103410Smini	if (error)
1224103410Smini		return (NULL);
1225104695Sjulian	if (td->td_standin == NULL)
1226104695Sjulian		td->td_standin = thread_alloc();
1227103410Smini	mtx_lock_spin(&sched_lock);
1228104695Sjulian	td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */
1229103410Smini	mtx_unlock_spin(&sched_lock);
1230103410Smini	return (td2);
1231103410Smini}
1232103410Smini
1233103410Smini/*
1234105900Sjulian * setup done on the thread when it enters the kernel.
1235105900Sjulian * XXXKSE Presently only for syscalls but eventually all kernel entries.
1236105900Sjulian */
1237105900Sjulianvoid
1238105900Sjulianthread_user_enter(struct proc *p, struct thread *td)
1239105900Sjulian{
1240105900Sjulian	struct kse *ke;
1241105900Sjulian
1242105900Sjulian	/*
1243105900Sjulian	 * First check that we shouldn't just abort.
1244105900Sjulian	 * But check if we are the single thread first!
1245105900Sjulian	 * XXX p_singlethread not locked, but should be safe.
1246105900Sjulian	 */
1247105900Sjulian	if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
1248105900Sjulian		PROC_LOCK(p);
1249105900Sjulian		mtx_lock_spin(&sched_lock);
1250105900Sjulian		thread_exit();
1251105900Sjulian		/* NOTREACHED */
1252105900Sjulian	}
1253105900Sjulian
1254105900Sjulian	/*
1255105900Sjulian	 * If we are doing a syscall in a KSE environment,
1256105900Sjulian	 * note where our mailbox is. There is always the
1257105900Sjulian	 * possibility that we could do this lazily (in sleep()),
1258105900Sjulian	 * but for now do it every time.
1259105900Sjulian	 */
1260105901Sdavidxu	ke = td->td_kse;
1261105912Sjulian	if (ke->ke_mailbox != NULL) {
1262105900Sjulian#if 0
1263105900Sjulian		td->td_mailbox = (void *)fuword((caddr_t)ke->ke_mailbox
1264105900Sjulian		    + offsetof(struct kse_mailbox, km_curthread));
1265105900Sjulian#else /* if user pointer arithmetic is ok in the kernel */
1266105900Sjulian		td->td_mailbox =
1267105900Sjulian		    (void *)fuword( (void *)&ke->ke_mailbox->km_curthread);
1268105900Sjulian#endif
1269105900Sjulian		if ((td->td_mailbox == NULL) ||
1270107006Sdavidxu		    (td->td_mailbox == (void *)-1) ||
1271107006Sdavidxu		    (p->p_numthreads > max_threads_per_proc)) {
1272105900Sjulian			td->td_mailbox = NULL;	/* single thread it.. */
1273105900Sjulian			td->td_flags &= ~TDF_UNBOUND;
1274105900Sjulian		} else {
1275105900Sjulian			if (td->td_standin == NULL)
1276105900Sjulian				td->td_standin = thread_alloc();
1277105900Sjulian			td->td_flags |= TDF_UNBOUND;
1278105900Sjulian		}
1279105900Sjulian	}
1280105900Sjulian}
1281105900Sjulian
1282105900Sjulian/*
1283103410Smini * The extra work we go through if we are a threaded process when we
1284103410Smini * return to userland.
1285103410Smini *
128699026Sjulian * If we are a KSE process and returning to user mode, check for
128799026Sjulian * extra work to do before we return (e.g. for more syscalls
128899026Sjulian * to complete first).  If we were in a critical section, we should
128999026Sjulian * just return to let it finish. Same if we were in the UTS (in
1290103410Smini * which case the mailbox's context's busy indicator will be set).
1291103410Smini * The only traps we suport will have set the mailbox.
1292103410Smini * We will clear it here.
129399026Sjulian */
129499026Sjulianint
1295103838Sjulianthread_userret(struct thread *td, struct trapframe *frame)
129699026Sjulian{
1297103410Smini	int error;
1298104031Sjulian	int unbound;
1299104031Sjulian	struct kse *ke;
1300104695Sjulian	struct ksegrp *kg;
1301104695Sjulian	struct thread *td2;
1302104695Sjulian	struct proc *p;
130399026Sjulian
1304104695Sjulian	error = 0;
1305104157Sjulian
1306104031Sjulian	unbound = td->td_flags & TDF_UNBOUND;
1307104695Sjulian
1308104695Sjulian	kg = td->td_ksegrp;
1309104695Sjulian	p = td->td_proc;
1310104695Sjulian
1311103410Smini	/*
1312104695Sjulian	 * Originally bound threads never upcall but they may
1313104695Sjulian	 * loan out their KSE at this point.
1314104695Sjulian	 * Upcalls imply bound.. They also may want to do some Philantropy.
1315104695Sjulian	 * Unbound threads on the other hand either yield to other work
1316104695Sjulian	 * or transform into an upcall.
1317104695Sjulian	 * (having saved their context to user space in both cases)
1318103410Smini	 */
1319106182Sdavidxu	if (unbound) {
1320104695Sjulian		/*
1321104695Sjulian		 * We are an unbound thread, looking to return to
1322104695Sjulian		 * user space.
1323104695Sjulian		 * THere are several possibilities:
1324104695Sjulian		 * 1) we are using a borrowed KSE. save state and exit.
1325104695Sjulian		 *    kse_reassign() will recycle the kse as needed,
1326104695Sjulian		 * 2) we are not.. save state, and then convert ourself
1327104695Sjulian		 *    to be an upcall, bound to the KSE.
1328104695Sjulian		 *    if there are others that need the kse,
1329104695Sjulian		 *    give them a chance by doing an mi_switch().
1330104695Sjulian		 *    Because we are bound, control will eventually return
1331104695Sjulian		 *    to us here.
1332104695Sjulian		 * ***
1333104695Sjulian		 * Save the thread's context, and link it
1334104695Sjulian		 * into the KSEGRP's list of completed threads.
1335104695Sjulian		 */
1336104695Sjulian		error = thread_export_context(td);
1337104695Sjulian		td->td_mailbox = NULL;
1338104695Sjulian		if (error) {
1339104695Sjulian			/*
1340104695Sjulian			 * If we are not running on a borrowed KSE, then
1341104695Sjulian			 * failing to do the KSE operation just defaults
1342104695Sjulian			 * back to synchonous operation, so just return from
1343104695Sjulian			 * the syscall. If it IS borrowed, there is nothing
1344104695Sjulian			 * we can do. We just lose that context. We
1345104695Sjulian			 * probably should note this somewhere and send
1346104695Sjulian			 * the process a signal.
1347104695Sjulian			 */
1348104695Sjulian			PROC_LOCK(td->td_proc);
1349104695Sjulian			psignal(td->td_proc, SIGSEGV);
1350104695Sjulian			mtx_lock_spin(&sched_lock);
1351104695Sjulian			if (td->td_kse->ke_bound == NULL) {
1352104695Sjulian				td->td_flags &= ~TDF_UNBOUND;
1353104695Sjulian				PROC_UNLOCK(td->td_proc);
1354104695Sjulian				mtx_unlock_spin(&sched_lock);
1355104695Sjulian				return (error);	/* go sync */
1356104695Sjulian			}
1357104695Sjulian			thread_exit();
1358104695Sjulian		}
1359104695Sjulian
1360104695Sjulian		/*
1361104695Sjulian		 * if the KSE is owned and we are borrowing it,
1362104695Sjulian		 * don't make an upcall, just exit so that the owner
1363104695Sjulian		 * can get its KSE if it wants it.
1364104695Sjulian		 * Our context is already safely stored for later
1365104695Sjulian		 * use by the UTS.
1366104695Sjulian		 */
1367104695Sjulian		PROC_LOCK(p);
1368104695Sjulian		mtx_lock_spin(&sched_lock);
1369104695Sjulian		if (td->td_kse->ke_bound) {
1370104695Sjulian			thread_exit();
1371104695Sjulian		}
1372104695Sjulian		PROC_UNLOCK(p);
1373104695Sjulian
1374104695Sjulian		/*
1375104695Sjulian		 * Turn ourself into a bound upcall.
1376104695Sjulian		 * We will rely on kse_reassign()
1377104695Sjulian		 * to make us run at a later time.
1378104695Sjulian		 * We should look just like a sheduled upcall
1379104695Sjulian		 * from msleep() or cv_wait().
1380104695Sjulian		 */
1381104695Sjulian		td->td_flags &= ~TDF_UNBOUND;
1382104695Sjulian		td->td_flags |= TDF_UPCALLING;
1383104695Sjulian		/* Only get here if we have become an upcall */
1384104695Sjulian
1385104695Sjulian	} else {
1386104695Sjulian		mtx_lock_spin(&sched_lock);
1387104695Sjulian	}
1388104695Sjulian	/*
1389104695Sjulian	 * We ARE going back to userland with this KSE.
1390104695Sjulian	 * Check for threads that need to borrow it.
1391104695Sjulian	 * Optimisation: don't call mi_switch if no-one wants the KSE.
1392104695Sjulian	 * Any other thread that comes ready after this missed the boat.
1393104695Sjulian	 */
1394104031Sjulian	ke = td->td_kse;
1395104695Sjulian	if ((td2 = kg->kg_last_assigned))
1396104695Sjulian		td2 = TAILQ_NEXT(td2, td_runq);
1397104695Sjulian	else
1398104695Sjulian		td2 = TAILQ_FIRST(&kg->kg_runq);
1399104695Sjulian	if (td2)  {
1400104695Sjulian		/*
1401104695Sjulian		 * force a switch to more urgent 'in kernel'
1402104695Sjulian		 * work. Control will return to this thread
1403104695Sjulian		 * when there is no more work to do.
1404104695Sjulian		 * kse_reassign() will do tha for us.
1405104695Sjulian		 */
1406104695Sjulian		TD_SET_LOAN(td);
1407104695Sjulian		ke->ke_bound = td;
1408104695Sjulian		ke->ke_thread = NULL;
1409104695Sjulian		mi_switch(); /* kse_reassign() will (re)find td2 */
141099026Sjulian	}
1411104695Sjulian	mtx_unlock_spin(&sched_lock);
1412104695Sjulian
1413103410Smini	/*
1414104695Sjulian	 * Optimisation:
1415104695Sjulian	 * Ensure that we have a spare thread available,
1416104695Sjulian	 * for when we re-enter the kernel.
1417103410Smini	 */
1418104695Sjulian	if (td->td_standin == NULL) {
1419104695Sjulian		if (ke->ke_tdspare) {
1420104695Sjulian			td->td_standin = ke->ke_tdspare;
1421104695Sjulian			ke->ke_tdspare = NULL;
1422104695Sjulian		} else {
1423104695Sjulian			td->td_standin = thread_alloc();
1424104695Sjulian		}
1425104695Sjulian	}
1426104695Sjulian
1427104695Sjulian	/*
1428104695Sjulian	 * To get here, we know there is no other need for our
1429104695Sjulian	 * KSE so we can proceed. If not upcalling, go back to
1430104695Sjulian	 * userspace. If we are, get the upcall set up.
1431104695Sjulian	 */
1432104695Sjulian	if ((td->td_flags & TDF_UPCALLING) == 0)
1433103410Smini		return (0);
1434104695Sjulian
1435104695Sjulian	/*
1436104695Sjulian	 * We must be an upcall to get this far.
1437104695Sjulian	 * There is no more work to do and we are going to ride
1438104695Sjulian	 * this thead/KSE up to userland as an upcall.
1439104695Sjulian	 * Do the last parts of the setup needed for the upcall.
1440104695Sjulian	 */
1441104695Sjulian	CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1442104695Sjulian	    td, td->td_proc->p_pid, td->td_proc->p_comm);
1443104695Sjulian
1444103410Smini	/*
1445104695Sjulian	 * Set user context to the UTS.
1446103410Smini	 */
1447104695Sjulian	cpu_set_upcall_kse(td, ke);
144899026Sjulian
1449104695Sjulian	/*
1450104695Sjulian	 * Put any completed mailboxes on this KSE's list.
1451104695Sjulian	 */
1452104695Sjulian	error = thread_link_mboxes(kg, ke);
1453104695Sjulian	if (error)
1454104695Sjulian		goto bad;
1455104031Sjulian
1456104695Sjulian	/*
1457104695Sjulian	 * Set state and mailbox.
1458104695Sjulian	 * From now on we are just a bound outgoing process.
1459104695Sjulian	 * **Problem** userret is often called several times.
1460104695Sjulian	 * it would be nice if this all happenned only on the first time
1461104695Sjulian	 * through. (the scan for extra work etc.)
1462104695Sjulian	 */
1463106180Sdavidxu	mtx_lock_spin(&sched_lock);
1464104695Sjulian	td->td_flags &= ~TDF_UPCALLING;
1465106180Sdavidxu	mtx_unlock_spin(&sched_lock);
1466104031Sjulian#if 0
1467104695Sjulian	error = suword((caddr_t)ke->ke_mailbox +
1468104695Sjulian	    offsetof(struct kse_mailbox, km_curthread), 0);
1469104031Sjulian#else	/* if user pointer arithmetic is ok in the kernel */
1470104695Sjulian	error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0);
1471104031Sjulian#endif
1472104695Sjulian	if (!error)
1473104695Sjulian		return (0);
1474104695Sjulian
1475104031Sjulianbad:
1476104031Sjulian	/*
1477104031Sjulian	 * Things are going to be so screwed we should just kill the process.
1478104031Sjulian 	 * how do we do that?
1479104031Sjulian	 */
1480104695Sjulian	PROC_LOCK(td->td_proc);
1481104695Sjulian	psignal(td->td_proc, SIGSEGV);
1482104695Sjulian	PROC_UNLOCK(td->td_proc);
1483104695Sjulian	return (error);	/* go sync */
148499026Sjulian}
148599026Sjulian
148699026Sjulian/*
148799026Sjulian * Enforce single-threading.
148899026Sjulian *
148999026Sjulian * Returns 1 if the caller must abort (another thread is waiting to
149099026Sjulian * exit the process or similar). Process is locked!
149199026Sjulian * Returns 0 when you are successfully the only thread running.
149299026Sjulian * A process has successfully single threaded in the suspend mode when
149399026Sjulian * There are no threads in user mode. Threads in the kernel must be
149499026Sjulian * allowed to continue until they get to the user boundary. They may even
149599026Sjulian * copy out their return values and data before suspending. They may however be
149699026Sjulian * accellerated in reaching the user boundary as we will wake up
149799026Sjulian * any sleeping threads that are interruptable. (PCATCH).
149899026Sjulian */
149999026Sjulianint
150099026Sjulianthread_single(int force_exit)
150199026Sjulian{
150299026Sjulian	struct thread *td;
150399026Sjulian	struct thread *td2;
150499026Sjulian	struct proc *p;
150599026Sjulian
150699026Sjulian	td = curthread;
150799026Sjulian	p = td->td_proc;
150899026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
150999026Sjulian	KASSERT((td != NULL), ("curthread is NULL"));
151099026Sjulian
151199026Sjulian	if ((p->p_flag & P_KSES) == 0)
151299026Sjulian		return (0);
151399026Sjulian
1514100648Sjulian	/* Is someone already single threading? */
1515100648Sjulian	if (p->p_singlethread)
151699026Sjulian		return (1);
151799026Sjulian
1518102950Sdavidxu	if (force_exit == SINGLE_EXIT)
151999026Sjulian		p->p_flag |= P_SINGLE_EXIT;
152099026Sjulian	else
152199026Sjulian		p->p_flag &= ~P_SINGLE_EXIT;
1522102950Sdavidxu	p->p_flag |= P_STOPPED_SINGLE;
152399026Sjulian	p->p_singlethread = td;
1524105911Sjulian	/* XXXKSE Which lock protects the below values? */
152599026Sjulian	while ((p->p_numthreads - p->p_suspcount) != 1) {
1526103216Sjulian		mtx_lock_spin(&sched_lock);
152799026Sjulian		FOREACH_THREAD_IN_PROC(p, td2) {
152899026Sjulian			if (td2 == td)
152999026Sjulian				continue;
1530103216Sjulian			if (TD_IS_INHIBITED(td2)) {
1531105911Sjulian				if (force_exit == SINGLE_EXIT) {
1532105911Sjulian					if (TD_IS_SUSPENDED(td2)) {
1533103216Sjulian						thread_unsuspend_one(td2);
1534105911Sjulian					}
1535105911Sjulian					if (TD_ON_SLEEPQ(td2) &&
1536105911Sjulian					    (td2->td_flags & TDF_SINTR)) {
1537105911Sjulian						if (td2->td_flags & TDF_CVWAITQ)
1538105911Sjulian							cv_abort(td2);
1539105911Sjulian						else
1540105911Sjulian							abortsleep(td2);
1541105911Sjulian					}
1542105911Sjulian				} else {
1543105911Sjulian					if (TD_IS_SUSPENDED(td2))
1544105874Sdavidxu						continue;
1545105911Sjulian					/* maybe other inhibitted states too? */
1546105970Sdavidxu					if (TD_IS_SLEEPING(td2))
1547105911Sjulian						thread_suspend_one(td2);
154899026Sjulian				}
154999026Sjulian			}
155099026Sjulian		}
1551105911Sjulian		/*
1552105911Sjulian		 * Maybe we suspended some threads.. was it enough?
1553105911Sjulian		 */
1554105911Sjulian		if ((p->p_numthreads - p->p_suspcount) == 1) {
1555105911Sjulian			mtx_unlock_spin(&sched_lock);
1556105911Sjulian			break;
1557105911Sjulian		}
1558105911Sjulian
155999026Sjulian		/*
156099026Sjulian		 * Wake us up when everyone else has suspended.
1561100648Sjulian		 * In the mean time we suspend as well.
156299026Sjulian		 */
1563103216Sjulian		thread_suspend_one(td);
156499026Sjulian		mtx_unlock(&Giant);
156599026Sjulian		PROC_UNLOCK(p);
156699026Sjulian		mi_switch();
156799026Sjulian		mtx_unlock_spin(&sched_lock);
156899026Sjulian		mtx_lock(&Giant);
156999026Sjulian		PROC_LOCK(p);
157099026Sjulian	}
1571105854Sjulian	if (force_exit == SINGLE_EXIT)
1572105854Sjulian		kse_purge(p, td);
157399026Sjulian	return (0);
157499026Sjulian}
157599026Sjulian
157699026Sjulian/*
157799026Sjulian * Called in from locations that can safely check to see
157899026Sjulian * whether we have to suspend or at least throttle for a
157999026Sjulian * single-thread event (e.g. fork).
158099026Sjulian *
158199026Sjulian * Such locations include userret().
158299026Sjulian * If the "return_instead" argument is non zero, the thread must be able to
158399026Sjulian * accept 0 (caller may continue), or 1 (caller must abort) as a result.
158499026Sjulian *
158599026Sjulian * The 'return_instead' argument tells the function if it may do a
158699026Sjulian * thread_exit() or suspend, or whether the caller must abort and back
158799026Sjulian * out instead.
158899026Sjulian *
158999026Sjulian * If the thread that set the single_threading request has set the
159099026Sjulian * P_SINGLE_EXIT bit in the process flags then this call will never return
159199026Sjulian * if 'return_instead' is false, but will exit.
159299026Sjulian *
159399026Sjulian * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
159499026Sjulian *---------------+--------------------+---------------------
159599026Sjulian *       0       | returns 0          |   returns 0 or 1
159699026Sjulian *               | when ST ends       |   immediatly
159799026Sjulian *---------------+--------------------+---------------------
159899026Sjulian *       1       | thread exits       |   returns 1
159999026Sjulian *               |                    |  immediatly
160099026Sjulian * 0 = thread_exit() or suspension ok,
160199026Sjulian * other = return error instead of stopping the thread.
160299026Sjulian *
160399026Sjulian * While a full suspension is under effect, even a single threading
160499026Sjulian * thread would be suspended if it made this call (but it shouldn't).
160599026Sjulian * This call should only be made from places where
160699026Sjulian * thread_exit() would be safe as that may be the outcome unless
160799026Sjulian * return_instead is set.
160899026Sjulian */
160999026Sjulianint
161099026Sjulianthread_suspend_check(int return_instead)
161199026Sjulian{
1612104502Sjmallett	struct thread *td;
1613104502Sjmallett	struct proc *p;
1614105854Sjulian	struct kse *ke;
1615105854Sjulian	struct ksegrp *kg;
161699026Sjulian
161799026Sjulian	td = curthread;
161899026Sjulian	p = td->td_proc;
1619105854Sjulian	kg = td->td_ksegrp;
162099026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
162199026Sjulian	while (P_SHOULDSTOP(p)) {
1622102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
162399026Sjulian			KASSERT(p->p_singlethread != NULL,
162499026Sjulian			    ("singlethread not set"));
162599026Sjulian			/*
1626100648Sjulian			 * The only suspension in action is a
1627100648Sjulian			 * single-threading. Single threader need not stop.
1628100646Sjulian			 * XXX Should be safe to access unlocked
1629100646Sjulian			 * as it can only be set to be true by us.
163099026Sjulian			 */
1631100648Sjulian			if (p->p_singlethread == td)
163299026Sjulian				return (0);	/* Exempt from stopping. */
163399026Sjulian		}
1634100648Sjulian		if (return_instead)
163599026Sjulian			return (1);
163699026Sjulian
163799026Sjulian		/*
163899026Sjulian		 * If the process is waiting for us to exit,
163999026Sjulian		 * this thread should just suicide.
1640102950Sdavidxu		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
164199026Sjulian		 */
164299026Sjulian		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
164399026Sjulian			mtx_lock_spin(&sched_lock);
164499026Sjulian			while (mtx_owned(&Giant))
164599026Sjulian				mtx_unlock(&Giant);
1646105854Sjulian			/*
1647105854Sjulian			 * free extra kses and ksegrps, we needn't worry
1648105854Sjulian			 * about if current thread is in same ksegrp as
1649105854Sjulian			 * p_singlethread and last kse in the group
1650105854Sjulian			 * could be killed, this is protected by kg_numthreads,
1651105854Sjulian			 * in this case, we deduce that kg_numthreads must > 1.
1652105854Sjulian			 */
1653105854Sjulian			ke = td->td_kse;
1654105854Sjulian			if (ke->ke_bound == NULL &&
1655105854Sjulian			    ((kg->kg_kses != 1) || (kg->kg_numthreads == 1)))
1656105854Sjulian				ke->ke_flags |= KEF_EXIT;
165799026Sjulian			thread_exit();
165899026Sjulian		}
165999026Sjulian
166099026Sjulian		/*
166199026Sjulian		 * When a thread suspends, it just
166299026Sjulian		 * moves to the processes's suspend queue
166399026Sjulian		 * and stays there.
166499026Sjulian		 *
166599026Sjulian		 * XXXKSE if TDF_BOUND is true
166699026Sjulian		 * it will not release it's KSE which might
166799026Sjulian		 * lead to deadlock if there are not enough KSEs
166899026Sjulian		 * to complete all waiting threads.
166999026Sjulian		 * Maybe be able to 'lend' it out again.
167099026Sjulian		 * (lent kse's can not go back to userland?)
167199026Sjulian		 * and can only be lent in STOPPED state.
167299026Sjulian		 */
1673102238Sjulian		mtx_lock_spin(&sched_lock);
1674102950Sdavidxu		if ((p->p_flag & P_STOPPED_SIG) &&
1675102238Sjulian		    (p->p_suspcount+1 == p->p_numthreads)) {
1676102238Sjulian			mtx_unlock_spin(&sched_lock);
1677102238Sjulian			PROC_LOCK(p->p_pptr);
1678102238Sjulian			if ((p->p_pptr->p_procsig->ps_flag &
1679102238Sjulian				PS_NOCLDSTOP) == 0) {
1680102238Sjulian				psignal(p->p_pptr, SIGCHLD);
1681102238Sjulian			}
1682102238Sjulian			PROC_UNLOCK(p->p_pptr);
1683103055Sjulian			mtx_lock_spin(&sched_lock);
1684102238Sjulian		}
168599026Sjulian		mtx_assert(&Giant, MA_NOTOWNED);
1686103216Sjulian		thread_suspend_one(td);
168799026Sjulian		PROC_UNLOCK(p);
1688102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1689100632Sjulian			if (p->p_numthreads == p->p_suspcount) {
1690103216Sjulian				thread_unsuspend_one(p->p_singlethread);
1691100632Sjulian			}
1692100632Sjulian		}
1693100594Sjulian		p->p_stats->p_ru.ru_nivcsw++;
169499026Sjulian		mi_switch();
169599026Sjulian		mtx_unlock_spin(&sched_lock);
169699026Sjulian		PROC_LOCK(p);
169799026Sjulian	}
169899026Sjulian	return (0);
169999026Sjulian}
170099026Sjulian
1701102898Sdavidxuvoid
1702102898Sdavidxuthread_suspend_one(struct thread *td)
1703102898Sdavidxu{
1704102898Sdavidxu	struct proc *p = td->td_proc;
1705102898Sdavidxu
1706102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1707102898Sdavidxu	p->p_suspcount++;
1708103216Sjulian	TD_SET_SUSPENDED(td);
1709102898Sdavidxu	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
1710103216Sjulian	/*
1711103216Sjulian	 * Hack: If we are suspending but are on the sleep queue
1712103216Sjulian	 * then we are in msleep or the cv equivalent. We
1713103216Sjulian	 * want to look like we have two Inhibitors.
1714105911Sjulian	 * May already be set.. doesn't matter.
1715103216Sjulian	 */
1716103216Sjulian	if (TD_ON_SLEEPQ(td))
1717103216Sjulian		TD_SET_SLEEPING(td);
1718102898Sdavidxu}
1719102898Sdavidxu
1720102898Sdavidxuvoid
1721102898Sdavidxuthread_unsuspend_one(struct thread *td)
1722102898Sdavidxu{
1723102898Sdavidxu	struct proc *p = td->td_proc;
1724102898Sdavidxu
1725102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1726102898Sdavidxu	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
1727103216Sjulian	TD_CLR_SUSPENDED(td);
1728102898Sdavidxu	p->p_suspcount--;
1729103216Sjulian	setrunnable(td);
1730102898Sdavidxu}
1731102898Sdavidxu
173299026Sjulian/*
173399026Sjulian * Allow all threads blocked by single threading to continue running.
173499026Sjulian */
173599026Sjulianvoid
173699026Sjulianthread_unsuspend(struct proc *p)
173799026Sjulian{
173899026Sjulian	struct thread *td;
173999026Sjulian
1740100646Sjulian	mtx_assert(&sched_lock, MA_OWNED);
174199026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
174299026Sjulian	if (!P_SHOULDSTOP(p)) {
174399026Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
1744102898Sdavidxu			thread_unsuspend_one(td);
174599026Sjulian		}
1746102950Sdavidxu	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
174799026Sjulian	    (p->p_numthreads == p->p_suspcount)) {
174899026Sjulian		/*
174999026Sjulian		 * Stopping everything also did the job for the single
175099026Sjulian		 * threading request. Now we've downgraded to single-threaded,
175199026Sjulian		 * let it continue.
175299026Sjulian		 */
1753102898Sdavidxu		thread_unsuspend_one(p->p_singlethread);
175499026Sjulian	}
175599026Sjulian}
175699026Sjulian
175799026Sjulianvoid
175899026Sjulianthread_single_end(void)
175999026Sjulian{
176099026Sjulian	struct thread *td;
176199026Sjulian	struct proc *p;
176299026Sjulian
176399026Sjulian	td = curthread;
176499026Sjulian	p = td->td_proc;
176599026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
1766102950Sdavidxu	p->p_flag &= ~P_STOPPED_SINGLE;
176799026Sjulian	p->p_singlethread = NULL;
1768102292Sjulian	/*
1769102292Sjulian	 * If there are other threads they mey now run,
1770102292Sjulian	 * unless of course there is a blanket 'stop order'
1771102292Sjulian	 * on the process. The single threader must be allowed
1772102292Sjulian	 * to continue however as this is a bad place to stop.
1773102292Sjulian	 */
1774102292Sjulian	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
1775102292Sjulian		mtx_lock_spin(&sched_lock);
1776102292Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
1777103216Sjulian			thread_unsuspend_one(td);
1778102292Sjulian		}
1779102292Sjulian		mtx_unlock_spin(&sched_lock);
1780102292Sjulian	}
178199026Sjulian}
178299026Sjulian
1783102292Sjulian
1784