kern_thread.c revision 103410
199026Sjulian/*
299026Sjulian * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
399026Sjulian *  All rights reserved.
499026Sjulian *
599026Sjulian * Redistribution and use in source and binary forms, with or without
699026Sjulian * modification, are permitted provided that the following conditions
799026Sjulian * are met:
899026Sjulian * 1. Redistributions of source code must retain the above copyright
999026Sjulian *    notice(s), this list of conditions and the following disclaimer as
1099026Sjulian *    the first lines of this file unmodified other than the possible
1199026Sjulian *    addition of one or more copyright notices.
1299026Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1399026Sjulian *    notice(s), this list of conditions and the following disclaimer in the
1499026Sjulian *    documentation and/or other materials provided with the distribution.
1599026Sjulian *
1699026Sjulian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1799026Sjulian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1899026Sjulian * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1999026Sjulian * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2099026Sjulian * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2199026Sjulian * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2299026Sjulian * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2399026Sjulian * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2499026Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2599026Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2699026Sjulian * DAMAGE.
2799026Sjulian *
2899026Sjulian * $FreeBSD: head/sys/kern/kern_thread.c 103410 2002-09-16 19:26:48Z mini $
2999026Sjulian */
3099026Sjulian
3199026Sjulian#include <sys/param.h>
3299026Sjulian#include <sys/systm.h>
3399026Sjulian#include <sys/kernel.h>
3499026Sjulian#include <sys/lock.h>
3599026Sjulian#include <sys/malloc.h>
3699026Sjulian#include <sys/mutex.h>
3799026Sjulian#include <sys/proc.h>
3899026Sjulian#include <sys/sysctl.h>
3999026Sjulian#include <sys/filedesc.h>
4099026Sjulian#include <sys/tty.h>
4199026Sjulian#include <sys/signalvar.h>
4299026Sjulian#include <sys/sx.h>
4399026Sjulian#include <sys/user.h>
4499026Sjulian#include <sys/jail.h>
4599026Sjulian#include <sys/kse.h>
4699026Sjulian#include <sys/ktr.h>
47103410Smini#include <sys/ucontext.h>
4899026Sjulian
4999026Sjulian#include <vm/vm.h>
5099026Sjulian#include <vm/vm_object.h>
5199026Sjulian#include <vm/pmap.h>
5299026Sjulian#include <vm/uma.h>
5399026Sjulian#include <vm/vm_map.h>
5499026Sjulian
55100273Speter#include <machine/frame.h>
56100273Speter
5799026Sjulian/*
58103367Sjulian * KSEGRP related storage.
5999026Sjulian */
60103367Sjulianstatic uma_zone_t ksegrp_zone;
61103367Sjulianstatic uma_zone_t kse_zone;
6299026Sjulianstatic uma_zone_t thread_zone;
6399026Sjulian
64103367Sjulian/* DEBUG ONLY */
6599026SjulianSYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
6699026Sjulianstatic int oiks_debug = 1;	/* 0 disable, 1 printf, 2 enter debugger */
6799026SjulianSYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW,
6899026Sjulian	&oiks_debug, 0, "OIKS thread debug");
6999026Sjulian
70103367Sjulianstatic int max_threads_per_proc = 4;
71103367SjulianSYSCTL_INT(_kern_threads, OID_AUTO, max_per_proc, CTLFLAG_RW,
72103367Sjulian	&max_threads_per_proc, 0, "Limit on threads per proc");
73103367Sjulian
7499026Sjulian#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
7599026Sjulian
7699026Sjulianstruct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
7799026Sjulianstruct mtx zombie_thread_lock;
7899026SjulianMTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock,
7999026Sjulian    "zombie_thread_lock", MTX_SPIN);
8099026Sjulian
8199026Sjulian/*
8299026Sjulian * Pepare a thread for use.
8399026Sjulian */
8499026Sjulianstatic void
8599026Sjulianthread_ctor(void *mem, int size, void *arg)
8699026Sjulian{
8799026Sjulian	struct thread	*td;
8899026Sjulian
8999026Sjulian	KASSERT((size == sizeof(struct thread)),
9099552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
9199026Sjulian
9299026Sjulian	td = (struct thread *)mem;
93103216Sjulian	td->td_state = TDS_INACTIVE;
9499026Sjulian	td->td_flags |= TDF_UNBOUND;
9599026Sjulian}
9699026Sjulian
9799026Sjulian/*
9899026Sjulian * Reclaim a thread after use.
9999026Sjulian */
10099026Sjulianstatic void
10199026Sjulianthread_dtor(void *mem, int size, void *arg)
10299026Sjulian{
10399026Sjulian	struct thread	*td;
10499026Sjulian
10599026Sjulian	KASSERT((size == sizeof(struct thread)),
10699552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
10799026Sjulian
10899026Sjulian	td = (struct thread *)mem;
10999026Sjulian
11099026Sjulian#ifdef INVARIANTS
11199026Sjulian	/* Verify that this thread is in a safe state to free. */
11299026Sjulian	switch (td->td_state) {
113103216Sjulian	case TDS_INHIBITED:
114103216Sjulian	case TDS_RUNNING:
115103216Sjulian	case TDS_CAN_RUN:
11699026Sjulian	case TDS_RUNQ:
11799026Sjulian		/*
11899026Sjulian		 * We must never unlink a thread that is in one of
11999026Sjulian		 * these states, because it is currently active.
12099026Sjulian		 */
12199026Sjulian		panic("bad state for thread unlinking");
12299026Sjulian		/* NOTREACHED */
123103216Sjulian	case TDS_INACTIVE:
12499026Sjulian		break;
12599026Sjulian	default:
12699026Sjulian		panic("bad thread state");
12799026Sjulian		/* NOTREACHED */
12899026Sjulian	}
12999026Sjulian#endif
13099026Sjulian}
13199026Sjulian
13299026Sjulian/*
13399026Sjulian * Initialize type-stable parts of a thread (when newly created).
13499026Sjulian */
13599026Sjulianstatic void
13699026Sjulianthread_init(void *mem, int size)
13799026Sjulian{
13899026Sjulian	struct thread	*td;
13999026Sjulian
14099026Sjulian	KASSERT((size == sizeof(struct thread)),
14199552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
14299026Sjulian
14399026Sjulian	td = (struct thread *)mem;
144103312Sjulian	mtx_lock(&Giant);
14599026Sjulian	pmap_new_thread(td);
146103312Sjulian	mtx_unlock(&Giant);
14799026Sjulian	cpu_thread_setup(td);
14899026Sjulian}
14999026Sjulian
15099026Sjulian/*
15199026Sjulian * Tear down type-stable parts of a thread (just before being discarded).
15299026Sjulian */
15399026Sjulianstatic void
15499026Sjulianthread_fini(void *mem, int size)
15599026Sjulian{
15699026Sjulian	struct thread	*td;
15799026Sjulian
15899026Sjulian	KASSERT((size == sizeof(struct thread)),
15999552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
16099026Sjulian
16199026Sjulian	td = (struct thread *)mem;
16299026Sjulian	pmap_dispose_thread(td);
16399026Sjulian}
16499026Sjulian
16599026Sjulian/*
166103410Smini * Fill a ucontext_t with a thread's context information.
167103410Smini *
168103410Smini * This is an analogue to getcontext(3).
169103410Smini */
170103410Sminivoid
171103410Sminithread_getcontext(struct thread *td, ucontext_t *uc)
172103410Smini{
173103410Smini
174103410Smini	get_mcontext(td, &uc->uc_mcontext);
175103410Smini	uc->uc_sigmask = td->td_proc->p_sigmask;
176103410Smini}
177103410Smini
178103410Smini/*
179103410Smini * Set a thread's context from a ucontext_t.
180103410Smini *
181103410Smini * This is an analogue to setcontext(3).
182103410Smini */
183103410Sminiint
184103410Sminithread_setcontext(struct thread *td, ucontext_t *uc)
185103410Smini{
186103410Smini	int ret;
187103410Smini
188103410Smini	ret = set_mcontext(td, &uc->uc_mcontext);
189103410Smini	if (ret == 0) {
190103410Smini		SIG_CANTMASK(uc->uc_sigmask);
191103410Smini		PROC_LOCK(td->td_proc);
192103410Smini		td->td_proc->p_sigmask = uc->uc_sigmask;
193103410Smini		PROC_UNLOCK(td->td_proc);
194103410Smini	}
195103410Smini	return (ret);
196103410Smini}
197103410Smini
198103410Smini/*
19999026Sjulian * Initialize global thread allocation resources.
20099026Sjulian */
20199026Sjulianvoid
20299026Sjulianthreadinit(void)
20399026Sjulian{
20499026Sjulian
20599026Sjulian	thread_zone = uma_zcreate("THREAD", sizeof (struct thread),
20699026Sjulian	    thread_ctor, thread_dtor, thread_init, thread_fini,
20799026Sjulian	    UMA_ALIGN_CACHE, 0);
208103367Sjulian	ksegrp_zone = uma_zcreate("KSEGRP", sizeof (struct ksegrp),
209103367Sjulian	    NULL, NULL, NULL, NULL,
210103367Sjulian	    UMA_ALIGN_CACHE, 0);
211103367Sjulian	kse_zone = uma_zcreate("KSE", sizeof (struct kse),
212103367Sjulian	    NULL, NULL, NULL, NULL,
213103367Sjulian	    UMA_ALIGN_CACHE, 0);
21499026Sjulian}
21599026Sjulian
21699026Sjulian/*
217103002Sjulian * Stash an embarasingly extra thread into the zombie thread queue.
21899026Sjulian */
21999026Sjulianvoid
22099026Sjulianthread_stash(struct thread *td)
22199026Sjulian{
22299026Sjulian	mtx_lock_spin(&zombie_thread_lock);
22399026Sjulian	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
22499026Sjulian	mtx_unlock_spin(&zombie_thread_lock);
22599026Sjulian}
22699026Sjulian
227103410Smini/*
228103410Smini * Reap zombie threads.
22999026Sjulian */
23099026Sjulianvoid
23199026Sjulianthread_reap(void)
23299026Sjulian{
23399026Sjulian	struct thread *td_reaped;
23499026Sjulian
23599026Sjulian	/*
23699026Sjulian	 * don't even bother to lock if none at this instant
23799026Sjulian	 * We really don't care about the next instant..
23899026Sjulian	 */
23999026Sjulian	if (!TAILQ_EMPTY(&zombie_threads)) {
24099026Sjulian		mtx_lock_spin(&zombie_thread_lock);
24199026Sjulian		while (!TAILQ_EMPTY(&zombie_threads)) {
24299026Sjulian			td_reaped = TAILQ_FIRST(&zombie_threads);
24399026Sjulian			TAILQ_REMOVE(&zombie_threads, td_reaped, td_runq);
24499026Sjulian			mtx_unlock_spin(&zombie_thread_lock);
24599026Sjulian			thread_free(td_reaped);
24699026Sjulian			mtx_lock_spin(&zombie_thread_lock);
24799026Sjulian		}
24899026Sjulian		mtx_unlock_spin(&zombie_thread_lock);
24999026Sjulian	}
25099026Sjulian}
25199026Sjulian
25299026Sjulian/*
253103367Sjulian * Allocate a ksegrp.
254103367Sjulian */
255103367Sjulianstruct ksegrp *
256103367Sjulianksegrp_alloc(void)
257103367Sjulian{
258103367Sjulian	return (uma_zalloc(ksegrp_zone, M_WAITOK));
259103367Sjulian}
260103367Sjulian
261103367Sjulian/*
262103367Sjulian * Allocate a kse.
263103367Sjulian */
264103367Sjulianstruct kse *
265103367Sjuliankse_alloc(void)
266103367Sjulian{
267103367Sjulian	return (uma_zalloc(kse_zone, M_WAITOK));
268103367Sjulian}
269103367Sjulian
270103367Sjulian/*
27199026Sjulian * Allocate a thread.
27299026Sjulian */
27399026Sjulianstruct thread *
27499026Sjulianthread_alloc(void)
27599026Sjulian{
27699026Sjulian	thread_reap(); /* check if any zombies to get */
27799026Sjulian	return (uma_zalloc(thread_zone, M_WAITOK));
27899026Sjulian}
27999026Sjulian
28099026Sjulian/*
281103367Sjulian * Deallocate a ksegrp.
282103367Sjulian */
283103367Sjulianvoid
284103367Sjulianksegrp_free(struct ksegrp *td)
285103367Sjulian{
286103367Sjulian	uma_zfree(ksegrp_zone, td);
287103367Sjulian}
288103367Sjulian
289103367Sjulian/*
290103367Sjulian * Deallocate a kse.
291103367Sjulian */
292103367Sjulianvoid
293103367Sjuliankse_free(struct kse *td)
294103367Sjulian{
295103367Sjulian	uma_zfree(kse_zone, td);
296103367Sjulian}
297103367Sjulian
298103367Sjulian/*
29999026Sjulian * Deallocate a thread.
30099026Sjulian */
30199026Sjulianvoid
30299026Sjulianthread_free(struct thread *td)
30399026Sjulian{
30499026Sjulian	uma_zfree(thread_zone, td);
30599026Sjulian}
30699026Sjulian
30799026Sjulian/*
30899026Sjulian * Store the thread context in the UTS's mailbox.
30999026Sjulian */
31099026Sjulianint
31199026Sjulianthread_export_context(struct thread *td)
31299026Sjulian{
31399026Sjulian	struct kse *ke;
31499026Sjulian	uintptr_t td2_mbx;
31599026Sjulian	void *addr1;
31699026Sjulian	void *addr2;
31799026Sjulian	int error;
318103410Smini	ucontext_t uc;
31999026Sjulian
320100271Speter#ifdef __ia64__
321100271Speter	td2_mbx = 0;		/* pacify gcc (!) */
322100271Speter#endif
323103410Smini	/* Export the user/machine context. */
324103410Smini	error = copyin((caddr_t)td->td_mailbox +
325103410Smini	    offsetof(struct thread_mailbox, tm_context),
326103410Smini	    &uc,
327103410Smini	    sizeof(ucontext_t));
328103410Smini	if (error == 0) {
329103410Smini		thread_getcontext(td, &uc);
330103410Smini		error = copyout(&uc, (caddr_t)td->td_mailbox +
331103410Smini		offsetof(struct thread_mailbox, tm_context),
332103410Smini		sizeof(ucontext_t));
333103410Smini	}
33499026Sjulian
33599026Sjulian	ke = td->td_kse;
33699026Sjulian	addr1 = (caddr_t)ke->ke_mailbox
337103410Smini			+ offsetof(struct kse_mailbox, km_completed);
33899026Sjulian	addr2 = (caddr_t)td->td_mailbox
339103410Smini			+ offsetof(struct thread_mailbox , tm_next);
34099026Sjulian	/* Then link it into it's KSE's list of completed threads. */
34199026Sjulian	if (!error) {
34299026Sjulian		error = td2_mbx = fuword(addr1);
34399026Sjulian		if (error == -1)
34499026Sjulian			error = EFAULT;
34599026Sjulian		else
34699026Sjulian			error = 0;
34799026Sjulian	}
34899026Sjulian	if (!error)
34999026Sjulian		error = suword(addr2, td2_mbx);
35099026Sjulian	if (!error)
35199026Sjulian		error = suword(addr1, (u_long)td->td_mailbox);
35299026Sjulian	if (error == -1)
35399026Sjulian		error = EFAULT;
35499026Sjulian	return (error);
35599026Sjulian}
35699026Sjulian
35799026Sjulian
35899026Sjulian/*
35999026Sjulian * Discard the current thread and exit from its context.
36099026Sjulian *
36199026Sjulian * Because we can't free a thread while we're operating under its context,
36299026Sjulian * push the current thread into our KSE's ke_tdspare slot, freeing the
36399026Sjulian * thread that might be there currently. Because we know that only this
36499026Sjulian * processor will run our KSE, we needn't worry about someone else grabbing
36599026Sjulian * our context before we do a cpu_throw.
36699026Sjulian */
36799026Sjulianvoid
36899026Sjulianthread_exit(void)
36999026Sjulian{
37099026Sjulian	struct thread *td;
37199026Sjulian	struct kse *ke;
37299026Sjulian	struct proc *p;
37399026Sjulian	struct ksegrp	*kg;
37499026Sjulian
37599026Sjulian	td = curthread;
37699026Sjulian	kg = td->td_ksegrp;
37799026Sjulian	p = td->td_proc;
37899026Sjulian	ke = td->td_kse;
37999026Sjulian
38099026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
381102581Sjulian	KASSERT(p != NULL, ("thread exiting without a process"));
382102581Sjulian	KASSERT(ke != NULL, ("thread exiting without a kse"));
383102581Sjulian	KASSERT(kg != NULL, ("thread exiting without a kse group"));
38499026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
38599026Sjulian	CTR1(KTR_PROC, "thread_exit: thread %p", td);
38699026Sjulian	KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
38799026Sjulian
38899026Sjulian	if (ke->ke_tdspare != NULL) {
389103216Sjulian		thread_stash(ke->ke_tdspare);
39099026Sjulian		ke->ke_tdspare = NULL;
39199026Sjulian	}
39299026Sjulian	cpu_thread_exit(td);	/* XXXSMP */
39399026Sjulian
394102581Sjulian	/*
395103002Sjulian	 * The last thread is left attached to the process
396103002Sjulian	 * So that the whole bundle gets recycled. Skip
397103002Sjulian	 * all this stuff.
398102581Sjulian	 */
399103002Sjulian	if (p->p_numthreads > 1) {
400103002Sjulian		/* Reassign this thread's KSE. */
401103002Sjulian		ke->ke_thread = NULL;
402103002Sjulian		td->td_kse = NULL;
403103002Sjulian		ke->ke_state = KES_UNQUEUED;
404103002Sjulian		kse_reassign(ke);
405103002Sjulian
406103002Sjulian		/* Unlink this thread from its proc. and the kseg */
407103002Sjulian		TAILQ_REMOVE(&p->p_threads, td, td_plist);
408103002Sjulian		p->p_numthreads--;
409103002Sjulian		TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
410103002Sjulian		kg->kg_numthreads--;
411103002Sjulian		/*
412103002Sjulian		 * The test below is NOT true if we are the
413103002Sjulian		 * sole exiting thread. P_STOPPED_SNGL is unset
414103002Sjulian		 * in exit1() after it is the only survivor.
415103002Sjulian		 */
416103002Sjulian		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
417103002Sjulian			if (p->p_numthreads == p->p_suspcount) {
418103216Sjulian				thread_unsuspend_one(p->p_singlethread);
419103002Sjulian			}
42099026Sjulian		}
421103002Sjulian		PROC_UNLOCK(p);
422103216Sjulian		td->td_state	= TDS_INACTIVE;
423103002Sjulian		td->td_proc	= NULL;
424103002Sjulian		td->td_ksegrp	= NULL;
425103002Sjulian		td->td_last_kse	= NULL;
426103002Sjulian		ke->ke_tdspare = td;
427103002Sjulian	} else {
428103002Sjulian		PROC_UNLOCK(p);
42999026Sjulian	}
430103002Sjulian
43199026Sjulian	cpu_throw();
43299026Sjulian	/* NOTREACHED */
43399026Sjulian}
43499026Sjulian
43599026Sjulian/*
43699026Sjulian * Link a thread to a process.
437103002Sjulian * set up anything that needs to be initialized for it to
438103002Sjulian * be used by the process.
43999026Sjulian *
44099026Sjulian * Note that we do not link to the proc's ucred here.
44199026Sjulian * The thread is linked as if running but no KSE assigned.
44299026Sjulian */
44399026Sjulianvoid
44499026Sjulianthread_link(struct thread *td, struct ksegrp *kg)
44599026Sjulian{
44699026Sjulian	struct proc *p;
44799026Sjulian
44899026Sjulian	p = kg->kg_proc;
449103216Sjulian	td->td_state = TDS_INACTIVE;
45099026Sjulian	td->td_proc	= p;
45199026Sjulian	td->td_ksegrp	= kg;
45299026Sjulian	td->td_last_kse	= NULL;
45399026Sjulian
454103002Sjulian	LIST_INIT(&td->td_contested);
455103002Sjulian	callout_init(&td->td_slpcallout, 1);
45699026Sjulian	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
45799026Sjulian	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
45899026Sjulian	p->p_numthreads++;
45999026Sjulian	kg->kg_numthreads++;
460103367Sjulian	if (oiks_debug && p->p_numthreads > max_threads_per_proc) {
46199026Sjulian		printf("OIKS %d\n", p->p_numthreads);
46299026Sjulian		if (oiks_debug > 1)
46399026Sjulian			Debugger("OIKS");
46499026Sjulian	}
46599026Sjulian	td->td_kse	= NULL;
46699026Sjulian}
46799026Sjulian
46899026Sjulian/*
469103410Smini * Create a thread and schedule it for upcall on the KSE given.
47099026Sjulian */
47199026Sjulianstruct thread *
47299026Sjulianthread_schedule_upcall(struct thread *td, struct kse *ke)
47399026Sjulian{
47499026Sjulian	struct thread *td2;
47599026Sjulian
47699026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
47799026Sjulian	if (ke->ke_tdspare != NULL) {
47899026Sjulian		td2 = ke->ke_tdspare;
47999026Sjulian		ke->ke_tdspare = NULL;
48099026Sjulian	} else {
48199026Sjulian		mtx_unlock_spin(&sched_lock);
48299026Sjulian		td2 = thread_alloc();
48399026Sjulian		mtx_lock_spin(&sched_lock);
48499026Sjulian	}
48599026Sjulian	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
48699026Sjulian	     td, td->td_proc->p_pid, td->td_proc->p_comm);
487103072Sjulian	bzero(&td2->td_startzero,
488103002Sjulian	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
489103002Sjulian	bcopy(&td->td_startcopy, &td2->td_startcopy,
490103002Sjulian	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
49199026Sjulian	thread_link(td2, ke->ke_ksegrp);
492103410Smini	cpu_set_upcall(td2, td->td_pcb);
493103410Smini	bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe));
494103410Smini	/*
495103410Smini	 * The user context for this thread is selected when we choose
496103410Smini	 * a KSE and return to userland on it. All we need do here is
497103410Smini	 * note that the thread exists in order to perform an upcall.
498103410Smini	 *
499103410Smini	 * Since selecting a KSE to perform the upcall involves locking
500103410Smini	 * that KSE's context to our upcall, its best to wait until the
501103410Smini	 * last possible moment before grabbing a KSE. We do this in
502103410Smini	 * userret().
503103410Smini	 */
50499026Sjulian	td2->td_ucred = crhold(td->td_ucred);
50599026Sjulian	td2->td_flags = TDF_UNBOUND|TDF_UPCALLING;
506103216Sjulian	TD_SET_CAN_RUN(td2);
50799026Sjulian	setrunqueue(td2);
50899026Sjulian	return (td2);
50999026Sjulian}
51099026Sjulian
51199026Sjulian/*
512103410Smini * Schedule an upcall to notify a KSE process recieved signals.
51399026Sjulian *
514103410Smini * XXX - Modifying a sigset_t like this is totally bogus.
515103410Smini */
516103410Sministruct thread *
517103410Sminisignal_upcall(struct proc *p, int sig)
518103410Smini{
519103410Smini	struct thread *td, *td2;
520103410Smini	struct kse *ke;
521103410Smini	sigset_t ss;
522103410Smini	int error;
523103410Smini
524103410Smini	PROC_LOCK_ASSERT(p, MA_OWNED);
525103410Smini
526103410Smini	td = FIRST_THREAD_IN_PROC(p);
527103410Smini	ke = td->td_kse;
528103410Smini	PROC_UNLOCK(p);
529103410Smini	error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t));
530103410Smini	PROC_LOCK(p);
531103410Smini	if (error)
532103410Smini		return (NULL);
533103410Smini	SIGADDSET(ss, sig);
534103410Smini	PROC_UNLOCK(p);
535103410Smini	error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t));
536103410Smini	PROC_LOCK(p);
537103410Smini	if (error)
538103410Smini		return (NULL);
539103410Smini	mtx_lock_spin(&sched_lock);
540103410Smini	td2 = thread_schedule_upcall(td, ke);
541103410Smini	mtx_unlock_spin(&sched_lock);
542103410Smini	return (td2);
543103410Smini}
544103410Smini
545103410Smini/*
546103410Smini * Consider whether or not an upcall should be made, and update the
547103410Smini * TDF_UPCALLING flag appropriately.
548103410Smini *
549103410Smini * This function is called when the current thread had been bound to a user
550103410Smini * thread that performed a syscall that blocked, and is now returning.
551103410Smini * Got that? syscall -> msleep -> wakeup -> syscall_return -> us.
552103410Smini *
553103410Smini * This thread will be returned to the UTS in its mailbox as a completed
554103410Smini * thread.  We need to decide whether or not to perform an upcall now,
555103410Smini * or simply queue the thread for later.
556103410Smini *
557103410Smini * XXXKSE Future enhancement: We could also return back to
558103410Smini * the thread if we haven't had to do an upcall since then.
559103410Smini * If the KSE's copy is == the thread's copy, and there are
560103410Smini * no other completed threads.
561103410Smini */
562103410Sministatic int
563103410Sminithread_consider_upcalling(struct proc *p, struct ksegrp *kg, struct kse *ke,
564103410Smini    struct thread *td, struct trapframe *frame)
565103410Smini{
566103410Smini	int error;
567103410Smini
568103410Smini	/*
569103410Smini	 * Save the thread's context, and link it
570103410Smini	 * into the KSE's list of completed threads.
571103410Smini	 */
572103410Smini	error = thread_export_context(td);
573103410Smini	td->td_mailbox = NULL;
574103410Smini	if (error)
575103410Smini		/*
576103410Smini		 * Failing to do the KSE operation just defaults
577103410Smini		 * back to synchonous operation, so just return from
578103410Smini		 * the syscall.
579103410Smini		 */
580103410Smini		return (error);
581103410Smini
582103410Smini	/*
583103410Smini	 * Decide whether to perfom an upcall now.
584103410Smini	 */
585103410Smini	/* Make sure there are no other threads waiting to run. */
586103410Smini	if (TAILQ_FIRST(&kg->kg_runq)) {
587103410Smini		/*
588103410Smini		 * Another thread in this KSEG needs to run.
589103410Smini		 * Switch to it instead of performing an upcall,
590103410Smini		 * abondoning this thread.  Perform the upcall
591103410Smini		 * later; discard this thread for now.
592103410Smini		 *
593103410Smini		 * XXXKSE - As for the other threads to run;
594103410Smini		 * we COULD rush through all the threads
595103410Smini		 * in this KSEG at this priority, or we
596103410Smini		 * could throw the ball back into the court
597103410Smini		 * and just run the highest prio kse available.
598103410Smini		 * What is OUR priority?  The priority of the highest
599103410Smini		 * sycall waiting to be returned?
600103410Smini		 * For now, just let another KSE run (easiest).
601103410Smini		 *
602103410Smini		 * XXXKSE Future enhancement: Shove threads in this
603103410Smini		 * state onto a list of completed threads hanging
604103410Smini		 * off the KSEG. Then, collect them before performing
605103410Smini		 * an upcall. This way, we don't commit to an upcall
606103410Smini		 * on a particular KSE, but report completed threads on
607103410Smini		 * the next upcall to any KSE in this KSEG.
608103410Smini		 *
609103410Smini		 */
610103410Smini		PROC_LOCK(p);
611103410Smini		mtx_lock_spin(&sched_lock);
612103410Smini		thread_exit(); /* Abandon current thread. */
613103410Smini		/* NOTREACHED */
614103410Smini	} else
615103410Smini		/*
616103410Smini		 * Perform an upcall now.
617103410Smini		 *
618103410Smini		 * XXXKSE - Assumes we are going to userland, and not
619103410Smini		 * nested in the kernel.
620103410Smini		 */
621103410Smini		td->td_flags |= TDF_UPCALLING;
622103410Smini	return (0);
623103410Smini}
624103410Smini
625103410Smini/*
626103410Smini * The extra work we go through if we are a threaded process when we
627103410Smini * return to userland.
628103410Smini *
62999026Sjulian * If we are a KSE process and returning to user mode, check for
63099026Sjulian * extra work to do before we return (e.g. for more syscalls
63199026Sjulian * to complete first).  If we were in a critical section, we should
63299026Sjulian * just return to let it finish. Same if we were in the UTS (in
633103410Smini * which case the mailbox's context's busy indicator will be set).
634103410Smini * The only traps we suport will have set the mailbox.
635103410Smini * We will clear it here.
63699026Sjulian */
63799026Sjulianint
63899026Sjulianthread_userret(struct proc *p, struct ksegrp *kg, struct kse *ke,
63999026Sjulian    struct thread *td, struct trapframe *frame)
64099026Sjulian{
641103410Smini	int error;
64299026Sjulian
643103410Smini	/*
644103410Smini	 * Ensure that we have a spare thread available.
645103410Smini	 */
64699026Sjulian	if (ke->ke_tdspare == NULL) {
647103410Smini		mtx_lock(&Giant);
64899026Sjulian		ke->ke_tdspare = thread_alloc();
649103410Smini		mtx_unlock(&Giant);
65099026Sjulian	}
651103410Smini
652103410Smini	/*
653103410Smini	 * Bound threads need no additional work.
654103410Smini	 */
655103410Smini	if ((td->td_flags & TDF_UNBOUND) == 0)
656103410Smini		return (0);
657103410Smini	error = 0;
658103410Smini
659103410Smini	/*
660103410Smini	 * Decide whether or not we should perform an upcall now.
661103410Smini	 */
662103410Smini	if (((td->td_flags & TDF_UPCALLING) == 0) && td->td_mailbox) {
663103410Smini		error = thread_consider_upcalling(p, kg, ke, td, frame);
664103410Smini		if (error != 0)
665103410Smini			/*
666103410Smini			 * Failing to do the KSE operation just defaults
667103410Smini			 * back to synchonous operation, so just return from
668103410Smini			 * the syscall.
669103410Smini			 */
670103410Smini			goto cont;
671103410Smini	}
672103410Smini	if (td->td_flags & TDF_UPCALLING) {
67399026Sjulian		/*
674103410Smini		 * There is no more work to do and we are going to ride
675103410Smini		 * this thead/KSE up to userland.
67699026Sjulian		 */
677103410Smini		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
678103410Smini		    td, p->p_pid, p->p_comm);
67999026Sjulian
68099026Sjulian		/*
681103410Smini		 * Set user context to the UTS.
68299026Sjulian		 */
683103410Smini		cpu_set_upcall_kse(td, ke);
684103410Smini		if (error)
68599026Sjulian			/*
686103410Smini			 * Failing to do the KSE operation just defaults
687103410Smini			 * back to synchonous operation, so just return from
688103410Smini			 * the syscall.
68999026Sjulian			 */
690103410Smini			goto cont;
69199026Sjulian
69299026Sjulian		/*
693103410Smini		 * Set state and mailbox.
69499026Sjulian		 */
695103410Smini		td->td_flags &= ~TDF_UPCALLING;
696103410Smini		error = suword((caddr_t)td->td_kse->ke_mailbox +
697103410Smini		    offsetof(struct kse_mailbox, km_curthread),
698103410Smini		    0);
699103410Smini	}
70099026Sjuliancont:
701103410Smini	/*
702103410Smini	 * Stop any chance that we may be separated from
703103410Smini	 * the KSE we are currently on. This is "biting the bullet",
704103410Smini	 * we are committing to go to user space as as this KSE here.
705103410Smini	 */
706103410Smini	td->td_flags &= ~TDF_UNBOUND;	/* Bind to this user thread. */
70799026Sjulian	return (error);
70899026Sjulian}
70999026Sjulian
71099026Sjulian/*
71199026Sjulian * Enforce single-threading.
71299026Sjulian *
71399026Sjulian * Returns 1 if the caller must abort (another thread is waiting to
71499026Sjulian * exit the process or similar). Process is locked!
71599026Sjulian * Returns 0 when you are successfully the only thread running.
71699026Sjulian * A process has successfully single threaded in the suspend mode when
71799026Sjulian * There are no threads in user mode. Threads in the kernel must be
71899026Sjulian * allowed to continue until they get to the user boundary. They may even
71999026Sjulian * copy out their return values and data before suspending. They may however be
72099026Sjulian * accellerated in reaching the user boundary as we will wake up
72199026Sjulian * any sleeping threads that are interruptable. (PCATCH).
72299026Sjulian */
72399026Sjulianint
72499026Sjulianthread_single(int force_exit)
72599026Sjulian{
72699026Sjulian	struct thread *td;
72799026Sjulian	struct thread *td2;
72899026Sjulian	struct proc *p;
72999026Sjulian
73099026Sjulian	td = curthread;
73199026Sjulian	p = td->td_proc;
73299026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
73399026Sjulian	KASSERT((td != NULL), ("curthread is NULL"));
73499026Sjulian
73599026Sjulian	if ((p->p_flag & P_KSES) == 0)
73699026Sjulian		return (0);
73799026Sjulian
738100648Sjulian	/* Is someone already single threading? */
739100648Sjulian	if (p->p_singlethread)
74099026Sjulian		return (1);
74199026Sjulian
742102950Sdavidxu	if (force_exit == SINGLE_EXIT)
74399026Sjulian		p->p_flag |= P_SINGLE_EXIT;
74499026Sjulian	else
74599026Sjulian		p->p_flag &= ~P_SINGLE_EXIT;
746102950Sdavidxu	p->p_flag |= P_STOPPED_SINGLE;
74799026Sjulian	p->p_singlethread = td;
74899026Sjulian	while ((p->p_numthreads - p->p_suspcount) != 1) {
749103216Sjulian		mtx_lock_spin(&sched_lock);
75099026Sjulian		FOREACH_THREAD_IN_PROC(p, td2) {
75199026Sjulian			if (td2 == td)
75299026Sjulian				continue;
753103216Sjulian			if (TD_IS_INHIBITED(td2)) {
754103216Sjulian				if (TD_IS_SUSPENDED(td2)) {
755103216Sjulian					if (force_exit == SINGLE_EXIT) {
756103216Sjulian						thread_unsuspend_one(td2);
757103216Sjulian					}
75899026Sjulian				}
759103216Sjulian				if ( TD_IS_SLEEPING(td2)) {
760103216Sjulian					if (td2->td_flags & TDF_CVWAITQ)
761103216Sjulian						cv_waitq_remove(td2);
762103216Sjulian					else
763103216Sjulian						unsleep(td2);
764103216Sjulian					break;
765103216Sjulian				}
766103216Sjulian				if (TD_CAN_RUN(td2))
767103216Sjulian					setrunqueue(td2);
76899026Sjulian			}
76999026Sjulian		}
77099026Sjulian		/*
77199026Sjulian		 * Wake us up when everyone else has suspended.
772100648Sjulian		 * In the mean time we suspend as well.
77399026Sjulian		 */
774103216Sjulian		thread_suspend_one(td);
77599026Sjulian		mtx_unlock(&Giant);
77699026Sjulian		PROC_UNLOCK(p);
77799026Sjulian		mi_switch();
77899026Sjulian		mtx_unlock_spin(&sched_lock);
77999026Sjulian		mtx_lock(&Giant);
78099026Sjulian		PROC_LOCK(p);
78199026Sjulian	}
78299026Sjulian	return (0);
78399026Sjulian}
78499026Sjulian
78599026Sjulian/*
78699026Sjulian * Called in from locations that can safely check to see
78799026Sjulian * whether we have to suspend or at least throttle for a
78899026Sjulian * single-thread event (e.g. fork).
78999026Sjulian *
79099026Sjulian * Such locations include userret().
79199026Sjulian * If the "return_instead" argument is non zero, the thread must be able to
79299026Sjulian * accept 0 (caller may continue), or 1 (caller must abort) as a result.
79399026Sjulian *
79499026Sjulian * The 'return_instead' argument tells the function if it may do a
79599026Sjulian * thread_exit() or suspend, or whether the caller must abort and back
79699026Sjulian * out instead.
79799026Sjulian *
79899026Sjulian * If the thread that set the single_threading request has set the
79999026Sjulian * P_SINGLE_EXIT bit in the process flags then this call will never return
80099026Sjulian * if 'return_instead' is false, but will exit.
80199026Sjulian *
80299026Sjulian * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
80399026Sjulian *---------------+--------------------+---------------------
80499026Sjulian *       0       | returns 0          |   returns 0 or 1
80599026Sjulian *               | when ST ends       |   immediatly
80699026Sjulian *---------------+--------------------+---------------------
80799026Sjulian *       1       | thread exits       |   returns 1
80899026Sjulian *               |                    |  immediatly
80999026Sjulian * 0 = thread_exit() or suspension ok,
81099026Sjulian * other = return error instead of stopping the thread.
81199026Sjulian *
81299026Sjulian * While a full suspension is under effect, even a single threading
81399026Sjulian * thread would be suspended if it made this call (but it shouldn't).
81499026Sjulian * This call should only be made from places where
81599026Sjulian * thread_exit() would be safe as that may be the outcome unless
81699026Sjulian * return_instead is set.
81799026Sjulian */
81899026Sjulianint
81999026Sjulianthread_suspend_check(int return_instead)
82099026Sjulian{
82199026Sjulian	struct thread *td = curthread;
82299026Sjulian	struct proc *p = td->td_proc;
82399026Sjulian
82499026Sjulian	td = curthread;
82599026Sjulian	p = td->td_proc;
82699026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
82799026Sjulian	while (P_SHOULDSTOP(p)) {
828102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
82999026Sjulian			KASSERT(p->p_singlethread != NULL,
83099026Sjulian			    ("singlethread not set"));
83199026Sjulian			/*
832100648Sjulian			 * The only suspension in action is a
833100648Sjulian			 * single-threading. Single threader need not stop.
834100646Sjulian			 * XXX Should be safe to access unlocked
835100646Sjulian			 * as it can only be set to be true by us.
83699026Sjulian			 */
837100648Sjulian			if (p->p_singlethread == td)
83899026Sjulian				return (0);	/* Exempt from stopping. */
83999026Sjulian		}
840100648Sjulian		if (return_instead)
84199026Sjulian			return (1);
84299026Sjulian
84399026Sjulian		/*
84499026Sjulian		 * If the process is waiting for us to exit,
84599026Sjulian		 * this thread should just suicide.
846102950Sdavidxu		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
84799026Sjulian		 */
84899026Sjulian		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
84999026Sjulian			mtx_lock_spin(&sched_lock);
85099026Sjulian			while (mtx_owned(&Giant))
85199026Sjulian				mtx_unlock(&Giant);
85299026Sjulian			thread_exit();
85399026Sjulian		}
85499026Sjulian
85599026Sjulian		/*
85699026Sjulian		 * When a thread suspends, it just
85799026Sjulian		 * moves to the processes's suspend queue
85899026Sjulian		 * and stays there.
85999026Sjulian		 *
86099026Sjulian		 * XXXKSE if TDF_BOUND is true
86199026Sjulian		 * it will not release it's KSE which might
86299026Sjulian		 * lead to deadlock if there are not enough KSEs
86399026Sjulian		 * to complete all waiting threads.
86499026Sjulian		 * Maybe be able to 'lend' it out again.
86599026Sjulian		 * (lent kse's can not go back to userland?)
86699026Sjulian		 * and can only be lent in STOPPED state.
86799026Sjulian		 */
868102238Sjulian		mtx_lock_spin(&sched_lock);
869102950Sdavidxu		if ((p->p_flag & P_STOPPED_SIG) &&
870102238Sjulian		    (p->p_suspcount+1 == p->p_numthreads)) {
871102238Sjulian			mtx_unlock_spin(&sched_lock);
872102238Sjulian			PROC_LOCK(p->p_pptr);
873102238Sjulian			if ((p->p_pptr->p_procsig->ps_flag &
874102238Sjulian				PS_NOCLDSTOP) == 0) {
875102238Sjulian				psignal(p->p_pptr, SIGCHLD);
876102238Sjulian			}
877102238Sjulian			PROC_UNLOCK(p->p_pptr);
878103055Sjulian			mtx_lock_spin(&sched_lock);
879102238Sjulian		}
88099026Sjulian		mtx_assert(&Giant, MA_NOTOWNED);
881103216Sjulian		thread_suspend_one(td);
88299026Sjulian		PROC_UNLOCK(p);
883102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
884100632Sjulian			if (p->p_numthreads == p->p_suspcount) {
885103216Sjulian				thread_unsuspend_one(p->p_singlethread);
886100632Sjulian			}
887100632Sjulian		}
888100594Sjulian		p->p_stats->p_ru.ru_nivcsw++;
88999026Sjulian		mi_switch();
89099026Sjulian		mtx_unlock_spin(&sched_lock);
89199026Sjulian		PROC_LOCK(p);
89299026Sjulian	}
89399026Sjulian	return (0);
89499026Sjulian}
89599026Sjulian
896102898Sdavidxuvoid
897102898Sdavidxuthread_suspend_one(struct thread *td)
898102898Sdavidxu{
899102898Sdavidxu	struct proc *p = td->td_proc;
900102898Sdavidxu
901102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
902102898Sdavidxu	p->p_suspcount++;
903103216Sjulian	TD_SET_SUSPENDED(td);
904102898Sdavidxu	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
905103216Sjulian	/*
906103216Sjulian	 * Hack: If we are suspending but are on the sleep queue
907103216Sjulian	 * then we are in msleep or the cv equivalent. We
908103216Sjulian	 * want to look like we have two Inhibitors.
909103216Sjulian	 */
910103216Sjulian	if (TD_ON_SLEEPQ(td))
911103216Sjulian		TD_SET_SLEEPING(td);
912102898Sdavidxu}
913102898Sdavidxu
914102898Sdavidxuvoid
915102898Sdavidxuthread_unsuspend_one(struct thread *td)
916102898Sdavidxu{
917102898Sdavidxu	struct proc *p = td->td_proc;
918102898Sdavidxu
919102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
920102898Sdavidxu	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
921103216Sjulian	TD_CLR_SUSPENDED(td);
922102898Sdavidxu	p->p_suspcount--;
923103216Sjulian	setrunnable(td);
924102898Sdavidxu}
925102898Sdavidxu
92699026Sjulian/*
92799026Sjulian * Allow all threads blocked by single threading to continue running.
92899026Sjulian */
92999026Sjulianvoid
93099026Sjulianthread_unsuspend(struct proc *p)
93199026Sjulian{
93299026Sjulian	struct thread *td;
93399026Sjulian
934100646Sjulian	mtx_assert(&sched_lock, MA_OWNED);
93599026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
93699026Sjulian	if (!P_SHOULDSTOP(p)) {
93799026Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
938102898Sdavidxu			thread_unsuspend_one(td);
93999026Sjulian		}
940102950Sdavidxu	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
94199026Sjulian	    (p->p_numthreads == p->p_suspcount)) {
94299026Sjulian		/*
94399026Sjulian		 * Stopping everything also did the job for the single
94499026Sjulian		 * threading request. Now we've downgraded to single-threaded,
94599026Sjulian		 * let it continue.
94699026Sjulian		 */
947102898Sdavidxu		thread_unsuspend_one(p->p_singlethread);
94899026Sjulian	}
94999026Sjulian}
95099026Sjulian
95199026Sjulianvoid
95299026Sjulianthread_single_end(void)
95399026Sjulian{
95499026Sjulian	struct thread *td;
95599026Sjulian	struct proc *p;
95699026Sjulian
95799026Sjulian	td = curthread;
95899026Sjulian	p = td->td_proc;
95999026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
960102950Sdavidxu	p->p_flag &= ~P_STOPPED_SINGLE;
96199026Sjulian	p->p_singlethread = NULL;
962102292Sjulian	/*
963102292Sjulian	 * If there are other threads they mey now run,
964102292Sjulian	 * unless of course there is a blanket 'stop order'
965102292Sjulian	 * on the process. The single threader must be allowed
966102292Sjulian	 * to continue however as this is a bad place to stop.
967102292Sjulian	 */
968102292Sjulian	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
969102292Sjulian		mtx_lock_spin(&sched_lock);
970102292Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
971103216Sjulian			thread_unsuspend_one(td);
972102292Sjulian		}
973102292Sjulian		mtx_unlock_spin(&sched_lock);
974102292Sjulian	}
97599026Sjulian}
97699026Sjulian
977102292Sjulian
978