kern_thread.c revision 100271
199026Sjulian/*
299026Sjulian * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
399026Sjulian *  All rights reserved.
499026Sjulian *
599026Sjulian * Redistribution and use in source and binary forms, with or without
699026Sjulian * modification, are permitted provided that the following conditions
799026Sjulian * are met:
899026Sjulian * 1. Redistributions of source code must retain the above copyright
999026Sjulian *    notice(s), this list of conditions and the following disclaimer as
1099026Sjulian *    the first lines of this file unmodified other than the possible
1199026Sjulian *    addition of one or more copyright notices.
1299026Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1399026Sjulian *    notice(s), this list of conditions and the following disclaimer in the
1499026Sjulian *    documentation and/or other materials provided with the distribution.
1599026Sjulian *
1699026Sjulian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1799026Sjulian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1899026Sjulian * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1999026Sjulian * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2099026Sjulian * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2199026Sjulian * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2299026Sjulian * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2399026Sjulian * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2499026Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2599026Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2699026Sjulian * DAMAGE.
2799026Sjulian *
2899026Sjulian * $FreeBSD: head/sys/kern/kern_thread.c 100271 2002-07-17 23:32:13Z peter $
2999026Sjulian */
3099026Sjulian
3199026Sjulian#include <sys/param.h>
3299026Sjulian#include <sys/systm.h>
3399026Sjulian#include <sys/kernel.h>
3499026Sjulian#include <sys/lock.h>
3599026Sjulian#include <sys/malloc.h>
3699026Sjulian#include <sys/mutex.h>
3799026Sjulian#include <sys/proc.h>
3899026Sjulian#include <sys/sysctl.h>
3999026Sjulian#include <sys/filedesc.h>
4099026Sjulian#include <sys/tty.h>
4199026Sjulian#include <sys/signalvar.h>
4299026Sjulian#include <sys/sx.h>
4399026Sjulian#include <sys/user.h>
4499026Sjulian#include <sys/jail.h>
4599026Sjulian#include <sys/kse.h>
4699026Sjulian#include <sys/ktr.h>
4799026Sjulian
4899026Sjulian#include <vm/vm.h>
4999026Sjulian#include <vm/vm_object.h>
5099026Sjulian#include <vm/pmap.h>
5199026Sjulian#include <vm/uma.h>
5299026Sjulian#include <vm/vm_map.h>
5399026Sjulian
5499026Sjulian/*
5599026Sjulian * Thread related storage.
5699026Sjulian */
5799026Sjulianstatic uma_zone_t thread_zone;
5899026Sjulianstatic int allocated_threads;
5999026Sjulianstatic int active_threads;
6099026Sjulianstatic int cached_threads;
6199026Sjulian
6299026SjulianSYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
6399026Sjulian
6499026SjulianSYSCTL_INT(_kern_threads, OID_AUTO, active, CTLFLAG_RD,
6599026Sjulian	&active_threads, 0, "Number of active threads in system.");
6699026Sjulian
6799026SjulianSYSCTL_INT(_kern_threads, OID_AUTO, cached, CTLFLAG_RD,
6899026Sjulian	&cached_threads, 0, "Number of threads in thread cache.");
6999026Sjulian
7099026SjulianSYSCTL_INT(_kern_threads, OID_AUTO, allocated, CTLFLAG_RD,
7199026Sjulian	&allocated_threads, 0, "Number of threads in zone.");
7299026Sjulian
7399026Sjulianstatic int oiks_debug = 1;	/* 0 disable, 1 printf, 2 enter debugger */
7499026SjulianSYSCTL_INT(_kern_threads, OID_AUTO, oiks, CTLFLAG_RW,
7599026Sjulian	&oiks_debug, 0, "OIKS thread debug");
7699026Sjulian
7799026Sjulian#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
7899026Sjulian
7999026Sjulianstruct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
8099026Sjulianstruct mtx zombie_thread_lock;
8199026SjulianMTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock,
8299026Sjulian    "zombie_thread_lock", MTX_SPIN);
8399026Sjulian
8499026Sjulian/*
8599026Sjulian * Pepare a thread for use.
8699026Sjulian */
8799026Sjulianstatic void
8899026Sjulianthread_ctor(void *mem, int size, void *arg)
8999026Sjulian{
9099026Sjulian	struct thread	*td;
9199026Sjulian
9299026Sjulian	KASSERT((size == sizeof(struct thread)),
9399552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
9499026Sjulian
9599026Sjulian	td = (struct thread *)mem;
9699026Sjulian	bzero(&td->td_startzero,
9799026Sjulian	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
9899026Sjulian	td->td_state = TDS_NEW;
9999026Sjulian	td->td_flags |= TDF_UNBOUND;
10099559Speter#if 0
10199559Speter	/*
10299559Speter	 * Maybe move these here from process creation, but maybe not.
10399559Speter	 * Moving them here takes them away from their "natural" place
10499559Speter	 * in the fork process.
10599559Speter	 */
10699559Speter	/* XXX td_contested does not appear to be initialized for threads! */
10799559Speter	LIST_INIT(&td->td_contested);
10899559Speter	callout_init(&td->td_slpcallout, 1);
10999559Speter#endif
11099026Sjulian	cached_threads--;	/* XXXSMP */
11199026Sjulian	active_threads++;	/* XXXSMP */
11299026Sjulian}
11399026Sjulian
11499026Sjulian/*
11599026Sjulian * Reclaim a thread after use.
11699026Sjulian */
11799026Sjulianstatic void
11899026Sjulianthread_dtor(void *mem, int size, void *arg)
11999026Sjulian{
12099026Sjulian	struct thread	*td;
12199026Sjulian
12299026Sjulian	KASSERT((size == sizeof(struct thread)),
12399552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
12499026Sjulian
12599026Sjulian	td = (struct thread *)mem;
12699026Sjulian
12799026Sjulian#ifdef INVARIANTS
12899026Sjulian	/* Verify that this thread is in a safe state to free. */
12999026Sjulian	switch (td->td_state) {
13099026Sjulian	case TDS_SLP:
13199026Sjulian	case TDS_MTX:
13299026Sjulian	case TDS_RUNQ:
13399026Sjulian		/*
13499026Sjulian		 * We must never unlink a thread that is in one of
13599026Sjulian		 * these states, because it is currently active.
13699026Sjulian		 */
13799026Sjulian		panic("bad state for thread unlinking");
13899026Sjulian		/* NOTREACHED */
13999026Sjulian	case TDS_UNQUEUED:
14099026Sjulian	case TDS_NEW:
14199026Sjulian	case TDS_RUNNING:
14299026Sjulian	case TDS_SURPLUS:
14399026Sjulian		break;
14499026Sjulian	default:
14599026Sjulian		panic("bad thread state");
14699026Sjulian		/* NOTREACHED */
14799026Sjulian	}
14899026Sjulian#endif
14999026Sjulian
15099026Sjulian	/* Update counters. */
15199026Sjulian	active_threads--;	/* XXXSMP */
15299026Sjulian	cached_threads++;	/* XXXSMP */
15399026Sjulian}
15499026Sjulian
15599026Sjulian/*
15699026Sjulian * Initialize type-stable parts of a thread (when newly created).
15799026Sjulian */
15899026Sjulianstatic void
15999026Sjulianthread_init(void *mem, int size)
16099026Sjulian{
16199026Sjulian	struct thread	*td;
16299026Sjulian
16399026Sjulian	KASSERT((size == sizeof(struct thread)),
16499552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
16599026Sjulian
16699026Sjulian	td = (struct thread *)mem;
16799026Sjulian	pmap_new_thread(td);
16899026Sjulian	cpu_thread_setup(td);
16999026Sjulian	cached_threads++;	/* XXXSMP */
17099026Sjulian	allocated_threads++;	/* XXXSMP */
17199026Sjulian}
17299026Sjulian
17399026Sjulian/*
17499026Sjulian * Tear down type-stable parts of a thread (just before being discarded).
17599026Sjulian */
17699026Sjulianstatic void
17799026Sjulianthread_fini(void *mem, int size)
17899026Sjulian{
17999026Sjulian	struct thread	*td;
18099026Sjulian
18199026Sjulian	KASSERT((size == sizeof(struct thread)),
18299552Speter	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct thread)));
18399026Sjulian
18499026Sjulian	td = (struct thread *)mem;
18599026Sjulian	pmap_dispose_thread(td);
18699026Sjulian	cached_threads--;	/* XXXSMP */
18799026Sjulian	allocated_threads--;	/* XXXSMP */
18899026Sjulian}
18999026Sjulian
19099026Sjulian/*
19199026Sjulian * Initialize global thread allocation resources.
19299026Sjulian */
19399026Sjulianvoid
19499026Sjulianthreadinit(void)
19599026Sjulian{
19699026Sjulian
19799026Sjulian	thread_zone = uma_zcreate("THREAD", sizeof (struct thread),
19899026Sjulian	    thread_ctor, thread_dtor, thread_init, thread_fini,
19999026Sjulian	    UMA_ALIGN_CACHE, 0);
20099026Sjulian}
20199026Sjulian
20299026Sjulian/*
20399026Sjulian * Stash an embarasingly esxtra thread into the zombie thread queue.
20499026Sjulian */
20599026Sjulianvoid
20699026Sjulianthread_stash(struct thread *td)
20799026Sjulian{
20899026Sjulian	mtx_lock_spin(&zombie_thread_lock);
20999026Sjulian	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
21099026Sjulian	mtx_unlock_spin(&zombie_thread_lock);
21199026Sjulian}
21299026Sjulian
21399026Sjulian/*
21499026Sjulian * reap any  zombie threads for this Processor.
21599026Sjulian */
21699026Sjulianvoid
21799026Sjulianthread_reap(void)
21899026Sjulian{
21999026Sjulian	struct thread *td_reaped;
22099026Sjulian
22199026Sjulian	/*
22299026Sjulian	 * don't even bother to lock if none at this instant
22399026Sjulian	 * We really don't care about the next instant..
22499026Sjulian	 */
22599026Sjulian	if (!TAILQ_EMPTY(&zombie_threads)) {
22699026Sjulian		mtx_lock_spin(&zombie_thread_lock);
22799026Sjulian		while (!TAILQ_EMPTY(&zombie_threads)) {
22899026Sjulian			td_reaped = TAILQ_FIRST(&zombie_threads);
22999026Sjulian			TAILQ_REMOVE(&zombie_threads, td_reaped, td_runq);
23099026Sjulian			mtx_unlock_spin(&zombie_thread_lock);
23199026Sjulian			thread_free(td_reaped);
23299026Sjulian			mtx_lock_spin(&zombie_thread_lock);
23399026Sjulian		}
23499026Sjulian		mtx_unlock_spin(&zombie_thread_lock);
23599026Sjulian	}
23699026Sjulian}
23799026Sjulian
23899026Sjulian/*
23999026Sjulian * Allocate a thread.
24099026Sjulian */
24199026Sjulianstruct thread *
24299026Sjulianthread_alloc(void)
24399026Sjulian{
24499026Sjulian	thread_reap(); /* check if any zombies to get */
24599026Sjulian	return (uma_zalloc(thread_zone, M_WAITOK));
24699026Sjulian}
24799026Sjulian
24899026Sjulian/*
24999026Sjulian * Deallocate a thread.
25099026Sjulian */
25199026Sjulianvoid
25299026Sjulianthread_free(struct thread *td)
25399026Sjulian{
25499026Sjulian	uma_zfree(thread_zone, td);
25599026Sjulian}
25699026Sjulian
25799026Sjulian/*
25899026Sjulian * Store the thread context in the UTS's mailbox.
25999026Sjulian */
26099026Sjulianint
26199026Sjulianthread_export_context(struct thread *td)
26299026Sjulian{
26399026Sjulian	struct kse *ke;
26499026Sjulian	uintptr_t td2_mbx;
26599026Sjulian	void *addr1;
26699026Sjulian	void *addr2;
26799026Sjulian	int error;
26899026Sjulian
269100271Speter#ifdef __ia64__
270100271Speter	td2_mbx = 0;		/* pacify gcc (!) */
271100271Speter#endif
27299026Sjulian	/* Export the register contents. */
27399026Sjulian	error = cpu_export_context(td);
27499026Sjulian
27599026Sjulian	ke = td->td_kse;
27699026Sjulian	addr1 = (caddr_t)ke->ke_mailbox
27799026Sjulian			+ offsetof(struct kse_mailbox, kmbx_completed_threads);
27899026Sjulian	addr2 = (caddr_t)td->td_mailbox
27999026Sjulian			+ offsetof(struct thread_mailbox , next_completed);
28099026Sjulian	/* Then link it into it's KSE's list of completed threads. */
28199026Sjulian	if (!error) {
28299026Sjulian		error = td2_mbx = fuword(addr1);
28399026Sjulian		if (error == -1)
28499026Sjulian			error = EFAULT;
28599026Sjulian		else
28699026Sjulian			error = 0;
28799026Sjulian	}
28899026Sjulian	if (!error)
28999026Sjulian		error = suword(addr2, td2_mbx);
29099026Sjulian	if (!error)
29199026Sjulian		error = suword(addr1, (u_long)td->td_mailbox);
29299026Sjulian	if (error == -1)
29399026Sjulian		error = EFAULT;
29499026Sjulian	return (error);
29599026Sjulian}
29699026Sjulian
29799026Sjulian
29899026Sjulian/*
29999026Sjulian * Discard the current thread and exit from its context.
30099026Sjulian *
30199026Sjulian * Because we can't free a thread while we're operating under its context,
30299026Sjulian * push the current thread into our KSE's ke_tdspare slot, freeing the
30399026Sjulian * thread that might be there currently. Because we know that only this
30499026Sjulian * processor will run our KSE, we needn't worry about someone else grabbing
30599026Sjulian * our context before we do a cpu_throw.
30699026Sjulian */
30799026Sjulianvoid
30899026Sjulianthread_exit(void)
30999026Sjulian{
31099026Sjulian	struct thread *td;
31199026Sjulian	struct kse *ke;
31299026Sjulian	struct proc *p;
31399026Sjulian	struct ksegrp	*kg;
31499026Sjulian
31599026Sjulian	td = curthread;
31699026Sjulian	kg = td->td_ksegrp;
31799026Sjulian	p = td->td_proc;
31899026Sjulian	ke = td->td_kse;
31999026Sjulian
32099026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
32199026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
32299026Sjulian	CTR1(KTR_PROC, "thread_exit: thread %p", td);
32399026Sjulian	KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
32499026Sjulian
32599026Sjulian	if (ke->ke_tdspare != NULL) {
32699026Sjulian		thread_stash(ke->ke_tdspare);
32799026Sjulian		ke->ke_tdspare = NULL;
32899026Sjulian	}
32999026Sjulian	cpu_thread_exit(td);	/* XXXSMP */
33099026Sjulian
33199026Sjulian	/* Reassign this thread's KSE. */
33299026Sjulian	if (ke != NULL) {
33399026Sjulian		ke->ke_thread = NULL;
33499026Sjulian		td->td_kse = NULL;
33599026Sjulian		ke->ke_state = KES_UNQUEUED;
33699026Sjulian		kse_reassign(ke);
33799026Sjulian	}
33899026Sjulian
33999026Sjulian	/* Unlink this thread from its proc. and the kseg */
34099026Sjulian	if (p != NULL) {
34199026Sjulian		TAILQ_REMOVE(&p->p_threads, td, td_plist);
34299026Sjulian		p->p_numthreads--;
34399026Sjulian		if (kg != NULL) {
34499026Sjulian			TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
34599026Sjulian			kg->kg_numthreads--;
34699026Sjulian		}
34799026Sjulian		/*
34899026Sjulian		 * The test below is NOT true if we are the
34999026Sjulian		 * sole exiting thread. P_STOPPED_SNGL is unset
35099026Sjulian		 * in exit1() after it is the only survivor.
35199026Sjulian		 */
35299026Sjulian		if (P_SHOULDSTOP(p) == P_STOPPED_SNGL) {
35399026Sjulian			if (p->p_numthreads == p->p_suspcount) {
35499026Sjulian				TAILQ_REMOVE(&p->p_suspended,
35599026Sjulian				    p->p_singlethread, td_runq);
35699026Sjulian				setrunqueue(p->p_singlethread);
35799026Sjulian				p->p_suspcount--;
35899026Sjulian			}
35999026Sjulian		}
36099026Sjulian	}
36199026Sjulian	td->td_state	= TDS_SURPLUS;
36299026Sjulian	td->td_proc	= NULL;
36399026Sjulian	td->td_ksegrp	= NULL;
36499026Sjulian	td->td_last_kse	= NULL;
36599026Sjulian	ke->ke_tdspare = td;
36699026Sjulian	PROC_UNLOCK(p);
36799026Sjulian	cpu_throw();
36899026Sjulian	/* NOTREACHED */
36999026Sjulian}
37099026Sjulian
37199026Sjulian/*
37299026Sjulian * Link a thread to a process.
37399026Sjulian *
37499026Sjulian * Note that we do not link to the proc's ucred here.
37599026Sjulian * The thread is linked as if running but no KSE assigned.
37699026Sjulian */
37799026Sjulianvoid
37899026Sjulianthread_link(struct thread *td, struct ksegrp *kg)
37999026Sjulian{
38099026Sjulian	struct proc *p;
38199026Sjulian
38299026Sjulian	p = kg->kg_proc;
38399026Sjulian	td->td_state = TDS_NEW;
38499026Sjulian	td->td_proc	= p;
38599026Sjulian	td->td_ksegrp	= kg;
38699026Sjulian	td->td_last_kse	= NULL;
38799026Sjulian
38899026Sjulian	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
38999026Sjulian	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
39099026Sjulian	p->p_numthreads++;
39199026Sjulian	kg->kg_numthreads++;
39299026Sjulian	if (oiks_debug && p->p_numthreads > 4) {
39399026Sjulian		printf("OIKS %d\n", p->p_numthreads);
39499026Sjulian		if (oiks_debug > 1)
39599026Sjulian			Debugger("OIKS");
39699026Sjulian	}
39799026Sjulian	td->td_critnest = 0;
39899026Sjulian	td->td_kse	= NULL;
39999026Sjulian}
40099026Sjulian
40199026Sjulian/*
40299026Sjulian * Set up the upcall pcb in either a given thread or a new one
40399026Sjulian * if none given. Use the upcall for the given KSE
40499026Sjulian * XXXKSE possibly fix cpu_set_upcall() to not need td->td_kse set.
40599026Sjulian */
40699026Sjulianstruct thread *
40799026Sjulianthread_schedule_upcall(struct thread *td, struct kse *ke)
40899026Sjulian{
40999026Sjulian	struct thread *td2;
41099026Sjulian
41199026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
41299026Sjulian	if (ke->ke_tdspare != NULL) {
41399026Sjulian		td2 = ke->ke_tdspare;
41499026Sjulian		ke->ke_tdspare = NULL;
41599026Sjulian	} else {
41699026Sjulian		mtx_unlock_spin(&sched_lock);
41799026Sjulian		td2 = thread_alloc();
41899026Sjulian		mtx_lock_spin(&sched_lock);
41999026Sjulian	}
42099026Sjulian	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
42199026Sjulian	     td, td->td_proc->p_pid, td->td_proc->p_comm);
42299026Sjulian	thread_link(td2, ke->ke_ksegrp);
42399026Sjulian	cpu_set_upcall(td2, ke->ke_pcb);
42499026Sjulian	td2->td_ucred = crhold(td->td_ucred);
42599026Sjulian	td2->td_flags = TDF_UNBOUND|TDF_UPCALLING;
42699026Sjulian	td2->td_priority = td->td_priority;
42799026Sjulian	setrunqueue(td2);
42899026Sjulian	return (td2);
42999026Sjulian}
43099026Sjulian
43199026Sjulian/*
43299026Sjulian * The extra work we go through if we are a threaded process when we
43399026Sjulian * return to userland
43499026Sjulian *
43599026Sjulian * If we are a KSE process and returning to user mode, check for
43699026Sjulian * extra work to do before we return (e.g. for more syscalls
43799026Sjulian * to complete first).  If we were in a critical section, we should
43899026Sjulian * just return to let it finish. Same if we were in the UTS (in
43999026Sjulian * which case we will have no thread mailbox registered).  The only
44099026Sjulian * traps we suport will have set the mailbox.  We will clear it here.
44199026Sjulian */
44299026Sjulianint
44399026Sjulianthread_userret(struct proc *p, struct ksegrp *kg, struct kse *ke,
44499026Sjulian    struct thread *td, struct trapframe *frame)
44599026Sjulian{
44699026Sjulian	int error = 0;
44799026Sjulian
44899026Sjulian	if (ke->ke_tdspare == NULL) {
44999026Sjulian		ke->ke_tdspare = thread_alloc();
45099026Sjulian	}
45199026Sjulian	if (td->td_flags & TDF_UNBOUND) {
45299026Sjulian		/*
45399026Sjulian		 * Are we returning from a thread that had a mailbox?
45499026Sjulian		 *
45599026Sjulian		 * XXX Maybe this should be in a separate function.
45699026Sjulian		 */
45799026Sjulian		if (((td->td_flags & TDF_UPCALLING) == 0) && td->td_mailbox) {
45899026Sjulian			/*
45999026Sjulian			 * [XXXKSE Future enhancement]
46099026Sjulian			 * We could also go straight back to the syscall
46199026Sjulian			 * if we never had to do an upcall since then.
46299026Sjulian			 * If the KSE's copy is == the thread's copy..
46399026Sjulian			 * AND there are no other completed threads.
46499026Sjulian			 */
46599026Sjulian			/*
46699026Sjulian			 * We will go back as an upcall or go do another thread.
46799026Sjulian			 * Either way we need to save the context back to
46899026Sjulian			 * the user thread mailbox.
46999026Sjulian			 * So the UTS can restart it later.
47099026Sjulian			 */
47199026Sjulian			error = thread_export_context(td);
47299026Sjulian			td->td_mailbox = NULL;
47399026Sjulian			if (error) {
47499026Sjulian				/*
47599026Sjulian				 * Failing to do the KSE
47699026Sjulian				 * operation just defaults operation
47799026Sjulian				 * back to synchonous operation.
47899026Sjulian				 */
47999026Sjulian				goto cont;
48099026Sjulian			}
48199026Sjulian
48299026Sjulian			if (TAILQ_FIRST(&kg->kg_runq)) {
48399026Sjulian				/*
48499026Sjulian				 * Uh-oh.. don't return to the user.
48599026Sjulian				 * Instead, switch to the thread that
48699026Sjulian				 * needs to run. The question is:
48799026Sjulian				 * What do we do with the thread we have now?
48899026Sjulian				 * We have put the completion block
48999026Sjulian				 * on the kse mailbox. If we had more energy,
49099026Sjulian				 * we could lazily do so, assuming someone
49199026Sjulian				 * else might get to userland earlier
49299026Sjulian				 * and deliver it earlier than we could.
49399026Sjulian				 * To do that we could save it off the KSEG.
49499026Sjulian				 * An upcalling KSE would 'reap' all completed
49599026Sjulian				 * threads.
49699026Sjulian				 * Being in a hurry, we'll do nothing and
49799026Sjulian				 * leave it on the current KSE for now.
49899026Sjulian				 *
49999026Sjulian				 * As for the other threads to run;
50099026Sjulian				 * we COULD rush through all the threads
50199026Sjulian				 * in this KSEG at this priority, or we
50299026Sjulian				 * could throw the ball back into the court
50399026Sjulian				 * and just run the highest prio kse available.
50499026Sjulian				 * What is OUR priority?
50599026Sjulian				 * the priority of the highest sycall waiting
50699026Sjulian				 * to be returned?
50799026Sjulian				 * For now, just let another KSE run (easiest).
50899026Sjulian				 */
50999026Sjulian				PROC_LOCK(p);
51099026Sjulian				mtx_lock_spin(&sched_lock);
51199026Sjulian				thread_exit(); /* Abandon current thread. */
51299026Sjulian				/* NOTREACHED */
51399026Sjulian			} else { /* if (number of returning syscalls = 1) */
51499026Sjulian				/*
51599026Sjulian				 * Swap our frame for the upcall frame.
51699026Sjulian				 *
51799026Sjulian				 * XXXKSE Assumes we are going to user land
51899026Sjulian				 * and not nested in the kernel
51999026Sjulian				 */
52099026Sjulian				td->td_flags |= TDF_UPCALLING;
52199026Sjulian			}
52299026Sjulian		}
52399026Sjulian		/*
52499026Sjulian		 * This is NOT just an 'else' clause for the above test...
52599026Sjulian		 */
52699026Sjulian		if (td->td_flags & TDF_UPCALLING) {
52799026Sjulian			CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
52899026Sjulian			    td, p->p_pid, p->p_comm);
52999026Sjulian			/*
53099026Sjulian			 * Make sure that it has the correct frame loaded.
53199026Sjulian			 * While we know that we are on the same KSEGRP
53299026Sjulian			 * as we were created on, we could very easily
53399026Sjulian			 * have come in on another KSE. We therefore need
53499026Sjulian			 * to do the copy of the frame after the last
53599026Sjulian			 * possible switch() (the one above).
53699026Sjulian			 */
53799026Sjulian			bcopy(ke->ke_frame, frame, sizeof(struct trapframe));
53899026Sjulian
53999026Sjulian			/*
54099026Sjulian			 * Decide what we are sending to the user
54199026Sjulian			 * upcall sets one argument. The address of the mbox.
54299026Sjulian			 */
54399026Sjulian			cpu_set_args(td, ke);
54499026Sjulian
54599026Sjulian			/*
54699026Sjulian			 * There is no more work to do and we are going to ride
54799026Sjulian			 * this thead/KSE up to userland. Make sure the user's
54899026Sjulian			 * pointer to the thread mailbox is cleared before we
54999026Sjulian			 * re-enter the kernel next time for any reason..
55099026Sjulian			 * We might as well do it here.
55199026Sjulian			 */
55299026Sjulian			td->td_flags &= ~TDF_UPCALLING;	/* Hmmmm. */
55399026Sjulian			error = suword((caddr_t)td->td_kse->ke_mailbox +
55499026Sjulian			    offsetof(struct kse_mailbox, kmbx_current_thread),
55599026Sjulian			    0);
55699026Sjulian		}
55799026Sjulian		/*
55899026Sjulian		 * Stop any chance that we may be separated from
55999026Sjulian		 * the KSE we are currently on. This is "biting the bullet",
56099026Sjulian		 * we are committing to go to user space as as THIS KSE here.
56199026Sjulian		 */
56299026Sjuliancont:
56399026Sjulian		td->td_flags &= ~TDF_UNBOUND;
56499026Sjulian	}
56599026Sjulian	return (error);
56699026Sjulian}
56799026Sjulian
56899026Sjulian/*
56999026Sjulian * Enforce single-threading.
57099026Sjulian *
57199026Sjulian * Returns 1 if the caller must abort (another thread is waiting to
57299026Sjulian * exit the process or similar). Process is locked!
57399026Sjulian * Returns 0 when you are successfully the only thread running.
57499026Sjulian * A process has successfully single threaded in the suspend mode when
57599026Sjulian * There are no threads in user mode. Threads in the kernel must be
57699026Sjulian * allowed to continue until they get to the user boundary. They may even
57799026Sjulian * copy out their return values and data before suspending. They may however be
57899026Sjulian * accellerated in reaching the user boundary as we will wake up
57999026Sjulian * any sleeping threads that are interruptable. (PCATCH).
58099026Sjulian */
58199026Sjulianint
58299026Sjulianthread_single(int force_exit)
58399026Sjulian{
58499026Sjulian	struct thread *td;
58599026Sjulian	struct thread *td2;
58699026Sjulian	struct proc *p;
58799026Sjulian
58899026Sjulian	td = curthread;
58999026Sjulian	p = td->td_proc;
59099026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
59199026Sjulian	KASSERT((td != NULL), ("curthread is NULL"));
59299026Sjulian
59399026Sjulian	if ((p->p_flag & P_KSES) == 0)
59499026Sjulian		return (0);
59599026Sjulian
59699026Sjulian	if (p->p_singlethread) {
59799026Sjulian		/*
59899026Sjulian		 * Someone is already single threading!
59999026Sjulian		 */
60099026Sjulian		return (1);
60199026Sjulian	}
60299026Sjulian
60399026Sjulian	if (force_exit == SNGLE_EXIT)
60499026Sjulian		p->p_flag |= P_SINGLE_EXIT;
60599026Sjulian	else
60699026Sjulian		p->p_flag &= ~P_SINGLE_EXIT;
60799026Sjulian	p->p_flag |= P_STOPPED_SNGL;
60899026Sjulian	p->p_singlethread = td;
60999026Sjulian	while ((p->p_numthreads - p->p_suspcount) != 1) {
61099026Sjulian		FOREACH_THREAD_IN_PROC(p, td2) {
61199026Sjulian			if (td2 == td)
61299026Sjulian				continue;
61399026Sjulian			switch(td2->td_state) {
61499026Sjulian			case TDS_SUSPENDED:
61599026Sjulian				if (force_exit == SNGLE_EXIT) {
61699026Sjulian					TAILQ_REMOVE(&p->p_suspended,
61799026Sjulian					    td, td_runq);
61899026Sjulian					setrunqueue(td); /* Should suicide. */
61999026Sjulian				}
62099026Sjulian			case TDS_SLP:
62199026Sjulian				if (td2->td_flags & TDF_CVWAITQ) {
62299026Sjulian					cv_abort(td2);
62399026Sjulian				} else {
62499026Sjulian					abortsleep(td2);
62599026Sjulian				}
62699026Sjulian				break;
62799026Sjulian			/* etc. XXXKSE */
62899026Sjulian			default:
62999026Sjulian				;
63099026Sjulian			}
63199026Sjulian		}
63299026Sjulian		/*
63399026Sjulian		 * XXXKSE-- idea
63499026Sjulian		 * It's possible that we can just wake up when
63599026Sjulian		 * there are no runnable KSEs, because that would
63699026Sjulian		 * indicate that only this thread is runnable and
63799026Sjulian		 * there are no running KSEs in userland.
63899026Sjulian		 * --
63999026Sjulian		 * Wake us up when everyone else has suspended.
64099026Sjulian		 * (or died)
64199026Sjulian		 */
64299026Sjulian		mtx_lock_spin(&sched_lock);
64399026Sjulian		TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
64499026Sjulian		td->td_state = TDS_SUSPENDED;
64599026Sjulian		p->p_suspcount++;
64699026Sjulian		mtx_unlock(&Giant);
64799026Sjulian		PROC_UNLOCK(p);
64899026Sjulian		mi_switch();
64999026Sjulian		mtx_unlock_spin(&sched_lock);
65099026Sjulian		mtx_lock(&Giant);
65199026Sjulian		PROC_LOCK(p);
65299026Sjulian	}
65399026Sjulian	return (0);
65499026Sjulian}
65599026Sjulian
65699026Sjulian/*
65799026Sjulian * Called in from locations that can safely check to see
65899026Sjulian * whether we have to suspend or at least throttle for a
65999026Sjulian * single-thread event (e.g. fork).
66099026Sjulian *
66199026Sjulian * Such locations include userret().
66299026Sjulian * If the "return_instead" argument is non zero, the thread must be able to
66399026Sjulian * accept 0 (caller may continue), or 1 (caller must abort) as a result.
66499026Sjulian *
66599026Sjulian * The 'return_instead' argument tells the function if it may do a
66699026Sjulian * thread_exit() or suspend, or whether the caller must abort and back
66799026Sjulian * out instead.
66899026Sjulian *
66999026Sjulian * If the thread that set the single_threading request has set the
67099026Sjulian * P_SINGLE_EXIT bit in the process flags then this call will never return
67199026Sjulian * if 'return_instead' is false, but will exit.
67299026Sjulian *
67399026Sjulian * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
67499026Sjulian *---------------+--------------------+---------------------
67599026Sjulian *       0       | returns 0          |   returns 0 or 1
67699026Sjulian *               | when ST ends       |   immediatly
67799026Sjulian *---------------+--------------------+---------------------
67899026Sjulian *       1       | thread exits       |   returns 1
67999026Sjulian *               |                    |  immediatly
68099026Sjulian * 0 = thread_exit() or suspension ok,
68199026Sjulian * other = return error instead of stopping the thread.
68299026Sjulian *
68399026Sjulian * While a full suspension is under effect, even a single threading
68499026Sjulian * thread would be suspended if it made this call (but it shouldn't).
68599026Sjulian * This call should only be made from places where
68699026Sjulian * thread_exit() would be safe as that may be the outcome unless
68799026Sjulian * return_instead is set.
68899026Sjulian */
68999026Sjulianint
69099026Sjulianthread_suspend_check(int return_instead)
69199026Sjulian{
69299026Sjulian	struct thread *td = curthread;
69399026Sjulian	struct proc *p = td->td_proc;
69499026Sjulian
69599026Sjulian	td = curthread;
69699026Sjulian	p = td->td_proc;
69799026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
69899026Sjulian	while (P_SHOULDSTOP(p)) {
69999026Sjulian		if (P_SHOULDSTOP(p) == P_STOPPED_SNGL) {
70099026Sjulian			KASSERT(p->p_singlethread != NULL,
70199026Sjulian			    ("singlethread not set"));
70299026Sjulian
70399026Sjulian			/*
70499026Sjulian			 * The only suspension in action is
70599026Sjulian			 * a single-threading. Treat it ever
70699026Sjulian			 * so slightly different if it is
70799026Sjulian			 * in a special situation.
70899026Sjulian			 */
70999026Sjulian			if (p->p_singlethread == td) {
71099026Sjulian				return (0);	/* Exempt from stopping. */
71199026Sjulian			}
71299026Sjulian
71399026Sjulian		}
71499026Sjulian
71599026Sjulian		if (return_instead) {
71699026Sjulian			return (1);
71799026Sjulian		}
71899026Sjulian
71999026Sjulian		/*
72099026Sjulian		 * If the process is waiting for us to exit,
72199026Sjulian		 * this thread should just suicide.
72299026Sjulian		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SNGL.
72399026Sjulian		 */
72499026Sjulian		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
72599026Sjulian			mtx_lock_spin(&sched_lock);
72699026Sjulian			while (mtx_owned(&Giant))
72799026Sjulian				mtx_unlock(&Giant);
72899026Sjulian			thread_exit();
72999026Sjulian		}
73099026Sjulian
73199026Sjulian		/*
73299026Sjulian		 * When a thread suspends, it just
73399026Sjulian		 * moves to the processes's suspend queue
73499026Sjulian		 * and stays there.
73599026Sjulian		 *
73699026Sjulian		 * XXXKSE if TDF_BOUND is true
73799026Sjulian		 * it will not release it's KSE which might
73899026Sjulian		 * lead to deadlock if there are not enough KSEs
73999026Sjulian		 * to complete all waiting threads.
74099026Sjulian		 * Maybe be able to 'lend' it out again.
74199026Sjulian		 * (lent kse's can not go back to userland?)
74299026Sjulian		 * and can only be lent in STOPPED state.
74399026Sjulian		 */
74499026Sjulian		mtx_assert(&Giant, MA_NOTOWNED);
74599026Sjulian		mtx_lock_spin(&sched_lock);
74699026Sjulian		p->p_suspcount++;
74799026Sjulian		td->td_state = TDS_SUSPENDED;
74899026Sjulian		TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
74999026Sjulian		PROC_UNLOCK(p);
75099026Sjulian		mi_switch();
75199026Sjulian		mtx_unlock_spin(&sched_lock);
75299026Sjulian		PROC_LOCK(p);
75399026Sjulian	}
75499026Sjulian	return (0);
75599026Sjulian}
75699026Sjulian
75799026Sjulian/*
75899026Sjulian * Allow all threads blocked by single threading to continue running.
75999026Sjulian */
76099026Sjulianvoid
76199026Sjulianthread_unsuspend(struct proc *p)
76299026Sjulian{
76399026Sjulian	struct thread *td;
76499026Sjulian
76599026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
76699026Sjulian	if (!P_SHOULDSTOP(p)) {
76799026Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
76899026Sjulian			TAILQ_REMOVE(&p->p_suspended, td, td_runq);
76999026Sjulian			p->p_suspcount--;
77099026Sjulian			setrunqueue(td);
77199026Sjulian		}
77299026Sjulian	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SNGL) &&
77399026Sjulian	    (p->p_numthreads == p->p_suspcount)) {
77499026Sjulian		/*
77599026Sjulian		 * Stopping everything also did the job for the single
77699026Sjulian		 * threading request. Now we've downgraded to single-threaded,
77799026Sjulian		 * let it continue.
77899026Sjulian		 */
77999026Sjulian		TAILQ_REMOVE(&p->p_suspended, p->p_singlethread, td_runq);
78099026Sjulian		p->p_suspcount--;
78199026Sjulian		setrunqueue(p->p_singlethread);
78299026Sjulian	}
78399026Sjulian}
78499026Sjulian
78599026Sjulianvoid
78699026Sjulianthread_single_end(void)
78799026Sjulian{
78899026Sjulian	struct thread *td;
78999026Sjulian	struct proc *p;
79099026Sjulian
79199026Sjulian	td = curthread;
79299026Sjulian	p = td->td_proc;
79399026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
79499026Sjulian	p->p_flag &= ~P_STOPPED_SNGL;
79599026Sjulian	p->p_singlethread = NULL;
79699026Sjulian	thread_unsuspend(p);
79799026Sjulian}
79899026Sjulian
799