kern_thread.c revision 124350
1124350Sschweikh/*
299026Sjulian * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
399026Sjulian *  All rights reserved.
499026Sjulian *
599026Sjulian * Redistribution and use in source and binary forms, with or without
699026Sjulian * modification, are permitted provided that the following conditions
799026Sjulian * are met:
899026Sjulian * 1. Redistributions of source code must retain the above copyright
999026Sjulian *    notice(s), this list of conditions and the following disclaimer as
10124350Sschweikh *    the first lines of this file unmodified other than the possible
1199026Sjulian *    addition of one or more copyright notices.
1299026Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1399026Sjulian *    notice(s), this list of conditions and the following disclaimer in the
1499026Sjulian *    documentation and/or other materials provided with the distribution.
1599026Sjulian *
1699026Sjulian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1799026Sjulian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1899026Sjulian * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1999026Sjulian * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2099026Sjulian * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2199026Sjulian * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2299026Sjulian * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2399026Sjulian * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2499026Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2599026Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2699026Sjulian * DAMAGE.
2799026Sjulian */
2899026Sjulian
29116182Sobrien#include <sys/cdefs.h>
30116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 124350 2004-01-10 18:34:01Z schweikh $");
31116182Sobrien
3299026Sjulian#include <sys/param.h>
3399026Sjulian#include <sys/systm.h>
3499026Sjulian#include <sys/kernel.h>
3599026Sjulian#include <sys/lock.h>
3699026Sjulian#include <sys/malloc.h>
3799026Sjulian#include <sys/mutex.h>
3899026Sjulian#include <sys/proc.h>
39107029Sjulian#include <sys/smp.h>
4099026Sjulian#include <sys/sysctl.h>
41105854Sjulian#include <sys/sysproto.h>
4299026Sjulian#include <sys/filedesc.h>
43107126Sjeff#include <sys/sched.h>
4499026Sjulian#include <sys/signalvar.h>
4599026Sjulian#include <sys/sx.h>
46107126Sjeff#include <sys/tty.h>
47122514Sjhb#include <sys/turnstile.h>
4899026Sjulian#include <sys/user.h>
4999026Sjulian#include <sys/jail.h>
5099026Sjulian#include <sys/kse.h>
5199026Sjulian#include <sys/ktr.h>
52103410Smini#include <sys/ucontext.h>
5399026Sjulian
5499026Sjulian#include <vm/vm.h>
55116355Salc#include <vm/vm_extern.h>
5699026Sjulian#include <vm/vm_object.h>
5799026Sjulian#include <vm/pmap.h>
5899026Sjulian#include <vm/uma.h>
5999026Sjulian#include <vm/vm_map.h>
6099026Sjulian
61100273Speter#include <machine/frame.h>
62100273Speter
6399026Sjulian/*
64103367Sjulian * KSEGRP related storage.
6599026Sjulian */
66103367Sjulianstatic uma_zone_t ksegrp_zone;
67103367Sjulianstatic uma_zone_t kse_zone;
6899026Sjulianstatic uma_zone_t thread_zone;
69111028Sjeffstatic uma_zone_t upcall_zone;
7099026Sjulian
71103367Sjulian/* DEBUG ONLY */
7299026SjulianSYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
73107719Sjulianstatic int thread_debug = 0;
74107719SjulianSYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
75107719Sjulian	&thread_debug, 0, "thread debug");
7699026Sjulian
77114268Sdavidxustatic int max_threads_per_proc = 150;
78107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
79103367Sjulian	&max_threads_per_proc, 0, "Limit on threads per proc");
80103367Sjulian
81114268Sdavidxustatic int max_groups_per_proc = 50;
82107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
83107006Sdavidxu	&max_groups_per_proc, 0, "Limit on thread groups per proc");
84107006Sdavidxu
85111115Sdavidxustatic int max_threads_hits;
86111115SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
87111115Sdavidxu	&max_threads_hits, 0, "");
88111115Sdavidxu
89111028Sjeffstatic int virtual_cpu;
90111028Sjeff
9199026Sjulian#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
9299026Sjulian
93111028SjeffTAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
94105854SjulianTAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
95105854SjulianTAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
96124350SschweikhTAILQ_HEAD(, kse_upcall) zombie_upcalls =
97111028Sjeff	TAILQ_HEAD_INITIALIZER(zombie_upcalls);
98111028Sjeffstruct mtx kse_zombie_lock;
99111028SjeffMTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
10099026Sjulian
101107719Sjulianstatic void kse_purge(struct proc *p, struct thread *td);
102111028Sjeffstatic void kse_purge_group(struct thread *td);
103111515Sdavidxustatic int thread_update_usr_ticks(struct thread *td, int user);
104111028Sjeffstatic void thread_alloc_spare(struct thread *td, struct thread *spare);
105105854Sjulian
106111028Sjeffstatic int
107111028Sjeffsysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
108111028Sjeff{
109111028Sjeff	int error, new_val;
110111028Sjeff	int def_val;
111111028Sjeff
112111028Sjeff#ifdef SMP
113111028Sjeff	def_val = mp_ncpus;
114111028Sjeff#else
115111028Sjeff	def_val = 1;
116111028Sjeff#endif
117111028Sjeff	if (virtual_cpu == 0)
118111028Sjeff		new_val = def_val;
119111028Sjeff	else
120111028Sjeff		new_val = virtual_cpu;
121111028Sjeff	error = sysctl_handle_int(oidp, &new_val, 0, req);
122111028Sjeff        if (error != 0 || req->newptr == NULL)
123111028Sjeff		return (error);
124111028Sjeff	if (new_val < 0)
125111028Sjeff		return (EINVAL);
126111028Sjeff	virtual_cpu = new_val;
127111028Sjeff	return (0);
128111028Sjeff}
129111028Sjeff
130111028Sjeff/* DEBUG ONLY */
131111028SjeffSYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
132111028Sjeff	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
133111028Sjeff	"debug virtual cpus");
134111028Sjeff
13599026Sjulian/*
136107719Sjulian * Prepare a thread for use.
13799026Sjulian */
13899026Sjulianstatic void
13999026Sjulianthread_ctor(void *mem, int size, void *arg)
14099026Sjulian{
14199026Sjulian	struct thread	*td;
14299026Sjulian
14399026Sjulian	td = (struct thread *)mem;
144103216Sjulian	td->td_state = TDS_INACTIVE;
145113339Sjulian	td->td_oncpu	= NOCPU;
146118442Sjhb	td->td_critnest = 1;
14799026Sjulian}
14899026Sjulian
14999026Sjulian/*
15099026Sjulian * Reclaim a thread after use.
15199026Sjulian */
15299026Sjulianstatic void
15399026Sjulianthread_dtor(void *mem, int size, void *arg)
15499026Sjulian{
15599026Sjulian	struct thread	*td;
15699026Sjulian
15799026Sjulian	td = (struct thread *)mem;
15899026Sjulian
15999026Sjulian#ifdef INVARIANTS
16099026Sjulian	/* Verify that this thread is in a safe state to free. */
16199026Sjulian	switch (td->td_state) {
162103216Sjulian	case TDS_INHIBITED:
163103216Sjulian	case TDS_RUNNING:
164103216Sjulian	case TDS_CAN_RUN:
16599026Sjulian	case TDS_RUNQ:
16699026Sjulian		/*
16799026Sjulian		 * We must never unlink a thread that is in one of
16899026Sjulian		 * these states, because it is currently active.
16999026Sjulian		 */
17099026Sjulian		panic("bad state for thread unlinking");
17199026Sjulian		/* NOTREACHED */
172103216Sjulian	case TDS_INACTIVE:
17399026Sjulian		break;
17499026Sjulian	default:
17599026Sjulian		panic("bad thread state");
17699026Sjulian		/* NOTREACHED */
17799026Sjulian	}
17899026Sjulian#endif
17999026Sjulian}
18099026Sjulian
18199026Sjulian/*
18299026Sjulian * Initialize type-stable parts of a thread (when newly created).
18399026Sjulian */
18499026Sjulianstatic void
18599026Sjulianthread_init(void *mem, int size)
18699026Sjulian{
18799026Sjulian	struct thread	*td;
18899026Sjulian
18999026Sjulian	td = (struct thread *)mem;
190116355Salc	vm_thread_new(td, 0);
19199026Sjulian	cpu_thread_setup(td);
192122514Sjhb	td->td_turnstile = turnstile_alloc();
193107126Sjeff	td->td_sched = (struct td_sched *)&td[1];
19499026Sjulian}
19599026Sjulian
19699026Sjulian/*
19799026Sjulian * Tear down type-stable parts of a thread (just before being discarded).
19899026Sjulian */
19999026Sjulianstatic void
20099026Sjulianthread_fini(void *mem, int size)
20199026Sjulian{
20299026Sjulian	struct thread	*td;
20399026Sjulian
20499026Sjulian	td = (struct thread *)mem;
205122514Sjhb	turnstile_free(td->td_turnstile);
206116355Salc	vm_thread_dispose(td);
20799026Sjulian}
208111028Sjeff
209107126Sjeff/*
210107126Sjeff * Initialize type-stable parts of a kse (when newly created).
211107126Sjeff */
212107126Sjeffstatic void
213107126Sjeffkse_init(void *mem, int size)
214107126Sjeff{
215107126Sjeff	struct kse	*ke;
21699026Sjulian
217107126Sjeff	ke = (struct kse *)mem;
218107126Sjeff	ke->ke_sched = (struct ke_sched *)&ke[1];
219107126Sjeff}
220111028Sjeff
221107126Sjeff/*
222107126Sjeff * Initialize type-stable parts of a ksegrp (when newly created).
223107126Sjeff */
224107126Sjeffstatic void
225107126Sjeffksegrp_init(void *mem, int size)
226107126Sjeff{
227107126Sjeff	struct ksegrp	*kg;
228107126Sjeff
229107126Sjeff	kg = (struct ksegrp *)mem;
230107126Sjeff	kg->kg_sched = (struct kg_sched *)&kg[1];
231107126Sjeff}
232107126Sjeff
233124350Sschweikh/*
234111028Sjeff * KSE is linked into kse group.
235105854Sjulian */
236105854Sjulianvoid
237105854Sjuliankse_link(struct kse *ke, struct ksegrp *kg)
238105854Sjulian{
239105854Sjulian	struct proc *p = kg->kg_proc;
240105854Sjulian
241105854Sjulian	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
242105854Sjulian	kg->kg_kses++;
243111028Sjeff	ke->ke_state	= KES_UNQUEUED;
244105854Sjulian	ke->ke_proc	= p;
245105854Sjulian	ke->ke_ksegrp	= kg;
246105854Sjulian	ke->ke_thread	= NULL;
247111028Sjeff	ke->ke_oncpu	= NOCPU;
248111028Sjeff	ke->ke_flags	= 0;
249105854Sjulian}
250105854Sjulian
251105854Sjulianvoid
252105854Sjuliankse_unlink(struct kse *ke)
253105854Sjulian{
254105854Sjulian	struct ksegrp *kg;
255105854Sjulian
256105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
257105854Sjulian	kg = ke->ke_ksegrp;
258105854Sjulian	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
259111028Sjeff	if (ke->ke_state == KES_IDLE) {
260111028Sjeff		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
261111028Sjeff		kg->kg_idle_kses--;
262105854Sjulian	}
263119488Sdavidxu	--kg->kg_kses;
264105854Sjulian	/*
265105854Sjulian	 * Aggregate stats from the KSE
266105854Sjulian	 */
267105854Sjulian	kse_stash(ke);
268105854Sjulian}
269105854Sjulian
270105854Sjulianvoid
271105854Sjulianksegrp_link(struct ksegrp *kg, struct proc *p)
272105854Sjulian{
273105854Sjulian
274105854Sjulian	TAILQ_INIT(&kg->kg_threads);
275105854Sjulian	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
276105854Sjulian	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
277105854Sjulian	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
278111028Sjeff	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
279111028Sjeff	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
280111028Sjeff	kg->kg_proc = p;
281111028Sjeff	/*
282111028Sjeff	 * the following counters are in the -zero- section
283111028Sjeff	 * and may not need clearing
284111028Sjeff	 */
285105854Sjulian	kg->kg_numthreads = 0;
286111028Sjeff	kg->kg_runnable   = 0;
287111028Sjeff	kg->kg_kses       = 0;
288111028Sjeff	kg->kg_runq_kses  = 0; /* XXXKSE change name */
289111028Sjeff	kg->kg_idle_kses  = 0;
290111028Sjeff	kg->kg_numupcalls = 0;
291111028Sjeff	/* link it in now that it's consistent */
292105854Sjulian	p->p_numksegrps++;
293105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
294105854Sjulian}
295105854Sjulian
296105854Sjulianvoid
297105854Sjulianksegrp_unlink(struct ksegrp *kg)
298105854Sjulian{
299105854Sjulian	struct proc *p;
300105854Sjulian
301105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
302111028Sjeff	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
303111028Sjeff	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
304111028Sjeff	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
305111028Sjeff
306105854Sjulian	p = kg->kg_proc;
307105854Sjulian	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
308105854Sjulian	p->p_numksegrps--;
309105854Sjulian	/*
310105854Sjulian	 * Aggregate stats from the KSE
311105854Sjulian	 */
312105854Sjulian	ksegrp_stash(kg);
313105854Sjulian}
314105854Sjulian
315111028Sjeffstruct kse_upcall *
316111028Sjeffupcall_alloc(void)
317111028Sjeff{
318111028Sjeff	struct kse_upcall *ku;
319111028Sjeff
320111125Sdavidxu	ku = uma_zalloc(upcall_zone, M_WAITOK);
321111028Sjeff	bzero(ku, sizeof(*ku));
322111028Sjeff	return (ku);
323111028Sjeff}
324111028Sjeff
325111028Sjeffvoid
326111028Sjeffupcall_free(struct kse_upcall *ku)
327111028Sjeff{
328111028Sjeff
329111028Sjeff	uma_zfree(upcall_zone, ku);
330111028Sjeff}
331111028Sjeff
332111028Sjeffvoid
333111028Sjeffupcall_link(struct kse_upcall *ku, struct ksegrp *kg)
334111028Sjeff{
335111028Sjeff
336111028Sjeff	mtx_assert(&sched_lock, MA_OWNED);
337111028Sjeff	TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
338111028Sjeff	ku->ku_ksegrp = kg;
339111028Sjeff	kg->kg_numupcalls++;
340111028Sjeff}
341111028Sjeff
342111028Sjeffvoid
343111028Sjeffupcall_unlink(struct kse_upcall *ku)
344111028Sjeff{
345111028Sjeff	struct ksegrp *kg = ku->ku_ksegrp;
346111028Sjeff
347111028Sjeff	mtx_assert(&sched_lock, MA_OWNED);
348111028Sjeff	KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
349124350Sschweikh	TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
350111028Sjeff	kg->kg_numupcalls--;
351111028Sjeff	upcall_stash(ku);
352111028Sjeff}
353111028Sjeff
354111028Sjeffvoid
355111028Sjeffupcall_remove(struct thread *td)
356111028Sjeff{
357111028Sjeff
358111028Sjeff	if (td->td_upcall) {
359111028Sjeff		td->td_upcall->ku_owner = NULL;
360111028Sjeff		upcall_unlink(td->td_upcall);
361111028Sjeff		td->td_upcall = 0;
362124350Sschweikh	}
363111028Sjeff}
364111028Sjeff
36599026Sjulian/*
366111028Sjeff * For a newly created process,
367111028Sjeff * link up all the structures and its initial threads etc.
368105854Sjulian */
369105854Sjulianvoid
370105854Sjulianproc_linkup(struct proc *p, struct ksegrp *kg,
371111028Sjeff	    struct kse *ke, struct thread *td)
372105854Sjulian{
373105854Sjulian
374105854Sjulian	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
375105854Sjulian	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
376105854Sjulian	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
377105854Sjulian	p->p_numksegrps = 0;
378105854Sjulian	p->p_numthreads = 0;
379105854Sjulian
380105854Sjulian	ksegrp_link(kg, p);
381105854Sjulian	kse_link(ke, kg);
382105854Sjulian	thread_link(td, kg);
383105854Sjulian}
384105854Sjulian
385123252Smarcel#ifndef _SYS_SYSPROTO_H_
386123252Smarcelstruct kse_switchin_args {
387123252Smarcel	const struct __mcontext *mcp;
388123252Smarcel	long val;
389123252Smarcel	long *loc;
390123252Smarcel};
391123252Smarcel#endif
392123252Smarcel
393123252Smarcelint
394123252Smarcelkse_switchin(struct thread *td, struct kse_switchin_args *uap)
395123252Smarcel{
396123252Smarcel	mcontext_t mc;
397123252Smarcel	int error;
398123252Smarcel
399123252Smarcel	error = (uap->mcp == NULL) ? EINVAL : 0;
400123252Smarcel	if (!error)
401123252Smarcel		error = copyin(uap->mcp, &mc, sizeof(mc));
402123366Smarcel	if (!error && uap->loc != NULL)
403123366Smarcel		error = (suword(uap->loc, uap->val) != 0) ? EINVAL : 0;
404123252Smarcel	if (!error)
405123252Smarcel		error = set_mcontext(td, &mc);
406123252Smarcel	return ((error == 0) ? EJUSTRETURN : error);
407123252Smarcel}
408123252Smarcel
409111028Sjeff/*
410111028Sjeffstruct kse_thr_interrupt_args {
411111028Sjeff	struct kse_thr_mailbox * tmbx;
412117704Sdavidxu	int cmd;
413117704Sdavidxu	long data;
414111028Sjeff};
415111028Sjeff*/
416105854Sjulianint
417105854Sjuliankse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
418105854Sjulian{
419106180Sdavidxu	struct proc *p;
420106180Sdavidxu	struct thread *td2;
421105854Sjulian
422106242Sdavidxu	p = td->td_proc;
423119488Sdavidxu
424117704Sdavidxu	if (!(p->p_flag & P_SA))
425106242Sdavidxu		return (EINVAL);
426116963Sdavidxu
427117704Sdavidxu	switch (uap->cmd) {
428117704Sdavidxu	case KSE_INTR_SENDSIG:
429117704Sdavidxu		if (uap->data < 0 || uap->data > _SIG_MAXSIG)
430117704Sdavidxu			return (EINVAL);
431117704Sdavidxu	case KSE_INTR_INTERRUPT:
432117704Sdavidxu	case KSE_INTR_RESTART:
433117704Sdavidxu		PROC_LOCK(p);
434117704Sdavidxu		mtx_lock_spin(&sched_lock);
435117704Sdavidxu		FOREACH_THREAD_IN_PROC(p, td2) {
436117704Sdavidxu			if (td2->td_mailbox == uap->tmbx)
437117704Sdavidxu				break;
438117704Sdavidxu		}
439117704Sdavidxu		if (td2 == NULL) {
440117704Sdavidxu			mtx_unlock_spin(&sched_lock);
441117704Sdavidxu			PROC_UNLOCK(p);
442117704Sdavidxu			return (ESRCH);
443117704Sdavidxu		}
444117704Sdavidxu		if (uap->cmd == KSE_INTR_SENDSIG) {
445117704Sdavidxu			if (uap->data > 0) {
446117704Sdavidxu				td2->td_flags &= ~TDF_INTERRUPT;
447117704Sdavidxu				mtx_unlock_spin(&sched_lock);
448117704Sdavidxu				tdsignal(td2, (int)uap->data, SIGTARGET_TD);
449117704Sdavidxu			} else {
450117704Sdavidxu				mtx_unlock_spin(&sched_lock);
451117704Sdavidxu			}
452117704Sdavidxu		} else {
453117704Sdavidxu			td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING;
454117704Sdavidxu			if (TD_CAN_UNBIND(td2))
455117704Sdavidxu				td2->td_upcall->ku_flags |= KUF_DOUPCALL;
456117704Sdavidxu			if (uap->cmd == KSE_INTR_INTERRUPT)
457117704Sdavidxu				td2->td_intrval = EINTR;
458116963Sdavidxu			else
459117704Sdavidxu				td2->td_intrval = ERESTART;
460117704Sdavidxu			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) {
461117704Sdavidxu				if (td2->td_flags & TDF_CVWAITQ)
462117704Sdavidxu					cv_abort(td2);
463117704Sdavidxu				else
464117704Sdavidxu					abortsleep(td2);
465117704Sdavidxu			}
466117704Sdavidxu			mtx_unlock_spin(&sched_lock);
467106180Sdavidxu		}
468117704Sdavidxu		PROC_UNLOCK(p);
469117704Sdavidxu		break;
470117704Sdavidxu	case KSE_INTR_SIGEXIT:
471117704Sdavidxu		if (uap->data < 1 || uap->data > _SIG_MAXSIG)
472117704Sdavidxu			return (EINVAL);
473117704Sdavidxu		PROC_LOCK(p);
474117704Sdavidxu		sigexit(td, (int)uap->data);
475117704Sdavidxu		break;
476117704Sdavidxu	default:
477117704Sdavidxu		return (EINVAL);
478106180Sdavidxu	}
479116963Sdavidxu	return (0);
480105854Sjulian}
481105854Sjulian
482111028Sjeff/*
483111028Sjeffstruct kse_exit_args {
484111028Sjeff	register_t dummy;
485111028Sjeff};
486111028Sjeff*/
487105854Sjulianint
488105854Sjuliankse_exit(struct thread *td, struct kse_exit_args *uap)
489105854Sjulian{
490105854Sjulian	struct proc *p;
491105854Sjulian	struct ksegrp *kg;
492108640Sdavidxu	struct kse *ke;
493115790Sjulian	struct kse_upcall *ku, *ku2;
494115790Sjulian	int    error, count;
495105854Sjulian
496105854Sjulian	p = td->td_proc;
497115790Sjulian	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
498106182Sdavidxu		return (EINVAL);
499105854Sjulian	kg = td->td_ksegrp;
500115790Sjulian	count = 0;
501105854Sjulian	PROC_LOCK(p);
502105854Sjulian	mtx_lock_spin(&sched_lock);
503115790Sjulian	FOREACH_UPCALL_IN_GROUP(kg, ku2) {
504115790Sjulian		if (ku2->ku_flags & KUF_EXITING)
505115790Sjulian			count++;
506115790Sjulian	}
507115790Sjulian	if ((kg->kg_numupcalls - count) == 1 &&
508115790Sjulian	    (kg->kg_numthreads > 1)) {
509105854Sjulian		mtx_unlock_spin(&sched_lock);
510105854Sjulian		PROC_UNLOCK(p);
511105854Sjulian		return (EDEADLK);
512105854Sjulian	}
513115790Sjulian	ku->ku_flags |= KUF_EXITING;
514115790Sjulian	mtx_unlock_spin(&sched_lock);
515115790Sjulian	PROC_UNLOCK(p);
516115790Sjulian	error = suword(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE);
517115790Sjulian	PROC_LOCK(p);
518115790Sjulian	if (error)
519115790Sjulian		psignal(p, SIGSEGV);
520115790Sjulian	mtx_lock_spin(&sched_lock);
521115790Sjulian	upcall_remove(td);
522108640Sdavidxu	ke = td->td_kse;
523108640Sdavidxu	if (p->p_numthreads == 1) {
524111028Sjeff		kse_purge(p, td);
525116361Sdavidxu		p->p_flag &= ~P_SA;
526105854Sjulian		mtx_unlock_spin(&sched_lock);
527105854Sjulian		PROC_UNLOCK(p);
528105854Sjulian	} else {
529111028Sjeff		if (kg->kg_numthreads == 1) { /* Shutdown a group */
530111028Sjeff			kse_purge_group(td);
531111028Sjeff			ke->ke_flags |= KEF_EXIT;
532111028Sjeff		}
533112071Sdavidxu		thread_stopped(p);
534105854Sjulian		thread_exit();
535105854Sjulian		/* NOTREACHED */
536105854Sjulian	}
537106182Sdavidxu	return (0);
538105854Sjulian}
539105854Sjulian
540107719Sjulian/*
541108338Sjulian * Either becomes an upcall or waits for an awakening event and
542111028Sjeff * then becomes an upcall. Only error cases return.
543107719Sjulian */
544111028Sjeff/*
545111028Sjeffstruct kse_release_args {
546111169Sdavidxu	struct timespec *timeout;
547111028Sjeff};
548111028Sjeff*/
549105854Sjulianint
550111028Sjeffkse_release(struct thread *td, struct kse_release_args *uap)
551105854Sjulian{
552105854Sjulian	struct proc *p;
553107719Sjulian	struct ksegrp *kg;
554116401Sdavidxu	struct kse_upcall *ku;
555116401Sdavidxu	struct timespec timeout;
556111169Sdavidxu	struct timeval tv;
557116963Sdavidxu	sigset_t sigset;
558111169Sdavidxu	int error;
559105854Sjulian
560105854Sjulian	p = td->td_proc;
561107719Sjulian	kg = td->td_ksegrp;
562116401Sdavidxu	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
563107719Sjulian		return (EINVAL);
564111169Sdavidxu	if (uap->timeout != NULL) {
565111169Sdavidxu		if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
566111169Sdavidxu			return (error);
567111169Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
568111169Sdavidxu	}
569116401Sdavidxu	if (td->td_flags & TDF_SA)
570116401Sdavidxu		td->td_pflags |= TDP_UPCALLING;
571116963Sdavidxu	else {
572116963Sdavidxu		ku->ku_mflags = fuword(&ku->ku_mailbox->km_flags);
573116963Sdavidxu		if (ku->ku_mflags == -1) {
574116963Sdavidxu			PROC_LOCK(p);
575116963Sdavidxu			sigexit(td, SIGSEGV);
576116963Sdavidxu		}
577116963Sdavidxu	}
578111169Sdavidxu	PROC_LOCK(p);
579116963Sdavidxu	if (ku->ku_mflags & KMF_WAITSIGEVENT) {
580116963Sdavidxu		/* UTS wants to wait for signal event */
581116963Sdavidxu		if (!(p->p_flag & P_SIGEVENT) && !(ku->ku_flags & KUF_DOUPCALL))
582116963Sdavidxu			error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH,
583116963Sdavidxu			    "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0));
584116963Sdavidxu		p->p_flag &= ~P_SIGEVENT;
585116963Sdavidxu		sigset = p->p_siglist;
586116963Sdavidxu		PROC_UNLOCK(p);
587116963Sdavidxu		error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught,
588116963Sdavidxu		    sizeof(sigset));
589116963Sdavidxu	} else {
590116963Sdavidxu		 if (! kg->kg_completed && !(ku->ku_flags & KUF_DOUPCALL)) {
591116963Sdavidxu			kg->kg_upsleeps++;
592116963Sdavidxu			error = msleep(&kg->kg_completed, &p->p_mtx,
593116963Sdavidxu				PPAUSE|PCATCH, "kserel",
594116963Sdavidxu				(uap->timeout ? tvtohz(&tv) : 0));
595116963Sdavidxu			kg->kg_upsleeps--;
596116963Sdavidxu		}
597116963Sdavidxu		PROC_UNLOCK(p);
598105854Sjulian	}
599116401Sdavidxu	if (ku->ku_flags & KUF_DOUPCALL) {
600116401Sdavidxu		mtx_lock_spin(&sched_lock);
601116401Sdavidxu		ku->ku_flags &= ~KUF_DOUPCALL;
602116401Sdavidxu		mtx_unlock_spin(&sched_lock);
603116401Sdavidxu	}
604107719Sjulian	return (0);
605105854Sjulian}
606105854Sjulian
607105854Sjulian/* struct kse_wakeup_args {
608105854Sjulian	struct kse_mailbox *mbx;
609105854Sjulian}; */
610105854Sjulianint
611105854Sjuliankse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
612105854Sjulian{
613105854Sjulian	struct proc *p;
614105854Sjulian	struct ksegrp *kg;
615111028Sjeff	struct kse_upcall *ku;
616108338Sjulian	struct thread *td2;
617105854Sjulian
618105854Sjulian	p = td->td_proc;
619108338Sjulian	td2 = NULL;
620111028Sjeff	ku = NULL;
621105854Sjulian	/* KSE-enabled processes only, please. */
622116361Sdavidxu	if (!(p->p_flag & P_SA))
623111028Sjeff		return (EINVAL);
624111028Sjeff	PROC_LOCK(p);
625108613Sjulian	mtx_lock_spin(&sched_lock);
626105854Sjulian	if (uap->mbx) {
627105854Sjulian		FOREACH_KSEGRP_IN_PROC(p, kg) {
628111028Sjeff			FOREACH_UPCALL_IN_GROUP(kg, ku) {
629111207Sdavidxu				if (ku->ku_mailbox == uap->mbx)
630111028Sjeff					break;
631108613Sjulian			}
632111028Sjeff			if (ku)
633108338Sjulian				break;
634105854Sjulian		}
635105854Sjulian	} else {
636105854Sjulian		kg = td->td_ksegrp;
637111028Sjeff		if (kg->kg_upsleeps) {
638111028Sjeff			wakeup_one(&kg->kg_completed);
639111028Sjeff			mtx_unlock_spin(&sched_lock);
640111028Sjeff			PROC_UNLOCK(p);
641111028Sjeff			return (0);
642108338Sjulian		}
643111028Sjeff		ku = TAILQ_FIRST(&kg->kg_upcalls);
644105854Sjulian	}
645111028Sjeff	if (ku) {
646111028Sjeff		if ((td2 = ku->ku_owner) == NULL) {
647111028Sjeff			panic("%s: no owner", __func__);
648111028Sjeff		} else if (TD_ON_SLEEPQ(td2) &&
649116963Sdavidxu		           ((td2->td_wchan == &kg->kg_completed) ||
650116963Sdavidxu			    (td2->td_wchan == &p->p_siglist &&
651116963Sdavidxu			     (ku->ku_mflags & KMF_WAITSIGEVENT)))) {
652111028Sjeff			abortsleep(td2);
653111028Sjeff		} else {
654111028Sjeff			ku->ku_flags |= KUF_DOUPCALL;
655108613Sjulian		}
656105854Sjulian		mtx_unlock_spin(&sched_lock);
657111028Sjeff		PROC_UNLOCK(p);
658108338Sjulian		return (0);
659108613Sjulian	}
660105854Sjulian	mtx_unlock_spin(&sched_lock);
661111028Sjeff	PROC_UNLOCK(p);
662108338Sjulian	return (ESRCH);
663105854Sjulian}
664105854Sjulian
665124350Sschweikh/*
666105854Sjulian * No new KSEG: first call: use current KSE, don't schedule an upcall
667111028Sjeff * All other situations, do allocate max new KSEs and schedule an upcall.
668105854Sjulian */
669105854Sjulian/* struct kse_create_args {
670105854Sjulian	struct kse_mailbox *mbx;
671105854Sjulian	int newgroup;
672105854Sjulian}; */
673105854Sjulianint
674105854Sjuliankse_create(struct thread *td, struct kse_create_args *uap)
675105854Sjulian{
676105854Sjulian	struct kse *newke;
677105854Sjulian	struct ksegrp *newkg;
678105854Sjulian	struct ksegrp *kg;
679105854Sjulian	struct proc *p;
680105854Sjulian	struct kse_mailbox mbx;
681111028Sjeff	struct kse_upcall *newku;
682116401Sdavidxu	int err, ncpus, sa = 0, first = 0;
683116401Sdavidxu	struct thread *newtd;
684105854Sjulian
685105854Sjulian	p = td->td_proc;
686105854Sjulian	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
687105854Sjulian		return (err);
688105854Sjulian
689111028Sjeff	/* Too bad, why hasn't kernel always a cpu counter !? */
690111028Sjeff#ifdef SMP
691111028Sjeff	ncpus = mp_ncpus;
692111028Sjeff#else
693111028Sjeff	ncpus = 1;
694111028Sjeff#endif
695116401Sdavidxu	if (virtual_cpu != 0)
696111028Sjeff		ncpus = virtual_cpu;
697116401Sdavidxu	if (!(mbx.km_flags & KMF_BOUND))
698116401Sdavidxu		sa = TDF_SA;
699116440Sdavidxu	else
700116440Sdavidxu		ncpus = 1;
701112078Sdavidxu	PROC_LOCK(p);
702116401Sdavidxu	if (!(p->p_flag & P_SA)) {
703116401Sdavidxu		first = 1;
704116401Sdavidxu		p->p_flag |= P_SA;
705116401Sdavidxu	}
706112078Sdavidxu	PROC_UNLOCK(p);
707116401Sdavidxu	if (!sa && !uap->newgroup && !first)
708116401Sdavidxu		return (EINVAL);
709105854Sjulian	kg = td->td_ksegrp;
710105854Sjulian	if (uap->newgroup) {
711124350Sschweikh		/* Have race condition but it is cheap */
712116401Sdavidxu		if (p->p_numksegrps >= max_groups_per_proc)
713107006Sdavidxu			return (EPROCLIM);
714124350Sschweikh		/*
715105854Sjulian		 * If we want a new KSEGRP it doesn't matter whether
716105854Sjulian		 * we have already fired up KSE mode before or not.
717111028Sjeff		 * We put the process in KSE mode and create a new KSEGRP.
718105854Sjulian		 */
719105854Sjulian		newkg = ksegrp_alloc();
720105854Sjulian		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
721111028Sjeff		      kg_startzero, kg_endzero));
722105854Sjulian		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
723105854Sjulian		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
724124350Sschweikh		PROC_LOCK(p);
725111028Sjeff		mtx_lock_spin(&sched_lock);
726111028Sjeff		if (p->p_numksegrps >= max_groups_per_proc) {
727111028Sjeff			mtx_unlock_spin(&sched_lock);
728119488Sdavidxu			PROC_UNLOCK(p);
729111677Sdavidxu			ksegrp_free(newkg);
730111028Sjeff			return (EPROCLIM);
731111028Sjeff		}
732111677Sdavidxu		ksegrp_link(newkg, p);
733119488Sdavidxu		sched_fork_ksegrp(kg, newkg);
734111028Sjeff		mtx_unlock_spin(&sched_lock);
735119488Sdavidxu		PROC_UNLOCK(p);
736105854Sjulian	} else {
737116452Sdavidxu		if (!first && ((td->td_flags & TDF_SA) ^ sa) != 0)
738116452Sdavidxu			return (EINVAL);
739111028Sjeff		newkg = kg;
740111028Sjeff	}
741111028Sjeff
742111028Sjeff	/*
743111028Sjeff	 * Creating upcalls more than number of physical cpu does
744124350Sschweikh	 * not help performance.
745111028Sjeff	 */
746111028Sjeff	if (newkg->kg_numupcalls >= ncpus)
747111028Sjeff		return (EPROCLIM);
748111028Sjeff
749111028Sjeff	if (newkg->kg_numupcalls == 0) {
750111028Sjeff		/*
751116401Sdavidxu		 * Initialize KSE group
752116401Sdavidxu		 *
753116401Sdavidxu		 * For multiplxed group, create KSEs as many as physical
754116401Sdavidxu		 * cpus. This increases concurrent even if userland
755116401Sdavidxu		 * is not MP safe and can only run on single CPU.
756111028Sjeff		 * In ideal world, every physical cpu should execute a thread.
757111028Sjeff		 * If there is enough KSEs, threads in kernel can be
758124350Sschweikh		 * executed parallel on different cpus with full speed,
759124350Sschweikh		 * Concurrent in kernel shouldn't be restricted by number of
760116401Sdavidxu		 * upcalls userland provides. Adding more upcall structures
761116401Sdavidxu		 * only increases concurrent in userland.
762116401Sdavidxu		 *
763116401Sdavidxu		 * For bound thread group, because there is only thread in the
764116401Sdavidxu		 * group, we only create one KSE for the group. Thread in this
765116401Sdavidxu		 * kind of group will never schedule an upcall when blocked,
766116401Sdavidxu		 * this intends to simulate pthread system scope thread.
767105854Sjulian		 */
768111028Sjeff		while (newkg->kg_kses < ncpus) {
769105854Sjulian			newke = kse_alloc();
770111028Sjeff			bzero(&newke->ke_startzero, RANGEOF(struct kse,
771111028Sjeff			      ke_startzero, ke_endzero));
772105854Sjulian#if 0
773111028Sjeff			mtx_lock_spin(&sched_lock);
774111028Sjeff			bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
775111028Sjeff			      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
776111028Sjeff			mtx_unlock_spin(&sched_lock);
777105854Sjulian#endif
778111028Sjeff			mtx_lock_spin(&sched_lock);
779111028Sjeff			kse_link(newke, newkg);
780119488Sdavidxu			sched_fork_kse(td->td_kse, newke);
781111028Sjeff			/* Add engine */
782111028Sjeff			kse_reassign(newke);
783111028Sjeff			mtx_unlock_spin(&sched_lock);
784105854Sjulian		}
785111028Sjeff	}
786111028Sjeff	newku = upcall_alloc();
787111028Sjeff	newku->ku_mailbox = uap->mbx;
788111028Sjeff	newku->ku_func = mbx.km_func;
789111028Sjeff	bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
790111028Sjeff
791111028Sjeff	/* For the first call this may not have been set */
792111028Sjeff	if (td->td_standin == NULL)
793111028Sjeff		thread_alloc_spare(td, NULL);
794111028Sjeff
795116963Sdavidxu	PROC_LOCK(p);
796111028Sjeff	if (newkg->kg_numupcalls >= ncpus) {
797116963Sdavidxu		PROC_UNLOCK(p);
798111028Sjeff		upcall_free(newku);
799111028Sjeff		return (EPROCLIM);
800111028Sjeff	}
801117637Sdavidxu	if (first && sa) {
802116963Sdavidxu		SIGSETOR(p->p_siglist, td->td_siglist);
803116963Sdavidxu		SIGEMPTYSET(td->td_siglist);
804116963Sdavidxu		SIGFILLSET(td->td_sigmask);
805116963Sdavidxu		SIG_CANTMASK(td->td_sigmask);
806116963Sdavidxu	}
807116963Sdavidxu	mtx_lock_spin(&sched_lock);
808116963Sdavidxu	PROC_UNLOCK(p);
809111028Sjeff	upcall_link(newku, newkg);
810112397Sdavidxu	if (mbx.km_quantum)
811112397Sdavidxu		newkg->kg_upquantum = max(1, mbx.km_quantum/tick);
812111028Sjeff
813111028Sjeff	/*
814111028Sjeff	 * Each upcall structure has an owner thread, find which
815111028Sjeff	 * one owns it.
816111028Sjeff	 */
817111028Sjeff	if (uap->newgroup) {
818124350Sschweikh		/*
819111028Sjeff		 * Because new ksegrp hasn't thread,
820111028Sjeff		 * create an initial upcall thread to own it.
821111028Sjeff		 */
822116401Sdavidxu		newtd = thread_schedule_upcall(td, newku);
823105854Sjulian	} else {
824105854Sjulian		/*
825111028Sjeff		 * If current thread hasn't an upcall structure,
826111028Sjeff		 * just assign the upcall to it.
827105854Sjulian		 */
828111028Sjeff		if (td->td_upcall == NULL) {
829111028Sjeff			newku->ku_owner = td;
830111028Sjeff			td->td_upcall = newku;
831116401Sdavidxu			newtd = td;
832111028Sjeff		} else {
833111028Sjeff			/*
834111028Sjeff			 * Create a new upcall thread to own it.
835111028Sjeff			 */
836116401Sdavidxu			newtd = thread_schedule_upcall(td, newku);
837111028Sjeff		}
838105854Sjulian	}
839116401Sdavidxu	if (!sa) {
840116401Sdavidxu		newtd->td_mailbox = mbx.km_curthread;
841116401Sdavidxu		newtd->td_flags &= ~TDF_SA;
842116607Sdavidxu		if (newtd != td) {
843116607Sdavidxu			mtx_unlock_spin(&sched_lock);
844116607Sdavidxu			cpu_set_upcall_kse(newtd, newku);
845116607Sdavidxu			mtx_lock_spin(&sched_lock);
846116607Sdavidxu		}
847116401Sdavidxu	} else {
848116401Sdavidxu		newtd->td_flags |= TDF_SA;
849116401Sdavidxu	}
850116607Sdavidxu	if (newtd != td)
851116607Sdavidxu		setrunqueue(newtd);
852111028Sjeff	mtx_unlock_spin(&sched_lock);
853105854Sjulian	return (0);
854105854Sjulian}
855105854Sjulian
856105854Sjulian/*
85799026Sjulian * Initialize global thread allocation resources.
85899026Sjulian */
85999026Sjulianvoid
86099026Sjulianthreadinit(void)
86199026Sjulian{
86299026Sjulian
863107126Sjeff	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
86499026Sjulian	    thread_ctor, thread_dtor, thread_init, thread_fini,
86599026Sjulian	    UMA_ALIGN_CACHE, 0);
866107126Sjeff	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
867107126Sjeff	    NULL, NULL, ksegrp_init, NULL,
868103367Sjulian	    UMA_ALIGN_CACHE, 0);
869107126Sjeff	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
870107126Sjeff	    NULL, NULL, kse_init, NULL,
871103367Sjulian	    UMA_ALIGN_CACHE, 0);
872111028Sjeff	upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
873111028Sjeff	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
87499026Sjulian}
87599026Sjulian
87699026Sjulian/*
877103002Sjulian * Stash an embarasingly extra thread into the zombie thread queue.
87899026Sjulian */
87999026Sjulianvoid
88099026Sjulianthread_stash(struct thread *td)
88199026Sjulian{
882111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
88399026Sjulian	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
884111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
88599026Sjulian}
88699026Sjulian
887103410Smini/*
888105854Sjulian * Stash an embarasingly extra kse into the zombie kse queue.
889105854Sjulian */
890105854Sjulianvoid
891105854Sjuliankse_stash(struct kse *ke)
892105854Sjulian{
893111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
894105854Sjulian	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
895111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
896105854Sjulian}
897105854Sjulian
898105854Sjulian/*
899111028Sjeff * Stash an embarasingly extra upcall into the zombie upcall queue.
900111028Sjeff */
901111028Sjeff
902111028Sjeffvoid
903111028Sjeffupcall_stash(struct kse_upcall *ku)
904111028Sjeff{
905111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
906111028Sjeff	TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
907111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
908111028Sjeff}
909111028Sjeff
910111028Sjeff/*
911105854Sjulian * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
912105854Sjulian */
913105854Sjulianvoid
914105854Sjulianksegrp_stash(struct ksegrp *kg)
915105854Sjulian{
916111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
917105854Sjulian	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
918111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
919105854Sjulian}
920105854Sjulian
921105854Sjulian/*
922111028Sjeff * Reap zombie kse resource.
92399026Sjulian */
92499026Sjulianvoid
92599026Sjulianthread_reap(void)
92699026Sjulian{
927105854Sjulian	struct thread *td_first, *td_next;
928105854Sjulian	struct kse *ke_first, *ke_next;
929105854Sjulian	struct ksegrp *kg_first, * kg_next;
930111028Sjeff	struct kse_upcall *ku_first, *ku_next;
93199026Sjulian
93299026Sjulian	/*
933111028Sjeff	 * Don't even bother to lock if none at this instant,
934111028Sjeff	 * we really don't care about the next instant..
93599026Sjulian	 */
936105854Sjulian	if ((!TAILQ_EMPTY(&zombie_threads))
937105854Sjulian	    || (!TAILQ_EMPTY(&zombie_kses))
938111028Sjeff	    || (!TAILQ_EMPTY(&zombie_ksegrps))
939111028Sjeff	    || (!TAILQ_EMPTY(&zombie_upcalls))) {
940111028Sjeff		mtx_lock_spin(&kse_zombie_lock);
941105854Sjulian		td_first = TAILQ_FIRST(&zombie_threads);
942105854Sjulian		ke_first = TAILQ_FIRST(&zombie_kses);
943105854Sjulian		kg_first = TAILQ_FIRST(&zombie_ksegrps);
944111028Sjeff		ku_first = TAILQ_FIRST(&zombie_upcalls);
945105854Sjulian		if (td_first)
946105854Sjulian			TAILQ_INIT(&zombie_threads);
947105854Sjulian		if (ke_first)
948105854Sjulian			TAILQ_INIT(&zombie_kses);
949105854Sjulian		if (kg_first)
950105854Sjulian			TAILQ_INIT(&zombie_ksegrps);
951111028Sjeff		if (ku_first)
952111028Sjeff			TAILQ_INIT(&zombie_upcalls);
953111028Sjeff		mtx_unlock_spin(&kse_zombie_lock);
954105854Sjulian		while (td_first) {
955105854Sjulian			td_next = TAILQ_NEXT(td_first, td_runq);
956111028Sjeff			if (td_first->td_ucred)
957111028Sjeff				crfree(td_first->td_ucred);
958105854Sjulian			thread_free(td_first);
959105854Sjulian			td_first = td_next;
96099026Sjulian		}
961105854Sjulian		while (ke_first) {
962105854Sjulian			ke_next = TAILQ_NEXT(ke_first, ke_procq);
963105854Sjulian			kse_free(ke_first);
964105854Sjulian			ke_first = ke_next;
965105854Sjulian		}
966105854Sjulian		while (kg_first) {
967105854Sjulian			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
968105854Sjulian			ksegrp_free(kg_first);
969105854Sjulian			kg_first = kg_next;
970105854Sjulian		}
971111028Sjeff		while (ku_first) {
972111028Sjeff			ku_next = TAILQ_NEXT(ku_first, ku_link);
973111028Sjeff			upcall_free(ku_first);
974111028Sjeff			ku_first = ku_next;
975111028Sjeff		}
97699026Sjulian	}
97799026Sjulian}
97899026Sjulian
97999026Sjulian/*
980103367Sjulian * Allocate a ksegrp.
981103367Sjulian */
982103367Sjulianstruct ksegrp *
983103367Sjulianksegrp_alloc(void)
984103367Sjulian{
985111119Simp	return (uma_zalloc(ksegrp_zone, M_WAITOK));
986103367Sjulian}
987103367Sjulian
988103367Sjulian/*
989103367Sjulian * Allocate a kse.
990103367Sjulian */
991103367Sjulianstruct kse *
992103367Sjuliankse_alloc(void)
993103367Sjulian{
994111119Simp	return (uma_zalloc(kse_zone, M_WAITOK));
995103367Sjulian}
996103367Sjulian
997103367Sjulian/*
99899026Sjulian * Allocate a thread.
99999026Sjulian */
100099026Sjulianstruct thread *
100199026Sjulianthread_alloc(void)
100299026Sjulian{
100399026Sjulian	thread_reap(); /* check if any zombies to get */
1004111119Simp	return (uma_zalloc(thread_zone, M_WAITOK));
100599026Sjulian}
100699026Sjulian
100799026Sjulian/*
1008103367Sjulian * Deallocate a ksegrp.
1009103367Sjulian */
1010103367Sjulianvoid
1011103367Sjulianksegrp_free(struct ksegrp *td)
1012103367Sjulian{
1013103367Sjulian	uma_zfree(ksegrp_zone, td);
1014103367Sjulian}
1015103367Sjulian
1016103367Sjulian/*
1017103367Sjulian * Deallocate a kse.
1018103367Sjulian */
1019103367Sjulianvoid
1020103367Sjuliankse_free(struct kse *td)
1021103367Sjulian{
1022103367Sjulian	uma_zfree(kse_zone, td);
1023103367Sjulian}
1024103367Sjulian
1025103367Sjulian/*
102699026Sjulian * Deallocate a thread.
102799026Sjulian */
102899026Sjulianvoid
102999026Sjulianthread_free(struct thread *td)
103099026Sjulian{
1031107719Sjulian
1032107719Sjulian	cpu_thread_clean(td);
103399026Sjulian	uma_zfree(thread_zone, td);
103499026Sjulian}
103599026Sjulian
103699026Sjulian/*
103799026Sjulian * Store the thread context in the UTS's mailbox.
1038104031Sjulian * then add the mailbox at the head of a list we are building in user space.
1039104031Sjulian * The list is anchored in the ksegrp structure.
104099026Sjulian */
104199026Sjulianint
1042117704Sdavidxuthread_export_context(struct thread *td, int willexit)
104399026Sjulian{
1044104503Sjmallett	struct proc *p;
1045104031Sjulian	struct ksegrp *kg;
1046104031Sjulian	uintptr_t mbx;
1047104031Sjulian	void *addr;
1048116963Sdavidxu	int error = 0, temp, sig;
1049115790Sjulian	mcontext_t mc;
105099026Sjulian
1051104503Sjmallett	p = td->td_proc;
1052104503Sjmallett	kg = td->td_ksegrp;
1053104503Sjmallett
1054104031Sjulian	/* Export the user/machine context. */
1055115790Sjulian	get_mcontext(td, &mc, 0);
1056115790Sjulian	addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext);
1057115790Sjulian	error = copyout(&mc, addr, sizeof(mcontext_t));
1058115790Sjulian	if (error)
1059108338Sjulian		goto bad;
1060104031Sjulian
1061111028Sjeff	/* Exports clock ticks in kernel mode */
1062111028Sjeff	addr = (caddr_t)(&td->td_mailbox->tm_sticks);
1063117000Smarcel	temp = fuword32(addr) + td->td_usticks;
1064117000Smarcel	if (suword32(addr, temp)) {
1065115790Sjulian		error = EFAULT;
1066111028Sjeff		goto bad;
1067115790Sjulian	}
1068111028Sjeff
1069116963Sdavidxu	/*
1070116963Sdavidxu	 * Post sync signal, or process SIGKILL and SIGSTOP.
1071116963Sdavidxu	 * For sync signal, it is only possible when the signal is not
1072116963Sdavidxu	 * caught by userland or process is being debugged.
1073116963Sdavidxu	 */
1074117704Sdavidxu	PROC_LOCK(p);
1075116963Sdavidxu	if (td->td_flags & TDF_NEEDSIGCHK) {
1076116963Sdavidxu		mtx_lock_spin(&sched_lock);
1077116963Sdavidxu		td->td_flags &= ~TDF_NEEDSIGCHK;
1078116963Sdavidxu		mtx_unlock_spin(&sched_lock);
1079116963Sdavidxu		mtx_lock(&p->p_sigacts->ps_mtx);
1080116963Sdavidxu		while ((sig = cursig(td)) != 0)
1081116963Sdavidxu			postsig(sig);
1082116963Sdavidxu		mtx_unlock(&p->p_sigacts->ps_mtx);
1083116963Sdavidxu	}
1084117704Sdavidxu	if (willexit)
1085117704Sdavidxu		SIGFILLSET(td->td_sigmask);
1086117704Sdavidxu	PROC_UNLOCK(p);
1087116963Sdavidxu
1088111028Sjeff	/* Get address in latest mbox of list pointer */
1089104031Sjulian	addr = (void *)(&td->td_mailbox->tm_next);
1090104031Sjulian	/*
1091104031Sjulian	 * Put the saved address of the previous first
1092104031Sjulian	 * entry into this one
1093104031Sjulian	 */
1094104031Sjulian	for (;;) {
1095104031Sjulian		mbx = (uintptr_t)kg->kg_completed;
1096104031Sjulian		if (suword(addr, mbx)) {
1097108338Sjulian			error = EFAULT;
1098107034Sdavidxu			goto bad;
1099104031Sjulian		}
1100104126Sjulian		PROC_LOCK(p);
1101104031Sjulian		if (mbx == (uintptr_t)kg->kg_completed) {
1102104031Sjulian			kg->kg_completed = td->td_mailbox;
1103111028Sjeff			/*
1104111028Sjeff			 * The thread context may be taken away by
1105111028Sjeff			 * other upcall threads when we unlock
1106111028Sjeff			 * process lock. it's no longer valid to
1107111028Sjeff			 * use it again in any other places.
1108111028Sjeff			 */
1109111028Sjeff			td->td_mailbox = NULL;
1110104126Sjulian			PROC_UNLOCK(p);
1111104031Sjulian			break;
1112104031Sjulian		}
1113104126Sjulian		PROC_UNLOCK(p);
1114104031Sjulian	}
1115111028Sjeff	td->td_usticks = 0;
1116104031Sjulian	return (0);
1117107034Sdavidxu
1118107034Sdavidxubad:
1119107034Sdavidxu	PROC_LOCK(p);
1120117704Sdavidxu	sigexit(td, SIGILL);
1121108338Sjulian	return (error);
1122104031Sjulian}
112399026Sjulian
1124104031Sjulian/*
1125104031Sjulian * Take the list of completed mailboxes for this KSEGRP and put them on this
1126111028Sjeff * upcall's mailbox as it's the next one going up.
1127104031Sjulian */
1128104031Sjulianstatic int
1129111028Sjeffthread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
1130104031Sjulian{
1131104126Sjulian	struct proc *p = kg->kg_proc;
1132104031Sjulian	void *addr;
1133104031Sjulian	uintptr_t mbx;
1134104031Sjulian
1135111028Sjeff	addr = (void *)(&ku->ku_mailbox->km_completed);
1136104031Sjulian	for (;;) {
1137104031Sjulian		mbx = (uintptr_t)kg->kg_completed;
1138104031Sjulian		if (suword(addr, mbx)) {
1139104126Sjulian			PROC_LOCK(p);
1140104126Sjulian			psignal(p, SIGSEGV);
1141104126Sjulian			PROC_UNLOCK(p);
1142104031Sjulian			return (EFAULT);
1143104031Sjulian		}
1144104126Sjulian		PROC_LOCK(p);
1145104031Sjulian		if (mbx == (uintptr_t)kg->kg_completed) {
1146104031Sjulian			kg->kg_completed = NULL;
1147104126Sjulian			PROC_UNLOCK(p);
1148104031Sjulian			break;
1149104031Sjulian		}
1150104126Sjulian		PROC_UNLOCK(p);
115199026Sjulian	}
1152104031Sjulian	return (0);
115399026Sjulian}
115499026Sjulian
115599026Sjulian/*
1156107034Sdavidxu * This function should be called at statclock interrupt time
1157107034Sdavidxu */
1158107034Sdavidxuint
1159111028Sjeffthread_statclock(int user)
1160107034Sdavidxu{
1161107034Sdavidxu	struct thread *td = curthread;
1162116401Sdavidxu	struct ksegrp *kg = td->td_ksegrp;
1163124350Sschweikh
1164116401Sdavidxu	if (kg->kg_numupcalls == 0 || !(td->td_flags & TDF_SA))
1165116401Sdavidxu		return (0);
1166107034Sdavidxu	if (user) {
1167107034Sdavidxu		/* Current always do via ast() */
1168111976Sdavidxu		mtx_lock_spin(&sched_lock);
1169111032Sjulian		td->td_flags |= (TDF_USTATCLOCK|TDF_ASTPENDING);
1170111976Sdavidxu		mtx_unlock_spin(&sched_lock);
1171111028Sjeff		td->td_uuticks++;
1172107034Sdavidxu	} else {
1173107034Sdavidxu		if (td->td_mailbox != NULL)
1174111028Sjeff			td->td_usticks++;
1175111028Sjeff		else {
1176111028Sjeff			/* XXXKSE
1177111028Sjeff		 	 * We will call thread_user_enter() for every
1178111028Sjeff			 * kernel entry in future, so if the thread mailbox
1179111028Sjeff			 * is NULL, it must be a UTS kernel, don't account
1180111028Sjeff			 * clock ticks for it.
1181111028Sjeff			 */
1182111028Sjeff		}
1183107034Sdavidxu	}
1184111028Sjeff	return (0);
1185107034Sdavidxu}
1186107034Sdavidxu
1187111028Sjeff/*
1188111515Sdavidxu * Export state clock ticks for userland
1189111028Sjeff */
1190107034Sdavidxustatic int
1191111515Sdavidxuthread_update_usr_ticks(struct thread *td, int user)
1192107034Sdavidxu{
1193107034Sdavidxu	struct proc *p = td->td_proc;
1194107034Sdavidxu	struct kse_thr_mailbox *tmbx;
1195111028Sjeff	struct kse_upcall *ku;
1196112397Sdavidxu	struct ksegrp *kg;
1197107034Sdavidxu	caddr_t addr;
1198118607Sjhb	u_int uticks;
1199107034Sdavidxu
1200111028Sjeff	if ((ku = td->td_upcall) == NULL)
1201111028Sjeff		return (-1);
1202124350Sschweikh
1203111028Sjeff	tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
1204107034Sdavidxu	if ((tmbx == NULL) || (tmbx == (void *)-1))
1205111028Sjeff		return (-1);
1206111515Sdavidxu	if (user) {
1207111515Sdavidxu		uticks = td->td_uuticks;
1208111515Sdavidxu		td->td_uuticks = 0;
1209111515Sdavidxu		addr = (caddr_t)&tmbx->tm_uticks;
1210111515Sdavidxu	} else {
1211111515Sdavidxu		uticks = td->td_usticks;
1212111515Sdavidxu		td->td_usticks = 0;
1213111515Sdavidxu		addr = (caddr_t)&tmbx->tm_sticks;
1214111515Sdavidxu	}
1215107034Sdavidxu	if (uticks) {
1216117000Smarcel		if (suword32(addr, uticks+fuword32(addr))) {
1217111028Sjeff			PROC_LOCK(p);
1218111028Sjeff			psignal(p, SIGSEGV);
1219111028Sjeff			PROC_UNLOCK(p);
1220111028Sjeff			return (-2);
1221111028Sjeff		}
1222107034Sdavidxu	}
1223112397Sdavidxu	kg = td->td_ksegrp;
1224112397Sdavidxu	if (kg->kg_upquantum && ticks >= kg->kg_nextupcall) {
1225112397Sdavidxu		mtx_lock_spin(&sched_lock);
1226112397Sdavidxu		td->td_upcall->ku_flags |= KUF_DOUPCALL;
1227112397Sdavidxu		mtx_unlock_spin(&sched_lock);
1228112397Sdavidxu	}
1229111028Sjeff	return (0);
1230111028Sjeff}
1231111028Sjeff
1232111028Sjeff/*
123399026Sjulian * Discard the current thread and exit from its context.
123499026Sjulian *
123599026Sjulian * Because we can't free a thread while we're operating under its context,
1236107719Sjulian * push the current thread into our CPU's deadthread holder. This means
1237107719Sjulian * we needn't worry about someone else grabbing our context before we
1238107719Sjulian * do a cpu_throw().
123999026Sjulian */
124099026Sjulianvoid
124199026Sjulianthread_exit(void)
124299026Sjulian{
124399026Sjulian	struct thread *td;
124499026Sjulian	struct kse *ke;
124599026Sjulian	struct proc *p;
124699026Sjulian	struct ksegrp	*kg;
124799026Sjulian
124899026Sjulian	td = curthread;
124999026Sjulian	kg = td->td_ksegrp;
125099026Sjulian	p = td->td_proc;
125199026Sjulian	ke = td->td_kse;
125299026Sjulian
125399026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
1254102581Sjulian	KASSERT(p != NULL, ("thread exiting without a process"));
1255102581Sjulian	KASSERT(ke != NULL, ("thread exiting without a kse"));
1256102581Sjulian	KASSERT(kg != NULL, ("thread exiting without a kse group"));
125799026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
125899026Sjulian	CTR1(KTR_PROC, "thread_exit: thread %p", td);
125999026Sjulian	KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
126099026Sjulian
1261104695Sjulian	if (td->td_standin != NULL) {
1262104695Sjulian		thread_stash(td->td_standin);
1263104695Sjulian		td->td_standin = NULL;
1264104695Sjulian	}
1265104695Sjulian
126699026Sjulian	cpu_thread_exit(td);	/* XXXSMP */
126799026Sjulian
1268102581Sjulian	/*
1269103002Sjulian	 * The last thread is left attached to the process
1270103002Sjulian	 * So that the whole bundle gets recycled. Skip
1271103002Sjulian	 * all this stuff.
1272102581Sjulian	 */
1273103002Sjulian	if (p->p_numthreads > 1) {
1274113641Sjulian		thread_unlink(td);
1275111115Sdavidxu		if (p->p_maxthrwaits)
1276111115Sdavidxu			wakeup(&p->p_numthreads);
1277103002Sjulian		/*
1278103002Sjulian		 * The test below is NOT true if we are the
1279103002Sjulian		 * sole exiting thread. P_STOPPED_SNGL is unset
1280103002Sjulian		 * in exit1() after it is the only survivor.
1281103002Sjulian		 */
1282103002Sjulian		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1283103002Sjulian			if (p->p_numthreads == p->p_suspcount) {
1284103216Sjulian				thread_unsuspend_one(p->p_singlethread);
1285103002Sjulian			}
128699026Sjulian		}
1287104695Sjulian
1288111028Sjeff		/*
1289111028Sjeff		 * Because each upcall structure has an owner thread,
1290111028Sjeff		 * owner thread exits only when process is in exiting
1291111028Sjeff		 * state, so upcall to userland is no longer needed,
1292111028Sjeff		 * deleting upcall structure is safe here.
1293111028Sjeff		 * So when all threads in a group is exited, all upcalls
1294111028Sjeff		 * in the group should be automatically freed.
1295111028Sjeff		 */
1296111028Sjeff		if (td->td_upcall)
1297111028Sjeff			upcall_remove(td);
1298124350Sschweikh
1299119488Sdavidxu		sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
1300119488Sdavidxu		sched_exit_kse(FIRST_KSE_IN_PROC(p), ke);
1301104695Sjulian		ke->ke_state = KES_UNQUEUED;
1302111028Sjeff		ke->ke_thread = NULL;
1303124350Sschweikh		/*
1304108338Sjulian		 * Decide what to do with the KSE attached to this thread.
1305104695Sjulian		 */
1306119488Sdavidxu		if (ke->ke_flags & KEF_EXIT) {
1307105854Sjulian			kse_unlink(ke);
1308119488Sdavidxu			if (kg->kg_kses == 0) {
1309119488Sdavidxu				sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), kg);
1310119488Sdavidxu				ksegrp_unlink(kg);
1311119488Sdavidxu			}
1312119488Sdavidxu		}
1313111028Sjeff		else
1314105854Sjulian			kse_reassign(ke);
1315105854Sjulian		PROC_UNLOCK(p);
1316111028Sjeff		td->td_kse	= NULL;
1317105854Sjulian		td->td_state	= TDS_INACTIVE;
1318113244Sdavidxu#if 0
1319105854Sjulian		td->td_proc	= NULL;
1320113244Sdavidxu#endif
1321105854Sjulian		td->td_ksegrp	= NULL;
1322105854Sjulian		td->td_last_kse	= NULL;
1323107719Sjulian		PCPU_SET(deadthread, td);
1324103002Sjulian	} else {
1325103002Sjulian		PROC_UNLOCK(p);
132699026Sjulian	}
1327112888Sjeff	/* XXX Shouldn't cpu_throw() here. */
1328112993Speter	mtx_assert(&sched_lock, MA_OWNED);
1329112993Speter	cpu_throw(td, choosethread());
1330112993Speter	panic("I'm a teapot!");
133199026Sjulian	/* NOTREACHED */
133299026Sjulian}
133399026Sjulian
1334124350Sschweikh/*
1335107719Sjulian * Do any thread specific cleanups that may be needed in wait()
1336107719Sjulian * called with Giant held, proc and schedlock not held.
1337107719Sjulian */
1338107719Sjulianvoid
1339107719Sjulianthread_wait(struct proc *p)
1340107719Sjulian{
1341107719Sjulian	struct thread *td;
1342107719Sjulian
1343124350Sschweikh	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
1344124350Sschweikh	KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()"));
1345107719Sjulian	FOREACH_THREAD_IN_PROC(p, td) {
1346107719Sjulian		if (td->td_standin != NULL) {
1347107719Sjulian			thread_free(td->td_standin);
1348107719Sjulian			td->td_standin = NULL;
1349107719Sjulian		}
1350107719Sjulian		cpu_thread_clean(td);
1351107719Sjulian	}
1352107719Sjulian	thread_reap();	/* check for zombie threads etc. */
1353107719Sjulian}
1354107719Sjulian
135599026Sjulian/*
135699026Sjulian * Link a thread to a process.
1357103002Sjulian * set up anything that needs to be initialized for it to
1358103002Sjulian * be used by the process.
135999026Sjulian *
136099026Sjulian * Note that we do not link to the proc's ucred here.
136199026Sjulian * The thread is linked as if running but no KSE assigned.
136299026Sjulian */
136399026Sjulianvoid
136499026Sjulianthread_link(struct thread *td, struct ksegrp *kg)
136599026Sjulian{
136699026Sjulian	struct proc *p;
136799026Sjulian
136899026Sjulian	p = kg->kg_proc;
1369111028Sjeff	td->td_state    = TDS_INACTIVE;
1370111028Sjeff	td->td_proc     = p;
1371111028Sjeff	td->td_ksegrp   = kg;
1372111028Sjeff	td->td_last_kse = NULL;
1373111028Sjeff	td->td_flags    = 0;
1374111028Sjeff	td->td_kse      = NULL;
137599026Sjulian
1376103002Sjulian	LIST_INIT(&td->td_contested);
1377119137Ssam	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
137899026Sjulian	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
137999026Sjulian	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
138099026Sjulian	p->p_numthreads++;
138199026Sjulian	kg->kg_numthreads++;
138299026Sjulian}
138399026Sjulian
1384113641Sjulianvoid
1385113641Sjulianthread_unlink(struct thread *td)
1386124350Sschweikh{
1387113641Sjulian	struct proc *p = td->td_proc;
1388113641Sjulian	struct ksegrp *kg = td->td_ksegrp;
1389113920Sjhb
1390113920Sjhb	mtx_assert(&sched_lock, MA_OWNED);
1391113641Sjulian	TAILQ_REMOVE(&p->p_threads, td, td_plist);
1392113641Sjulian	p->p_numthreads--;
1393113641Sjulian	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
1394113641Sjulian	kg->kg_numthreads--;
1395113641Sjulian	/* could clear a few other things here */
1396124350Sschweikh}
1397113641Sjulian
1398111028Sjeff/*
1399111028Sjeff * Purge a ksegrp resource. When a ksegrp is preparing to
1400124350Sschweikh * exit, it calls this function.
1401111028Sjeff */
1402113864Sjhbstatic void
1403111028Sjeffkse_purge_group(struct thread *td)
1404111028Sjeff{
1405111028Sjeff	struct ksegrp *kg;
1406111028Sjeff	struct kse *ke;
1407111028Sjeff
1408111028Sjeff	kg = td->td_ksegrp;
1409111028Sjeff 	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
1410111028Sjeff	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
1411111028Sjeff		KASSERT(ke->ke_state == KES_IDLE,
1412111028Sjeff			("%s: wrong idle KSE state", __func__));
1413111028Sjeff		kse_unlink(ke);
1414111028Sjeff	}
1415111028Sjeff	KASSERT((kg->kg_kses == 1),
1416111028Sjeff		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
1417111028Sjeff	KASSERT((kg->kg_numupcalls == 0),
1418111028Sjeff	        ("%s: ksegrp still has %d upcall datas",
1419111028Sjeff		__func__, kg->kg_numupcalls));
1420111028Sjeff}
1421111028Sjeff
1422111028Sjeff/*
1423124350Sschweikh * Purge a process's KSE resource. When a process is preparing to
1424124350Sschweikh * exit, it calls kse_purge to release any extra KSE resources in
1425111028Sjeff * the process.
1426111028Sjeff */
1427113864Sjhbstatic void
1428105854Sjuliankse_purge(struct proc *p, struct thread *td)
1429105854Sjulian{
1430105854Sjulian	struct ksegrp *kg;
1431111028Sjeff	struct kse *ke;
1432105854Sjulian
1433105854Sjulian 	KASSERT(p->p_numthreads == 1, ("bad thread number"));
1434105854Sjulian	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
1435105854Sjulian		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
1436105854Sjulian		p->p_numksegrps--;
1437111028Sjeff		/*
1438111028Sjeff		 * There is no ownership for KSE, after all threads
1439124350Sschweikh		 * in the group exited, it is possible that some KSEs
1440111028Sjeff		 * were left in idle queue, gc them now.
1441111028Sjeff		 */
1442111028Sjeff		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
1443111028Sjeff			KASSERT(ke->ke_state == KES_IDLE,
1444111028Sjeff			   ("%s: wrong idle KSE state", __func__));
1445111028Sjeff			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
1446111028Sjeff			kg->kg_idle_kses--;
1447111028Sjeff			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
1448111028Sjeff			kg->kg_kses--;
1449111028Sjeff			kse_stash(ke);
1450111028Sjeff		}
1451105854Sjulian		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
1452111028Sjeff		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
1453111028Sjeff		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
1454111028Sjeff		KASSERT((kg->kg_numupcalls == 0),
1455111028Sjeff		        ("%s: ksegrp still has %d upcall datas",
1456111028Sjeff			__func__, kg->kg_numupcalls));
1457124350Sschweikh
1458111028Sjeff		if (kg != td->td_ksegrp)
1459105854Sjulian			ksegrp_stash(kg);
1460105854Sjulian	}
1461105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
1462105854Sjulian	p->p_numksegrps++;
1463105854Sjulian}
1464105854Sjulian
1465111028Sjeff/*
1466111028Sjeff * This function is intended to be used to initialize a spare thread
1467111028Sjeff * for upcall. Initialize thread's large data area outside sched_lock
1468111028Sjeff * for thread_schedule_upcall().
1469111028Sjeff */
1470111028Sjeffvoid
1471111028Sjeffthread_alloc_spare(struct thread *td, struct thread *spare)
1472111028Sjeff{
1473111028Sjeff	if (td->td_standin)
1474111028Sjeff		return;
1475111028Sjeff	if (spare == NULL)
1476111028Sjeff		spare = thread_alloc();
1477111028Sjeff	td->td_standin = spare;
1478111028Sjeff	bzero(&spare->td_startzero,
1479111028Sjeff	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
1480111028Sjeff	spare->td_proc = td->td_proc;
1481111028Sjeff	spare->td_ucred = crhold(td->td_ucred);
1482111028Sjeff}
1483105854Sjulian
148499026Sjulian/*
1485103410Smini * Create a thread and schedule it for upcall on the KSE given.
1486108338Sjulian * Use our thread's standin so that we don't have to allocate one.
148799026Sjulian */
148899026Sjulianstruct thread *
1489111028Sjeffthread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
149099026Sjulian{
149199026Sjulian	struct thread *td2;
149299026Sjulian
149399026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
1494104695Sjulian
1495124350Sschweikh	/*
1496111028Sjeff	 * Schedule an upcall thread on specified kse_upcall,
1497111028Sjeff	 * the kse_upcall must be free.
1498111028Sjeff	 * td must have a spare thread.
1499104695Sjulian	 */
1500111028Sjeff	KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
1501104695Sjulian	if ((td2 = td->td_standin) != NULL) {
1502104695Sjulian		td->td_standin = NULL;
150399026Sjulian	} else {
1504111028Sjeff		panic("no reserve thread when scheduling an upcall");
1505106182Sdavidxu		return (NULL);
150699026Sjulian	}
150799026Sjulian	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
1508104695Sjulian	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
1509103002Sjulian	bcopy(&td->td_startcopy, &td2->td_startcopy,
1510103002Sjulian	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
1511111028Sjeff	thread_link(td2, ku->ku_ksegrp);
1512113244Sdavidxu	/* inherit blocked thread's context */
1513115858Smarcel	cpu_set_upcall(td2, td);
1514111028Sjeff	/* Let the new thread become owner of the upcall */
1515111028Sjeff	ku->ku_owner   = td2;
1516111028Sjeff	td2->td_upcall = ku;
1517116401Sdavidxu	td2->td_flags  = TDF_SA;
1518116372Sdavidxu	td2->td_pflags = TDP_UPCALLING;
1519111028Sjeff	td2->td_kse    = NULL;
1520111028Sjeff	td2->td_state  = TDS_CAN_RUN;
1521104695Sjulian	td2->td_inhibitors = 0;
1522116963Sdavidxu	SIGFILLSET(td2->td_sigmask);
1523116963Sdavidxu	SIG_CANTMASK(td2->td_sigmask);
1524119488Sdavidxu	sched_fork_thread(td, td2);
1525104695Sjulian	return (td2);	/* bogus.. should be a void function */
152699026Sjulian}
152799026Sjulian
1528116963Sdavidxu/*
1529116963Sdavidxu * It is only used when thread generated a trap and process is being
1530116963Sdavidxu * debugged.
1531116963Sdavidxu */
1532111033Sjeffvoid
1533111033Sjeffthread_signal_add(struct thread *td, int sig)
1534103410Smini{
1535111033Sjeff	struct proc *p;
1536116963Sdavidxu	siginfo_t siginfo;
1537116963Sdavidxu	struct sigacts *ps;
1538103410Smini	int error;
1539103410Smini
1540115884Sdavidxu	p = td->td_proc;
1541115884Sdavidxu	PROC_LOCK_ASSERT(p, MA_OWNED);
1542116963Sdavidxu	ps = p->p_sigacts;
1543116963Sdavidxu	mtx_assert(&ps->ps_mtx, MA_OWNED);
1544116963Sdavidxu
1545117607Sdavidxu	cpu_thread_siginfo(sig, 0, &siginfo);
1546116963Sdavidxu	mtx_unlock(&ps->ps_mtx);
1547103410Smini	PROC_UNLOCK(p);
1548116963Sdavidxu	error = copyout(&siginfo, &td->td_mailbox->tm_syncsig, sizeof(siginfo));
1549116963Sdavidxu	if (error) {
1550116963Sdavidxu		PROC_LOCK(p);
1551116963Sdavidxu		sigexit(td, SIGILL);
1552116963Sdavidxu	}
1553103410Smini	PROC_LOCK(p);
1554116963Sdavidxu	SIGADDSET(td->td_sigmask, sig);
1555116963Sdavidxu	mtx_lock(&ps->ps_mtx);
1556111033Sjeff}
1557111033Sjeff
1558111033Sjeffvoid
1559112397Sdavidxuthread_switchout(struct thread *td)
1560112397Sdavidxu{
1561112397Sdavidxu	struct kse_upcall *ku;
1562116607Sdavidxu	struct thread *td2;
1563112397Sdavidxu
1564112397Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1565112397Sdavidxu
1566112397Sdavidxu	/*
1567112397Sdavidxu	 * If the outgoing thread is in threaded group and has never
1568112397Sdavidxu	 * scheduled an upcall, decide whether this is a short
1569112397Sdavidxu	 * or long term event and thus whether or not to schedule
1570112397Sdavidxu	 * an upcall.
1571112397Sdavidxu	 * If it is a short term event, just suspend it in
1572112397Sdavidxu	 * a way that takes its KSE with it.
1573112397Sdavidxu	 * Select the events for which we want to schedule upcalls.
1574112397Sdavidxu	 * For now it's just sleep.
1575112397Sdavidxu	 * XXXKSE eventually almost any inhibition could do.
1576112397Sdavidxu	 */
1577112397Sdavidxu	if (TD_CAN_UNBIND(td) && (td->td_standin) && TD_ON_SLEEPQ(td)) {
1578124350Sschweikh		/*
1579112397Sdavidxu		 * Release ownership of upcall, and schedule an upcall
1580112397Sdavidxu		 * thread, this new upcall thread becomes the owner of
1581112397Sdavidxu		 * the upcall structure.
1582112397Sdavidxu		 */
1583112397Sdavidxu		ku = td->td_upcall;
1584112397Sdavidxu		ku->ku_owner = NULL;
1585124350Sschweikh		td->td_upcall = NULL;
1586112397Sdavidxu		td->td_flags &= ~TDF_CAN_UNBIND;
1587116607Sdavidxu		td2 = thread_schedule_upcall(td, ku);
1588116607Sdavidxu		setrunqueue(td2);
1589112397Sdavidxu	}
1590112397Sdavidxu}
1591112397Sdavidxu
1592103410Smini/*
1593111028Sjeff * Setup done on the thread when it enters the kernel.
1594105900Sjulian * XXXKSE Presently only for syscalls but eventually all kernel entries.
1595105900Sjulian */
1596105900Sjulianvoid
1597105900Sjulianthread_user_enter(struct proc *p, struct thread *td)
1598105900Sjulian{
1599111028Sjeff	struct ksegrp *kg;
1600111028Sjeff	struct kse_upcall *ku;
1601113793Sdavidxu	struct kse_thr_mailbox *tmbx;
1602118486Sdavidxu	uint32_t tflags;
1603105900Sjulian
1604111028Sjeff	kg = td->td_ksegrp;
1605113793Sdavidxu
1606105900Sjulian	/*
1607105900Sjulian	 * First check that we shouldn't just abort.
1608105900Sjulian	 * But check if we are the single thread first!
1609105900Sjulian	 */
1610116401Sdavidxu	if (p->p_flag & P_SINGLE_EXIT) {
1611116401Sdavidxu		PROC_LOCK(p);
1612105900Sjulian		mtx_lock_spin(&sched_lock);
1613112071Sdavidxu		thread_stopped(p);
1614105900Sjulian		thread_exit();
1615105900Sjulian		/* NOTREACHED */
1616105900Sjulian	}
1617105900Sjulian
1618105900Sjulian	/*
1619105900Sjulian	 * If we are doing a syscall in a KSE environment,
1620105900Sjulian	 * note where our mailbox is. There is always the
1621108338Sjulian	 * possibility that we could do this lazily (in kse_reassign()),
1622105900Sjulian	 * but for now do it every time.
1623105900Sjulian	 */
1624111028Sjeff	kg = td->td_ksegrp;
1625116401Sdavidxu	if (td->td_flags & TDF_SA) {
1626111028Sjeff		ku = td->td_upcall;
1627111028Sjeff		KASSERT(ku, ("%s: no upcall owned", __func__));
1628111028Sjeff		KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__));
1629113793Sdavidxu		KASSERT(!TD_CAN_UNBIND(td), ("%s: can unbind", __func__));
1630117000Smarcel		ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags);
1631113793Sdavidxu		tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
1632118486Sdavidxu		if ((tmbx == NULL) || (tmbx == (void *)-1L) ||
1633118486Sdavidxu		    (ku->ku_mflags & KMF_NOUPCALL)) {
1634111028Sjeff			td->td_mailbox = NULL;
1635105900Sjulian		} else {
1636111115Sdavidxu			if (td->td_standin == NULL)
1637111115Sdavidxu				thread_alloc_spare(td, NULL);
1638118673Sdeischen			tflags = fuword32(&tmbx->tm_flags);
1639118486Sdavidxu			/*
1640118486Sdavidxu			 * On some architectures, TP register points to thread
1641124350Sschweikh			 * mailbox but not points to kse mailbox, and userland
1642124350Sschweikh			 * can not atomically clear km_curthread, but can
1643118486Sdavidxu			 * use TP register, and set TMF_NOUPCALL in thread
1644118486Sdavidxu			 * flag	to indicate a critical region.
1645118486Sdavidxu			 */
1646118486Sdavidxu			if (tflags & TMF_NOUPCALL) {
1647118486Sdavidxu				td->td_mailbox = NULL;
1648118486Sdavidxu			} else {
1649118486Sdavidxu				td->td_mailbox = tmbx;
1650118486Sdavidxu				mtx_lock_spin(&sched_lock);
1651113793Sdavidxu				td->td_flags |= TDF_CAN_UNBIND;
1652118486Sdavidxu				mtx_unlock_spin(&sched_lock);
1653118486Sdavidxu			}
1654105900Sjulian		}
1655105900Sjulian	}
1656105900Sjulian}
1657105900Sjulian
1658105900Sjulian/*
1659103410Smini * The extra work we go through if we are a threaded process when we
1660103410Smini * return to userland.
1661103410Smini *
166299026Sjulian * If we are a KSE process and returning to user mode, check for
166399026Sjulian * extra work to do before we return (e.g. for more syscalls
166499026Sjulian * to complete first).  If we were in a critical section, we should
166599026Sjulian * just return to let it finish. Same if we were in the UTS (in
1666103410Smini * which case the mailbox's context's busy indicator will be set).
1667103410Smini * The only traps we suport will have set the mailbox.
1668103410Smini * We will clear it here.
166999026Sjulian */
167099026Sjulianint
1671103838Sjulianthread_userret(struct thread *td, struct trapframe *frame)
167299026Sjulian{
1673113793Sdavidxu	int error = 0, upcalls, uts_crit;
1674111028Sjeff	struct kse_upcall *ku;
1675111115Sdavidxu	struct ksegrp *kg, *kg2;
1676104695Sjulian	struct proc *p;
1677107060Sdavidxu	struct timespec ts;
167899026Sjulian
1679111028Sjeff	p = td->td_proc;
1680110190Sjulian	kg = td->td_ksegrp;
1681116401Sdavidxu	ku = td->td_upcall;
1682104695Sjulian
1683116401Sdavidxu	/* Nothing to do with bound thread */
1684116401Sdavidxu	if (!(td->td_flags & TDF_SA))
1685111028Sjeff		return (0);
1686108338Sjulian
1687103410Smini	/*
1688124350Sschweikh	 * Stat clock interrupt hit in userland, it
1689111028Sjeff	 * is returning from interrupt, charge thread's
1690111028Sjeff	 * userland time for UTS.
1691103410Smini	 */
1692111028Sjeff	if (td->td_flags & TDF_USTATCLOCK) {
1693111515Sdavidxu		thread_update_usr_ticks(td, 1);
1694111028Sjeff		mtx_lock_spin(&sched_lock);
1695111028Sjeff		td->td_flags &= ~TDF_USTATCLOCK;
1696111028Sjeff		mtx_unlock_spin(&sched_lock);
1697116401Sdavidxu		if (kg->kg_completed ||
1698111515Sdavidxu		    (td->td_upcall->ku_flags & KUF_DOUPCALL))
1699111515Sdavidxu			thread_user_enter(p, td);
1700111028Sjeff	}
1701108338Sjulian
1702113793Sdavidxu	uts_crit = (td->td_mailbox == NULL);
1703124350Sschweikh	/*
1704111028Sjeff	 * Optimisation:
1705111028Sjeff	 * This thread has not started any upcall.
1706111028Sjeff	 * If there is no work to report other than ourself,
1707111028Sjeff	 * then it can return direct to userland.
1708111028Sjeff	 */
1709108338Sjulian	if (TD_CAN_UNBIND(td)) {
1710111028Sjeff		mtx_lock_spin(&sched_lock);
1711111028Sjeff		td->td_flags &= ~TDF_CAN_UNBIND;
1712112888Sjeff		if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
1713112077Sdavidxu		    (kg->kg_completed == NULL) &&
1714112397Sdavidxu		    (ku->ku_flags & KUF_DOUPCALL) == 0 &&
1715113708Sdavidxu		    (kg->kg_upquantum && ticks < kg->kg_nextupcall)) {
1716112888Sjeff			mtx_unlock_spin(&sched_lock);
1717111515Sdavidxu			thread_update_usr_ticks(td, 0);
1718112222Sdavidxu			nanotime(&ts);
1719112397Sdavidxu			error = copyout(&ts,
1720112222Sdavidxu				(caddr_t)&ku->ku_mailbox->km_timeofday,
1721112222Sdavidxu				sizeof(ts));
1722112077Sdavidxu			td->td_mailbox = 0;
1723113793Sdavidxu			ku->ku_mflags = 0;
1724112222Sdavidxu			if (error)
1725112222Sdavidxu				goto out;
1726112077Sdavidxu			return (0);
1727108338Sjulian		}
1728112888Sjeff		mtx_unlock_spin(&sched_lock);
1729117704Sdavidxu		thread_export_context(td, 0);
1730104695Sjulian		/*
1731111028Sjeff		 * There is something to report, and we own an upcall
1732111028Sjeff		 * strucuture, we can go to userland.
1733111028Sjeff		 * Turn ourself into an upcall thread.
1734104695Sjulian		 */
1735116372Sdavidxu		td->td_pflags |= TDP_UPCALLING;
1736113793Sdavidxu	} else if (td->td_mailbox && (ku == NULL)) {
1737117704Sdavidxu		thread_export_context(td, 1);
1738112071Sdavidxu		PROC_LOCK(p);
1739112071Sdavidxu		/*
1740112071Sdavidxu		 * There are upcall threads waiting for
1741112071Sdavidxu		 * work to do, wake one of them up.
1742124350Sschweikh		 * XXXKSE Maybe wake all of them up.
1743112071Sdavidxu		 */
1744117704Sdavidxu		if (kg->kg_upsleeps)
1745112071Sdavidxu			wakeup_one(&kg->kg_completed);
1746112071Sdavidxu		mtx_lock_spin(&sched_lock);
1747112071Sdavidxu		thread_stopped(p);
1748108338Sjulian		thread_exit();
1749111028Sjeff		/* NOTREACHED */
1750104695Sjulian	}
1751104695Sjulian
1752116401Sdavidxu	KASSERT(ku != NULL, ("upcall is NULL\n"));
1753111154Sdavidxu	KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
1754111154Sdavidxu
1755111154Sdavidxu	if (p->p_numthreads > max_threads_per_proc) {
1756111154Sdavidxu		max_threads_hits++;
1757111154Sdavidxu		PROC_LOCK(p);
1758113920Sjhb		mtx_lock_spin(&sched_lock);
1759116184Sdavidxu		p->p_maxthrwaits++;
1760111154Sdavidxu		while (p->p_numthreads > max_threads_per_proc) {
1761111154Sdavidxu			upcalls = 0;
1762111154Sdavidxu			FOREACH_KSEGRP_IN_PROC(p, kg2) {
1763111154Sdavidxu				if (kg2->kg_numupcalls == 0)
1764111154Sdavidxu					upcalls++;
1765111154Sdavidxu				else
1766111154Sdavidxu					upcalls += kg2->kg_numupcalls;
1767111154Sdavidxu			}
1768111154Sdavidxu			if (upcalls >= max_threads_per_proc)
1769111154Sdavidxu				break;
1770114106Sdavidxu			mtx_unlock_spin(&sched_lock);
1771116138Sdavidxu			if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
1772123737Speter			    "maxthreads", 0)) {
1773116184Sdavidxu				mtx_lock_spin(&sched_lock);
1774116184Sdavidxu				break;
1775116184Sdavidxu			} else {
1776116184Sdavidxu				mtx_lock_spin(&sched_lock);
1777116184Sdavidxu			}
1778111154Sdavidxu		}
1779116184Sdavidxu		p->p_maxthrwaits--;
1780113920Sjhb		mtx_unlock_spin(&sched_lock);
1781111154Sdavidxu		PROC_UNLOCK(p);
1782111154Sdavidxu	}
1783111154Sdavidxu
1784116372Sdavidxu	if (td->td_pflags & TDP_UPCALLING) {
1785113793Sdavidxu		uts_crit = 0;
1786112397Sdavidxu		kg->kg_nextupcall = ticks+kg->kg_upquantum;
1787124350Sschweikh		/*
1788108338Sjulian		 * There is no more work to do and we are going to ride
1789111028Sjeff		 * this thread up to userland as an upcall.
1790108338Sjulian		 * Do the last parts of the setup needed for the upcall.
1791108338Sjulian		 */
1792108338Sjulian		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1793108338Sjulian		    td, td->td_proc->p_pid, td->td_proc->p_comm);
1794104695Sjulian
1795116372Sdavidxu		td->td_pflags &= ~TDP_UPCALLING;
1796116401Sdavidxu		if (ku->ku_flags & KUF_DOUPCALL) {
1797116401Sdavidxu			mtx_lock_spin(&sched_lock);
1798111028Sjeff			ku->ku_flags &= ~KUF_DOUPCALL;
1799116401Sdavidxu			mtx_unlock_spin(&sched_lock);
1800116401Sdavidxu		}
1801111028Sjeff		/*
1802113793Sdavidxu		 * Set user context to the UTS
1803113793Sdavidxu		 */
1804113793Sdavidxu		if (!(ku->ku_mflags & KMF_NOUPCALL)) {
1805113793Sdavidxu			cpu_set_upcall_kse(td, ku);
1806113793Sdavidxu			error = suword(&ku->ku_mailbox->km_curthread, 0);
1807113793Sdavidxu			if (error)
1808113793Sdavidxu				goto out;
1809113793Sdavidxu		}
1810113793Sdavidxu
1811113793Sdavidxu		/*
1812108338Sjulian		 * Unhook the list of completed threads.
1813124350Sschweikh		 * anything that completes after this gets to
1814108338Sjulian		 * come in next time.
1815108338Sjulian		 * Put the list of completed thread mailboxes on
1816108338Sjulian		 * this KSE's mailbox.
1817108338Sjulian		 */
1818113793Sdavidxu		if (!(ku->ku_mflags & KMF_NOCOMPLETED) &&
1819113793Sdavidxu		    (error = thread_link_mboxes(kg, ku)) != 0)
1820111115Sdavidxu			goto out;
1821113793Sdavidxu	}
1822113793Sdavidxu	if (!uts_crit) {
1823107060Sdavidxu		nanotime(&ts);
1824113793Sdavidxu		error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts));
1825111115Sdavidxu	}
1826111115Sdavidxu
1827111115Sdavidxuout:
1828111115Sdavidxu	if (error) {
1829111115Sdavidxu		/*
1830111129Sdavidxu		 * Things are going to be so screwed we should just kill
1831111129Sdavidxu		 * the process.
1832111115Sdavidxu		 * how do we do that?
1833111115Sdavidxu		 */
1834111115Sdavidxu		PROC_LOCK(td->td_proc);
1835111115Sdavidxu		psignal(td->td_proc, SIGSEGV);
1836111115Sdavidxu		PROC_UNLOCK(td->td_proc);
1837111115Sdavidxu	} else {
1838111115Sdavidxu		/*
1839111115Sdavidxu		 * Optimisation:
1840111115Sdavidxu		 * Ensure that we have a spare thread available,
1841111115Sdavidxu		 * for when we re-enter the kernel.
1842111115Sdavidxu		 */
1843111115Sdavidxu		if (td->td_standin == NULL)
1844111115Sdavidxu			thread_alloc_spare(td, NULL);
1845111115Sdavidxu	}
1846111115Sdavidxu
1847113793Sdavidxu	ku->ku_mflags = 0;
1848111028Sjeff	/*
1849111028Sjeff	 * Clear thread mailbox first, then clear system tick count.
1850124350Sschweikh	 * The order is important because thread_statclock() use
1851111028Sjeff	 * mailbox pointer to see if it is an userland thread or
1852111028Sjeff	 * an UTS kernel thread.
1853111028Sjeff	 */
1854108338Sjulian	td->td_mailbox = NULL;
1855111028Sjeff	td->td_usticks = 0;
1856104695Sjulian	return (error);	/* go sync */
185799026Sjulian}
185899026Sjulian
185999026Sjulian/*
186099026Sjulian * Enforce single-threading.
186199026Sjulian *
186299026Sjulian * Returns 1 if the caller must abort (another thread is waiting to
186399026Sjulian * exit the process or similar). Process is locked!
186499026Sjulian * Returns 0 when you are successfully the only thread running.
186599026Sjulian * A process has successfully single threaded in the suspend mode when
186699026Sjulian * There are no threads in user mode. Threads in the kernel must be
186799026Sjulian * allowed to continue until they get to the user boundary. They may even
186899026Sjulian * copy out their return values and data before suspending. They may however be
186999026Sjulian * accellerated in reaching the user boundary as we will wake up
187099026Sjulian * any sleeping threads that are interruptable. (PCATCH).
187199026Sjulian */
187299026Sjulianint
187399026Sjulianthread_single(int force_exit)
187499026Sjulian{
187599026Sjulian	struct thread *td;
187699026Sjulian	struct thread *td2;
187799026Sjulian	struct proc *p;
187899026Sjulian
187999026Sjulian	td = curthread;
188099026Sjulian	p = td->td_proc;
1881107719Sjulian	mtx_assert(&Giant, MA_OWNED);
188299026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
188399026Sjulian	KASSERT((td != NULL), ("curthread is NULL"));
188499026Sjulian
1885116361Sdavidxu	if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1)
188699026Sjulian		return (0);
188799026Sjulian
1888100648Sjulian	/* Is someone already single threading? */
1889124350Sschweikh	if (p->p_singlethread)
189099026Sjulian		return (1);
189199026Sjulian
1892108338Sjulian	if (force_exit == SINGLE_EXIT) {
189399026Sjulian		p->p_flag |= P_SINGLE_EXIT;
1894108338Sjulian	} else
189599026Sjulian		p->p_flag &= ~P_SINGLE_EXIT;
1896102950Sdavidxu	p->p_flag |= P_STOPPED_SINGLE;
1897113920Sjhb	mtx_lock_spin(&sched_lock);
189899026Sjulian	p->p_singlethread = td;
189999026Sjulian	while ((p->p_numthreads - p->p_suspcount) != 1) {
190099026Sjulian		FOREACH_THREAD_IN_PROC(p, td2) {
190199026Sjulian			if (td2 == td)
190299026Sjulian				continue;
1903113705Sdavidxu			td2->td_flags |= TDF_ASTPENDING;
1904103216Sjulian			if (TD_IS_INHIBITED(td2)) {
1905105911Sjulian				if (force_exit == SINGLE_EXIT) {
1906105911Sjulian					if (TD_IS_SUSPENDED(td2)) {
1907103216Sjulian						thread_unsuspend_one(td2);
1908105911Sjulian					}
1909105911Sjulian					if (TD_ON_SLEEPQ(td2) &&
1910105911Sjulian					    (td2->td_flags & TDF_SINTR)) {
1911105911Sjulian						if (td2->td_flags & TDF_CVWAITQ)
1912105911Sjulian							cv_abort(td2);
1913105911Sjulian						else
1914105911Sjulian							abortsleep(td2);
1915105911Sjulian					}
1916105911Sjulian				} else {
1917105911Sjulian					if (TD_IS_SUSPENDED(td2))
1918105874Sdavidxu						continue;
1919111028Sjeff					/*
1920111028Sjeff					 * maybe other inhibitted states too?
1921111028Sjeff					 * XXXKSE Is it totally safe to
1922111028Sjeff					 * suspend a non-interruptable thread?
1923111028Sjeff					 */
1924108338Sjulian					if (td2->td_inhibitors &
1925111028Sjeff					    (TDI_SLEEPING | TDI_SWAPPED))
1926105911Sjulian						thread_suspend_one(td2);
192799026Sjulian				}
192899026Sjulian			}
192999026Sjulian		}
1930124350Sschweikh		/*
1931124350Sschweikh		 * Maybe we suspended some threads.. was it enough?
1932105911Sjulian		 */
1933113920Sjhb		if ((p->p_numthreads - p->p_suspcount) == 1)
1934105911Sjulian			break;
1935105911Sjulian
193699026Sjulian		/*
193799026Sjulian		 * Wake us up when everyone else has suspended.
1938100648Sjulian		 * In the mean time we suspend as well.
193999026Sjulian		 */
1940103216Sjulian		thread_suspend_one(td);
1941113795Sdavidxu		DROP_GIANT();
194299026Sjulian		PROC_UNLOCK(p);
1943107719Sjulian		p->p_stats->p_ru.ru_nvcsw++;
194499026Sjulian		mi_switch();
194599026Sjulian		mtx_unlock_spin(&sched_lock);
1946113795Sdavidxu		PICKUP_GIANT();
194799026Sjulian		PROC_LOCK(p);
1948113920Sjhb		mtx_lock_spin(&sched_lock);
194999026Sjulian	}
1950124350Sschweikh	if (force_exit == SINGLE_EXIT) {
1951113920Sjhb		if (td->td_upcall)
1952111028Sjeff			upcall_remove(td);
1953105854Sjulian		kse_purge(p, td);
1954111028Sjeff	}
1955113920Sjhb	mtx_unlock_spin(&sched_lock);
195699026Sjulian	return (0);
195799026Sjulian}
195899026Sjulian
195999026Sjulian/*
196099026Sjulian * Called in from locations that can safely check to see
196199026Sjulian * whether we have to suspend or at least throttle for a
196299026Sjulian * single-thread event (e.g. fork).
196399026Sjulian *
196499026Sjulian * Such locations include userret().
196599026Sjulian * If the "return_instead" argument is non zero, the thread must be able to
196699026Sjulian * accept 0 (caller may continue), or 1 (caller must abort) as a result.
196799026Sjulian *
196899026Sjulian * The 'return_instead' argument tells the function if it may do a
196999026Sjulian * thread_exit() or suspend, or whether the caller must abort and back
197099026Sjulian * out instead.
197199026Sjulian *
197299026Sjulian * If the thread that set the single_threading request has set the
197399026Sjulian * P_SINGLE_EXIT bit in the process flags then this call will never return
197499026Sjulian * if 'return_instead' is false, but will exit.
197599026Sjulian *
197699026Sjulian * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
197799026Sjulian *---------------+--------------------+---------------------
197899026Sjulian *       0       | returns 0          |   returns 0 or 1
197999026Sjulian *               | when ST ends       |   immediatly
198099026Sjulian *---------------+--------------------+---------------------
198199026Sjulian *       1       | thread exits       |   returns 1
198299026Sjulian *               |                    |  immediatly
198399026Sjulian * 0 = thread_exit() or suspension ok,
198499026Sjulian * other = return error instead of stopping the thread.
198599026Sjulian *
198699026Sjulian * While a full suspension is under effect, even a single threading
198799026Sjulian * thread would be suspended if it made this call (but it shouldn't).
198899026Sjulian * This call should only be made from places where
1989124350Sschweikh * thread_exit() would be safe as that may be the outcome unless
199099026Sjulian * return_instead is set.
199199026Sjulian */
199299026Sjulianint
199399026Sjulianthread_suspend_check(int return_instead)
199499026Sjulian{
1995104502Sjmallett	struct thread *td;
1996104502Sjmallett	struct proc *p;
199799026Sjulian
199899026Sjulian	td = curthread;
199999026Sjulian	p = td->td_proc;
200099026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
200199026Sjulian	while (P_SHOULDSTOP(p)) {
2002102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
200399026Sjulian			KASSERT(p->p_singlethread != NULL,
200499026Sjulian			    ("singlethread not set"));
200599026Sjulian			/*
2006100648Sjulian			 * The only suspension in action is a
2007100648Sjulian			 * single-threading. Single threader need not stop.
2008124350Sschweikh			 * XXX Should be safe to access unlocked
2009100646Sjulian			 * as it can only be set to be true by us.
201099026Sjulian			 */
2011100648Sjulian			if (p->p_singlethread == td)
201299026Sjulian				return (0);	/* Exempt from stopping. */
2013124350Sschweikh		}
2014100648Sjulian		if (return_instead)
201599026Sjulian			return (1);
201699026Sjulian
2017112071Sdavidxu		mtx_lock_spin(&sched_lock);
2018112071Sdavidxu		thread_stopped(p);
201999026Sjulian		/*
202099026Sjulian		 * If the process is waiting for us to exit,
202199026Sjulian		 * this thread should just suicide.
2022102950Sdavidxu		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
202399026Sjulian		 */
202499026Sjulian		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
202599026Sjulian			while (mtx_owned(&Giant))
202699026Sjulian				mtx_unlock(&Giant);
2027116361Sdavidxu			if (p->p_flag & P_SA)
2028112910Sjeff				thread_exit();
2029112910Sjeff			else
2030112910Sjeff				thr_exit1();
203199026Sjulian		}
203299026Sjulian
203399026Sjulian		/*
203499026Sjulian		 * When a thread suspends, it just
203599026Sjulian		 * moves to the processes's suspend queue
203699026Sjulian		 * and stays there.
203799026Sjulian		 */
2038103216Sjulian		thread_suspend_one(td);
2039102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
2040100632Sjulian			if (p->p_numthreads == p->p_suspcount) {
2041103216Sjulian				thread_unsuspend_one(p->p_singlethread);
2042100632Sjulian			}
2043100632Sjulian		}
2044114398Sdavidxu		DROP_GIANT();
2045113864Sjhb		PROC_UNLOCK(p);
2046100594Sjulian		p->p_stats->p_ru.ru_nivcsw++;
204799026Sjulian		mi_switch();
204899026Sjulian		mtx_unlock_spin(&sched_lock);
2049114398Sdavidxu		PICKUP_GIANT();
205099026Sjulian		PROC_LOCK(p);
205199026Sjulian	}
205299026Sjulian	return (0);
205399026Sjulian}
205499026Sjulian
2055102898Sdavidxuvoid
2056102898Sdavidxuthread_suspend_one(struct thread *td)
2057102898Sdavidxu{
2058102898Sdavidxu	struct proc *p = td->td_proc;
2059102898Sdavidxu
2060102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
2061113920Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
2062112071Sdavidxu	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
2063102898Sdavidxu	p->p_suspcount++;
2064103216Sjulian	TD_SET_SUSPENDED(td);
2065102898Sdavidxu	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
2066103216Sjulian	/*
2067103216Sjulian	 * Hack: If we are suspending but are on the sleep queue
2068103216Sjulian	 * then we are in msleep or the cv equivalent. We
2069103216Sjulian	 * want to look like we have two Inhibitors.
2070105911Sjulian	 * May already be set.. doesn't matter.
2071103216Sjulian	 */
2072103216Sjulian	if (TD_ON_SLEEPQ(td))
2073103216Sjulian		TD_SET_SLEEPING(td);
2074102898Sdavidxu}
2075102898Sdavidxu
2076102898Sdavidxuvoid
2077102898Sdavidxuthread_unsuspend_one(struct thread *td)
2078102898Sdavidxu{
2079102898Sdavidxu	struct proc *p = td->td_proc;
2080102898Sdavidxu
2081102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
2082113920Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
2083102898Sdavidxu	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
2084103216Sjulian	TD_CLR_SUSPENDED(td);
2085102898Sdavidxu	p->p_suspcount--;
2086103216Sjulian	setrunnable(td);
2087102898Sdavidxu}
2088102898Sdavidxu
208999026Sjulian/*
209099026Sjulian * Allow all threads blocked by single threading to continue running.
209199026Sjulian */
209299026Sjulianvoid
209399026Sjulianthread_unsuspend(struct proc *p)
209499026Sjulian{
209599026Sjulian	struct thread *td;
209699026Sjulian
2097100646Sjulian	mtx_assert(&sched_lock, MA_OWNED);
209899026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
209999026Sjulian	if (!P_SHOULDSTOP(p)) {
210099026Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
2101102898Sdavidxu			thread_unsuspend_one(td);
210299026Sjulian		}
2103102950Sdavidxu	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
210499026Sjulian	    (p->p_numthreads == p->p_suspcount)) {
210599026Sjulian		/*
210699026Sjulian		 * Stopping everything also did the job for the single
210799026Sjulian		 * threading request. Now we've downgraded to single-threaded,
210899026Sjulian		 * let it continue.
210999026Sjulian		 */
2110102898Sdavidxu		thread_unsuspend_one(p->p_singlethread);
211199026Sjulian	}
211299026Sjulian}
211399026Sjulian
211499026Sjulianvoid
211599026Sjulianthread_single_end(void)
211699026Sjulian{
211799026Sjulian	struct thread *td;
211899026Sjulian	struct proc *p;
211999026Sjulian
212099026Sjulian	td = curthread;
212199026Sjulian	p = td->td_proc;
212299026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
2123102950Sdavidxu	p->p_flag &= ~P_STOPPED_SINGLE;
2124113920Sjhb	mtx_lock_spin(&sched_lock);
212599026Sjulian	p->p_singlethread = NULL;
2126102292Sjulian	/*
2127102292Sjulian	 * If there are other threads they mey now run,
2128102292Sjulian	 * unless of course there is a blanket 'stop order'
2129102292Sjulian	 * on the process. The single threader must be allowed
2130102292Sjulian	 * to continue however as this is a bad place to stop.
2131102292Sjulian	 */
2132102292Sjulian	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
2133102292Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
2134103216Sjulian			thread_unsuspend_one(td);
2135102292Sjulian		}
2136102292Sjulian	}
2137113920Sjhb	mtx_unlock_spin(&sched_lock);
213899026Sjulian}
2139