kern_thread.c revision 126326
1124350Sschweikh/*
299026Sjulian * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
399026Sjulian *  All rights reserved.
499026Sjulian *
599026Sjulian * Redistribution and use in source and binary forms, with or without
699026Sjulian * modification, are permitted provided that the following conditions
799026Sjulian * are met:
899026Sjulian * 1. Redistributions of source code must retain the above copyright
999026Sjulian *    notice(s), this list of conditions and the following disclaimer as
10124350Sschweikh *    the first lines of this file unmodified other than the possible
1199026Sjulian *    addition of one or more copyright notices.
1299026Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1399026Sjulian *    notice(s), this list of conditions and the following disclaimer in the
1499026Sjulian *    documentation and/or other materials provided with the distribution.
1599026Sjulian *
1699026Sjulian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1799026Sjulian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1899026Sjulian * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1999026Sjulian * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2099026Sjulian * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2199026Sjulian * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2299026Sjulian * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2399026Sjulian * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2499026Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2599026Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2699026Sjulian * DAMAGE.
2799026Sjulian */
2899026Sjulian
29116182Sobrien#include <sys/cdefs.h>
30116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 126326 2004-02-27 18:52:44Z jhb $");
31116182Sobrien
3299026Sjulian#include <sys/param.h>
3399026Sjulian#include <sys/systm.h>
3499026Sjulian#include <sys/kernel.h>
3599026Sjulian#include <sys/lock.h>
3699026Sjulian#include <sys/malloc.h>
3799026Sjulian#include <sys/mutex.h>
3899026Sjulian#include <sys/proc.h>
39107029Sjulian#include <sys/smp.h>
4099026Sjulian#include <sys/sysctl.h>
41105854Sjulian#include <sys/sysproto.h>
4299026Sjulian#include <sys/filedesc.h>
43107126Sjeff#include <sys/sched.h>
4499026Sjulian#include <sys/signalvar.h>
45126326Sjhb#include <sys/sleepqueue.h>
4699026Sjulian#include <sys/sx.h>
47107126Sjeff#include <sys/tty.h>
48122514Sjhb#include <sys/turnstile.h>
4999026Sjulian#include <sys/user.h>
5099026Sjulian#include <sys/kse.h>
5199026Sjulian#include <sys/ktr.h>
52103410Smini#include <sys/ucontext.h>
5399026Sjulian
5499026Sjulian#include <vm/vm.h>
55116355Salc#include <vm/vm_extern.h>
5699026Sjulian#include <vm/vm_object.h>
5799026Sjulian#include <vm/pmap.h>
5899026Sjulian#include <vm/uma.h>
5999026Sjulian#include <vm/vm_map.h>
6099026Sjulian
61100273Speter#include <machine/frame.h>
62100273Speter
6399026Sjulian/*
64103367Sjulian * KSEGRP related storage.
6599026Sjulian */
66103367Sjulianstatic uma_zone_t ksegrp_zone;
67103367Sjulianstatic uma_zone_t kse_zone;
6899026Sjulianstatic uma_zone_t thread_zone;
69111028Sjeffstatic uma_zone_t upcall_zone;
7099026Sjulian
71103367Sjulian/* DEBUG ONLY */
7299026SjulianSYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
73107719Sjulianstatic int thread_debug = 0;
74107719SjulianSYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
75107719Sjulian	&thread_debug, 0, "thread debug");
7699026Sjulian
77114268Sdavidxustatic int max_threads_per_proc = 150;
78107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
79103367Sjulian	&max_threads_per_proc, 0, "Limit on threads per proc");
80103367Sjulian
81114268Sdavidxustatic int max_groups_per_proc = 50;
82107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
83107006Sdavidxu	&max_groups_per_proc, 0, "Limit on thread groups per proc");
84107006Sdavidxu
85111115Sdavidxustatic int max_threads_hits;
86111115SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
87111115Sdavidxu	&max_threads_hits, 0, "");
88111115Sdavidxu
89111028Sjeffstatic int virtual_cpu;
90111028Sjeff
9199026Sjulian#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
9299026Sjulian
93111028SjeffTAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
94105854SjulianTAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
95105854SjulianTAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
96124350SschweikhTAILQ_HEAD(, kse_upcall) zombie_upcalls =
97111028Sjeff	TAILQ_HEAD_INITIALIZER(zombie_upcalls);
98111028Sjeffstruct mtx kse_zombie_lock;
99111028SjeffMTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
10099026Sjulian
101107719Sjulianstatic void kse_purge(struct proc *p, struct thread *td);
102111028Sjeffstatic void kse_purge_group(struct thread *td);
103111515Sdavidxustatic int thread_update_usr_ticks(struct thread *td, int user);
104111028Sjeffstatic void thread_alloc_spare(struct thread *td, struct thread *spare);
105105854Sjulian
106111028Sjeffstatic int
107111028Sjeffsysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
108111028Sjeff{
109111028Sjeff	int error, new_val;
110111028Sjeff	int def_val;
111111028Sjeff
112111028Sjeff#ifdef SMP
113111028Sjeff	def_val = mp_ncpus;
114111028Sjeff#else
115111028Sjeff	def_val = 1;
116111028Sjeff#endif
117111028Sjeff	if (virtual_cpu == 0)
118111028Sjeff		new_val = def_val;
119111028Sjeff	else
120111028Sjeff		new_val = virtual_cpu;
121111028Sjeff	error = sysctl_handle_int(oidp, &new_val, 0, req);
122111028Sjeff        if (error != 0 || req->newptr == NULL)
123111028Sjeff		return (error);
124111028Sjeff	if (new_val < 0)
125111028Sjeff		return (EINVAL);
126111028Sjeff	virtual_cpu = new_val;
127111028Sjeff	return (0);
128111028Sjeff}
129111028Sjeff
130111028Sjeff/* DEBUG ONLY */
131111028SjeffSYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
132111028Sjeff	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
133111028Sjeff	"debug virtual cpus");
134111028Sjeff
13599026Sjulian/*
136107719Sjulian * Prepare a thread for use.
13799026Sjulian */
13899026Sjulianstatic void
13999026Sjulianthread_ctor(void *mem, int size, void *arg)
14099026Sjulian{
14199026Sjulian	struct thread	*td;
14299026Sjulian
14399026Sjulian	td = (struct thread *)mem;
144103216Sjulian	td->td_state = TDS_INACTIVE;
145113339Sjulian	td->td_oncpu	= NOCPU;
146118442Sjhb	td->td_critnest = 1;
14799026Sjulian}
14899026Sjulian
14999026Sjulian/*
15099026Sjulian * Reclaim a thread after use.
15199026Sjulian */
15299026Sjulianstatic void
15399026Sjulianthread_dtor(void *mem, int size, void *arg)
15499026Sjulian{
15599026Sjulian	struct thread	*td;
15699026Sjulian
15799026Sjulian	td = (struct thread *)mem;
15899026Sjulian
15999026Sjulian#ifdef INVARIANTS
16099026Sjulian	/* Verify that this thread is in a safe state to free. */
16199026Sjulian	switch (td->td_state) {
162103216Sjulian	case TDS_INHIBITED:
163103216Sjulian	case TDS_RUNNING:
164103216Sjulian	case TDS_CAN_RUN:
16599026Sjulian	case TDS_RUNQ:
16699026Sjulian		/*
16799026Sjulian		 * We must never unlink a thread that is in one of
16899026Sjulian		 * these states, because it is currently active.
16999026Sjulian		 */
17099026Sjulian		panic("bad state for thread unlinking");
17199026Sjulian		/* NOTREACHED */
172103216Sjulian	case TDS_INACTIVE:
17399026Sjulian		break;
17499026Sjulian	default:
17599026Sjulian		panic("bad thread state");
17699026Sjulian		/* NOTREACHED */
17799026Sjulian	}
17899026Sjulian#endif
17999026Sjulian}
18099026Sjulian
18199026Sjulian/*
18299026Sjulian * Initialize type-stable parts of a thread (when newly created).
18399026Sjulian */
18499026Sjulianstatic void
18599026Sjulianthread_init(void *mem, int size)
18699026Sjulian{
18799026Sjulian	struct thread	*td;
18899026Sjulian
18999026Sjulian	td = (struct thread *)mem;
190116355Salc	vm_thread_new(td, 0);
19199026Sjulian	cpu_thread_setup(td);
192126326Sjhb	td->td_sleepqueue = sleepq_alloc();
193122514Sjhb	td->td_turnstile = turnstile_alloc();
194107126Sjeff	td->td_sched = (struct td_sched *)&td[1];
19599026Sjulian}
19699026Sjulian
19799026Sjulian/*
19899026Sjulian * Tear down type-stable parts of a thread (just before being discarded).
19999026Sjulian */
20099026Sjulianstatic void
20199026Sjulianthread_fini(void *mem, int size)
20299026Sjulian{
20399026Sjulian	struct thread	*td;
20499026Sjulian
20599026Sjulian	td = (struct thread *)mem;
206122514Sjhb	turnstile_free(td->td_turnstile);
207126326Sjhb	sleepq_free(td->td_sleepqueue);
208116355Salc	vm_thread_dispose(td);
20999026Sjulian}
210111028Sjeff
211107126Sjeff/*
212107126Sjeff * Initialize type-stable parts of a kse (when newly created).
213107126Sjeff */
214107126Sjeffstatic void
215107126Sjeffkse_init(void *mem, int size)
216107126Sjeff{
217107126Sjeff	struct kse	*ke;
21899026Sjulian
219107126Sjeff	ke = (struct kse *)mem;
220107126Sjeff	ke->ke_sched = (struct ke_sched *)&ke[1];
221107126Sjeff}
222111028Sjeff
223107126Sjeff/*
224107126Sjeff * Initialize type-stable parts of a ksegrp (when newly created).
225107126Sjeff */
226107126Sjeffstatic void
227107126Sjeffksegrp_init(void *mem, int size)
228107126Sjeff{
229107126Sjeff	struct ksegrp	*kg;
230107126Sjeff
231107126Sjeff	kg = (struct ksegrp *)mem;
232107126Sjeff	kg->kg_sched = (struct kg_sched *)&kg[1];
233107126Sjeff}
234107126Sjeff
235124350Sschweikh/*
236111028Sjeff * KSE is linked into kse group.
237105854Sjulian */
238105854Sjulianvoid
239105854Sjuliankse_link(struct kse *ke, struct ksegrp *kg)
240105854Sjulian{
241105854Sjulian	struct proc *p = kg->kg_proc;
242105854Sjulian
243105854Sjulian	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
244105854Sjulian	kg->kg_kses++;
245111028Sjeff	ke->ke_state	= KES_UNQUEUED;
246105854Sjulian	ke->ke_proc	= p;
247105854Sjulian	ke->ke_ksegrp	= kg;
248105854Sjulian	ke->ke_thread	= NULL;
249111028Sjeff	ke->ke_oncpu	= NOCPU;
250111028Sjeff	ke->ke_flags	= 0;
251105854Sjulian}
252105854Sjulian
253105854Sjulianvoid
254105854Sjuliankse_unlink(struct kse *ke)
255105854Sjulian{
256105854Sjulian	struct ksegrp *kg;
257105854Sjulian
258105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
259105854Sjulian	kg = ke->ke_ksegrp;
260105854Sjulian	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
261111028Sjeff	if (ke->ke_state == KES_IDLE) {
262111028Sjeff		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
263111028Sjeff		kg->kg_idle_kses--;
264105854Sjulian	}
265119488Sdavidxu	--kg->kg_kses;
266105854Sjulian	/*
267105854Sjulian	 * Aggregate stats from the KSE
268105854Sjulian	 */
269105854Sjulian	kse_stash(ke);
270105854Sjulian}
271105854Sjulian
272105854Sjulianvoid
273105854Sjulianksegrp_link(struct ksegrp *kg, struct proc *p)
274105854Sjulian{
275105854Sjulian
276105854Sjulian	TAILQ_INIT(&kg->kg_threads);
277105854Sjulian	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
278105854Sjulian	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
279105854Sjulian	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
280111028Sjeff	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
281111028Sjeff	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
282111028Sjeff	kg->kg_proc = p;
283111028Sjeff	/*
284111028Sjeff	 * the following counters are in the -zero- section
285111028Sjeff	 * and may not need clearing
286111028Sjeff	 */
287105854Sjulian	kg->kg_numthreads = 0;
288111028Sjeff	kg->kg_runnable   = 0;
289111028Sjeff	kg->kg_kses       = 0;
290111028Sjeff	kg->kg_runq_kses  = 0; /* XXXKSE change name */
291111028Sjeff	kg->kg_idle_kses  = 0;
292111028Sjeff	kg->kg_numupcalls = 0;
293111028Sjeff	/* link it in now that it's consistent */
294105854Sjulian	p->p_numksegrps++;
295105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
296105854Sjulian}
297105854Sjulian
298105854Sjulianvoid
299105854Sjulianksegrp_unlink(struct ksegrp *kg)
300105854Sjulian{
301105854Sjulian	struct proc *p;
302105854Sjulian
303105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
304111028Sjeff	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
305111028Sjeff	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
306111028Sjeff	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
307111028Sjeff
308105854Sjulian	p = kg->kg_proc;
309105854Sjulian	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
310105854Sjulian	p->p_numksegrps--;
311105854Sjulian	/*
312105854Sjulian	 * Aggregate stats from the KSE
313105854Sjulian	 */
314105854Sjulian	ksegrp_stash(kg);
315105854Sjulian}
316105854Sjulian
317111028Sjeffstruct kse_upcall *
318111028Sjeffupcall_alloc(void)
319111028Sjeff{
320111028Sjeff	struct kse_upcall *ku;
321111028Sjeff
322111125Sdavidxu	ku = uma_zalloc(upcall_zone, M_WAITOK);
323111028Sjeff	bzero(ku, sizeof(*ku));
324111028Sjeff	return (ku);
325111028Sjeff}
326111028Sjeff
327111028Sjeffvoid
328111028Sjeffupcall_free(struct kse_upcall *ku)
329111028Sjeff{
330111028Sjeff
331111028Sjeff	uma_zfree(upcall_zone, ku);
332111028Sjeff}
333111028Sjeff
334111028Sjeffvoid
335111028Sjeffupcall_link(struct kse_upcall *ku, struct ksegrp *kg)
336111028Sjeff{
337111028Sjeff
338111028Sjeff	mtx_assert(&sched_lock, MA_OWNED);
339111028Sjeff	TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
340111028Sjeff	ku->ku_ksegrp = kg;
341111028Sjeff	kg->kg_numupcalls++;
342111028Sjeff}
343111028Sjeff
344111028Sjeffvoid
345111028Sjeffupcall_unlink(struct kse_upcall *ku)
346111028Sjeff{
347111028Sjeff	struct ksegrp *kg = ku->ku_ksegrp;
348111028Sjeff
349111028Sjeff	mtx_assert(&sched_lock, MA_OWNED);
350111028Sjeff	KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
351124350Sschweikh	TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
352111028Sjeff	kg->kg_numupcalls--;
353111028Sjeff	upcall_stash(ku);
354111028Sjeff}
355111028Sjeff
356111028Sjeffvoid
357111028Sjeffupcall_remove(struct thread *td)
358111028Sjeff{
359111028Sjeff
360111028Sjeff	if (td->td_upcall) {
361111028Sjeff		td->td_upcall->ku_owner = NULL;
362111028Sjeff		upcall_unlink(td->td_upcall);
363111028Sjeff		td->td_upcall = 0;
364124350Sschweikh	}
365111028Sjeff}
366111028Sjeff
36799026Sjulian/*
368111028Sjeff * For a newly created process,
369111028Sjeff * link up all the structures and its initial threads etc.
370105854Sjulian */
371105854Sjulianvoid
372105854Sjulianproc_linkup(struct proc *p, struct ksegrp *kg,
373111028Sjeff	    struct kse *ke, struct thread *td)
374105854Sjulian{
375105854Sjulian
376105854Sjulian	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
377105854Sjulian	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
378105854Sjulian	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
379105854Sjulian	p->p_numksegrps = 0;
380105854Sjulian	p->p_numthreads = 0;
381105854Sjulian
382105854Sjulian	ksegrp_link(kg, p);
383105854Sjulian	kse_link(ke, kg);
384105854Sjulian	thread_link(td, kg);
385105854Sjulian}
386105854Sjulian
387123252Smarcel#ifndef _SYS_SYSPROTO_H_
388123252Smarcelstruct kse_switchin_args {
389123252Smarcel	const struct __mcontext *mcp;
390123252Smarcel	long val;
391123252Smarcel	long *loc;
392123252Smarcel};
393123252Smarcel#endif
394123252Smarcel
395123252Smarcelint
396123252Smarcelkse_switchin(struct thread *td, struct kse_switchin_args *uap)
397123252Smarcel{
398123252Smarcel	mcontext_t mc;
399123252Smarcel	int error;
400123252Smarcel
401123252Smarcel	error = (uap->mcp == NULL) ? EINVAL : 0;
402123252Smarcel	if (!error)
403123252Smarcel		error = copyin(uap->mcp, &mc, sizeof(mc));
404123366Smarcel	if (!error && uap->loc != NULL)
405123366Smarcel		error = (suword(uap->loc, uap->val) != 0) ? EINVAL : 0;
406123252Smarcel	if (!error)
407123252Smarcel		error = set_mcontext(td, &mc);
408123252Smarcel	return ((error == 0) ? EJUSTRETURN : error);
409123252Smarcel}
410123252Smarcel
411111028Sjeff/*
412111028Sjeffstruct kse_thr_interrupt_args {
413111028Sjeff	struct kse_thr_mailbox * tmbx;
414117704Sdavidxu	int cmd;
415117704Sdavidxu	long data;
416111028Sjeff};
417111028Sjeff*/
418105854Sjulianint
419105854Sjuliankse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
420105854Sjulian{
421106180Sdavidxu	struct proc *p;
422106180Sdavidxu	struct thread *td2;
423105854Sjulian
424106242Sdavidxu	p = td->td_proc;
425119488Sdavidxu
426117704Sdavidxu	if (!(p->p_flag & P_SA))
427106242Sdavidxu		return (EINVAL);
428116963Sdavidxu
429117704Sdavidxu	switch (uap->cmd) {
430117704Sdavidxu	case KSE_INTR_SENDSIG:
431117704Sdavidxu		if (uap->data < 0 || uap->data > _SIG_MAXSIG)
432117704Sdavidxu			return (EINVAL);
433117704Sdavidxu	case KSE_INTR_INTERRUPT:
434117704Sdavidxu	case KSE_INTR_RESTART:
435117704Sdavidxu		PROC_LOCK(p);
436117704Sdavidxu		mtx_lock_spin(&sched_lock);
437117704Sdavidxu		FOREACH_THREAD_IN_PROC(p, td2) {
438117704Sdavidxu			if (td2->td_mailbox == uap->tmbx)
439117704Sdavidxu				break;
440117704Sdavidxu		}
441117704Sdavidxu		if (td2 == NULL) {
442117704Sdavidxu			mtx_unlock_spin(&sched_lock);
443117704Sdavidxu			PROC_UNLOCK(p);
444117704Sdavidxu			return (ESRCH);
445117704Sdavidxu		}
446117704Sdavidxu		if (uap->cmd == KSE_INTR_SENDSIG) {
447117704Sdavidxu			if (uap->data > 0) {
448117704Sdavidxu				td2->td_flags &= ~TDF_INTERRUPT;
449117704Sdavidxu				mtx_unlock_spin(&sched_lock);
450117704Sdavidxu				tdsignal(td2, (int)uap->data, SIGTARGET_TD);
451117704Sdavidxu			} else {
452117704Sdavidxu				mtx_unlock_spin(&sched_lock);
453117704Sdavidxu			}
454117704Sdavidxu		} else {
455117704Sdavidxu			td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING;
456117704Sdavidxu			if (TD_CAN_UNBIND(td2))
457117704Sdavidxu				td2->td_upcall->ku_flags |= KUF_DOUPCALL;
458117704Sdavidxu			if (uap->cmd == KSE_INTR_INTERRUPT)
459117704Sdavidxu				td2->td_intrval = EINTR;
460116963Sdavidxu			else
461117704Sdavidxu				td2->td_intrval = ERESTART;
462126326Sjhb			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR))
463126326Sjhb				sleepq_abort(td2);
464117704Sdavidxu			mtx_unlock_spin(&sched_lock);
465106180Sdavidxu		}
466117704Sdavidxu		PROC_UNLOCK(p);
467117704Sdavidxu		break;
468117704Sdavidxu	case KSE_INTR_SIGEXIT:
469117704Sdavidxu		if (uap->data < 1 || uap->data > _SIG_MAXSIG)
470117704Sdavidxu			return (EINVAL);
471117704Sdavidxu		PROC_LOCK(p);
472117704Sdavidxu		sigexit(td, (int)uap->data);
473117704Sdavidxu		break;
474117704Sdavidxu	default:
475117704Sdavidxu		return (EINVAL);
476106180Sdavidxu	}
477116963Sdavidxu	return (0);
478105854Sjulian}
479105854Sjulian
480111028Sjeff/*
481111028Sjeffstruct kse_exit_args {
482111028Sjeff	register_t dummy;
483111028Sjeff};
484111028Sjeff*/
485105854Sjulianint
486105854Sjuliankse_exit(struct thread *td, struct kse_exit_args *uap)
487105854Sjulian{
488105854Sjulian	struct proc *p;
489105854Sjulian	struct ksegrp *kg;
490108640Sdavidxu	struct kse *ke;
491115790Sjulian	struct kse_upcall *ku, *ku2;
492115790Sjulian	int    error, count;
493105854Sjulian
494105854Sjulian	p = td->td_proc;
495115790Sjulian	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
496106182Sdavidxu		return (EINVAL);
497105854Sjulian	kg = td->td_ksegrp;
498115790Sjulian	count = 0;
499105854Sjulian	PROC_LOCK(p);
500105854Sjulian	mtx_lock_spin(&sched_lock);
501115790Sjulian	FOREACH_UPCALL_IN_GROUP(kg, ku2) {
502115790Sjulian		if (ku2->ku_flags & KUF_EXITING)
503115790Sjulian			count++;
504115790Sjulian	}
505115790Sjulian	if ((kg->kg_numupcalls - count) == 1 &&
506115790Sjulian	    (kg->kg_numthreads > 1)) {
507105854Sjulian		mtx_unlock_spin(&sched_lock);
508105854Sjulian		PROC_UNLOCK(p);
509105854Sjulian		return (EDEADLK);
510105854Sjulian	}
511115790Sjulian	ku->ku_flags |= KUF_EXITING;
512115790Sjulian	mtx_unlock_spin(&sched_lock);
513115790Sjulian	PROC_UNLOCK(p);
514115790Sjulian	error = suword(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE);
515115790Sjulian	PROC_LOCK(p);
516115790Sjulian	if (error)
517115790Sjulian		psignal(p, SIGSEGV);
518115790Sjulian	mtx_lock_spin(&sched_lock);
519115790Sjulian	upcall_remove(td);
520108640Sdavidxu	ke = td->td_kse;
521108640Sdavidxu	if (p->p_numthreads == 1) {
522111028Sjeff		kse_purge(p, td);
523116361Sdavidxu		p->p_flag &= ~P_SA;
524105854Sjulian		mtx_unlock_spin(&sched_lock);
525105854Sjulian		PROC_UNLOCK(p);
526105854Sjulian	} else {
527111028Sjeff		if (kg->kg_numthreads == 1) { /* Shutdown a group */
528111028Sjeff			kse_purge_group(td);
529111028Sjeff			ke->ke_flags |= KEF_EXIT;
530111028Sjeff		}
531112071Sdavidxu		thread_stopped(p);
532105854Sjulian		thread_exit();
533105854Sjulian		/* NOTREACHED */
534105854Sjulian	}
535106182Sdavidxu	return (0);
536105854Sjulian}
537105854Sjulian
538107719Sjulian/*
539108338Sjulian * Either becomes an upcall or waits for an awakening event and
540111028Sjeff * then becomes an upcall. Only error cases return.
541107719Sjulian */
542111028Sjeff/*
543111028Sjeffstruct kse_release_args {
544111169Sdavidxu	struct timespec *timeout;
545111028Sjeff};
546111028Sjeff*/
547105854Sjulianint
548111028Sjeffkse_release(struct thread *td, struct kse_release_args *uap)
549105854Sjulian{
550105854Sjulian	struct proc *p;
551107719Sjulian	struct ksegrp *kg;
552116401Sdavidxu	struct kse_upcall *ku;
553116401Sdavidxu	struct timespec timeout;
554111169Sdavidxu	struct timeval tv;
555116963Sdavidxu	sigset_t sigset;
556111169Sdavidxu	int error;
557105854Sjulian
558105854Sjulian	p = td->td_proc;
559107719Sjulian	kg = td->td_ksegrp;
560116401Sdavidxu	if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
561107719Sjulian		return (EINVAL);
562111169Sdavidxu	if (uap->timeout != NULL) {
563111169Sdavidxu		if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
564111169Sdavidxu			return (error);
565111169Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
566111169Sdavidxu	}
567116401Sdavidxu	if (td->td_flags & TDF_SA)
568116401Sdavidxu		td->td_pflags |= TDP_UPCALLING;
569116963Sdavidxu	else {
570116963Sdavidxu		ku->ku_mflags = fuword(&ku->ku_mailbox->km_flags);
571116963Sdavidxu		if (ku->ku_mflags == -1) {
572116963Sdavidxu			PROC_LOCK(p);
573116963Sdavidxu			sigexit(td, SIGSEGV);
574116963Sdavidxu		}
575116963Sdavidxu	}
576111169Sdavidxu	PROC_LOCK(p);
577116963Sdavidxu	if (ku->ku_mflags & KMF_WAITSIGEVENT) {
578116963Sdavidxu		/* UTS wants to wait for signal event */
579116963Sdavidxu		if (!(p->p_flag & P_SIGEVENT) && !(ku->ku_flags & KUF_DOUPCALL))
580116963Sdavidxu			error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH,
581116963Sdavidxu			    "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0));
582116963Sdavidxu		p->p_flag &= ~P_SIGEVENT;
583116963Sdavidxu		sigset = p->p_siglist;
584116963Sdavidxu		PROC_UNLOCK(p);
585116963Sdavidxu		error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught,
586116963Sdavidxu		    sizeof(sigset));
587116963Sdavidxu	} else {
588116963Sdavidxu		 if (! kg->kg_completed && !(ku->ku_flags & KUF_DOUPCALL)) {
589116963Sdavidxu			kg->kg_upsleeps++;
590116963Sdavidxu			error = msleep(&kg->kg_completed, &p->p_mtx,
591116963Sdavidxu				PPAUSE|PCATCH, "kserel",
592116963Sdavidxu				(uap->timeout ? tvtohz(&tv) : 0));
593116963Sdavidxu			kg->kg_upsleeps--;
594116963Sdavidxu		}
595116963Sdavidxu		PROC_UNLOCK(p);
596105854Sjulian	}
597116401Sdavidxu	if (ku->ku_flags & KUF_DOUPCALL) {
598116401Sdavidxu		mtx_lock_spin(&sched_lock);
599116401Sdavidxu		ku->ku_flags &= ~KUF_DOUPCALL;
600116401Sdavidxu		mtx_unlock_spin(&sched_lock);
601116401Sdavidxu	}
602107719Sjulian	return (0);
603105854Sjulian}
604105854Sjulian
605105854Sjulian/* struct kse_wakeup_args {
606105854Sjulian	struct kse_mailbox *mbx;
607105854Sjulian}; */
608105854Sjulianint
609105854Sjuliankse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
610105854Sjulian{
611105854Sjulian	struct proc *p;
612105854Sjulian	struct ksegrp *kg;
613111028Sjeff	struct kse_upcall *ku;
614108338Sjulian	struct thread *td2;
615105854Sjulian
616105854Sjulian	p = td->td_proc;
617108338Sjulian	td2 = NULL;
618111028Sjeff	ku = NULL;
619105854Sjulian	/* KSE-enabled processes only, please. */
620116361Sdavidxu	if (!(p->p_flag & P_SA))
621111028Sjeff		return (EINVAL);
622111028Sjeff	PROC_LOCK(p);
623108613Sjulian	mtx_lock_spin(&sched_lock);
624105854Sjulian	if (uap->mbx) {
625105854Sjulian		FOREACH_KSEGRP_IN_PROC(p, kg) {
626111028Sjeff			FOREACH_UPCALL_IN_GROUP(kg, ku) {
627111207Sdavidxu				if (ku->ku_mailbox == uap->mbx)
628111028Sjeff					break;
629108613Sjulian			}
630111028Sjeff			if (ku)
631108338Sjulian				break;
632105854Sjulian		}
633105854Sjulian	} else {
634105854Sjulian		kg = td->td_ksegrp;
635111028Sjeff		if (kg->kg_upsleeps) {
636111028Sjeff			wakeup_one(&kg->kg_completed);
637111028Sjeff			mtx_unlock_spin(&sched_lock);
638111028Sjeff			PROC_UNLOCK(p);
639111028Sjeff			return (0);
640108338Sjulian		}
641111028Sjeff		ku = TAILQ_FIRST(&kg->kg_upcalls);
642105854Sjulian	}
643111028Sjeff	if (ku) {
644111028Sjeff		if ((td2 = ku->ku_owner) == NULL) {
645111028Sjeff			panic("%s: no owner", __func__);
646111028Sjeff		} else if (TD_ON_SLEEPQ(td2) &&
647116963Sdavidxu		           ((td2->td_wchan == &kg->kg_completed) ||
648116963Sdavidxu			    (td2->td_wchan == &p->p_siglist &&
649116963Sdavidxu			     (ku->ku_mflags & KMF_WAITSIGEVENT)))) {
650126326Sjhb			sleepq_abort(td2);
651111028Sjeff		} else {
652111028Sjeff			ku->ku_flags |= KUF_DOUPCALL;
653108613Sjulian		}
654105854Sjulian		mtx_unlock_spin(&sched_lock);
655111028Sjeff		PROC_UNLOCK(p);
656108338Sjulian		return (0);
657108613Sjulian	}
658105854Sjulian	mtx_unlock_spin(&sched_lock);
659111028Sjeff	PROC_UNLOCK(p);
660108338Sjulian	return (ESRCH);
661105854Sjulian}
662105854Sjulian
663124350Sschweikh/*
664105854Sjulian * No new KSEG: first call: use current KSE, don't schedule an upcall
665111028Sjeff * All other situations, do allocate max new KSEs and schedule an upcall.
666105854Sjulian */
667105854Sjulian/* struct kse_create_args {
668105854Sjulian	struct kse_mailbox *mbx;
669105854Sjulian	int newgroup;
670105854Sjulian}; */
671105854Sjulianint
672105854Sjuliankse_create(struct thread *td, struct kse_create_args *uap)
673105854Sjulian{
674105854Sjulian	struct kse *newke;
675105854Sjulian	struct ksegrp *newkg;
676105854Sjulian	struct ksegrp *kg;
677105854Sjulian	struct proc *p;
678105854Sjulian	struct kse_mailbox mbx;
679111028Sjeff	struct kse_upcall *newku;
680116401Sdavidxu	int err, ncpus, sa = 0, first = 0;
681116401Sdavidxu	struct thread *newtd;
682105854Sjulian
683105854Sjulian	p = td->td_proc;
684105854Sjulian	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
685105854Sjulian		return (err);
686105854Sjulian
687111028Sjeff	/* Too bad, why hasn't kernel always a cpu counter !? */
688111028Sjeff#ifdef SMP
689111028Sjeff	ncpus = mp_ncpus;
690111028Sjeff#else
691111028Sjeff	ncpus = 1;
692111028Sjeff#endif
693116401Sdavidxu	if (virtual_cpu != 0)
694111028Sjeff		ncpus = virtual_cpu;
695116401Sdavidxu	if (!(mbx.km_flags & KMF_BOUND))
696116401Sdavidxu		sa = TDF_SA;
697116440Sdavidxu	else
698116440Sdavidxu		ncpus = 1;
699112078Sdavidxu	PROC_LOCK(p);
700116401Sdavidxu	if (!(p->p_flag & P_SA)) {
701116401Sdavidxu		first = 1;
702116401Sdavidxu		p->p_flag |= P_SA;
703116401Sdavidxu	}
704112078Sdavidxu	PROC_UNLOCK(p);
705116401Sdavidxu	if (!sa && !uap->newgroup && !first)
706116401Sdavidxu		return (EINVAL);
707105854Sjulian	kg = td->td_ksegrp;
708105854Sjulian	if (uap->newgroup) {
709124350Sschweikh		/* Have race condition but it is cheap */
710116401Sdavidxu		if (p->p_numksegrps >= max_groups_per_proc)
711107006Sdavidxu			return (EPROCLIM);
712124350Sschweikh		/*
713105854Sjulian		 * If we want a new KSEGRP it doesn't matter whether
714105854Sjulian		 * we have already fired up KSE mode before or not.
715111028Sjeff		 * We put the process in KSE mode and create a new KSEGRP.
716105854Sjulian		 */
717105854Sjulian		newkg = ksegrp_alloc();
718105854Sjulian		bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
719111028Sjeff		      kg_startzero, kg_endzero));
720105854Sjulian		bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
721105854Sjulian		      RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
722124350Sschweikh		PROC_LOCK(p);
723111028Sjeff		mtx_lock_spin(&sched_lock);
724111028Sjeff		if (p->p_numksegrps >= max_groups_per_proc) {
725111028Sjeff			mtx_unlock_spin(&sched_lock);
726119488Sdavidxu			PROC_UNLOCK(p);
727111677Sdavidxu			ksegrp_free(newkg);
728111028Sjeff			return (EPROCLIM);
729111028Sjeff		}
730111677Sdavidxu		ksegrp_link(newkg, p);
731119488Sdavidxu		sched_fork_ksegrp(kg, newkg);
732111028Sjeff		mtx_unlock_spin(&sched_lock);
733119488Sdavidxu		PROC_UNLOCK(p);
734105854Sjulian	} else {
735116452Sdavidxu		if (!first && ((td->td_flags & TDF_SA) ^ sa) != 0)
736116452Sdavidxu			return (EINVAL);
737111028Sjeff		newkg = kg;
738111028Sjeff	}
739111028Sjeff
740111028Sjeff	/*
741111028Sjeff	 * Creating upcalls more than number of physical cpu does
742124350Sschweikh	 * not help performance.
743111028Sjeff	 */
744111028Sjeff	if (newkg->kg_numupcalls >= ncpus)
745111028Sjeff		return (EPROCLIM);
746111028Sjeff
747111028Sjeff	if (newkg->kg_numupcalls == 0) {
748111028Sjeff		/*
749116401Sdavidxu		 * Initialize KSE group
750116401Sdavidxu		 *
751116401Sdavidxu		 * For multiplxed group, create KSEs as many as physical
752116401Sdavidxu		 * cpus. This increases concurrent even if userland
753116401Sdavidxu		 * is not MP safe and can only run on single CPU.
754111028Sjeff		 * In ideal world, every physical cpu should execute a thread.
755111028Sjeff		 * If there is enough KSEs, threads in kernel can be
756124350Sschweikh		 * executed parallel on different cpus with full speed,
757124350Sschweikh		 * Concurrent in kernel shouldn't be restricted by number of
758116401Sdavidxu		 * upcalls userland provides. Adding more upcall structures
759116401Sdavidxu		 * only increases concurrent in userland.
760116401Sdavidxu		 *
761116401Sdavidxu		 * For bound thread group, because there is only thread in the
762116401Sdavidxu		 * group, we only create one KSE for the group. Thread in this
763116401Sdavidxu		 * kind of group will never schedule an upcall when blocked,
764116401Sdavidxu		 * this intends to simulate pthread system scope thread.
765105854Sjulian		 */
766111028Sjeff		while (newkg->kg_kses < ncpus) {
767105854Sjulian			newke = kse_alloc();
768111028Sjeff			bzero(&newke->ke_startzero, RANGEOF(struct kse,
769111028Sjeff			      ke_startzero, ke_endzero));
770105854Sjulian#if 0
771111028Sjeff			mtx_lock_spin(&sched_lock);
772111028Sjeff			bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
773111028Sjeff			      RANGEOF(struct kse, ke_startcopy, ke_endcopy));
774111028Sjeff			mtx_unlock_spin(&sched_lock);
775105854Sjulian#endif
776111028Sjeff			mtx_lock_spin(&sched_lock);
777111028Sjeff			kse_link(newke, newkg);
778119488Sdavidxu			sched_fork_kse(td->td_kse, newke);
779111028Sjeff			/* Add engine */
780111028Sjeff			kse_reassign(newke);
781111028Sjeff			mtx_unlock_spin(&sched_lock);
782105854Sjulian		}
783111028Sjeff	}
784111028Sjeff	newku = upcall_alloc();
785111028Sjeff	newku->ku_mailbox = uap->mbx;
786111028Sjeff	newku->ku_func = mbx.km_func;
787111028Sjeff	bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
788111028Sjeff
789111028Sjeff	/* For the first call this may not have been set */
790111028Sjeff	if (td->td_standin == NULL)
791111028Sjeff		thread_alloc_spare(td, NULL);
792111028Sjeff
793116963Sdavidxu	PROC_LOCK(p);
794111028Sjeff	if (newkg->kg_numupcalls >= ncpus) {
795116963Sdavidxu		PROC_UNLOCK(p);
796111028Sjeff		upcall_free(newku);
797111028Sjeff		return (EPROCLIM);
798111028Sjeff	}
799117637Sdavidxu	if (first && sa) {
800116963Sdavidxu		SIGSETOR(p->p_siglist, td->td_siglist);
801116963Sdavidxu		SIGEMPTYSET(td->td_siglist);
802116963Sdavidxu		SIGFILLSET(td->td_sigmask);
803116963Sdavidxu		SIG_CANTMASK(td->td_sigmask);
804116963Sdavidxu	}
805116963Sdavidxu	mtx_lock_spin(&sched_lock);
806116963Sdavidxu	PROC_UNLOCK(p);
807111028Sjeff	upcall_link(newku, newkg);
808112397Sdavidxu	if (mbx.km_quantum)
809112397Sdavidxu		newkg->kg_upquantum = max(1, mbx.km_quantum/tick);
810111028Sjeff
811111028Sjeff	/*
812111028Sjeff	 * Each upcall structure has an owner thread, find which
813111028Sjeff	 * one owns it.
814111028Sjeff	 */
815111028Sjeff	if (uap->newgroup) {
816124350Sschweikh		/*
817111028Sjeff		 * Because new ksegrp hasn't thread,
818111028Sjeff		 * create an initial upcall thread to own it.
819111028Sjeff		 */
820116401Sdavidxu		newtd = thread_schedule_upcall(td, newku);
821105854Sjulian	} else {
822105854Sjulian		/*
823111028Sjeff		 * If current thread hasn't an upcall structure,
824111028Sjeff		 * just assign the upcall to it.
825105854Sjulian		 */
826111028Sjeff		if (td->td_upcall == NULL) {
827111028Sjeff			newku->ku_owner = td;
828111028Sjeff			td->td_upcall = newku;
829116401Sdavidxu			newtd = td;
830111028Sjeff		} else {
831111028Sjeff			/*
832111028Sjeff			 * Create a new upcall thread to own it.
833111028Sjeff			 */
834116401Sdavidxu			newtd = thread_schedule_upcall(td, newku);
835111028Sjeff		}
836105854Sjulian	}
837116401Sdavidxu	if (!sa) {
838116401Sdavidxu		newtd->td_mailbox = mbx.km_curthread;
839116401Sdavidxu		newtd->td_flags &= ~TDF_SA;
840116607Sdavidxu		if (newtd != td) {
841116607Sdavidxu			mtx_unlock_spin(&sched_lock);
842116607Sdavidxu			cpu_set_upcall_kse(newtd, newku);
843116607Sdavidxu			mtx_lock_spin(&sched_lock);
844116607Sdavidxu		}
845116401Sdavidxu	} else {
846116401Sdavidxu		newtd->td_flags |= TDF_SA;
847116401Sdavidxu	}
848116607Sdavidxu	if (newtd != td)
849116607Sdavidxu		setrunqueue(newtd);
850111028Sjeff	mtx_unlock_spin(&sched_lock);
851105854Sjulian	return (0);
852105854Sjulian}
853105854Sjulian
854105854Sjulian/*
85599026Sjulian * Initialize global thread allocation resources.
85699026Sjulian */
85799026Sjulianvoid
85899026Sjulianthreadinit(void)
85999026Sjulian{
86099026Sjulian
861107126Sjeff	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
86299026Sjulian	    thread_ctor, thread_dtor, thread_init, thread_fini,
86399026Sjulian	    UMA_ALIGN_CACHE, 0);
864107126Sjeff	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
865107126Sjeff	    NULL, NULL, ksegrp_init, NULL,
866103367Sjulian	    UMA_ALIGN_CACHE, 0);
867107126Sjeff	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
868107126Sjeff	    NULL, NULL, kse_init, NULL,
869103367Sjulian	    UMA_ALIGN_CACHE, 0);
870111028Sjeff	upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
871111028Sjeff	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
87299026Sjulian}
87399026Sjulian
87499026Sjulian/*
875103002Sjulian * Stash an embarasingly extra thread into the zombie thread queue.
87699026Sjulian */
87799026Sjulianvoid
87899026Sjulianthread_stash(struct thread *td)
87999026Sjulian{
880111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
88199026Sjulian	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
882111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
88399026Sjulian}
88499026Sjulian
885103410Smini/*
886105854Sjulian * Stash an embarasingly extra kse into the zombie kse queue.
887105854Sjulian */
888105854Sjulianvoid
889105854Sjuliankse_stash(struct kse *ke)
890105854Sjulian{
891111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
892105854Sjulian	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
893111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
894105854Sjulian}
895105854Sjulian
896105854Sjulian/*
897111028Sjeff * Stash an embarasingly extra upcall into the zombie upcall queue.
898111028Sjeff */
899111028Sjeff
900111028Sjeffvoid
901111028Sjeffupcall_stash(struct kse_upcall *ku)
902111028Sjeff{
903111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
904111028Sjeff	TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
905111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
906111028Sjeff}
907111028Sjeff
908111028Sjeff/*
909105854Sjulian * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
910105854Sjulian */
911105854Sjulianvoid
912105854Sjulianksegrp_stash(struct ksegrp *kg)
913105854Sjulian{
914111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
915105854Sjulian	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
916111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
917105854Sjulian}
918105854Sjulian
919105854Sjulian/*
920111028Sjeff * Reap zombie kse resource.
92199026Sjulian */
92299026Sjulianvoid
92399026Sjulianthread_reap(void)
92499026Sjulian{
925105854Sjulian	struct thread *td_first, *td_next;
926105854Sjulian	struct kse *ke_first, *ke_next;
927105854Sjulian	struct ksegrp *kg_first, * kg_next;
928111028Sjeff	struct kse_upcall *ku_first, *ku_next;
92999026Sjulian
93099026Sjulian	/*
931111028Sjeff	 * Don't even bother to lock if none at this instant,
932111028Sjeff	 * we really don't care about the next instant..
93399026Sjulian	 */
934105854Sjulian	if ((!TAILQ_EMPTY(&zombie_threads))
935105854Sjulian	    || (!TAILQ_EMPTY(&zombie_kses))
936111028Sjeff	    || (!TAILQ_EMPTY(&zombie_ksegrps))
937111028Sjeff	    || (!TAILQ_EMPTY(&zombie_upcalls))) {
938111028Sjeff		mtx_lock_spin(&kse_zombie_lock);
939105854Sjulian		td_first = TAILQ_FIRST(&zombie_threads);
940105854Sjulian		ke_first = TAILQ_FIRST(&zombie_kses);
941105854Sjulian		kg_first = TAILQ_FIRST(&zombie_ksegrps);
942111028Sjeff		ku_first = TAILQ_FIRST(&zombie_upcalls);
943105854Sjulian		if (td_first)
944105854Sjulian			TAILQ_INIT(&zombie_threads);
945105854Sjulian		if (ke_first)
946105854Sjulian			TAILQ_INIT(&zombie_kses);
947105854Sjulian		if (kg_first)
948105854Sjulian			TAILQ_INIT(&zombie_ksegrps);
949111028Sjeff		if (ku_first)
950111028Sjeff			TAILQ_INIT(&zombie_upcalls);
951111028Sjeff		mtx_unlock_spin(&kse_zombie_lock);
952105854Sjulian		while (td_first) {
953105854Sjulian			td_next = TAILQ_NEXT(td_first, td_runq);
954111028Sjeff			if (td_first->td_ucred)
955111028Sjeff				crfree(td_first->td_ucred);
956105854Sjulian			thread_free(td_first);
957105854Sjulian			td_first = td_next;
95899026Sjulian		}
959105854Sjulian		while (ke_first) {
960105854Sjulian			ke_next = TAILQ_NEXT(ke_first, ke_procq);
961105854Sjulian			kse_free(ke_first);
962105854Sjulian			ke_first = ke_next;
963105854Sjulian		}
964105854Sjulian		while (kg_first) {
965105854Sjulian			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
966105854Sjulian			ksegrp_free(kg_first);
967105854Sjulian			kg_first = kg_next;
968105854Sjulian		}
969111028Sjeff		while (ku_first) {
970111028Sjeff			ku_next = TAILQ_NEXT(ku_first, ku_link);
971111028Sjeff			upcall_free(ku_first);
972111028Sjeff			ku_first = ku_next;
973111028Sjeff		}
97499026Sjulian	}
97599026Sjulian}
97699026Sjulian
97799026Sjulian/*
978103367Sjulian * Allocate a ksegrp.
979103367Sjulian */
980103367Sjulianstruct ksegrp *
981103367Sjulianksegrp_alloc(void)
982103367Sjulian{
983111119Simp	return (uma_zalloc(ksegrp_zone, M_WAITOK));
984103367Sjulian}
985103367Sjulian
986103367Sjulian/*
987103367Sjulian * Allocate a kse.
988103367Sjulian */
989103367Sjulianstruct kse *
990103367Sjuliankse_alloc(void)
991103367Sjulian{
992111119Simp	return (uma_zalloc(kse_zone, M_WAITOK));
993103367Sjulian}
994103367Sjulian
995103367Sjulian/*
99699026Sjulian * Allocate a thread.
99799026Sjulian */
99899026Sjulianstruct thread *
99999026Sjulianthread_alloc(void)
100099026Sjulian{
100199026Sjulian	thread_reap(); /* check if any zombies to get */
1002111119Simp	return (uma_zalloc(thread_zone, M_WAITOK));
100399026Sjulian}
100499026Sjulian
100599026Sjulian/*
1006103367Sjulian * Deallocate a ksegrp.
1007103367Sjulian */
1008103367Sjulianvoid
1009103367Sjulianksegrp_free(struct ksegrp *td)
1010103367Sjulian{
1011103367Sjulian	uma_zfree(ksegrp_zone, td);
1012103367Sjulian}
1013103367Sjulian
1014103367Sjulian/*
1015103367Sjulian * Deallocate a kse.
1016103367Sjulian */
1017103367Sjulianvoid
1018103367Sjuliankse_free(struct kse *td)
1019103367Sjulian{
1020103367Sjulian	uma_zfree(kse_zone, td);
1021103367Sjulian}
1022103367Sjulian
1023103367Sjulian/*
102499026Sjulian * Deallocate a thread.
102599026Sjulian */
102699026Sjulianvoid
102799026Sjulianthread_free(struct thread *td)
102899026Sjulian{
1029107719Sjulian
1030107719Sjulian	cpu_thread_clean(td);
103199026Sjulian	uma_zfree(thread_zone, td);
103299026Sjulian}
103399026Sjulian
103499026Sjulian/*
103599026Sjulian * Store the thread context in the UTS's mailbox.
1036104031Sjulian * then add the mailbox at the head of a list we are building in user space.
1037104031Sjulian * The list is anchored in the ksegrp structure.
103899026Sjulian */
103999026Sjulianint
1040117704Sdavidxuthread_export_context(struct thread *td, int willexit)
104199026Sjulian{
1042104503Sjmallett	struct proc *p;
1043104031Sjulian	struct ksegrp *kg;
1044104031Sjulian	uintptr_t mbx;
1045104031Sjulian	void *addr;
1046116963Sdavidxu	int error = 0, temp, sig;
1047115790Sjulian	mcontext_t mc;
104899026Sjulian
1049104503Sjmallett	p = td->td_proc;
1050104503Sjmallett	kg = td->td_ksegrp;
1051104503Sjmallett
1052104031Sjulian	/* Export the user/machine context. */
1053115790Sjulian	get_mcontext(td, &mc, 0);
1054115790Sjulian	addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext);
1055115790Sjulian	error = copyout(&mc, addr, sizeof(mcontext_t));
1056115790Sjulian	if (error)
1057108338Sjulian		goto bad;
1058104031Sjulian
1059111028Sjeff	/* Exports clock ticks in kernel mode */
1060111028Sjeff	addr = (caddr_t)(&td->td_mailbox->tm_sticks);
1061117000Smarcel	temp = fuword32(addr) + td->td_usticks;
1062117000Smarcel	if (suword32(addr, temp)) {
1063115790Sjulian		error = EFAULT;
1064111028Sjeff		goto bad;
1065115790Sjulian	}
1066111028Sjeff
1067116963Sdavidxu	/*
1068116963Sdavidxu	 * Post sync signal, or process SIGKILL and SIGSTOP.
1069116963Sdavidxu	 * For sync signal, it is only possible when the signal is not
1070116963Sdavidxu	 * caught by userland or process is being debugged.
1071116963Sdavidxu	 */
1072117704Sdavidxu	PROC_LOCK(p);
1073116963Sdavidxu	if (td->td_flags & TDF_NEEDSIGCHK) {
1074116963Sdavidxu		mtx_lock_spin(&sched_lock);
1075116963Sdavidxu		td->td_flags &= ~TDF_NEEDSIGCHK;
1076116963Sdavidxu		mtx_unlock_spin(&sched_lock);
1077116963Sdavidxu		mtx_lock(&p->p_sigacts->ps_mtx);
1078116963Sdavidxu		while ((sig = cursig(td)) != 0)
1079116963Sdavidxu			postsig(sig);
1080116963Sdavidxu		mtx_unlock(&p->p_sigacts->ps_mtx);
1081116963Sdavidxu	}
1082117704Sdavidxu	if (willexit)
1083117704Sdavidxu		SIGFILLSET(td->td_sigmask);
1084117704Sdavidxu	PROC_UNLOCK(p);
1085116963Sdavidxu
1086111028Sjeff	/* Get address in latest mbox of list pointer */
1087104031Sjulian	addr = (void *)(&td->td_mailbox->tm_next);
1088104031Sjulian	/*
1089104031Sjulian	 * Put the saved address of the previous first
1090104031Sjulian	 * entry into this one
1091104031Sjulian	 */
1092104031Sjulian	for (;;) {
1093104031Sjulian		mbx = (uintptr_t)kg->kg_completed;
1094104031Sjulian		if (suword(addr, mbx)) {
1095108338Sjulian			error = EFAULT;
1096107034Sdavidxu			goto bad;
1097104031Sjulian		}
1098104126Sjulian		PROC_LOCK(p);
1099104031Sjulian		if (mbx == (uintptr_t)kg->kg_completed) {
1100104031Sjulian			kg->kg_completed = td->td_mailbox;
1101111028Sjeff			/*
1102111028Sjeff			 * The thread context may be taken away by
1103111028Sjeff			 * other upcall threads when we unlock
1104111028Sjeff			 * process lock. it's no longer valid to
1105111028Sjeff			 * use it again in any other places.
1106111028Sjeff			 */
1107111028Sjeff			td->td_mailbox = NULL;
1108104126Sjulian			PROC_UNLOCK(p);
1109104031Sjulian			break;
1110104031Sjulian		}
1111104126Sjulian		PROC_UNLOCK(p);
1112104031Sjulian	}
1113111028Sjeff	td->td_usticks = 0;
1114104031Sjulian	return (0);
1115107034Sdavidxu
1116107034Sdavidxubad:
1117107034Sdavidxu	PROC_LOCK(p);
1118117704Sdavidxu	sigexit(td, SIGILL);
1119108338Sjulian	return (error);
1120104031Sjulian}
112199026Sjulian
1122104031Sjulian/*
1123104031Sjulian * Take the list of completed mailboxes for this KSEGRP and put them on this
1124111028Sjeff * upcall's mailbox as it's the next one going up.
1125104031Sjulian */
1126104031Sjulianstatic int
1127111028Sjeffthread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
1128104031Sjulian{
1129104126Sjulian	struct proc *p = kg->kg_proc;
1130104031Sjulian	void *addr;
1131104031Sjulian	uintptr_t mbx;
1132104031Sjulian
1133111028Sjeff	addr = (void *)(&ku->ku_mailbox->km_completed);
1134104031Sjulian	for (;;) {
1135104031Sjulian		mbx = (uintptr_t)kg->kg_completed;
1136104031Sjulian		if (suword(addr, mbx)) {
1137104126Sjulian			PROC_LOCK(p);
1138104126Sjulian			psignal(p, SIGSEGV);
1139104126Sjulian			PROC_UNLOCK(p);
1140104031Sjulian			return (EFAULT);
1141104031Sjulian		}
1142104126Sjulian		PROC_LOCK(p);
1143104031Sjulian		if (mbx == (uintptr_t)kg->kg_completed) {
1144104031Sjulian			kg->kg_completed = NULL;
1145104126Sjulian			PROC_UNLOCK(p);
1146104031Sjulian			break;
1147104031Sjulian		}
1148104126Sjulian		PROC_UNLOCK(p);
114999026Sjulian	}
1150104031Sjulian	return (0);
115199026Sjulian}
115299026Sjulian
115399026Sjulian/*
1154107034Sdavidxu * This function should be called at statclock interrupt time
1155107034Sdavidxu */
1156107034Sdavidxuint
1157111028Sjeffthread_statclock(int user)
1158107034Sdavidxu{
1159107034Sdavidxu	struct thread *td = curthread;
1160116401Sdavidxu	struct ksegrp *kg = td->td_ksegrp;
1161124350Sschweikh
1162116401Sdavidxu	if (kg->kg_numupcalls == 0 || !(td->td_flags & TDF_SA))
1163116401Sdavidxu		return (0);
1164107034Sdavidxu	if (user) {
1165107034Sdavidxu		/* Current always do via ast() */
1166111976Sdavidxu		mtx_lock_spin(&sched_lock);
1167111032Sjulian		td->td_flags |= (TDF_USTATCLOCK|TDF_ASTPENDING);
1168111976Sdavidxu		mtx_unlock_spin(&sched_lock);
1169111028Sjeff		td->td_uuticks++;
1170107034Sdavidxu	} else {
1171107034Sdavidxu		if (td->td_mailbox != NULL)
1172111028Sjeff			td->td_usticks++;
1173111028Sjeff		else {
1174111028Sjeff			/* XXXKSE
1175111028Sjeff		 	 * We will call thread_user_enter() for every
1176111028Sjeff			 * kernel entry in future, so if the thread mailbox
1177111028Sjeff			 * is NULL, it must be a UTS kernel, don't account
1178111028Sjeff			 * clock ticks for it.
1179111028Sjeff			 */
1180111028Sjeff		}
1181107034Sdavidxu	}
1182111028Sjeff	return (0);
1183107034Sdavidxu}
1184107034Sdavidxu
1185111028Sjeff/*
1186111515Sdavidxu * Export state clock ticks for userland
1187111028Sjeff */
1188107034Sdavidxustatic int
1189111515Sdavidxuthread_update_usr_ticks(struct thread *td, int user)
1190107034Sdavidxu{
1191107034Sdavidxu	struct proc *p = td->td_proc;
1192107034Sdavidxu	struct kse_thr_mailbox *tmbx;
1193111028Sjeff	struct kse_upcall *ku;
1194112397Sdavidxu	struct ksegrp *kg;
1195107034Sdavidxu	caddr_t addr;
1196118607Sjhb	u_int uticks;
1197107034Sdavidxu
1198111028Sjeff	if ((ku = td->td_upcall) == NULL)
1199111028Sjeff		return (-1);
1200124350Sschweikh
1201111028Sjeff	tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
1202107034Sdavidxu	if ((tmbx == NULL) || (tmbx == (void *)-1))
1203111028Sjeff		return (-1);
1204111515Sdavidxu	if (user) {
1205111515Sdavidxu		uticks = td->td_uuticks;
1206111515Sdavidxu		td->td_uuticks = 0;
1207111515Sdavidxu		addr = (caddr_t)&tmbx->tm_uticks;
1208111515Sdavidxu	} else {
1209111515Sdavidxu		uticks = td->td_usticks;
1210111515Sdavidxu		td->td_usticks = 0;
1211111515Sdavidxu		addr = (caddr_t)&tmbx->tm_sticks;
1212111515Sdavidxu	}
1213107034Sdavidxu	if (uticks) {
1214117000Smarcel		if (suword32(addr, uticks+fuword32(addr))) {
1215111028Sjeff			PROC_LOCK(p);
1216111028Sjeff			psignal(p, SIGSEGV);
1217111028Sjeff			PROC_UNLOCK(p);
1218111028Sjeff			return (-2);
1219111028Sjeff		}
1220107034Sdavidxu	}
1221112397Sdavidxu	kg = td->td_ksegrp;
1222112397Sdavidxu	if (kg->kg_upquantum && ticks >= kg->kg_nextupcall) {
1223112397Sdavidxu		mtx_lock_spin(&sched_lock);
1224112397Sdavidxu		td->td_upcall->ku_flags |= KUF_DOUPCALL;
1225112397Sdavidxu		mtx_unlock_spin(&sched_lock);
1226112397Sdavidxu	}
1227111028Sjeff	return (0);
1228111028Sjeff}
1229111028Sjeff
1230111028Sjeff/*
123199026Sjulian * Discard the current thread and exit from its context.
123299026Sjulian *
123399026Sjulian * Because we can't free a thread while we're operating under its context,
1234107719Sjulian * push the current thread into our CPU's deadthread holder. This means
1235107719Sjulian * we needn't worry about someone else grabbing our context before we
1236107719Sjulian * do a cpu_throw().
123799026Sjulian */
123899026Sjulianvoid
123999026Sjulianthread_exit(void)
124099026Sjulian{
124199026Sjulian	struct thread *td;
124299026Sjulian	struct kse *ke;
124399026Sjulian	struct proc *p;
124499026Sjulian	struct ksegrp	*kg;
124599026Sjulian
124699026Sjulian	td = curthread;
124799026Sjulian	kg = td->td_ksegrp;
124899026Sjulian	p = td->td_proc;
124999026Sjulian	ke = td->td_kse;
125099026Sjulian
125199026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
1252102581Sjulian	KASSERT(p != NULL, ("thread exiting without a process"));
1253102581Sjulian	KASSERT(ke != NULL, ("thread exiting without a kse"));
1254102581Sjulian	KASSERT(kg != NULL, ("thread exiting without a kse group"));
125599026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
125699026Sjulian	CTR1(KTR_PROC, "thread_exit: thread %p", td);
1257125158Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
125899026Sjulian
1259104695Sjulian	if (td->td_standin != NULL) {
1260104695Sjulian		thread_stash(td->td_standin);
1261104695Sjulian		td->td_standin = NULL;
1262104695Sjulian	}
1263104695Sjulian
126499026Sjulian	cpu_thread_exit(td);	/* XXXSMP */
126599026Sjulian
1266102581Sjulian	/*
1267103002Sjulian	 * The last thread is left attached to the process
1268103002Sjulian	 * So that the whole bundle gets recycled. Skip
1269103002Sjulian	 * all this stuff.
1270102581Sjulian	 */
1271103002Sjulian	if (p->p_numthreads > 1) {
1272113641Sjulian		thread_unlink(td);
1273111115Sdavidxu		if (p->p_maxthrwaits)
1274111115Sdavidxu			wakeup(&p->p_numthreads);
1275103002Sjulian		/*
1276103002Sjulian		 * The test below is NOT true if we are the
1277103002Sjulian		 * sole exiting thread. P_STOPPED_SNGL is unset
1278103002Sjulian		 * in exit1() after it is the only survivor.
1279103002Sjulian		 */
1280103002Sjulian		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1281103002Sjulian			if (p->p_numthreads == p->p_suspcount) {
1282103216Sjulian				thread_unsuspend_one(p->p_singlethread);
1283103002Sjulian			}
128499026Sjulian		}
1285104695Sjulian
1286111028Sjeff		/*
1287111028Sjeff		 * Because each upcall structure has an owner thread,
1288111028Sjeff		 * owner thread exits only when process is in exiting
1289111028Sjeff		 * state, so upcall to userland is no longer needed,
1290111028Sjeff		 * deleting upcall structure is safe here.
1291111028Sjeff		 * So when all threads in a group is exited, all upcalls
1292111028Sjeff		 * in the group should be automatically freed.
1293111028Sjeff		 */
1294111028Sjeff		if (td->td_upcall)
1295111028Sjeff			upcall_remove(td);
1296124350Sschweikh
1297119488Sdavidxu		sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
1298119488Sdavidxu		sched_exit_kse(FIRST_KSE_IN_PROC(p), ke);
1299104695Sjulian		ke->ke_state = KES_UNQUEUED;
1300111028Sjeff		ke->ke_thread = NULL;
1301124350Sschweikh		/*
1302108338Sjulian		 * Decide what to do with the KSE attached to this thread.
1303104695Sjulian		 */
1304119488Sdavidxu		if (ke->ke_flags & KEF_EXIT) {
1305105854Sjulian			kse_unlink(ke);
1306119488Sdavidxu			if (kg->kg_kses == 0) {
1307119488Sdavidxu				sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), kg);
1308119488Sdavidxu				ksegrp_unlink(kg);
1309119488Sdavidxu			}
1310119488Sdavidxu		}
1311111028Sjeff		else
1312105854Sjulian			kse_reassign(ke);
1313105854Sjulian		PROC_UNLOCK(p);
1314111028Sjeff		td->td_kse	= NULL;
1315105854Sjulian		td->td_state	= TDS_INACTIVE;
1316113244Sdavidxu#if 0
1317105854Sjulian		td->td_proc	= NULL;
1318113244Sdavidxu#endif
1319105854Sjulian		td->td_ksegrp	= NULL;
1320105854Sjulian		td->td_last_kse	= NULL;
1321107719Sjulian		PCPU_SET(deadthread, td);
1322103002Sjulian	} else {
1323103002Sjulian		PROC_UNLOCK(p);
132499026Sjulian	}
1325112888Sjeff	/* XXX Shouldn't cpu_throw() here. */
1326112993Speter	mtx_assert(&sched_lock, MA_OWNED);
1327112993Speter	cpu_throw(td, choosethread());
1328112993Speter	panic("I'm a teapot!");
132999026Sjulian	/* NOTREACHED */
133099026Sjulian}
133199026Sjulian
1332124350Sschweikh/*
1333107719Sjulian * Do any thread specific cleanups that may be needed in wait()
1334107719Sjulian * called with Giant held, proc and schedlock not held.
1335107719Sjulian */
1336107719Sjulianvoid
1337107719Sjulianthread_wait(struct proc *p)
1338107719Sjulian{
1339107719Sjulian	struct thread *td;
1340107719Sjulian
1341124350Sschweikh	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
1342124350Sschweikh	KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()"));
1343107719Sjulian	FOREACH_THREAD_IN_PROC(p, td) {
1344107719Sjulian		if (td->td_standin != NULL) {
1345107719Sjulian			thread_free(td->td_standin);
1346107719Sjulian			td->td_standin = NULL;
1347107719Sjulian		}
1348107719Sjulian		cpu_thread_clean(td);
1349107719Sjulian	}
1350107719Sjulian	thread_reap();	/* check for zombie threads etc. */
1351107719Sjulian}
1352107719Sjulian
135399026Sjulian/*
135499026Sjulian * Link a thread to a process.
1355103002Sjulian * set up anything that needs to be initialized for it to
1356103002Sjulian * be used by the process.
135799026Sjulian *
135899026Sjulian * Note that we do not link to the proc's ucred here.
135999026Sjulian * The thread is linked as if running but no KSE assigned.
136099026Sjulian */
136199026Sjulianvoid
136299026Sjulianthread_link(struct thread *td, struct ksegrp *kg)
136399026Sjulian{
136499026Sjulian	struct proc *p;
136599026Sjulian
136699026Sjulian	p = kg->kg_proc;
1367111028Sjeff	td->td_state    = TDS_INACTIVE;
1368111028Sjeff	td->td_proc     = p;
1369111028Sjeff	td->td_ksegrp   = kg;
1370111028Sjeff	td->td_last_kse = NULL;
1371111028Sjeff	td->td_flags    = 0;
1372111028Sjeff	td->td_kse      = NULL;
137399026Sjulian
1374103002Sjulian	LIST_INIT(&td->td_contested);
1375119137Ssam	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
137699026Sjulian	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
137799026Sjulian	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
137899026Sjulian	p->p_numthreads++;
137999026Sjulian	kg->kg_numthreads++;
138099026Sjulian}
138199026Sjulian
1382113641Sjulianvoid
1383113641Sjulianthread_unlink(struct thread *td)
1384124350Sschweikh{
1385113641Sjulian	struct proc *p = td->td_proc;
1386113641Sjulian	struct ksegrp *kg = td->td_ksegrp;
1387113920Sjhb
1388113920Sjhb	mtx_assert(&sched_lock, MA_OWNED);
1389113641Sjulian	TAILQ_REMOVE(&p->p_threads, td, td_plist);
1390113641Sjulian	p->p_numthreads--;
1391113641Sjulian	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
1392113641Sjulian	kg->kg_numthreads--;
1393113641Sjulian	/* could clear a few other things here */
1394124350Sschweikh}
1395113641Sjulian
1396111028Sjeff/*
1397111028Sjeff * Purge a ksegrp resource. When a ksegrp is preparing to
1398124350Sschweikh * exit, it calls this function.
1399111028Sjeff */
1400113864Sjhbstatic void
1401111028Sjeffkse_purge_group(struct thread *td)
1402111028Sjeff{
1403111028Sjeff	struct ksegrp *kg;
1404111028Sjeff	struct kse *ke;
1405111028Sjeff
1406111028Sjeff	kg = td->td_ksegrp;
1407111028Sjeff 	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
1408111028Sjeff	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
1409111028Sjeff		KASSERT(ke->ke_state == KES_IDLE,
1410111028Sjeff			("%s: wrong idle KSE state", __func__));
1411111028Sjeff		kse_unlink(ke);
1412111028Sjeff	}
1413111028Sjeff	KASSERT((kg->kg_kses == 1),
1414111028Sjeff		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
1415111028Sjeff	KASSERT((kg->kg_numupcalls == 0),
1416111028Sjeff	        ("%s: ksegrp still has %d upcall datas",
1417111028Sjeff		__func__, kg->kg_numupcalls));
1418111028Sjeff}
1419111028Sjeff
1420111028Sjeff/*
1421124350Sschweikh * Purge a process's KSE resource. When a process is preparing to
1422124350Sschweikh * exit, it calls kse_purge to release any extra KSE resources in
1423111028Sjeff * the process.
1424111028Sjeff */
1425113864Sjhbstatic void
1426105854Sjuliankse_purge(struct proc *p, struct thread *td)
1427105854Sjulian{
1428105854Sjulian	struct ksegrp *kg;
1429111028Sjeff	struct kse *ke;
1430105854Sjulian
1431105854Sjulian 	KASSERT(p->p_numthreads == 1, ("bad thread number"));
1432105854Sjulian	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
1433105854Sjulian		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
1434105854Sjulian		p->p_numksegrps--;
1435111028Sjeff		/*
1436111028Sjeff		 * There is no ownership for KSE, after all threads
1437124350Sschweikh		 * in the group exited, it is possible that some KSEs
1438111028Sjeff		 * were left in idle queue, gc them now.
1439111028Sjeff		 */
1440111028Sjeff		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
1441111028Sjeff			KASSERT(ke->ke_state == KES_IDLE,
1442111028Sjeff			   ("%s: wrong idle KSE state", __func__));
1443111028Sjeff			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
1444111028Sjeff			kg->kg_idle_kses--;
1445111028Sjeff			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
1446111028Sjeff			kg->kg_kses--;
1447111028Sjeff			kse_stash(ke);
1448111028Sjeff		}
1449105854Sjulian		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
1450111028Sjeff		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
1451111028Sjeff		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
1452111028Sjeff		KASSERT((kg->kg_numupcalls == 0),
1453111028Sjeff		        ("%s: ksegrp still has %d upcall datas",
1454111028Sjeff			__func__, kg->kg_numupcalls));
1455124350Sschweikh
1456111028Sjeff		if (kg != td->td_ksegrp)
1457105854Sjulian			ksegrp_stash(kg);
1458105854Sjulian	}
1459105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
1460105854Sjulian	p->p_numksegrps++;
1461105854Sjulian}
1462105854Sjulian
1463111028Sjeff/*
1464111028Sjeff * This function is intended to be used to initialize a spare thread
1465111028Sjeff * for upcall. Initialize thread's large data area outside sched_lock
1466111028Sjeff * for thread_schedule_upcall().
1467111028Sjeff */
1468111028Sjeffvoid
1469111028Sjeffthread_alloc_spare(struct thread *td, struct thread *spare)
1470111028Sjeff{
1471111028Sjeff	if (td->td_standin)
1472111028Sjeff		return;
1473111028Sjeff	if (spare == NULL)
1474111028Sjeff		spare = thread_alloc();
1475111028Sjeff	td->td_standin = spare;
1476111028Sjeff	bzero(&spare->td_startzero,
1477111028Sjeff	    (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
1478111028Sjeff	spare->td_proc = td->td_proc;
1479111028Sjeff	spare->td_ucred = crhold(td->td_ucred);
1480111028Sjeff}
1481105854Sjulian
148299026Sjulian/*
1483103410Smini * Create a thread and schedule it for upcall on the KSE given.
1484108338Sjulian * Use our thread's standin so that we don't have to allocate one.
148599026Sjulian */
148699026Sjulianstruct thread *
1487111028Sjeffthread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
148899026Sjulian{
148999026Sjulian	struct thread *td2;
149099026Sjulian
149199026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
1492104695Sjulian
1493124350Sschweikh	/*
1494111028Sjeff	 * Schedule an upcall thread on specified kse_upcall,
1495111028Sjeff	 * the kse_upcall must be free.
1496111028Sjeff	 * td must have a spare thread.
1497104695Sjulian	 */
1498111028Sjeff	KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
1499104695Sjulian	if ((td2 = td->td_standin) != NULL) {
1500104695Sjulian		td->td_standin = NULL;
150199026Sjulian	} else {
1502111028Sjeff		panic("no reserve thread when scheduling an upcall");
1503106182Sdavidxu		return (NULL);
150499026Sjulian	}
150599026Sjulian	CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
1506104695Sjulian	     td2, td->td_proc->p_pid, td->td_proc->p_comm);
1507103002Sjulian	bcopy(&td->td_startcopy, &td2->td_startcopy,
1508103002Sjulian	    (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
1509111028Sjeff	thread_link(td2, ku->ku_ksegrp);
1510113244Sdavidxu	/* inherit blocked thread's context */
1511115858Smarcel	cpu_set_upcall(td2, td);
1512111028Sjeff	/* Let the new thread become owner of the upcall */
1513111028Sjeff	ku->ku_owner   = td2;
1514111028Sjeff	td2->td_upcall = ku;
1515116401Sdavidxu	td2->td_flags  = TDF_SA;
1516116372Sdavidxu	td2->td_pflags = TDP_UPCALLING;
1517111028Sjeff	td2->td_kse    = NULL;
1518111028Sjeff	td2->td_state  = TDS_CAN_RUN;
1519104695Sjulian	td2->td_inhibitors = 0;
1520116963Sdavidxu	SIGFILLSET(td2->td_sigmask);
1521116963Sdavidxu	SIG_CANTMASK(td2->td_sigmask);
1522119488Sdavidxu	sched_fork_thread(td, td2);
1523104695Sjulian	return (td2);	/* bogus.. should be a void function */
152499026Sjulian}
152599026Sjulian
1526116963Sdavidxu/*
1527116963Sdavidxu * It is only used when thread generated a trap and process is being
1528116963Sdavidxu * debugged.
1529116963Sdavidxu */
1530111033Sjeffvoid
1531111033Sjeffthread_signal_add(struct thread *td, int sig)
1532103410Smini{
1533111033Sjeff	struct proc *p;
1534116963Sdavidxu	siginfo_t siginfo;
1535116963Sdavidxu	struct sigacts *ps;
1536103410Smini	int error;
1537103410Smini
1538115884Sdavidxu	p = td->td_proc;
1539115884Sdavidxu	PROC_LOCK_ASSERT(p, MA_OWNED);
1540116963Sdavidxu	ps = p->p_sigacts;
1541116963Sdavidxu	mtx_assert(&ps->ps_mtx, MA_OWNED);
1542116963Sdavidxu
1543117607Sdavidxu	cpu_thread_siginfo(sig, 0, &siginfo);
1544116963Sdavidxu	mtx_unlock(&ps->ps_mtx);
1545103410Smini	PROC_UNLOCK(p);
1546116963Sdavidxu	error = copyout(&siginfo, &td->td_mailbox->tm_syncsig, sizeof(siginfo));
1547116963Sdavidxu	if (error) {
1548116963Sdavidxu		PROC_LOCK(p);
1549116963Sdavidxu		sigexit(td, SIGILL);
1550116963Sdavidxu	}
1551103410Smini	PROC_LOCK(p);
1552116963Sdavidxu	SIGADDSET(td->td_sigmask, sig);
1553116963Sdavidxu	mtx_lock(&ps->ps_mtx);
1554111033Sjeff}
1555111033Sjeff
1556111033Sjeffvoid
1557112397Sdavidxuthread_switchout(struct thread *td)
1558112397Sdavidxu{
1559112397Sdavidxu	struct kse_upcall *ku;
1560116607Sdavidxu	struct thread *td2;
1561112397Sdavidxu
1562112397Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1563112397Sdavidxu
1564112397Sdavidxu	/*
1565112397Sdavidxu	 * If the outgoing thread is in threaded group and has never
1566112397Sdavidxu	 * scheduled an upcall, decide whether this is a short
1567112397Sdavidxu	 * or long term event and thus whether or not to schedule
1568112397Sdavidxu	 * an upcall.
1569112397Sdavidxu	 * If it is a short term event, just suspend it in
1570112397Sdavidxu	 * a way that takes its KSE with it.
1571112397Sdavidxu	 * Select the events for which we want to schedule upcalls.
1572112397Sdavidxu	 * For now it's just sleep.
1573112397Sdavidxu	 * XXXKSE eventually almost any inhibition could do.
1574112397Sdavidxu	 */
1575112397Sdavidxu	if (TD_CAN_UNBIND(td) && (td->td_standin) && TD_ON_SLEEPQ(td)) {
1576124350Sschweikh		/*
1577112397Sdavidxu		 * Release ownership of upcall, and schedule an upcall
1578112397Sdavidxu		 * thread, this new upcall thread becomes the owner of
1579112397Sdavidxu		 * the upcall structure.
1580112397Sdavidxu		 */
1581112397Sdavidxu		ku = td->td_upcall;
1582112397Sdavidxu		ku->ku_owner = NULL;
1583124350Sschweikh		td->td_upcall = NULL;
1584112397Sdavidxu		td->td_flags &= ~TDF_CAN_UNBIND;
1585116607Sdavidxu		td2 = thread_schedule_upcall(td, ku);
1586116607Sdavidxu		setrunqueue(td2);
1587112397Sdavidxu	}
1588112397Sdavidxu}
1589112397Sdavidxu
1590103410Smini/*
1591111028Sjeff * Setup done on the thread when it enters the kernel.
1592105900Sjulian * XXXKSE Presently only for syscalls but eventually all kernel entries.
1593105900Sjulian */
1594105900Sjulianvoid
1595105900Sjulianthread_user_enter(struct proc *p, struct thread *td)
1596105900Sjulian{
1597111028Sjeff	struct ksegrp *kg;
1598111028Sjeff	struct kse_upcall *ku;
1599113793Sdavidxu	struct kse_thr_mailbox *tmbx;
1600118486Sdavidxu	uint32_t tflags;
1601105900Sjulian
1602111028Sjeff	kg = td->td_ksegrp;
1603113793Sdavidxu
1604105900Sjulian	/*
1605105900Sjulian	 * First check that we shouldn't just abort.
1606105900Sjulian	 * But check if we are the single thread first!
1607105900Sjulian	 */
1608116401Sdavidxu	if (p->p_flag & P_SINGLE_EXIT) {
1609116401Sdavidxu		PROC_LOCK(p);
1610105900Sjulian		mtx_lock_spin(&sched_lock);
1611112071Sdavidxu		thread_stopped(p);
1612105900Sjulian		thread_exit();
1613105900Sjulian		/* NOTREACHED */
1614105900Sjulian	}
1615105900Sjulian
1616105900Sjulian	/*
1617105900Sjulian	 * If we are doing a syscall in a KSE environment,
1618105900Sjulian	 * note where our mailbox is. There is always the
1619108338Sjulian	 * possibility that we could do this lazily (in kse_reassign()),
1620105900Sjulian	 * but for now do it every time.
1621105900Sjulian	 */
1622111028Sjeff	kg = td->td_ksegrp;
1623116401Sdavidxu	if (td->td_flags & TDF_SA) {
1624111028Sjeff		ku = td->td_upcall;
1625111028Sjeff		KASSERT(ku, ("%s: no upcall owned", __func__));
1626111028Sjeff		KASSERT((ku->ku_owner == td), ("%s: wrong owner", __func__));
1627113793Sdavidxu		KASSERT(!TD_CAN_UNBIND(td), ("%s: can unbind", __func__));
1628117000Smarcel		ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags);
1629113793Sdavidxu		tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
1630118486Sdavidxu		if ((tmbx == NULL) || (tmbx == (void *)-1L) ||
1631118486Sdavidxu		    (ku->ku_mflags & KMF_NOUPCALL)) {
1632111028Sjeff			td->td_mailbox = NULL;
1633105900Sjulian		} else {
1634111115Sdavidxu			if (td->td_standin == NULL)
1635111115Sdavidxu				thread_alloc_spare(td, NULL);
1636118673Sdeischen			tflags = fuword32(&tmbx->tm_flags);
1637118486Sdavidxu			/*
1638118486Sdavidxu			 * On some architectures, TP register points to thread
1639124350Sschweikh			 * mailbox but not points to kse mailbox, and userland
1640124350Sschweikh			 * can not atomically clear km_curthread, but can
1641118486Sdavidxu			 * use TP register, and set TMF_NOUPCALL in thread
1642118486Sdavidxu			 * flag	to indicate a critical region.
1643118486Sdavidxu			 */
1644118486Sdavidxu			if (tflags & TMF_NOUPCALL) {
1645118486Sdavidxu				td->td_mailbox = NULL;
1646118486Sdavidxu			} else {
1647118486Sdavidxu				td->td_mailbox = tmbx;
1648118486Sdavidxu				mtx_lock_spin(&sched_lock);
1649113793Sdavidxu				td->td_flags |= TDF_CAN_UNBIND;
1650118486Sdavidxu				mtx_unlock_spin(&sched_lock);
1651118486Sdavidxu			}
1652105900Sjulian		}
1653105900Sjulian	}
1654105900Sjulian}
1655105900Sjulian
1656105900Sjulian/*
1657103410Smini * The extra work we go through if we are a threaded process when we
1658103410Smini * return to userland.
1659103410Smini *
166099026Sjulian * If we are a KSE process and returning to user mode, check for
166199026Sjulian * extra work to do before we return (e.g. for more syscalls
166299026Sjulian * to complete first).  If we were in a critical section, we should
166399026Sjulian * just return to let it finish. Same if we were in the UTS (in
1664103410Smini * which case the mailbox's context's busy indicator will be set).
1665103410Smini * The only traps we suport will have set the mailbox.
1666103410Smini * We will clear it here.
166799026Sjulian */
166899026Sjulianint
1669103838Sjulianthread_userret(struct thread *td, struct trapframe *frame)
167099026Sjulian{
1671113793Sdavidxu	int error = 0, upcalls, uts_crit;
1672111028Sjeff	struct kse_upcall *ku;
1673111115Sdavidxu	struct ksegrp *kg, *kg2;
1674104695Sjulian	struct proc *p;
1675107060Sdavidxu	struct timespec ts;
167699026Sjulian
1677111028Sjeff	p = td->td_proc;
1678110190Sjulian	kg = td->td_ksegrp;
1679116401Sdavidxu	ku = td->td_upcall;
1680104695Sjulian
1681116401Sdavidxu	/* Nothing to do with bound thread */
1682116401Sdavidxu	if (!(td->td_flags & TDF_SA))
1683111028Sjeff		return (0);
1684108338Sjulian
1685103410Smini	/*
1686124350Sschweikh	 * Stat clock interrupt hit in userland, it
1687111028Sjeff	 * is returning from interrupt, charge thread's
1688111028Sjeff	 * userland time for UTS.
1689103410Smini	 */
1690111028Sjeff	if (td->td_flags & TDF_USTATCLOCK) {
1691111515Sdavidxu		thread_update_usr_ticks(td, 1);
1692111028Sjeff		mtx_lock_spin(&sched_lock);
1693111028Sjeff		td->td_flags &= ~TDF_USTATCLOCK;
1694111028Sjeff		mtx_unlock_spin(&sched_lock);
1695116401Sdavidxu		if (kg->kg_completed ||
1696111515Sdavidxu		    (td->td_upcall->ku_flags & KUF_DOUPCALL))
1697111515Sdavidxu			thread_user_enter(p, td);
1698111028Sjeff	}
1699108338Sjulian
1700113793Sdavidxu	uts_crit = (td->td_mailbox == NULL);
1701124350Sschweikh	/*
1702111028Sjeff	 * Optimisation:
1703111028Sjeff	 * This thread has not started any upcall.
1704111028Sjeff	 * If there is no work to report other than ourself,
1705111028Sjeff	 * then it can return direct to userland.
1706111028Sjeff	 */
1707108338Sjulian	if (TD_CAN_UNBIND(td)) {
1708111028Sjeff		mtx_lock_spin(&sched_lock);
1709111028Sjeff		td->td_flags &= ~TDF_CAN_UNBIND;
1710112888Sjeff		if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
1711112077Sdavidxu		    (kg->kg_completed == NULL) &&
1712112397Sdavidxu		    (ku->ku_flags & KUF_DOUPCALL) == 0 &&
1713113708Sdavidxu		    (kg->kg_upquantum && ticks < kg->kg_nextupcall)) {
1714112888Sjeff			mtx_unlock_spin(&sched_lock);
1715111515Sdavidxu			thread_update_usr_ticks(td, 0);
1716112222Sdavidxu			nanotime(&ts);
1717112397Sdavidxu			error = copyout(&ts,
1718112222Sdavidxu				(caddr_t)&ku->ku_mailbox->km_timeofday,
1719112222Sdavidxu				sizeof(ts));
1720112077Sdavidxu			td->td_mailbox = 0;
1721113793Sdavidxu			ku->ku_mflags = 0;
1722112222Sdavidxu			if (error)
1723112222Sdavidxu				goto out;
1724112077Sdavidxu			return (0);
1725108338Sjulian		}
1726112888Sjeff		mtx_unlock_spin(&sched_lock);
1727117704Sdavidxu		thread_export_context(td, 0);
1728104695Sjulian		/*
1729111028Sjeff		 * There is something to report, and we own an upcall
1730111028Sjeff		 * strucuture, we can go to userland.
1731111028Sjeff		 * Turn ourself into an upcall thread.
1732104695Sjulian		 */
1733116372Sdavidxu		td->td_pflags |= TDP_UPCALLING;
1734113793Sdavidxu	} else if (td->td_mailbox && (ku == NULL)) {
1735117704Sdavidxu		thread_export_context(td, 1);
1736112071Sdavidxu		PROC_LOCK(p);
1737112071Sdavidxu		/*
1738112071Sdavidxu		 * There are upcall threads waiting for
1739112071Sdavidxu		 * work to do, wake one of them up.
1740124350Sschweikh		 * XXXKSE Maybe wake all of them up.
1741112071Sdavidxu		 */
1742117704Sdavidxu		if (kg->kg_upsleeps)
1743112071Sdavidxu			wakeup_one(&kg->kg_completed);
1744112071Sdavidxu		mtx_lock_spin(&sched_lock);
1745112071Sdavidxu		thread_stopped(p);
1746108338Sjulian		thread_exit();
1747111028Sjeff		/* NOTREACHED */
1748104695Sjulian	}
1749104695Sjulian
1750116401Sdavidxu	KASSERT(ku != NULL, ("upcall is NULL\n"));
1751111154Sdavidxu	KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
1752111154Sdavidxu
1753111154Sdavidxu	if (p->p_numthreads > max_threads_per_proc) {
1754111154Sdavidxu		max_threads_hits++;
1755111154Sdavidxu		PROC_LOCK(p);
1756113920Sjhb		mtx_lock_spin(&sched_lock);
1757116184Sdavidxu		p->p_maxthrwaits++;
1758111154Sdavidxu		while (p->p_numthreads > max_threads_per_proc) {
1759111154Sdavidxu			upcalls = 0;
1760111154Sdavidxu			FOREACH_KSEGRP_IN_PROC(p, kg2) {
1761111154Sdavidxu				if (kg2->kg_numupcalls == 0)
1762111154Sdavidxu					upcalls++;
1763111154Sdavidxu				else
1764111154Sdavidxu					upcalls += kg2->kg_numupcalls;
1765111154Sdavidxu			}
1766111154Sdavidxu			if (upcalls >= max_threads_per_proc)
1767111154Sdavidxu				break;
1768114106Sdavidxu			mtx_unlock_spin(&sched_lock);
1769116138Sdavidxu			if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
1770123737Speter			    "maxthreads", 0)) {
1771116184Sdavidxu				mtx_lock_spin(&sched_lock);
1772116184Sdavidxu				break;
1773116184Sdavidxu			} else {
1774116184Sdavidxu				mtx_lock_spin(&sched_lock);
1775116184Sdavidxu			}
1776111154Sdavidxu		}
1777116184Sdavidxu		p->p_maxthrwaits--;
1778113920Sjhb		mtx_unlock_spin(&sched_lock);
1779111154Sdavidxu		PROC_UNLOCK(p);
1780111154Sdavidxu	}
1781111154Sdavidxu
1782116372Sdavidxu	if (td->td_pflags & TDP_UPCALLING) {
1783113793Sdavidxu		uts_crit = 0;
1784112397Sdavidxu		kg->kg_nextupcall = ticks+kg->kg_upquantum;
1785124350Sschweikh		/*
1786108338Sjulian		 * There is no more work to do and we are going to ride
1787111028Sjeff		 * this thread up to userland as an upcall.
1788108338Sjulian		 * Do the last parts of the setup needed for the upcall.
1789108338Sjulian		 */
1790108338Sjulian		CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1791108338Sjulian		    td, td->td_proc->p_pid, td->td_proc->p_comm);
1792104695Sjulian
1793116372Sdavidxu		td->td_pflags &= ~TDP_UPCALLING;
1794116401Sdavidxu		if (ku->ku_flags & KUF_DOUPCALL) {
1795116401Sdavidxu			mtx_lock_spin(&sched_lock);
1796111028Sjeff			ku->ku_flags &= ~KUF_DOUPCALL;
1797116401Sdavidxu			mtx_unlock_spin(&sched_lock);
1798116401Sdavidxu		}
1799111028Sjeff		/*
1800113793Sdavidxu		 * Set user context to the UTS
1801113793Sdavidxu		 */
1802113793Sdavidxu		if (!(ku->ku_mflags & KMF_NOUPCALL)) {
1803113793Sdavidxu			cpu_set_upcall_kse(td, ku);
1804113793Sdavidxu			error = suword(&ku->ku_mailbox->km_curthread, 0);
1805113793Sdavidxu			if (error)
1806113793Sdavidxu				goto out;
1807113793Sdavidxu		}
1808113793Sdavidxu
1809113793Sdavidxu		/*
1810108338Sjulian		 * Unhook the list of completed threads.
1811124350Sschweikh		 * anything that completes after this gets to
1812108338Sjulian		 * come in next time.
1813108338Sjulian		 * Put the list of completed thread mailboxes on
1814108338Sjulian		 * this KSE's mailbox.
1815108338Sjulian		 */
1816113793Sdavidxu		if (!(ku->ku_mflags & KMF_NOCOMPLETED) &&
1817113793Sdavidxu		    (error = thread_link_mboxes(kg, ku)) != 0)
1818111115Sdavidxu			goto out;
1819113793Sdavidxu	}
1820113793Sdavidxu	if (!uts_crit) {
1821107060Sdavidxu		nanotime(&ts);
1822113793Sdavidxu		error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts));
1823111115Sdavidxu	}
1824111115Sdavidxu
1825111115Sdavidxuout:
1826111115Sdavidxu	if (error) {
1827111115Sdavidxu		/*
1828111129Sdavidxu		 * Things are going to be so screwed we should just kill
1829111129Sdavidxu		 * the process.
1830111115Sdavidxu		 * how do we do that?
1831111115Sdavidxu		 */
1832111115Sdavidxu		PROC_LOCK(td->td_proc);
1833111115Sdavidxu		psignal(td->td_proc, SIGSEGV);
1834111115Sdavidxu		PROC_UNLOCK(td->td_proc);
1835111115Sdavidxu	} else {
1836111115Sdavidxu		/*
1837111115Sdavidxu		 * Optimisation:
1838111115Sdavidxu		 * Ensure that we have a spare thread available,
1839111115Sdavidxu		 * for when we re-enter the kernel.
1840111115Sdavidxu		 */
1841111115Sdavidxu		if (td->td_standin == NULL)
1842111115Sdavidxu			thread_alloc_spare(td, NULL);
1843111115Sdavidxu	}
1844111115Sdavidxu
1845113793Sdavidxu	ku->ku_mflags = 0;
1846111028Sjeff	/*
1847111028Sjeff	 * Clear thread mailbox first, then clear system tick count.
1848124350Sschweikh	 * The order is important because thread_statclock() use
1849111028Sjeff	 * mailbox pointer to see if it is an userland thread or
1850111028Sjeff	 * an UTS kernel thread.
1851111028Sjeff	 */
1852108338Sjulian	td->td_mailbox = NULL;
1853111028Sjeff	td->td_usticks = 0;
1854104695Sjulian	return (error);	/* go sync */
185599026Sjulian}
185699026Sjulian
185799026Sjulian/*
185899026Sjulian * Enforce single-threading.
185999026Sjulian *
186099026Sjulian * Returns 1 if the caller must abort (another thread is waiting to
186199026Sjulian * exit the process or similar). Process is locked!
186299026Sjulian * Returns 0 when you are successfully the only thread running.
186399026Sjulian * A process has successfully single threaded in the suspend mode when
186499026Sjulian * There are no threads in user mode. Threads in the kernel must be
186599026Sjulian * allowed to continue until they get to the user boundary. They may even
186699026Sjulian * copy out their return values and data before suspending. They may however be
186799026Sjulian * accellerated in reaching the user boundary as we will wake up
186899026Sjulian * any sleeping threads that are interruptable. (PCATCH).
186999026Sjulian */
187099026Sjulianint
187199026Sjulianthread_single(int force_exit)
187299026Sjulian{
187399026Sjulian	struct thread *td;
187499026Sjulian	struct thread *td2;
187599026Sjulian	struct proc *p;
187699026Sjulian
187799026Sjulian	td = curthread;
187899026Sjulian	p = td->td_proc;
1879107719Sjulian	mtx_assert(&Giant, MA_OWNED);
188099026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
188199026Sjulian	KASSERT((td != NULL), ("curthread is NULL"));
188299026Sjulian
1883116361Sdavidxu	if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1)
188499026Sjulian		return (0);
188599026Sjulian
1886100648Sjulian	/* Is someone already single threading? */
1887124350Sschweikh	if (p->p_singlethread)
188899026Sjulian		return (1);
188999026Sjulian
1890108338Sjulian	if (force_exit == SINGLE_EXIT) {
189199026Sjulian		p->p_flag |= P_SINGLE_EXIT;
1892108338Sjulian	} else
189399026Sjulian		p->p_flag &= ~P_SINGLE_EXIT;
1894102950Sdavidxu	p->p_flag |= P_STOPPED_SINGLE;
1895113920Sjhb	mtx_lock_spin(&sched_lock);
189699026Sjulian	p->p_singlethread = td;
189799026Sjulian	while ((p->p_numthreads - p->p_suspcount) != 1) {
189899026Sjulian		FOREACH_THREAD_IN_PROC(p, td2) {
189999026Sjulian			if (td2 == td)
190099026Sjulian				continue;
1901113705Sdavidxu			td2->td_flags |= TDF_ASTPENDING;
1902103216Sjulian			if (TD_IS_INHIBITED(td2)) {
1903105911Sjulian				if (force_exit == SINGLE_EXIT) {
1904105911Sjulian					if (TD_IS_SUSPENDED(td2)) {
1905103216Sjulian						thread_unsuspend_one(td2);
1906105911Sjulian					}
1907105911Sjulian					if (TD_ON_SLEEPQ(td2) &&
1908105911Sjulian					    (td2->td_flags & TDF_SINTR)) {
1909126326Sjhb						sleepq_abort(td2);
1910105911Sjulian					}
1911105911Sjulian				} else {
1912105911Sjulian					if (TD_IS_SUSPENDED(td2))
1913105874Sdavidxu						continue;
1914111028Sjeff					/*
1915111028Sjeff					 * maybe other inhibitted states too?
1916111028Sjeff					 * XXXKSE Is it totally safe to
1917111028Sjeff					 * suspend a non-interruptable thread?
1918111028Sjeff					 */
1919108338Sjulian					if (td2->td_inhibitors &
1920111028Sjeff					    (TDI_SLEEPING | TDI_SWAPPED))
1921105911Sjulian						thread_suspend_one(td2);
192299026Sjulian				}
192399026Sjulian			}
192499026Sjulian		}
1925124350Sschweikh		/*
1926124350Sschweikh		 * Maybe we suspended some threads.. was it enough?
1927105911Sjulian		 */
1928113920Sjhb		if ((p->p_numthreads - p->p_suspcount) == 1)
1929105911Sjulian			break;
1930105911Sjulian
193199026Sjulian		/*
193299026Sjulian		 * Wake us up when everyone else has suspended.
1933100648Sjulian		 * In the mean time we suspend as well.
193499026Sjulian		 */
1935103216Sjulian		thread_suspend_one(td);
1936113795Sdavidxu		DROP_GIANT();
193799026Sjulian		PROC_UNLOCK(p);
1938124944Sjeff		mi_switch(SW_VOL);
193999026Sjulian		mtx_unlock_spin(&sched_lock);
1940113795Sdavidxu		PICKUP_GIANT();
194199026Sjulian		PROC_LOCK(p);
1942113920Sjhb		mtx_lock_spin(&sched_lock);
194399026Sjulian	}
1944124350Sschweikh	if (force_exit == SINGLE_EXIT) {
1945113920Sjhb		if (td->td_upcall)
1946111028Sjeff			upcall_remove(td);
1947105854Sjulian		kse_purge(p, td);
1948111028Sjeff	}
1949113920Sjhb	mtx_unlock_spin(&sched_lock);
195099026Sjulian	return (0);
195199026Sjulian}
195299026Sjulian
195399026Sjulian/*
195499026Sjulian * Called in from locations that can safely check to see
195599026Sjulian * whether we have to suspend or at least throttle for a
195699026Sjulian * single-thread event (e.g. fork).
195799026Sjulian *
195899026Sjulian * Such locations include userret().
195999026Sjulian * If the "return_instead" argument is non zero, the thread must be able to
196099026Sjulian * accept 0 (caller may continue), or 1 (caller must abort) as a result.
196199026Sjulian *
196299026Sjulian * The 'return_instead' argument tells the function if it may do a
196399026Sjulian * thread_exit() or suspend, or whether the caller must abort and back
196499026Sjulian * out instead.
196599026Sjulian *
196699026Sjulian * If the thread that set the single_threading request has set the
196799026Sjulian * P_SINGLE_EXIT bit in the process flags then this call will never return
196899026Sjulian * if 'return_instead' is false, but will exit.
196999026Sjulian *
197099026Sjulian * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
197199026Sjulian *---------------+--------------------+---------------------
197299026Sjulian *       0       | returns 0          |   returns 0 or 1
197399026Sjulian *               | when ST ends       |   immediatly
197499026Sjulian *---------------+--------------------+---------------------
197599026Sjulian *       1       | thread exits       |   returns 1
197699026Sjulian *               |                    |  immediatly
197799026Sjulian * 0 = thread_exit() or suspension ok,
197899026Sjulian * other = return error instead of stopping the thread.
197999026Sjulian *
198099026Sjulian * While a full suspension is under effect, even a single threading
198199026Sjulian * thread would be suspended if it made this call (but it shouldn't).
198299026Sjulian * This call should only be made from places where
1983124350Sschweikh * thread_exit() would be safe as that may be the outcome unless
198499026Sjulian * return_instead is set.
198599026Sjulian */
198699026Sjulianint
198799026Sjulianthread_suspend_check(int return_instead)
198899026Sjulian{
1989104502Sjmallett	struct thread *td;
1990104502Sjmallett	struct proc *p;
199199026Sjulian
199299026Sjulian	td = curthread;
199399026Sjulian	p = td->td_proc;
199499026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
199599026Sjulian	while (P_SHOULDSTOP(p)) {
1996102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
199799026Sjulian			KASSERT(p->p_singlethread != NULL,
199899026Sjulian			    ("singlethread not set"));
199999026Sjulian			/*
2000100648Sjulian			 * The only suspension in action is a
2001100648Sjulian			 * single-threading. Single threader need not stop.
2002124350Sschweikh			 * XXX Should be safe to access unlocked
2003100646Sjulian			 * as it can only be set to be true by us.
200499026Sjulian			 */
2005100648Sjulian			if (p->p_singlethread == td)
200699026Sjulian				return (0);	/* Exempt from stopping. */
2007124350Sschweikh		}
2008100648Sjulian		if (return_instead)
200999026Sjulian			return (1);
201099026Sjulian
2011112071Sdavidxu		mtx_lock_spin(&sched_lock);
2012112071Sdavidxu		thread_stopped(p);
201399026Sjulian		/*
201499026Sjulian		 * If the process is waiting for us to exit,
201599026Sjulian		 * this thread should just suicide.
2016102950Sdavidxu		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
201799026Sjulian		 */
201899026Sjulian		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
201999026Sjulian			while (mtx_owned(&Giant))
202099026Sjulian				mtx_unlock(&Giant);
2021116361Sdavidxu			if (p->p_flag & P_SA)
2022112910Sjeff				thread_exit();
2023112910Sjeff			else
2024112910Sjeff				thr_exit1();
202599026Sjulian		}
202699026Sjulian
202799026Sjulian		/*
202899026Sjulian		 * When a thread suspends, it just
202999026Sjulian		 * moves to the processes's suspend queue
203099026Sjulian		 * and stays there.
203199026Sjulian		 */
2032103216Sjulian		thread_suspend_one(td);
2033102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
2034100632Sjulian			if (p->p_numthreads == p->p_suspcount) {
2035103216Sjulian				thread_unsuspend_one(p->p_singlethread);
2036100632Sjulian			}
2037100632Sjulian		}
2038114398Sdavidxu		DROP_GIANT();
2039113864Sjhb		PROC_UNLOCK(p);
2040124944Sjeff		mi_switch(SW_INVOL);
204199026Sjulian		mtx_unlock_spin(&sched_lock);
2042114398Sdavidxu		PICKUP_GIANT();
204399026Sjulian		PROC_LOCK(p);
204499026Sjulian	}
204599026Sjulian	return (0);
204699026Sjulian}
204799026Sjulian
2048102898Sdavidxuvoid
2049102898Sdavidxuthread_suspend_one(struct thread *td)
2050102898Sdavidxu{
2051102898Sdavidxu	struct proc *p = td->td_proc;
2052102898Sdavidxu
2053102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
2054113920Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
2055112071Sdavidxu	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
2056102898Sdavidxu	p->p_suspcount++;
2057103216Sjulian	TD_SET_SUSPENDED(td);
2058102898Sdavidxu	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
2059103216Sjulian	/*
2060103216Sjulian	 * Hack: If we are suspending but are on the sleep queue
2061103216Sjulian	 * then we are in msleep or the cv equivalent. We
2062103216Sjulian	 * want to look like we have two Inhibitors.
2063105911Sjulian	 * May already be set.. doesn't matter.
2064103216Sjulian	 */
2065103216Sjulian	if (TD_ON_SLEEPQ(td))
2066103216Sjulian		TD_SET_SLEEPING(td);
2067102898Sdavidxu}
2068102898Sdavidxu
2069102898Sdavidxuvoid
2070102898Sdavidxuthread_unsuspend_one(struct thread *td)
2071102898Sdavidxu{
2072102898Sdavidxu	struct proc *p = td->td_proc;
2073102898Sdavidxu
2074102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
2075113920Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
2076102898Sdavidxu	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
2077103216Sjulian	TD_CLR_SUSPENDED(td);
2078102898Sdavidxu	p->p_suspcount--;
2079103216Sjulian	setrunnable(td);
2080102898Sdavidxu}
2081102898Sdavidxu
208299026Sjulian/*
208399026Sjulian * Allow all threads blocked by single threading to continue running.
208499026Sjulian */
208599026Sjulianvoid
208699026Sjulianthread_unsuspend(struct proc *p)
208799026Sjulian{
208899026Sjulian	struct thread *td;
208999026Sjulian
2090100646Sjulian	mtx_assert(&sched_lock, MA_OWNED);
209199026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
209299026Sjulian	if (!P_SHOULDSTOP(p)) {
209399026Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
2094102898Sdavidxu			thread_unsuspend_one(td);
209599026Sjulian		}
2096102950Sdavidxu	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
209799026Sjulian	    (p->p_numthreads == p->p_suspcount)) {
209899026Sjulian		/*
209999026Sjulian		 * Stopping everything also did the job for the single
210099026Sjulian		 * threading request. Now we've downgraded to single-threaded,
210199026Sjulian		 * let it continue.
210299026Sjulian		 */
2103102898Sdavidxu		thread_unsuspend_one(p->p_singlethread);
210499026Sjulian	}
210599026Sjulian}
210699026Sjulian
210799026Sjulianvoid
210899026Sjulianthread_single_end(void)
210999026Sjulian{
211099026Sjulian	struct thread *td;
211199026Sjulian	struct proc *p;
211299026Sjulian
211399026Sjulian	td = curthread;
211499026Sjulian	p = td->td_proc;
211599026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
2116102950Sdavidxu	p->p_flag &= ~P_STOPPED_SINGLE;
2117113920Sjhb	mtx_lock_spin(&sched_lock);
211899026Sjulian	p->p_singlethread = NULL;
2119102292Sjulian	/*
2120102292Sjulian	 * If there are other threads they mey now run,
2121102292Sjulian	 * unless of course there is a blanket 'stop order'
2122102292Sjulian	 * on the process. The single threader must be allowed
2123102292Sjulian	 * to continue however as this is a bad place to stop.
2124102292Sjulian	 */
2125102292Sjulian	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
2126102292Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
2127103216Sjulian			thread_unsuspend_one(td);
2128102292Sjulian		}
2129102292Sjulian	}
2130113920Sjhb	mtx_unlock_spin(&sched_lock);
213199026Sjulian}
2132