kern_thread.c revision 130735
1124350Sschweikh/*
299026Sjulian * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
399026Sjulian *  All rights reserved.
499026Sjulian *
599026Sjulian * Redistribution and use in source and binary forms, with or without
699026Sjulian * modification, are permitted provided that the following conditions
799026Sjulian * are met:
899026Sjulian * 1. Redistributions of source code must retain the above copyright
999026Sjulian *    notice(s), this list of conditions and the following disclaimer as
10124350Sschweikh *    the first lines of this file unmodified other than the possible
1199026Sjulian *    addition of one or more copyright notices.
1299026Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1399026Sjulian *    notice(s), this list of conditions and the following disclaimer in the
1499026Sjulian *    documentation and/or other materials provided with the distribution.
1599026Sjulian *
1699026Sjulian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1799026Sjulian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1899026Sjulian * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1999026Sjulian * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2099026Sjulian * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2199026Sjulian * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2299026Sjulian * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2399026Sjulian * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2499026Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2599026Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2699026Sjulian * DAMAGE.
2799026Sjulian */
2899026Sjulian
29116182Sobrien#include <sys/cdefs.h>
30116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 130735 2004-06-19 17:58:32Z marcel $");
31116182Sobrien
3299026Sjulian#include <sys/param.h>
3399026Sjulian#include <sys/systm.h>
3499026Sjulian#include <sys/kernel.h>
3599026Sjulian#include <sys/lock.h>
3699026Sjulian#include <sys/mutex.h>
3799026Sjulian#include <sys/proc.h>
38130355Sjulian#include <sys/smp.h>
3999026Sjulian#include <sys/sysctl.h>
40107126Sjeff#include <sys/sched.h>
41126326Sjhb#include <sys/sleepqueue.h>
42122514Sjhb#include <sys/turnstile.h>
4399026Sjulian#include <sys/ktr.h>
4499026Sjulian
4599026Sjulian#include <vm/vm.h>
46116355Salc#include <vm/vm_extern.h>
4799026Sjulian#include <vm/uma.h>
4899026Sjulian
4999026Sjulian/*
50103367Sjulian * KSEGRP related storage.
5199026Sjulian */
52103367Sjulianstatic uma_zone_t ksegrp_zone;
53103367Sjulianstatic uma_zone_t kse_zone;
5499026Sjulianstatic uma_zone_t thread_zone;
5599026Sjulian
56103367Sjulian/* DEBUG ONLY */
5799026SjulianSYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
58107719Sjulianstatic int thread_debug = 0;
59107719SjulianSYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
60107719Sjulian	&thread_debug, 0, "thread debug");
6199026Sjulian
62130199Sjulianint max_threads_per_proc = 1500;
63107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
64103367Sjulian	&max_threads_per_proc, 0, "Limit on threads per proc");
65103367Sjulian
66130199Sjulianint max_groups_per_proc = 500;
67107006SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
68107006Sdavidxu	&max_groups_per_proc, 0, "Limit on thread groups per proc");
69107006Sdavidxu
70130199Sjulianint max_threads_hits;
71111115SdavidxuSYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
72111115Sdavidxu	&max_threads_hits, 0, "");
73111115Sdavidxu
74130355Sjulianint virtual_cpu;
75111028Sjeff
76130355Sjulian#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
77130355Sjulian
78111028SjeffTAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
79105854SjulianTAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
80105854SjulianTAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
81111028Sjeffstruct mtx kse_zombie_lock;
82111028SjeffMTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
8399026Sjulian
84130199Sjulianvoid kse_purge(struct proc *p, struct thread *td);
85130199Sjulianvoid kse_purge_group(struct thread *td);
86105854Sjulian
87130199Sjulian/* move to proc.h */
88130199Sjulianextern void	kseinit(void);
89130199Sjulianextern void	kse_GC(void);
90111028Sjeff
91111028Sjeff
92130355Sjulianstatic int
93130355Sjuliansysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
94130355Sjulian{
95130355Sjulian	int error, new_val;
96130355Sjulian	int def_val;
97111028Sjeff
98130355Sjulian	def_val = mp_ncpus;
99130355Sjulian	if (virtual_cpu == 0)
100130355Sjulian		new_val = def_val;
101130355Sjulian	else
102130355Sjulian		new_val = virtual_cpu;
103130355Sjulian	error = sysctl_handle_int(oidp, &new_val, 0, req);
104130355Sjulian        if (error != 0 || req->newptr == NULL)
105130355Sjulian		return (error);
106130355Sjulian	if (new_val < 0)
107130355Sjulian		return (EINVAL);
108130355Sjulian	virtual_cpu = new_val;
109130355Sjulian	return (0);
110130355Sjulian}
111130355Sjulian
112130355Sjulian/* DEBUG ONLY */
113130355SjulianSYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
114130355Sjulian	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
115130355Sjulian	"debug virtual cpus");
116130355Sjulian
11799026Sjulian/*
118127794Smarcel * Thread ID allocator. The allocator keeps track of assigned IDs by
119127794Smarcel * using a bitmap. The bitmap is created in parts. The parts are linked
120127794Smarcel * together.
121127794Smarcel */
122127794Smarceltypedef u_long tid_bitmap_word;
123127794Smarcel
124127794Smarcel#define	TID_IDS_PER_PART	1024
125127794Smarcel#define	TID_IDS_PER_IDX		(sizeof(tid_bitmap_word) << 3)
126127794Smarcel#define	TID_BITMAP_SIZE		(TID_IDS_PER_PART / TID_IDS_PER_IDX)
127127794Smarcel#define	TID_MIN			(PID_MAX + 1)
128127794Smarcel
129127794Smarcelstruct tid_bitmap_part {
130127794Smarcel	STAILQ_ENTRY(tid_bitmap_part) bmp_next;
131127794Smarcel	tid_bitmap_word	bmp_bitmap[TID_BITMAP_SIZE];
132130735Smarcel	lwpid_t		bmp_base;
133127794Smarcel	int		bmp_free;
134127794Smarcel};
135127794Smarcel
136127794Smarcelstatic STAILQ_HEAD(, tid_bitmap_part) tid_bitmap =
137127794Smarcel    STAILQ_HEAD_INITIALIZER(tid_bitmap);
138127794Smarcelstatic uma_zone_t tid_zone;
139127794Smarcel
140127794Smarcelstruct mtx tid_lock;
141127794SmarcelMTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF);
142127794Smarcel
143127794Smarcel/*
144107719Sjulian * Prepare a thread for use.
14599026Sjulian */
14699026Sjulianstatic void
14799026Sjulianthread_ctor(void *mem, int size, void *arg)
14899026Sjulian{
14999026Sjulian	struct thread	*td;
15099026Sjulian
15199026Sjulian	td = (struct thread *)mem;
152127794Smarcel	td->td_tid = 0;
153103216Sjulian	td->td_state = TDS_INACTIVE;
154113339Sjulian	td->td_oncpu	= NOCPU;
155130269Sjmallett
156130269Sjmallett	/*
157130269Sjmallett	 * Note that td_critnest begins life as 1 because the thread is not
158130269Sjmallett	 * running and is thereby implicitly waiting to be on the receiving
159130269Sjmallett	 * end of a context switch.  A context switch must occur inside a
160130269Sjmallett	 * critical section, and in fact, includes hand-off of the sched_lock.
161130269Sjmallett	 * After a context switch to a newly created thread, it will release
162130269Sjmallett	 * sched_lock for the first time, and its td_critnest will hit 0 for
163130269Sjmallett	 * the first time.  This happens on the far end of a context switch,
164130269Sjmallett	 * and when it context switches away from itself, it will in fact go
165130269Sjmallett	 * back into a critical section, and hand off the sched lock to the
166130269Sjmallett	 * next thread.
167130269Sjmallett	 */
168118442Sjhb	td->td_critnest = 1;
16999026Sjulian}
17099026Sjulian
17199026Sjulian/*
17299026Sjulian * Reclaim a thread after use.
17399026Sjulian */
17499026Sjulianstatic void
17599026Sjulianthread_dtor(void *mem, int size, void *arg)
17699026Sjulian{
177127794Smarcel	struct thread *td;
178127794Smarcel	struct tid_bitmap_part *bmp;
179130735Smarcel	lwpid_t tid;
180130735Smarcel	int bit, idx;
18199026Sjulian
18299026Sjulian	td = (struct thread *)mem;
18399026Sjulian
184127794Smarcel	if (td->td_tid > PID_MAX) {
185127794Smarcel		STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
186127794Smarcel			if (td->td_tid >= bmp->bmp_base &&
187127794Smarcel			    td->td_tid < bmp->bmp_base + TID_IDS_PER_PART)
188127794Smarcel				break;
189127794Smarcel		}
190127794Smarcel		KASSERT(bmp != NULL, ("No TID bitmap?"));
191127794Smarcel		mtx_lock(&tid_lock);
192127794Smarcel		tid = td->td_tid - bmp->bmp_base;
193127794Smarcel		idx = tid / TID_IDS_PER_IDX;
194127794Smarcel		bit = 1UL << (tid % TID_IDS_PER_IDX);
195127794Smarcel		bmp->bmp_bitmap[idx] |= bit;
196127794Smarcel		bmp->bmp_free++;
197127794Smarcel		mtx_unlock(&tid_lock);
198127794Smarcel	}
199127794Smarcel
20099026Sjulian#ifdef INVARIANTS
20199026Sjulian	/* Verify that this thread is in a safe state to free. */
20299026Sjulian	switch (td->td_state) {
203103216Sjulian	case TDS_INHIBITED:
204103216Sjulian	case TDS_RUNNING:
205103216Sjulian	case TDS_CAN_RUN:
20699026Sjulian	case TDS_RUNQ:
20799026Sjulian		/*
20899026Sjulian		 * We must never unlink a thread that is in one of
20999026Sjulian		 * these states, because it is currently active.
21099026Sjulian		 */
21199026Sjulian		panic("bad state for thread unlinking");
21299026Sjulian		/* NOTREACHED */
213103216Sjulian	case TDS_INACTIVE:
21499026Sjulian		break;
21599026Sjulian	default:
21699026Sjulian		panic("bad thread state");
21799026Sjulian		/* NOTREACHED */
21899026Sjulian	}
21999026Sjulian#endif
22099026Sjulian}
22199026Sjulian
22299026Sjulian/*
22399026Sjulian * Initialize type-stable parts of a thread (when newly created).
22499026Sjulian */
22599026Sjulianstatic void
22699026Sjulianthread_init(void *mem, int size)
22799026Sjulian{
22899026Sjulian	struct thread	*td;
22999026Sjulian
23099026Sjulian	td = (struct thread *)mem;
231116355Salc	vm_thread_new(td, 0);
23299026Sjulian	cpu_thread_setup(td);
233126326Sjhb	td->td_sleepqueue = sleepq_alloc();
234122514Sjhb	td->td_turnstile = turnstile_alloc();
235107126Sjeff	td->td_sched = (struct td_sched *)&td[1];
23699026Sjulian}
23799026Sjulian
23899026Sjulian/*
23999026Sjulian * Tear down type-stable parts of a thread (just before being discarded).
24099026Sjulian */
24199026Sjulianstatic void
24299026Sjulianthread_fini(void *mem, int size)
24399026Sjulian{
24499026Sjulian	struct thread	*td;
24599026Sjulian
24699026Sjulian	td = (struct thread *)mem;
247122514Sjhb	turnstile_free(td->td_turnstile);
248126326Sjhb	sleepq_free(td->td_sleepqueue);
249116355Salc	vm_thread_dispose(td);
25099026Sjulian}
251111028Sjeff
252107126Sjeff/*
253107126Sjeff * Initialize type-stable parts of a kse (when newly created).
254107126Sjeff */
255107126Sjeffstatic void
256107126Sjeffkse_init(void *mem, int size)
257107126Sjeff{
258107126Sjeff	struct kse	*ke;
25999026Sjulian
260107126Sjeff	ke = (struct kse *)mem;
261107126Sjeff	ke->ke_sched = (struct ke_sched *)&ke[1];
262107126Sjeff}
263111028Sjeff
264107126Sjeff/*
265107126Sjeff * Initialize type-stable parts of a ksegrp (when newly created).
266107126Sjeff */
267107126Sjeffstatic void
268107126Sjeffksegrp_init(void *mem, int size)
269107126Sjeff{
270107126Sjeff	struct ksegrp	*kg;
271107126Sjeff
272107126Sjeff	kg = (struct ksegrp *)mem;
273107126Sjeff	kg->kg_sched = (struct kg_sched *)&kg[1];
274107126Sjeff}
275107126Sjeff
276124350Sschweikh/*
277111028Sjeff * KSE is linked into kse group.
278105854Sjulian */
279105854Sjulianvoid
280105854Sjuliankse_link(struct kse *ke, struct ksegrp *kg)
281105854Sjulian{
282105854Sjulian	struct proc *p = kg->kg_proc;
283105854Sjulian
284105854Sjulian	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
285105854Sjulian	kg->kg_kses++;
286111028Sjeff	ke->ke_state	= KES_UNQUEUED;
287105854Sjulian	ke->ke_proc	= p;
288105854Sjulian	ke->ke_ksegrp	= kg;
289105854Sjulian	ke->ke_thread	= NULL;
290111028Sjeff	ke->ke_oncpu	= NOCPU;
291111028Sjeff	ke->ke_flags	= 0;
292105854Sjulian}
293105854Sjulian
294105854Sjulianvoid
295105854Sjuliankse_unlink(struct kse *ke)
296105854Sjulian{
297105854Sjulian	struct ksegrp *kg;
298105854Sjulian
299105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
300105854Sjulian	kg = ke->ke_ksegrp;
301105854Sjulian	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
302111028Sjeff	if (ke->ke_state == KES_IDLE) {
303111028Sjeff		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
304111028Sjeff		kg->kg_idle_kses--;
305105854Sjulian	}
306119488Sdavidxu	--kg->kg_kses;
307105854Sjulian	/*
308105854Sjulian	 * Aggregate stats from the KSE
309105854Sjulian	 */
310105854Sjulian	kse_stash(ke);
311105854Sjulian}
312105854Sjulian
313105854Sjulianvoid
314105854Sjulianksegrp_link(struct ksegrp *kg, struct proc *p)
315105854Sjulian{
316105854Sjulian
317105854Sjulian	TAILQ_INIT(&kg->kg_threads);
318105854Sjulian	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
319105854Sjulian	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
320105854Sjulian	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
321111028Sjeff	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
322111028Sjeff	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
323111028Sjeff	kg->kg_proc = p;
324111028Sjeff	/*
325111028Sjeff	 * the following counters are in the -zero- section
326111028Sjeff	 * and may not need clearing
327111028Sjeff	 */
328105854Sjulian	kg->kg_numthreads = 0;
329111028Sjeff	kg->kg_runnable   = 0;
330111028Sjeff	kg->kg_kses       = 0;
331111028Sjeff	kg->kg_runq_kses  = 0; /* XXXKSE change name */
332111028Sjeff	kg->kg_idle_kses  = 0;
333111028Sjeff	kg->kg_numupcalls = 0;
334111028Sjeff	/* link it in now that it's consistent */
335105854Sjulian	p->p_numksegrps++;
336105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
337105854Sjulian}
338105854Sjulian
339105854Sjulianvoid
340105854Sjulianksegrp_unlink(struct ksegrp *kg)
341105854Sjulian{
342105854Sjulian	struct proc *p;
343105854Sjulian
344105854Sjulian	mtx_assert(&sched_lock, MA_OWNED);
345111028Sjeff	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
346111028Sjeff	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
347111028Sjeff	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
348111028Sjeff
349105854Sjulian	p = kg->kg_proc;
350105854Sjulian	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
351105854Sjulian	p->p_numksegrps--;
352105854Sjulian	/*
353105854Sjulian	 * Aggregate stats from the KSE
354105854Sjulian	 */
355105854Sjulian	ksegrp_stash(kg);
356105854Sjulian}
357105854Sjulian
35899026Sjulian/*
359111028Sjeff * For a newly created process,
360111028Sjeff * link up all the structures and its initial threads etc.
361105854Sjulian */
362105854Sjulianvoid
363105854Sjulianproc_linkup(struct proc *p, struct ksegrp *kg,
364111028Sjeff	    struct kse *ke, struct thread *td)
365105854Sjulian{
366105854Sjulian
367105854Sjulian	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
368105854Sjulian	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
369105854Sjulian	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
370105854Sjulian	p->p_numksegrps = 0;
371105854Sjulian	p->p_numthreads = 0;
372105854Sjulian
373105854Sjulian	ksegrp_link(kg, p);
374105854Sjulian	kse_link(ke, kg);
375105854Sjulian	thread_link(td, kg);
376105854Sjulian}
377105854Sjulian
378111028Sjeff/*
37999026Sjulian * Initialize global thread allocation resources.
38099026Sjulian */
38199026Sjulianvoid
38299026Sjulianthreadinit(void)
38399026Sjulian{
38499026Sjulian
385107126Sjeff	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
38699026Sjulian	    thread_ctor, thread_dtor, thread_init, thread_fini,
38799026Sjulian	    UMA_ALIGN_CACHE, 0);
388127794Smarcel	tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part),
389127794Smarcel	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
390107126Sjeff	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
391107126Sjeff	    NULL, NULL, ksegrp_init, NULL,
392103367Sjulian	    UMA_ALIGN_CACHE, 0);
393107126Sjeff	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
394107126Sjeff	    NULL, NULL, kse_init, NULL,
395103367Sjulian	    UMA_ALIGN_CACHE, 0);
396130199Sjulian	kseinit();
39799026Sjulian}
39899026Sjulian
39999026Sjulian/*
400103002Sjulian * Stash an embarasingly extra thread into the zombie thread queue.
40199026Sjulian */
40299026Sjulianvoid
40399026Sjulianthread_stash(struct thread *td)
40499026Sjulian{
405111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
40699026Sjulian	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
407111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
40899026Sjulian}
40999026Sjulian
410103410Smini/*
411105854Sjulian * Stash an embarasingly extra kse into the zombie kse queue.
412105854Sjulian */
413105854Sjulianvoid
414105854Sjuliankse_stash(struct kse *ke)
415105854Sjulian{
416111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
417105854Sjulian	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
418111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
419105854Sjulian}
420105854Sjulian
421105854Sjulian/*
422105854Sjulian * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
423105854Sjulian */
424105854Sjulianvoid
425105854Sjulianksegrp_stash(struct ksegrp *kg)
426105854Sjulian{
427111028Sjeff	mtx_lock_spin(&kse_zombie_lock);
428105854Sjulian	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
429111028Sjeff	mtx_unlock_spin(&kse_zombie_lock);
430105854Sjulian}
431105854Sjulian
432105854Sjulian/*
433111028Sjeff * Reap zombie kse resource.
43499026Sjulian */
43599026Sjulianvoid
43699026Sjulianthread_reap(void)
43799026Sjulian{
438105854Sjulian	struct thread *td_first, *td_next;
439105854Sjulian	struct kse *ke_first, *ke_next;
440105854Sjulian	struct ksegrp *kg_first, * kg_next;
44199026Sjulian
44299026Sjulian	/*
443111028Sjeff	 * Don't even bother to lock if none at this instant,
444111028Sjeff	 * we really don't care about the next instant..
44599026Sjulian	 */
446105854Sjulian	if ((!TAILQ_EMPTY(&zombie_threads))
447105854Sjulian	    || (!TAILQ_EMPTY(&zombie_kses))
448130199Sjulian	    || (!TAILQ_EMPTY(&zombie_ksegrps))) {
449111028Sjeff		mtx_lock_spin(&kse_zombie_lock);
450105854Sjulian		td_first = TAILQ_FIRST(&zombie_threads);
451105854Sjulian		ke_first = TAILQ_FIRST(&zombie_kses);
452105854Sjulian		kg_first = TAILQ_FIRST(&zombie_ksegrps);
453105854Sjulian		if (td_first)
454105854Sjulian			TAILQ_INIT(&zombie_threads);
455105854Sjulian		if (ke_first)
456105854Sjulian			TAILQ_INIT(&zombie_kses);
457105854Sjulian		if (kg_first)
458105854Sjulian			TAILQ_INIT(&zombie_ksegrps);
459111028Sjeff		mtx_unlock_spin(&kse_zombie_lock);
460105854Sjulian		while (td_first) {
461105854Sjulian			td_next = TAILQ_NEXT(td_first, td_runq);
462111028Sjeff			if (td_first->td_ucred)
463111028Sjeff				crfree(td_first->td_ucred);
464105854Sjulian			thread_free(td_first);
465105854Sjulian			td_first = td_next;
46699026Sjulian		}
467105854Sjulian		while (ke_first) {
468105854Sjulian			ke_next = TAILQ_NEXT(ke_first, ke_procq);
469105854Sjulian			kse_free(ke_first);
470105854Sjulian			ke_first = ke_next;
471105854Sjulian		}
472105854Sjulian		while (kg_first) {
473105854Sjulian			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
474105854Sjulian			ksegrp_free(kg_first);
475105854Sjulian			kg_first = kg_next;
476105854Sjulian		}
47799026Sjulian	}
478130199Sjulian	kse_GC();
47999026Sjulian}
48099026Sjulian
48199026Sjulian/*
482103367Sjulian * Allocate a ksegrp.
483103367Sjulian */
484103367Sjulianstruct ksegrp *
485103367Sjulianksegrp_alloc(void)
486103367Sjulian{
487111119Simp	return (uma_zalloc(ksegrp_zone, M_WAITOK));
488103367Sjulian}
489103367Sjulian
490103367Sjulian/*
491103367Sjulian * Allocate a kse.
492103367Sjulian */
493103367Sjulianstruct kse *
494103367Sjuliankse_alloc(void)
495103367Sjulian{
496111119Simp	return (uma_zalloc(kse_zone, M_WAITOK));
497103367Sjulian}
498103367Sjulian
499103367Sjulian/*
50099026Sjulian * Allocate a thread.
50199026Sjulian */
50299026Sjulianstruct thread *
50399026Sjulianthread_alloc(void)
50499026Sjulian{
50599026Sjulian	thread_reap(); /* check if any zombies to get */
506111119Simp	return (uma_zalloc(thread_zone, M_WAITOK));
50799026Sjulian}
50899026Sjulian
50999026Sjulian/*
510103367Sjulian * Deallocate a ksegrp.
511103367Sjulian */
512103367Sjulianvoid
513103367Sjulianksegrp_free(struct ksegrp *td)
514103367Sjulian{
515103367Sjulian	uma_zfree(ksegrp_zone, td);
516103367Sjulian}
517103367Sjulian
518103367Sjulian/*
519103367Sjulian * Deallocate a kse.
520103367Sjulian */
521103367Sjulianvoid
522103367Sjuliankse_free(struct kse *td)
523103367Sjulian{
524103367Sjulian	uma_zfree(kse_zone, td);
525103367Sjulian}
526103367Sjulian
527103367Sjulian/*
52899026Sjulian * Deallocate a thread.
52999026Sjulian */
53099026Sjulianvoid
53199026Sjulianthread_free(struct thread *td)
53299026Sjulian{
533107719Sjulian
534107719Sjulian	cpu_thread_clean(td);
53599026Sjulian	uma_zfree(thread_zone, td);
53699026Sjulian}
53799026Sjulian
53899026Sjulian/*
539127794Smarcel * Assign a thread ID.
540127794Smarcel */
541130735Smarcellwpid_t
542127794Smarcelthread_new_tid(void)
543127794Smarcel{
544127794Smarcel	struct tid_bitmap_part *bmp, *new;
545130735Smarcel	lwpid_t tid;
546130735Smarcel	int bit, idx;
547127794Smarcel
548127794Smarcel	mtx_lock(&tid_lock);
549127794Smarcel	STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
550127794Smarcel		if (bmp->bmp_free)
551127794Smarcel			break;
552127794Smarcel	}
553127794Smarcel	/* Create a new bitmap if we run out of free bits. */
554127794Smarcel	if (bmp == NULL) {
555127794Smarcel		mtx_unlock(&tid_lock);
556127794Smarcel		new = uma_zalloc(tid_zone, M_WAITOK);
557127794Smarcel		mtx_lock(&tid_lock);
558127794Smarcel		bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next);
559127794Smarcel		if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) {
560127794Smarcel			/* 1=free, 0=assigned. This way we can use ffsl(). */
561127794Smarcel			memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap));
562127794Smarcel			new->bmp_base = (bmp == NULL) ? TID_MIN :
563127794Smarcel			    bmp->bmp_base + TID_IDS_PER_PART;
564127794Smarcel			new->bmp_free = TID_IDS_PER_PART;
565127794Smarcel			STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next);
566127794Smarcel			bmp = new;
567127794Smarcel			new = NULL;
568127794Smarcel		}
569127794Smarcel	} else
570127794Smarcel		new = NULL;
571127794Smarcel	/* We have a bitmap with available IDs. */
572127794Smarcel	idx = 0;
573127794Smarcel	while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL)
574127794Smarcel		idx++;
575127794Smarcel	bit = ffsl(bmp->bmp_bitmap[idx]) - 1;
576127794Smarcel	tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit;
577127794Smarcel	bmp->bmp_bitmap[idx] &= ~(1UL << bit);
578127794Smarcel	bmp->bmp_free--;
579127794Smarcel	mtx_unlock(&tid_lock);
580127794Smarcel
581127794Smarcel	if (new != NULL)
582127794Smarcel		uma_zfree(tid_zone, new);
583127794Smarcel	return (tid);
584127794Smarcel}
585127794Smarcel
58699026Sjulian
587104031Sjulian/*
58899026Sjulian * Discard the current thread and exit from its context.
589130355Sjulian * Always called with scheduler locked.
59099026Sjulian *
59199026Sjulian * Because we can't free a thread while we're operating under its context,
592107719Sjulian * push the current thread into our CPU's deadthread holder. This means
593107719Sjulian * we needn't worry about someone else grabbing our context before we
594130355Sjulian * do a cpu_throw().  This may not be needed now as we are under schedlock.
595130355Sjulian * Maybe we can just do a thread_stash() as thr_exit1 does.
59699026Sjulian */
597130355Sjulian/*  XXX
598130355Sjulian * libthr expects its thread exit to return for the last
599130355Sjulian * thread, meaning that the program is back to non-threaded
600130355Sjulian * mode I guess. Because we do this (cpu_throw) unconditionally
601130355Sjulian * here, they have their own version of it. (thr_exit1())
602130355Sjulian * that doesn't do it all if this was the last thread.
603130355Sjulian * It is also called from thread_suspend_check().
604130355Sjulian * Of course in the end, they end up coming here through exit1
605130355Sjulian * anyhow..  After fixing 'thr' to play by the rules we should be able
606130355Sjulian * to merge these two functions together.
607130355Sjulian */
60899026Sjulianvoid
60999026Sjulianthread_exit(void)
61099026Sjulian{
61199026Sjulian	struct thread *td;
61299026Sjulian	struct kse *ke;
61399026Sjulian	struct proc *p;
61499026Sjulian	struct ksegrp	*kg;
61599026Sjulian
61699026Sjulian	td = curthread;
61799026Sjulian	kg = td->td_ksegrp;
61899026Sjulian	p = td->td_proc;
61999026Sjulian	ke = td->td_kse;
62099026Sjulian
62199026Sjulian	mtx_assert(&sched_lock, MA_OWNED);
622102581Sjulian	KASSERT(p != NULL, ("thread exiting without a process"));
623102581Sjulian	KASSERT(ke != NULL, ("thread exiting without a kse"));
624102581Sjulian	KASSERT(kg != NULL, ("thread exiting without a kse group"));
62599026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
62699026Sjulian	CTR1(KTR_PROC, "thread_exit: thread %p", td);
627125158Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
62899026Sjulian
629104695Sjulian	if (td->td_standin != NULL) {
630104695Sjulian		thread_stash(td->td_standin);
631104695Sjulian		td->td_standin = NULL;
632104695Sjulian	}
633104695Sjulian
63499026Sjulian	cpu_thread_exit(td);	/* XXXSMP */
63599026Sjulian
636102581Sjulian	/*
637103002Sjulian	 * The last thread is left attached to the process
638103002Sjulian	 * So that the whole bundle gets recycled. Skip
639103002Sjulian	 * all this stuff.
640102581Sjulian	 */
641103002Sjulian	if (p->p_numthreads > 1) {
642113641Sjulian		thread_unlink(td);
643111115Sdavidxu		if (p->p_maxthrwaits)
644111115Sdavidxu			wakeup(&p->p_numthreads);
645103002Sjulian		/*
646103002Sjulian		 * The test below is NOT true if we are the
647103002Sjulian		 * sole exiting thread. P_STOPPED_SNGL is unset
648103002Sjulian		 * in exit1() after it is the only survivor.
649103002Sjulian		 */
650103002Sjulian		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
651103002Sjulian			if (p->p_numthreads == p->p_suspcount) {
652103216Sjulian				thread_unsuspend_one(p->p_singlethread);
653103002Sjulian			}
65499026Sjulian		}
655104695Sjulian
656111028Sjeff		/*
657111028Sjeff		 * Because each upcall structure has an owner thread,
658111028Sjeff		 * owner thread exits only when process is in exiting
659111028Sjeff		 * state, so upcall to userland is no longer needed,
660111028Sjeff		 * deleting upcall structure is safe here.
661111028Sjeff		 * So when all threads in a group is exited, all upcalls
662111028Sjeff		 * in the group should be automatically freed.
663111028Sjeff		 */
664111028Sjeff		if (td->td_upcall)
665111028Sjeff			upcall_remove(td);
666124350Sschweikh
667119488Sdavidxu		sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
668119488Sdavidxu		sched_exit_kse(FIRST_KSE_IN_PROC(p), ke);
669104695Sjulian		ke->ke_state = KES_UNQUEUED;
670111028Sjeff		ke->ke_thread = NULL;
671124350Sschweikh		/*
672108338Sjulian		 * Decide what to do with the KSE attached to this thread.
673104695Sjulian		 */
674119488Sdavidxu		if (ke->ke_flags & KEF_EXIT) {
675105854Sjulian			kse_unlink(ke);
676119488Sdavidxu			if (kg->kg_kses == 0) {
677119488Sdavidxu				sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), kg);
678119488Sdavidxu				ksegrp_unlink(kg);
679119488Sdavidxu			}
680119488Sdavidxu		}
681111028Sjeff		else
682105854Sjulian			kse_reassign(ke);
683105854Sjulian		PROC_UNLOCK(p);
684111028Sjeff		td->td_kse	= NULL;
685105854Sjulian		td->td_state	= TDS_INACTIVE;
686113244Sdavidxu#if 0
687105854Sjulian		td->td_proc	= NULL;
688113244Sdavidxu#endif
689105854Sjulian		td->td_ksegrp	= NULL;
690105854Sjulian		td->td_last_kse	= NULL;
691107719Sjulian		PCPU_SET(deadthread, td);
692103002Sjulian	} else {
693103002Sjulian		PROC_UNLOCK(p);
69499026Sjulian	}
695112888Sjeff	/* XXX Shouldn't cpu_throw() here. */
696112993Speter	mtx_assert(&sched_lock, MA_OWNED);
697112993Speter	cpu_throw(td, choosethread());
698112993Speter	panic("I'm a teapot!");
69999026Sjulian	/* NOTREACHED */
70099026Sjulian}
70199026Sjulian
702124350Sschweikh/*
703107719Sjulian * Do any thread specific cleanups that may be needed in wait()
704126932Speter * called with Giant, proc and schedlock not held.
705107719Sjulian */
706107719Sjulianvoid
707107719Sjulianthread_wait(struct proc *p)
708107719Sjulian{
709107719Sjulian	struct thread *td;
710107719Sjulian
711126932Speter	mtx_assert(&Giant, MA_NOTOWNED);
712124350Sschweikh	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
713124350Sschweikh	KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()"));
714107719Sjulian	FOREACH_THREAD_IN_PROC(p, td) {
715107719Sjulian		if (td->td_standin != NULL) {
716107719Sjulian			thread_free(td->td_standin);
717107719Sjulian			td->td_standin = NULL;
718107719Sjulian		}
719107719Sjulian		cpu_thread_clean(td);
720107719Sjulian	}
721107719Sjulian	thread_reap();	/* check for zombie threads etc. */
722107719Sjulian}
723107719Sjulian
72499026Sjulian/*
72599026Sjulian * Link a thread to a process.
726103002Sjulian * set up anything that needs to be initialized for it to
727103002Sjulian * be used by the process.
72899026Sjulian *
72999026Sjulian * Note that we do not link to the proc's ucred here.
73099026Sjulian * The thread is linked as if running but no KSE assigned.
73199026Sjulian */
73299026Sjulianvoid
73399026Sjulianthread_link(struct thread *td, struct ksegrp *kg)
73499026Sjulian{
73599026Sjulian	struct proc *p;
73699026Sjulian
73799026Sjulian	p = kg->kg_proc;
738111028Sjeff	td->td_state    = TDS_INACTIVE;
739111028Sjeff	td->td_proc     = p;
740111028Sjeff	td->td_ksegrp   = kg;
741111028Sjeff	td->td_last_kse = NULL;
742111028Sjeff	td->td_flags    = 0;
743128721Sdeischen	td->td_kflags	= 0;
744111028Sjeff	td->td_kse      = NULL;
74599026Sjulian
746103002Sjulian	LIST_INIT(&td->td_contested);
747119137Ssam	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
74899026Sjulian	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
74999026Sjulian	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
75099026Sjulian	p->p_numthreads++;
75199026Sjulian	kg->kg_numthreads++;
75299026Sjulian}
75399026Sjulian
754113641Sjulianvoid
755113641Sjulianthread_unlink(struct thread *td)
756124350Sschweikh{
757113641Sjulian	struct proc *p = td->td_proc;
758113641Sjulian	struct ksegrp *kg = td->td_ksegrp;
759113920Sjhb
760113920Sjhb	mtx_assert(&sched_lock, MA_OWNED);
761113641Sjulian	TAILQ_REMOVE(&p->p_threads, td, td_plist);
762113641Sjulian	p->p_numthreads--;
763113641Sjulian	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
764113641Sjulian	kg->kg_numthreads--;
765113641Sjulian	/* could clear a few other things here */
766124350Sschweikh}
767113641Sjulian
768111028Sjeff/*
769111028Sjeff * Purge a ksegrp resource. When a ksegrp is preparing to
770124350Sschweikh * exit, it calls this function.
771111028Sjeff */
772130199Sjulianvoid
773111028Sjeffkse_purge_group(struct thread *td)
774111028Sjeff{
775111028Sjeff	struct ksegrp *kg;
776111028Sjeff	struct kse *ke;
777111028Sjeff
778111028Sjeff	kg = td->td_ksegrp;
779111028Sjeff 	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
780111028Sjeff	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
781111028Sjeff		KASSERT(ke->ke_state == KES_IDLE,
782111028Sjeff			("%s: wrong idle KSE state", __func__));
783111028Sjeff		kse_unlink(ke);
784111028Sjeff	}
785111028Sjeff	KASSERT((kg->kg_kses == 1),
786111028Sjeff		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
787111028Sjeff	KASSERT((kg->kg_numupcalls == 0),
788111028Sjeff	        ("%s: ksegrp still has %d upcall datas",
789111028Sjeff		__func__, kg->kg_numupcalls));
790111028Sjeff}
791111028Sjeff
792111028Sjeff/*
793124350Sschweikh * Purge a process's KSE resource. When a process is preparing to
794124350Sschweikh * exit, it calls kse_purge to release any extra KSE resources in
795111028Sjeff * the process.
796111028Sjeff */
797130199Sjulianvoid
798105854Sjuliankse_purge(struct proc *p, struct thread *td)
799105854Sjulian{
800105854Sjulian	struct ksegrp *kg;
801111028Sjeff	struct kse *ke;
802105854Sjulian
803105854Sjulian 	KASSERT(p->p_numthreads == 1, ("bad thread number"));
804105854Sjulian	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
805105854Sjulian		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
806105854Sjulian		p->p_numksegrps--;
807111028Sjeff		/*
808111028Sjeff		 * There is no ownership for KSE, after all threads
809124350Sschweikh		 * in the group exited, it is possible that some KSEs
810111028Sjeff		 * were left in idle queue, gc them now.
811111028Sjeff		 */
812111028Sjeff		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
813111028Sjeff			KASSERT(ke->ke_state == KES_IDLE,
814111028Sjeff			   ("%s: wrong idle KSE state", __func__));
815111028Sjeff			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
816111028Sjeff			kg->kg_idle_kses--;
817111028Sjeff			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
818111028Sjeff			kg->kg_kses--;
819111028Sjeff			kse_stash(ke);
820111028Sjeff		}
821105854Sjulian		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
822111028Sjeff		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
823111028Sjeff		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
824111028Sjeff		KASSERT((kg->kg_numupcalls == 0),
825111028Sjeff		        ("%s: ksegrp still has %d upcall datas",
826111028Sjeff			__func__, kg->kg_numupcalls));
827124350Sschweikh
828111028Sjeff		if (kg != td->td_ksegrp)
829105854Sjulian			ksegrp_stash(kg);
830105854Sjulian	}
831105854Sjulian	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
832105854Sjulian	p->p_numksegrps++;
833105854Sjulian}
834105854Sjulian
835111028Sjeff/*
83699026Sjulian * Enforce single-threading.
83799026Sjulian *
83899026Sjulian * Returns 1 if the caller must abort (another thread is waiting to
83999026Sjulian * exit the process or similar). Process is locked!
84099026Sjulian * Returns 0 when you are successfully the only thread running.
84199026Sjulian * A process has successfully single threaded in the suspend mode when
84299026Sjulian * There are no threads in user mode. Threads in the kernel must be
84399026Sjulian * allowed to continue until they get to the user boundary. They may even
84499026Sjulian * copy out their return values and data before suspending. They may however be
84599026Sjulian * accellerated in reaching the user boundary as we will wake up
84699026Sjulian * any sleeping threads that are interruptable. (PCATCH).
84799026Sjulian */
84899026Sjulianint
84999026Sjulianthread_single(int force_exit)
85099026Sjulian{
85199026Sjulian	struct thread *td;
85299026Sjulian	struct thread *td2;
85399026Sjulian	struct proc *p;
854130674Sdavidxu	int remaining;
85599026Sjulian
85699026Sjulian	td = curthread;
85799026Sjulian	p = td->td_proc;
858126932Speter	mtx_assert(&Giant, MA_NOTOWNED);
85999026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
86099026Sjulian	KASSERT((td != NULL), ("curthread is NULL"));
86199026Sjulian
862116361Sdavidxu	if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1)
86399026Sjulian		return (0);
86499026Sjulian
865100648Sjulian	/* Is someone already single threading? */
866124350Sschweikh	if (p->p_singlethread)
86799026Sjulian		return (1);
86899026Sjulian
869108338Sjulian	if (force_exit == SINGLE_EXIT) {
87099026Sjulian		p->p_flag |= P_SINGLE_EXIT;
871108338Sjulian	} else
87299026Sjulian		p->p_flag &= ~P_SINGLE_EXIT;
873102950Sdavidxu	p->p_flag |= P_STOPPED_SINGLE;
874113920Sjhb	mtx_lock_spin(&sched_lock);
87599026Sjulian	p->p_singlethread = td;
876130674Sdavidxu	if (force_exit == SINGLE_EXIT)
877130674Sdavidxu		remaining = p->p_numthreads;
878130674Sdavidxu	else
879130674Sdavidxu		remaining = p->p_numthreads - p->p_suspcount;
880130674Sdavidxu	while (remaining != 1) {
88199026Sjulian		FOREACH_THREAD_IN_PROC(p, td2) {
88299026Sjulian			if (td2 == td)
88399026Sjulian				continue;
884113705Sdavidxu			td2->td_flags |= TDF_ASTPENDING;
885103216Sjulian			if (TD_IS_INHIBITED(td2)) {
886105911Sjulian				if (force_exit == SINGLE_EXIT) {
887105911Sjulian					if (TD_IS_SUSPENDED(td2)) {
888103216Sjulian						thread_unsuspend_one(td2);
889105911Sjulian					}
890105911Sjulian					if (TD_ON_SLEEPQ(td2) &&
891105911Sjulian					    (td2->td_flags & TDF_SINTR)) {
892126326Sjhb						sleepq_abort(td2);
893105911Sjulian					}
894105911Sjulian				} else {
895105911Sjulian					if (TD_IS_SUSPENDED(td2))
896105874Sdavidxu						continue;
897111028Sjeff					/*
898111028Sjeff					 * maybe other inhibitted states too?
899111028Sjeff					 * XXXKSE Is it totally safe to
900111028Sjeff					 * suspend a non-interruptable thread?
901111028Sjeff					 */
902108338Sjulian					if (td2->td_inhibitors &
903111028Sjeff					    (TDI_SLEEPING | TDI_SWAPPED))
904105911Sjulian						thread_suspend_one(td2);
90599026Sjulian				}
90699026Sjulian			}
90799026Sjulian		}
908130674Sdavidxu		if (force_exit == SINGLE_EXIT)
909130674Sdavidxu			remaining = p->p_numthreads;
910130674Sdavidxu		else
911130674Sdavidxu			remaining = p->p_numthreads - p->p_suspcount;
912130674Sdavidxu
913124350Sschweikh		/*
914124350Sschweikh		 * Maybe we suspended some threads.. was it enough?
915105911Sjulian		 */
916130674Sdavidxu		if (remaining == 1)
917105911Sjulian			break;
918105911Sjulian
91999026Sjulian		/*
92099026Sjulian		 * Wake us up when everyone else has suspended.
921100648Sjulian		 * In the mean time we suspend as well.
92299026Sjulian		 */
923103216Sjulian		thread_suspend_one(td);
92499026Sjulian		PROC_UNLOCK(p);
925124944Sjeff		mi_switch(SW_VOL);
92699026Sjulian		mtx_unlock_spin(&sched_lock);
92799026Sjulian		PROC_LOCK(p);
928113920Sjhb		mtx_lock_spin(&sched_lock);
929130674Sdavidxu		if (force_exit == SINGLE_EXIT)
930130674Sdavidxu			remaining = p->p_numthreads;
931130674Sdavidxu		else
932130674Sdavidxu			remaining = p->p_numthreads - p->p_suspcount;
93399026Sjulian	}
934124350Sschweikh	if (force_exit == SINGLE_EXIT) {
935113920Sjhb		if (td->td_upcall)
936111028Sjeff			upcall_remove(td);
937105854Sjulian		kse_purge(p, td);
938111028Sjeff	}
939113920Sjhb	mtx_unlock_spin(&sched_lock);
94099026Sjulian	return (0);
94199026Sjulian}
94299026Sjulian
94399026Sjulian/*
94499026Sjulian * Called in from locations that can safely check to see
94599026Sjulian * whether we have to suspend or at least throttle for a
94699026Sjulian * single-thread event (e.g. fork).
94799026Sjulian *
94899026Sjulian * Such locations include userret().
94999026Sjulian * If the "return_instead" argument is non zero, the thread must be able to
95099026Sjulian * accept 0 (caller may continue), or 1 (caller must abort) as a result.
95199026Sjulian *
95299026Sjulian * The 'return_instead' argument tells the function if it may do a
95399026Sjulian * thread_exit() or suspend, or whether the caller must abort and back
95499026Sjulian * out instead.
95599026Sjulian *
95699026Sjulian * If the thread that set the single_threading request has set the
95799026Sjulian * P_SINGLE_EXIT bit in the process flags then this call will never return
95899026Sjulian * if 'return_instead' is false, but will exit.
95999026Sjulian *
96099026Sjulian * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
96199026Sjulian *---------------+--------------------+---------------------
96299026Sjulian *       0       | returns 0          |   returns 0 or 1
96399026Sjulian *               | when ST ends       |   immediatly
96499026Sjulian *---------------+--------------------+---------------------
96599026Sjulian *       1       | thread exits       |   returns 1
96699026Sjulian *               |                    |  immediatly
96799026Sjulian * 0 = thread_exit() or suspension ok,
96899026Sjulian * other = return error instead of stopping the thread.
96999026Sjulian *
97099026Sjulian * While a full suspension is under effect, even a single threading
97199026Sjulian * thread would be suspended if it made this call (but it shouldn't).
97299026Sjulian * This call should only be made from places where
973124350Sschweikh * thread_exit() would be safe as that may be the outcome unless
97499026Sjulian * return_instead is set.
97599026Sjulian */
97699026Sjulianint
97799026Sjulianthread_suspend_check(int return_instead)
97899026Sjulian{
979104502Sjmallett	struct thread *td;
980104502Sjmallett	struct proc *p;
98199026Sjulian
98299026Sjulian	td = curthread;
98399026Sjulian	p = td->td_proc;
984126932Speter	mtx_assert(&Giant, MA_NOTOWNED);
98599026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
98699026Sjulian	while (P_SHOULDSTOP(p)) {
987102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
98899026Sjulian			KASSERT(p->p_singlethread != NULL,
98999026Sjulian			    ("singlethread not set"));
99099026Sjulian			/*
991100648Sjulian			 * The only suspension in action is a
992100648Sjulian			 * single-threading. Single threader need not stop.
993124350Sschweikh			 * XXX Should be safe to access unlocked
994100646Sjulian			 * as it can only be set to be true by us.
99599026Sjulian			 */
996100648Sjulian			if (p->p_singlethread == td)
99799026Sjulian				return (0);	/* Exempt from stopping. */
998124350Sschweikh		}
999100648Sjulian		if (return_instead)
100099026Sjulian			return (1);
100199026Sjulian
1002112071Sdavidxu		mtx_lock_spin(&sched_lock);
1003112071Sdavidxu		thread_stopped(p);
100499026Sjulian		/*
100599026Sjulian		 * If the process is waiting for us to exit,
100699026Sjulian		 * this thread should just suicide.
1007102950Sdavidxu		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
100899026Sjulian		 */
100999026Sjulian		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
1010116361Sdavidxu			if (p->p_flag & P_SA)
1011112910Sjeff				thread_exit();
1012112910Sjeff			else
1013112910Sjeff				thr_exit1();
101499026Sjulian		}
101599026Sjulian
101699026Sjulian		/*
101799026Sjulian		 * When a thread suspends, it just
101899026Sjulian		 * moves to the processes's suspend queue
101999026Sjulian		 * and stays there.
102099026Sjulian		 */
1021103216Sjulian		thread_suspend_one(td);
1022102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1023100632Sjulian			if (p->p_numthreads == p->p_suspcount) {
1024103216Sjulian				thread_unsuspend_one(p->p_singlethread);
1025100632Sjulian			}
1026100632Sjulian		}
1027113864Sjhb		PROC_UNLOCK(p);
1028124944Sjeff		mi_switch(SW_INVOL);
102999026Sjulian		mtx_unlock_spin(&sched_lock);
103099026Sjulian		PROC_LOCK(p);
103199026Sjulian	}
103299026Sjulian	return (0);
103399026Sjulian}
103499026Sjulian
1035102898Sdavidxuvoid
1036102898Sdavidxuthread_suspend_one(struct thread *td)
1037102898Sdavidxu{
1038102898Sdavidxu	struct proc *p = td->td_proc;
1039102898Sdavidxu
1040102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1041113920Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
1042112071Sdavidxu	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
1043102898Sdavidxu	p->p_suspcount++;
1044103216Sjulian	TD_SET_SUSPENDED(td);
1045102898Sdavidxu	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
1046103216Sjulian	/*
1047103216Sjulian	 * Hack: If we are suspending but are on the sleep queue
1048103216Sjulian	 * then we are in msleep or the cv equivalent. We
1049103216Sjulian	 * want to look like we have two Inhibitors.
1050105911Sjulian	 * May already be set.. doesn't matter.
1051103216Sjulian	 */
1052103216Sjulian	if (TD_ON_SLEEPQ(td))
1053103216Sjulian		TD_SET_SLEEPING(td);
1054102898Sdavidxu}
1055102898Sdavidxu
1056102898Sdavidxuvoid
1057102898Sdavidxuthread_unsuspend_one(struct thread *td)
1058102898Sdavidxu{
1059102898Sdavidxu	struct proc *p = td->td_proc;
1060102898Sdavidxu
1061102898Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1062113920Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
1063102898Sdavidxu	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
1064103216Sjulian	TD_CLR_SUSPENDED(td);
1065102898Sdavidxu	p->p_suspcount--;
1066103216Sjulian	setrunnable(td);
1067102898Sdavidxu}
1068102898Sdavidxu
106999026Sjulian/*
107099026Sjulian * Allow all threads blocked by single threading to continue running.
107199026Sjulian */
107299026Sjulianvoid
107399026Sjulianthread_unsuspend(struct proc *p)
107499026Sjulian{
107599026Sjulian	struct thread *td;
107699026Sjulian
1077100646Sjulian	mtx_assert(&sched_lock, MA_OWNED);
107899026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
107999026Sjulian	if (!P_SHOULDSTOP(p)) {
108099026Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
1081102898Sdavidxu			thread_unsuspend_one(td);
108299026Sjulian		}
1083102950Sdavidxu	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
108499026Sjulian	    (p->p_numthreads == p->p_suspcount)) {
108599026Sjulian		/*
108699026Sjulian		 * Stopping everything also did the job for the single
108799026Sjulian		 * threading request. Now we've downgraded to single-threaded,
108899026Sjulian		 * let it continue.
108999026Sjulian		 */
1090102898Sdavidxu		thread_unsuspend_one(p->p_singlethread);
109199026Sjulian	}
109299026Sjulian}
109399026Sjulian
109499026Sjulianvoid
109599026Sjulianthread_single_end(void)
109699026Sjulian{
109799026Sjulian	struct thread *td;
109899026Sjulian	struct proc *p;
109999026Sjulian
110099026Sjulian	td = curthread;
110199026Sjulian	p = td->td_proc;
110299026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
1103129547Sdavidxu	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
1104113920Sjhb	mtx_lock_spin(&sched_lock);
110599026Sjulian	p->p_singlethread = NULL;
1106102292Sjulian	/*
1107102292Sjulian	 * If there are other threads they mey now run,
1108102292Sjulian	 * unless of course there is a blanket 'stop order'
1109102292Sjulian	 * on the process. The single threader must be allowed
1110102292Sjulian	 * to continue however as this is a bad place to stop.
1111102292Sjulian	 */
1112102292Sjulian	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
1113102292Sjulian		while (( td = TAILQ_FIRST(&p->p_suspended))) {
1114103216Sjulian			thread_unsuspend_one(td);
1115102292Sjulian		}
1116102292Sjulian	}
1117113920Sjhb	mtx_unlock_spin(&sched_lock);
111899026Sjulian}
1119128721Sdeischen
1120