kern_thread.c revision 167944
1/*-
2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3 *  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice(s), this list of conditions and the following disclaimer as
10 *    the first lines of this file unmodified other than the possible
11 *    addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice(s), this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 167944 2007-03-27 16:51:34Z jhb $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/resourcevar.h>
39#include <sys/smp.h>
40#include <sys/sysctl.h>
41#include <sys/sched.h>
42#include <sys/sleepqueue.h>
43#include <sys/turnstile.h>
44#include <sys/ktr.h>
45#include <sys/umtx.h>
46
47#include <security/audit/audit.h>
48
49#include <vm/vm.h>
50#include <vm/vm_extern.h>
51#include <vm/uma.h>
52
53/*
54 * thread related storage.
55 */
56static uma_zone_t thread_zone;
57
58SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
59
60int max_threads_per_proc = 1500;
61SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
62	&max_threads_per_proc, 0, "Limit on threads per proc");
63
64int max_threads_hits;
65SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
66	&max_threads_hits, 0, "");
67
68#ifdef KSE
69int virtual_cpu;
70
71#endif
72TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
73struct mtx kse_zombie_lock;
74MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
75
76#ifdef KSE
77static int
78sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
79{
80	int error, new_val;
81	int def_val;
82
83	def_val = mp_ncpus;
84	if (virtual_cpu == 0)
85		new_val = def_val;
86	else
87		new_val = virtual_cpu;
88	error = sysctl_handle_int(oidp, &new_val, 0, req);
89	if (error != 0 || req->newptr == NULL)
90		return (error);
91	if (new_val < 0)
92		return (EINVAL);
93	virtual_cpu = new_val;
94	return (0);
95}
96
97/* DEBUG ONLY */
98SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
99	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
100	"debug virtual cpus");
101#endif
102
103struct mtx tid_lock;
104static struct unrhdr *tid_unrhdr;
105
106/*
107 * Prepare a thread for use.
108 */
109static int
110thread_ctor(void *mem, int size, void *arg, int flags)
111{
112	struct thread	*td;
113
114	td = (struct thread *)mem;
115	td->td_state = TDS_INACTIVE;
116	td->td_oncpu = NOCPU;
117
118	td->td_tid = alloc_unr(tid_unrhdr);
119	td->td_syscalls = 0;
120
121	/*
122	 * Note that td_critnest begins life as 1 because the thread is not
123	 * running and is thereby implicitly waiting to be on the receiving
124	 * end of a context switch.  A context switch must occur inside a
125	 * critical section, and in fact, includes hand-off of the sched_lock.
126	 * After a context switch to a newly created thread, it will release
127	 * sched_lock for the first time, and its td_critnest will hit 0 for
128	 * the first time.  This happens on the far end of a context switch,
129	 * and when it context switches away from itself, it will in fact go
130	 * back into a critical section, and hand off the sched lock to the
131	 * next thread.
132	 */
133	td->td_critnest = 1;
134
135#ifdef AUDIT
136	audit_thread_alloc(td);
137#endif
138	umtx_thread_alloc(td);
139	return (0);
140}
141
142/*
143 * Reclaim a thread after use.
144 */
145static void
146thread_dtor(void *mem, int size, void *arg)
147{
148	struct thread *td;
149
150	td = (struct thread *)mem;
151
152#ifdef INVARIANTS
153	/* Verify that this thread is in a safe state to free. */
154	switch (td->td_state) {
155	case TDS_INHIBITED:
156	case TDS_RUNNING:
157	case TDS_CAN_RUN:
158	case TDS_RUNQ:
159		/*
160		 * We must never unlink a thread that is in one of
161		 * these states, because it is currently active.
162		 */
163		panic("bad state for thread unlinking");
164		/* NOTREACHED */
165	case TDS_INACTIVE:
166		break;
167	default:
168		panic("bad thread state");
169		/* NOTREACHED */
170	}
171#endif
172#ifdef AUDIT
173	audit_thread_free(td);
174#endif
175	free_unr(tid_unrhdr, td->td_tid);
176	sched_newthread(td);
177}
178
179/*
180 * Initialize type-stable parts of a thread (when newly created).
181 */
182static int
183thread_init(void *mem, int size, int flags)
184{
185	struct thread *td;
186
187	td = (struct thread *)mem;
188
189	vm_thread_new(td, 0);
190	cpu_thread_setup(td);
191	td->td_sleepqueue = sleepq_alloc();
192	td->td_turnstile = turnstile_alloc();
193	td->td_sched = (struct td_sched *)&td[1];
194	sched_newthread(td);
195	umtx_thread_init(td);
196	return (0);
197}
198
199/*
200 * Tear down type-stable parts of a thread (just before being discarded).
201 */
202static void
203thread_fini(void *mem, int size)
204{
205	struct thread *td;
206
207	td = (struct thread *)mem;
208	turnstile_free(td->td_turnstile);
209	sleepq_free(td->td_sleepqueue);
210	umtx_thread_fini(td);
211	vm_thread_dispose(td);
212}
213
214/*
215 * For a newly created process,
216 * link up all the structures and its initial threads etc.
217 * called from:
218 * {arch}/{arch}/machdep.c   ia64_init(), init386() etc.
219 * proc_dtor() (should go away)
220 * proc_init()
221 */
222void
223proc_linkup(struct proc *p, struct thread *td)
224{
225	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
226	TAILQ_INIT(&p->p_upcalls);	     /* upcall list */
227	sigqueue_init(&p->p_sigqueue, p);
228	p->p_ksi = ksiginfo_alloc(1);
229	if (p->p_ksi != NULL) {
230		/* XXX p_ksi may be null if ksiginfo zone is not ready */
231		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
232	}
233	LIST_INIT(&p->p_mqnotifier);
234	p->p_numthreads = 0;
235	thread_link(td, p);
236}
237
238/*
239 * Initialize global thread allocation resources.
240 */
241void
242threadinit(void)
243{
244
245	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
246	tid_unrhdr = new_unrhdr(PID_MAX + 1, INT_MAX, &tid_lock);
247
248	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
249	    thread_ctor, thread_dtor, thread_init, thread_fini,
250	    16 - 1, 0);
251#ifdef KSE
252	kseinit();	/* set up kse specific stuff  e.g. upcall zone*/
253#endif
254}
255
256/*
257 * Stash an embarasingly extra thread into the zombie thread queue.
258 * Use the slpq as that must be unused by now.
259 */
260void
261thread_stash(struct thread *td)
262{
263	mtx_lock_spin(&kse_zombie_lock);
264	TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
265	mtx_unlock_spin(&kse_zombie_lock);
266}
267
268/*
269 * Reap zombie kse resource.
270 */
271void
272thread_reap(void)
273{
274	struct thread *td_first, *td_next;
275
276	/*
277	 * Don't even bother to lock if none at this instant,
278	 * we really don't care about the next instant..
279	 */
280	if (!TAILQ_EMPTY(&zombie_threads)) {
281		mtx_lock_spin(&kse_zombie_lock);
282		td_first = TAILQ_FIRST(&zombie_threads);
283		if (td_first)
284			TAILQ_INIT(&zombie_threads);
285		mtx_unlock_spin(&kse_zombie_lock);
286		while (td_first) {
287			td_next = TAILQ_NEXT(td_first, td_slpq);
288			if (td_first->td_ucred)
289				crfree(td_first->td_ucred);
290			thread_free(td_first);
291			td_first = td_next;
292		}
293	}
294}
295
296/*
297 * Allocate a thread.
298 */
299struct thread *
300thread_alloc(void)
301{
302
303	thread_reap(); /* check if any zombies to get */
304	return (uma_zalloc(thread_zone, M_WAITOK));
305}
306
307
308/*
309 * Deallocate a thread.
310 */
311void
312thread_free(struct thread *td)
313{
314
315	cpu_thread_clean(td);
316	uma_zfree(thread_zone, td);
317}
318
319/*
320 * Discard the current thread and exit from its context.
321 * Always called with scheduler locked.
322 *
323 * Because we can't free a thread while we're operating under its context,
324 * push the current thread into our CPU's deadthread holder. This means
325 * we needn't worry about someone else grabbing our context before we
326 * do a cpu_throw().  This may not be needed now as we are under schedlock.
327 * Maybe we can just do a thread_stash() as thr_exit1 does.
328 */
329/*  XXX
330 * libthr expects its thread exit to return for the last
331 * thread, meaning that the program is back to non-threaded
332 * mode I guess. Because we do this (cpu_throw) unconditionally
333 * here, they have their own version of it. (thr_exit1())
334 * that doesn't do it all if this was the last thread.
335 * It is also called from thread_suspend_check().
336 * Of course in the end, they end up coming here through exit1
337 * anyhow..  After fixing 'thr' to play by the rules we should be able
338 * to merge these two functions together.
339 *
340 * called from:
341 * exit1()
342 * kse_exit()
343 * thr_exit()
344 * ifdef KSE
345 * thread_user_enter()
346 * thread_userret()
347 * endif
348 * thread_suspend_check()
349 */
350void
351thread_exit(void)
352{
353	uint64_t new_switchtime;
354	struct thread *td;
355	struct proc *p;
356
357	td = curthread;
358	p = td->td_proc;
359
360	mtx_assert(&sched_lock, MA_OWNED);
361	mtx_assert(&Giant, MA_NOTOWNED);
362	PROC_LOCK_ASSERT(p, MA_OWNED);
363	KASSERT(p != NULL, ("thread exiting without a process"));
364	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
365	    (long)p->p_pid, p->p_comm);
366	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
367
368#ifdef AUDIT
369	AUDIT_SYSCALL_EXIT(0, td);
370#endif
371
372#ifdef KSE
373	if (td->td_standin != NULL) {
374		/*
375		 * Note that we don't need to free the cred here as it
376		 * is done in thread_reap().
377		 */
378		thread_stash(td->td_standin);
379		td->td_standin = NULL;
380	}
381#endif
382
383	umtx_thread_exit(td);
384
385	/*
386	 * drop FPU & debug register state storage, or any other
387	 * architecture specific resources that
388	 * would not be on a new untouched process.
389	 */
390	cpu_thread_exit(td);	/* XXXSMP */
391
392#ifdef KSE
393	/*
394	 * The thread is exiting. scheduler can release its stuff
395	 * and collect stats etc.
396	 * XXX this is not very right, since PROC_UNLOCK may still
397	 * need scheduler stuff.
398	 */
399	sched_thread_exit(td);
400#endif
401
402	/* Do the same timestamp bookkeeping that mi_switch() would do. */
403	new_switchtime = cpu_ticks();
404	p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
405	p->p_rux.rux_uticks += td->td_uticks;
406	p->p_rux.rux_sticks += td->td_sticks;
407	p->p_rux.rux_iticks += td->td_iticks;
408	PCPU_SET(switchtime, new_switchtime);
409	PCPU_SET(switchticks, ticks);
410	cnt.v_swtch++;
411
412	/* Add our usage into the usage of all our children. */
413	if (p->p_numthreads == 1)
414		ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);
415
416	/*
417	 * The last thread is left attached to the process
418	 * So that the whole bundle gets recycled. Skip
419	 * all this stuff if we never had threads.
420	 * EXIT clears all sign of other threads when
421	 * it goes to single threading, so the last thread always
422	 * takes the short path.
423	 */
424	if (p->p_flag & P_HADTHREADS) {
425		if (p->p_numthreads > 1) {
426			thread_unlink(td);
427
428			sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
429
430			/*
431			 * The test below is NOT true if we are the
432			 * sole exiting thread. P_STOPPED_SNGL is unset
433			 * in exit1() after it is the only survivor.
434			 */
435			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
436				if (p->p_numthreads == p->p_suspcount) {
437					thread_unsuspend_one(p->p_singlethread);
438				}
439			}
440
441#ifdef KSE
442			/*
443			 * Because each upcall structure has an owner thread,
444			 * owner thread exits only when process is in exiting
445			 * state, so upcall to userland is no longer needed,
446			 * deleting upcall structure is safe here.
447			 * So when all threads in a group is exited, all upcalls
448			 * in the group should be automatically freed.
449			 *  XXXKSE This is a KSE thing and should be exported
450			 * there somehow.
451			 */
452			upcall_remove(td);
453#endif
454
455			PROC_UNLOCK(p);
456			PCPU_SET(deadthread, td);
457		} else {
458			/*
459			 * The last thread is exiting.. but not through exit()
460			 * what should we do?
461			 * Theoretically this can't happen
462 			 * exit1() - clears threading flags before coming here
463 			 * kse_exit() - treats last thread specially
464 			 * thr_exit() - treats last thread specially
465			 * ifdef KSE
466 			 * thread_user_enter() - only if more exist
467 			 * thread_userret() - only if more exist
468			 * endif
469 			 * thread_suspend_check() - only if more exist
470			 */
471			panic ("thread_exit: Last thread exiting on its own");
472		}
473	} else {
474		/*
475		 * non threaded process comes here.
476		 * This includes an EX threaded process that is coming
477		 * here via exit1(). (exit1 dethreads the proc first).
478		 */
479		PROC_UNLOCK(p);
480	}
481	td->td_state = TDS_INACTIVE;
482	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
483	cpu_throw(td, choosethread());
484	panic("I'm a teapot!");
485	/* NOTREACHED */
486}
487
488/*
489 * Do any thread specific cleanups that may be needed in wait()
490 * called with Giant, proc and schedlock not held.
491 */
492void
493thread_wait(struct proc *p)
494{
495	struct thread *td;
496
497	mtx_assert(&Giant, MA_NOTOWNED);
498	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
499	FOREACH_THREAD_IN_PROC(p, td) {
500#ifdef KSE
501		if (td->td_standin != NULL) {
502			if (td->td_standin->td_ucred != NULL) {
503				crfree(td->td_standin->td_ucred);
504				td->td_standin->td_ucred = NULL;
505			}
506			thread_free(td->td_standin);
507			td->td_standin = NULL;
508		}
509#endif
510		cpu_thread_clean(td);
511		crfree(td->td_ucred);
512	}
513	thread_reap();	/* check for zombie threads etc. */
514}
515
516/*
517 * Link a thread to a process.
518 * set up anything that needs to be initialized for it to
519 * be used by the process.
520 *
521 * Note that we do not link to the proc's ucred here.
522 * The thread is linked as if running but no KSE assigned.
523 * Called from:
524 *  proc_linkup()
525 *  thread_schedule_upcall()
526 *  thr_create()
527 */
528void
529thread_link(struct thread *td, struct proc *p)
530{
531
532	td->td_state    = TDS_INACTIVE;
533	td->td_proc     = p;
534	td->td_flags    = 0;
535
536	LIST_INIT(&td->td_contested);
537	sigqueue_init(&td->td_sigqueue, p);
538	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
539	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
540	p->p_numthreads++;
541}
542
543/*
544 * Convert a process with one thread to an unthreaded process.
545 * Called from:
546 *  thread_single(exit)  (called from execve and exit)
547 *  kse_exit()		XXX may need cleaning up wrt KSE stuff
548 */
549void
550thread_unthread(struct thread *td)
551{
552	struct proc *p = td->td_proc;
553
554	KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads"));
555#ifdef KSE
556	upcall_remove(td);
557	p->p_flag &= ~(P_SA|P_HADTHREADS);
558	td->td_mailbox = NULL;
559	td->td_pflags &= ~(TDP_SA | TDP_CAN_UNBIND);
560	if (td->td_standin != NULL) {
561		thread_stash(td->td_standin);
562		td->td_standin = NULL;
563	}
564	sched_set_concurrency(p, 1);
565#else
566	p->p_flag &= ~P_HADTHREADS;
567#endif
568}
569
570/*
571 * Called from:
572 *  thread_exit()
573 */
574void
575thread_unlink(struct thread *td)
576{
577	struct proc *p = td->td_proc;
578
579	mtx_assert(&sched_lock, MA_OWNED);
580	TAILQ_REMOVE(&p->p_threads, td, td_plist);
581	p->p_numthreads--;
582	/* could clear a few other things here */
583	/* Must  NOT clear links to proc! */
584}
585
586/*
587 * Enforce single-threading.
588 *
589 * Returns 1 if the caller must abort (another thread is waiting to
590 * exit the process or similar). Process is locked!
591 * Returns 0 when you are successfully the only thread running.
592 * A process has successfully single threaded in the suspend mode when
593 * There are no threads in user mode. Threads in the kernel must be
594 * allowed to continue until they get to the user boundary. They may even
595 * copy out their return values and data before suspending. They may however be
596 * accelerated in reaching the user boundary as we will wake up
597 * any sleeping threads that are interruptable. (PCATCH).
598 */
599int
600thread_single(int mode)
601{
602	struct thread *td;
603	struct thread *td2;
604	struct proc *p;
605	int remaining;
606
607	td = curthread;
608	p = td->td_proc;
609	mtx_assert(&Giant, MA_NOTOWNED);
610	PROC_LOCK_ASSERT(p, MA_OWNED);
611	KASSERT((td != NULL), ("curthread is NULL"));
612
613	if ((p->p_flag & P_HADTHREADS) == 0)
614		return (0);
615
616	/* Is someone already single threading? */
617	if (p->p_singlethread != NULL && p->p_singlethread != td)
618		return (1);
619
620	if (mode == SINGLE_EXIT) {
621		p->p_flag |= P_SINGLE_EXIT;
622		p->p_flag &= ~P_SINGLE_BOUNDARY;
623	} else {
624		p->p_flag &= ~P_SINGLE_EXIT;
625		if (mode == SINGLE_BOUNDARY)
626			p->p_flag |= P_SINGLE_BOUNDARY;
627		else
628			p->p_flag &= ~P_SINGLE_BOUNDARY;
629	}
630	p->p_flag |= P_STOPPED_SINGLE;
631	mtx_lock_spin(&sched_lock);
632	p->p_singlethread = td;
633	if (mode == SINGLE_EXIT)
634		remaining = p->p_numthreads;
635	else if (mode == SINGLE_BOUNDARY)
636		remaining = p->p_numthreads - p->p_boundary_count;
637	else
638		remaining = p->p_numthreads - p->p_suspcount;
639	while (remaining != 1) {
640		if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
641			goto stopme;
642		FOREACH_THREAD_IN_PROC(p, td2) {
643			if (td2 == td)
644				continue;
645			td2->td_flags |= TDF_ASTPENDING;
646			if (TD_IS_INHIBITED(td2)) {
647				switch (mode) {
648				case SINGLE_EXIT:
649					if (td->td_flags & TDF_DBSUSPEND)
650						td->td_flags &= ~TDF_DBSUSPEND;
651					if (TD_IS_SUSPENDED(td2))
652						thread_unsuspend_one(td2);
653					if (TD_ON_SLEEPQ(td2) &&
654					    (td2->td_flags & TDF_SINTR))
655						sleepq_abort(td2, EINTR);
656					break;
657				case SINGLE_BOUNDARY:
658					if (TD_IS_SUSPENDED(td2) &&
659					    !(td2->td_flags & TDF_BOUNDARY))
660						thread_unsuspend_one(td2);
661					if (TD_ON_SLEEPQ(td2) &&
662					    (td2->td_flags & TDF_SINTR))
663						sleepq_abort(td2, ERESTART);
664					break;
665				default:
666					if (TD_IS_SUSPENDED(td2))
667						continue;
668					/*
669					 * maybe other inhibited states too?
670					 */
671					if ((td2->td_flags & TDF_SINTR) &&
672					    (td2->td_inhibitors &
673					    (TDI_SLEEPING | TDI_SWAPPED)))
674						thread_suspend_one(td2);
675					break;
676				}
677			}
678#ifdef SMP
679			else if (TD_IS_RUNNING(td2) && td != td2) {
680				forward_signal(td2);
681			}
682#endif
683		}
684		if (mode == SINGLE_EXIT)
685			remaining = p->p_numthreads;
686		else if (mode == SINGLE_BOUNDARY)
687			remaining = p->p_numthreads - p->p_boundary_count;
688		else
689			remaining = p->p_numthreads - p->p_suspcount;
690
691		/*
692		 * Maybe we suspended some threads.. was it enough?
693		 */
694		if (remaining == 1)
695			break;
696
697stopme:
698		/*
699		 * Wake us up when everyone else has suspended.
700		 * In the mean time we suspend as well.
701		 */
702		thread_stopped(p);
703		thread_suspend_one(td);
704		PROC_UNLOCK(p);
705		mi_switch(SW_VOL, NULL);
706		mtx_unlock_spin(&sched_lock);
707		PROC_LOCK(p);
708		mtx_lock_spin(&sched_lock);
709		if (mode == SINGLE_EXIT)
710			remaining = p->p_numthreads;
711		else if (mode == SINGLE_BOUNDARY)
712			remaining = p->p_numthreads - p->p_boundary_count;
713		else
714			remaining = p->p_numthreads - p->p_suspcount;
715	}
716	if (mode == SINGLE_EXIT) {
717		/*
718		 * We have gotten rid of all the other threads and we
719		 * are about to either exit or exec. In either case,
720		 * we try our utmost  to revert to being a non-threaded
721		 * process.
722		 */
723		p->p_singlethread = NULL;
724		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
725		thread_unthread(td);
726	}
727	mtx_unlock_spin(&sched_lock);
728	return (0);
729}
730
731/*
732 * Called in from locations that can safely check to see
733 * whether we have to suspend or at least throttle for a
734 * single-thread event (e.g. fork).
735 *
736 * Such locations include userret().
737 * If the "return_instead" argument is non zero, the thread must be able to
738 * accept 0 (caller may continue), or 1 (caller must abort) as a result.
739 *
740 * The 'return_instead' argument tells the function if it may do a
741 * thread_exit() or suspend, or whether the caller must abort and back
742 * out instead.
743 *
744 * If the thread that set the single_threading request has set the
745 * P_SINGLE_EXIT bit in the process flags then this call will never return
746 * if 'return_instead' is false, but will exit.
747 *
748 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
749 *---------------+--------------------+---------------------
750 *       0       | returns 0          |   returns 0 or 1
751 *               | when ST ends       |   immediatly
752 *---------------+--------------------+---------------------
753 *       1       | thread exits       |   returns 1
754 *               |                    |  immediatly
755 * 0 = thread_exit() or suspension ok,
756 * other = return error instead of stopping the thread.
757 *
758 * While a full suspension is under effect, even a single threading
759 * thread would be suspended if it made this call (but it shouldn't).
760 * This call should only be made from places where
761 * thread_exit() would be safe as that may be the outcome unless
762 * return_instead is set.
763 */
764int
765thread_suspend_check(int return_instead)
766{
767	struct thread *td;
768	struct proc *p;
769
770	td = curthread;
771	p = td->td_proc;
772	mtx_assert(&Giant, MA_NOTOWNED);
773	PROC_LOCK_ASSERT(p, MA_OWNED);
774	while (P_SHOULDSTOP(p) ||
775	      ((p->p_flag & P_TRACED) && (td->td_flags & TDF_DBSUSPEND))) {
776		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
777			KASSERT(p->p_singlethread != NULL,
778			    ("singlethread not set"));
779			/*
780			 * The only suspension in action is a
781			 * single-threading. Single threader need not stop.
782			 * XXX Should be safe to access unlocked
783			 * as it can only be set to be true by us.
784			 */
785			if (p->p_singlethread == td)
786				return (0);	/* Exempt from stopping. */
787		}
788		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
789			return (EINTR);
790
791		/* Should we goto user boundary if we didn't come from there? */
792		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
793		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
794			return (ERESTART);
795
796		/* If thread will exit, flush its pending signals */
797		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
798			sigqueue_flush(&td->td_sigqueue);
799
800		mtx_lock_spin(&sched_lock);
801		thread_stopped(p);
802		/*
803		 * If the process is waiting for us to exit,
804		 * this thread should just suicide.
805		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
806		 */
807		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
808			thread_exit();
809
810		/*
811		 * When a thread suspends, it just
812		 * gets taken off all queues.
813		 */
814		thread_suspend_one(td);
815		if (return_instead == 0) {
816			p->p_boundary_count++;
817			td->td_flags |= TDF_BOUNDARY;
818		}
819		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
820			if (p->p_numthreads == p->p_suspcount)
821				thread_unsuspend_one(p->p_singlethread);
822		}
823		PROC_UNLOCK(p);
824		mi_switch(SW_INVOL, NULL);
825		if (return_instead == 0) {
826			p->p_boundary_count--;
827			td->td_flags &= ~TDF_BOUNDARY;
828		}
829		mtx_unlock_spin(&sched_lock);
830		PROC_LOCK(p);
831	}
832	return (0);
833}
834
835void
836thread_suspend_one(struct thread *td)
837{
838	struct proc *p = td->td_proc;
839
840	mtx_assert(&sched_lock, MA_OWNED);
841	PROC_LOCK_ASSERT(p, MA_OWNED);
842	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
843	p->p_suspcount++;
844	TD_SET_SUSPENDED(td);
845}
846
847void
848thread_unsuspend_one(struct thread *td)
849{
850	struct proc *p = td->td_proc;
851
852	mtx_assert(&sched_lock, MA_OWNED);
853	PROC_LOCK_ASSERT(p, MA_OWNED);
854	KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
855	TD_CLR_SUSPENDED(td);
856	p->p_suspcount--;
857	setrunnable(td);
858}
859
860/*
861 * Allow all threads blocked by single threading to continue running.
862 */
863void
864thread_unsuspend(struct proc *p)
865{
866	struct thread *td;
867
868	mtx_assert(&sched_lock, MA_OWNED);
869	PROC_LOCK_ASSERT(p, MA_OWNED);
870	if (!P_SHOULDSTOP(p)) {
871                FOREACH_THREAD_IN_PROC(p, td) {
872			if (TD_IS_SUSPENDED(td)) {
873				thread_unsuspend_one(td);
874			}
875		}
876	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
877	    (p->p_numthreads == p->p_suspcount)) {
878		/*
879		 * Stopping everything also did the job for the single
880		 * threading request. Now we've downgraded to single-threaded,
881		 * let it continue.
882		 */
883		thread_unsuspend_one(p->p_singlethread);
884	}
885}
886
887/*
888 * End the single threading mode..
889 */
890void
891thread_single_end(void)
892{
893	struct thread *td;
894	struct proc *p;
895
896	td = curthread;
897	p = td->td_proc;
898	PROC_LOCK_ASSERT(p, MA_OWNED);
899	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
900	mtx_lock_spin(&sched_lock);
901	p->p_singlethread = NULL;
902	/*
903	 * If there are other threads they mey now run,
904	 * unless of course there is a blanket 'stop order'
905	 * on the process. The single threader must be allowed
906	 * to continue however as this is a bad place to stop.
907	 */
908	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
909                FOREACH_THREAD_IN_PROC(p, td) {
910			if (TD_IS_SUSPENDED(td)) {
911				thread_unsuspend_one(td);
912			}
913		}
914	}
915	mtx_unlock_spin(&sched_lock);
916}
917
918struct thread *
919thread_find(struct proc *p, lwpid_t tid)
920{
921	struct thread *td;
922
923	PROC_LOCK_ASSERT(p, MA_OWNED);
924	mtx_lock_spin(&sched_lock);
925	FOREACH_THREAD_IN_PROC(p, td) {
926		if (td->td_tid == tid)
927			break;
928	}
929	mtx_unlock_spin(&sched_lock);
930	return (td);
931}
932