kern_thread.c revision 275617
1/*-
2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3 *  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice(s), this list of conditions and the following disclaimer as
10 *    the first lines of this file unmodified other than the possible
11 *    addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice(s), this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 */
28
29#include "opt_witness.h"
30#include "opt_hwpmc_hooks.h"
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 275617 2014-12-08 16:27:43Z kib $");
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/mutex.h>
40#include <sys/proc.h>
41#include <sys/rangelock.h>
42#include <sys/resourcevar.h>
43#include <sys/sdt.h>
44#include <sys/smp.h>
45#include <sys/sched.h>
46#include <sys/sleepqueue.h>
47#include <sys/selinfo.h>
48#include <sys/turnstile.h>
49#include <sys/ktr.h>
50#include <sys/rwlock.h>
51#include <sys/umtx.h>
52#include <sys/cpuset.h>
53#ifdef	HWPMC_HOOKS
54#include <sys/pmckern.h>
55#endif
56
57#include <security/audit/audit.h>
58
59#include <vm/vm.h>
60#include <vm/vm_extern.h>
61#include <vm/uma.h>
62#include <sys/eventhandler.h>
63
64SDT_PROVIDER_DECLARE(proc);
65SDT_PROBE_DEFINE(proc, , , lwp__exit);
66
67/*
68 * thread related storage.
69 */
70static uma_zone_t thread_zone;
71
72TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
73static struct mtx zombie_lock;
74MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
75
76static void thread_zombie(struct thread *);
77
78#define TID_BUFFER_SIZE	1024
79
80struct mtx tid_lock;
81static struct unrhdr *tid_unrhdr;
82static lwpid_t tid_buffer[TID_BUFFER_SIZE];
83static int tid_head, tid_tail;
84static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
85
86struct	tidhashhead *tidhashtbl;
87u_long	tidhash;
88struct	rwlock tidhash_lock;
89
90static lwpid_t
91tid_alloc(void)
92{
93	lwpid_t	tid;
94
95	tid = alloc_unr(tid_unrhdr);
96	if (tid != -1)
97		return (tid);
98	mtx_lock(&tid_lock);
99	if (tid_head == tid_tail) {
100		mtx_unlock(&tid_lock);
101		return (-1);
102	}
103	tid = tid_buffer[tid_head];
104	tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
105	mtx_unlock(&tid_lock);
106	return (tid);
107}
108
109static void
110tid_free(lwpid_t tid)
111{
112	lwpid_t tmp_tid = -1;
113
114	mtx_lock(&tid_lock);
115	if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) {
116		tmp_tid = tid_buffer[tid_head];
117		tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
118	}
119	tid_buffer[tid_tail] = tid;
120	tid_tail = (tid_tail + 1) % TID_BUFFER_SIZE;
121	mtx_unlock(&tid_lock);
122	if (tmp_tid != -1)
123		free_unr(tid_unrhdr, tmp_tid);
124}
125
126/*
127 * Prepare a thread for use.
128 */
129static int
130thread_ctor(void *mem, int size, void *arg, int flags)
131{
132	struct thread	*td;
133
134	td = (struct thread *)mem;
135	td->td_state = TDS_INACTIVE;
136	td->td_oncpu = NOCPU;
137
138	td->td_tid = tid_alloc();
139
140	/*
141	 * Note that td_critnest begins life as 1 because the thread is not
142	 * running and is thereby implicitly waiting to be on the receiving
143	 * end of a context switch.
144	 */
145	td->td_critnest = 1;
146	td->td_lend_user_pri = PRI_MAX;
147	EVENTHANDLER_INVOKE(thread_ctor, td);
148#ifdef AUDIT
149	audit_thread_alloc(td);
150#endif
151	umtx_thread_alloc(td);
152	return (0);
153}
154
155/*
156 * Reclaim a thread after use.
157 */
158static void
159thread_dtor(void *mem, int size, void *arg)
160{
161	struct thread *td;
162
163	td = (struct thread *)mem;
164
165#ifdef INVARIANTS
166	/* Verify that this thread is in a safe state to free. */
167	switch (td->td_state) {
168	case TDS_INHIBITED:
169	case TDS_RUNNING:
170	case TDS_CAN_RUN:
171	case TDS_RUNQ:
172		/*
173		 * We must never unlink a thread that is in one of
174		 * these states, because it is currently active.
175		 */
176		panic("bad state for thread unlinking");
177		/* NOTREACHED */
178	case TDS_INACTIVE:
179		break;
180	default:
181		panic("bad thread state");
182		/* NOTREACHED */
183	}
184#endif
185#ifdef AUDIT
186	audit_thread_free(td);
187#endif
188	/* Free all OSD associated to this thread. */
189	osd_thread_exit(td);
190
191	EVENTHANDLER_INVOKE(thread_dtor, td);
192	tid_free(td->td_tid);
193}
194
195/*
196 * Initialize type-stable parts of a thread (when newly created).
197 */
198static int
199thread_init(void *mem, int size, int flags)
200{
201	struct thread *td;
202
203	td = (struct thread *)mem;
204
205	td->td_sleepqueue = sleepq_alloc();
206	td->td_turnstile = turnstile_alloc();
207	td->td_rlqe = NULL;
208	EVENTHANDLER_INVOKE(thread_init, td);
209	td->td_sched = (struct td_sched *)&td[1];
210	umtx_thread_init(td);
211	td->td_kstack = 0;
212	return (0);
213}
214
215/*
216 * Tear down type-stable parts of a thread (just before being discarded).
217 */
218static void
219thread_fini(void *mem, int size)
220{
221	struct thread *td;
222
223	td = (struct thread *)mem;
224	EVENTHANDLER_INVOKE(thread_fini, td);
225	rlqentry_free(td->td_rlqe);
226	turnstile_free(td->td_turnstile);
227	sleepq_free(td->td_sleepqueue);
228	umtx_thread_fini(td);
229	seltdfini(td);
230}
231
232/*
233 * For a newly created process,
234 * link up all the structures and its initial threads etc.
235 * called from:
236 * {arch}/{arch}/machdep.c   {arch}_init(), init386() etc.
237 * proc_dtor() (should go away)
238 * proc_init()
239 */
240void
241proc_linkup0(struct proc *p, struct thread *td)
242{
243	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
244	proc_linkup(p, td);
245}
246
247void
248proc_linkup(struct proc *p, struct thread *td)
249{
250
251	sigqueue_init(&p->p_sigqueue, p);
252	p->p_ksi = ksiginfo_alloc(1);
253	if (p->p_ksi != NULL) {
254		/* XXX p_ksi may be null if ksiginfo zone is not ready */
255		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
256	}
257	LIST_INIT(&p->p_mqnotifier);
258	p->p_numthreads = 0;
259	thread_link(td, p);
260}
261
262/*
263 * Initialize global thread allocation resources.
264 */
265void
266threadinit(void)
267{
268
269	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
270
271	/*
272	 * pid_max cannot be greater than PID_MAX.
273	 * leave one number for thread0.
274	 */
275	tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
276
277	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
278	    thread_ctor, thread_dtor, thread_init, thread_fini,
279	    16 - 1, 0);
280	tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
281	rw_init(&tidhash_lock, "tidhash");
282}
283
284/*
285 * Place an unused thread on the zombie list.
286 * Use the slpq as that must be unused by now.
287 */
288void
289thread_zombie(struct thread *td)
290{
291	mtx_lock_spin(&zombie_lock);
292	TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
293	mtx_unlock_spin(&zombie_lock);
294}
295
296/*
297 * Release a thread that has exited after cpu_throw().
298 */
299void
300thread_stash(struct thread *td)
301{
302	atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
303	thread_zombie(td);
304}
305
306/*
307 * Reap zombie resources.
308 */
309void
310thread_reap(void)
311{
312	struct thread *td_first, *td_next;
313
314	/*
315	 * Don't even bother to lock if none at this instant,
316	 * we really don't care about the next instant..
317	 */
318	if (!TAILQ_EMPTY(&zombie_threads)) {
319		mtx_lock_spin(&zombie_lock);
320		td_first = TAILQ_FIRST(&zombie_threads);
321		if (td_first)
322			TAILQ_INIT(&zombie_threads);
323		mtx_unlock_spin(&zombie_lock);
324		while (td_first) {
325			td_next = TAILQ_NEXT(td_first, td_slpq);
326			if (td_first->td_ucred)
327				crfree(td_first->td_ucred);
328			thread_free(td_first);
329			td_first = td_next;
330		}
331	}
332}
333
334/*
335 * Allocate a thread.
336 */
337struct thread *
338thread_alloc(int pages)
339{
340	struct thread *td;
341
342	thread_reap(); /* check if any zombies to get */
343
344	td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
345	KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
346	if (!vm_thread_new(td, pages)) {
347		uma_zfree(thread_zone, td);
348		return (NULL);
349	}
350	cpu_thread_alloc(td);
351	return (td);
352}
353
354int
355thread_alloc_stack(struct thread *td, int pages)
356{
357
358	KASSERT(td->td_kstack == 0,
359	    ("thread_alloc_stack called on a thread with kstack"));
360	if (!vm_thread_new(td, pages))
361		return (0);
362	cpu_thread_alloc(td);
363	return (1);
364}
365
366/*
367 * Deallocate a thread.
368 */
369void
370thread_free(struct thread *td)
371{
372
373	lock_profile_thread_exit(td);
374	if (td->td_cpuset)
375		cpuset_rel(td->td_cpuset);
376	td->td_cpuset = NULL;
377	cpu_thread_free(td);
378	if (td->td_kstack != 0)
379		vm_thread_dispose(td);
380	uma_zfree(thread_zone, td);
381}
382
383/*
384 * Discard the current thread and exit from its context.
385 * Always called with scheduler locked.
386 *
387 * Because we can't free a thread while we're operating under its context,
388 * push the current thread into our CPU's deadthread holder. This means
389 * we needn't worry about someone else grabbing our context before we
390 * do a cpu_throw().
391 */
392void
393thread_exit(void)
394{
395	uint64_t runtime, new_switchtime;
396	struct thread *td;
397	struct thread *td2;
398	struct proc *p;
399	int wakeup_swapper;
400
401	td = curthread;
402	p = td->td_proc;
403
404	PROC_SLOCK_ASSERT(p, MA_OWNED);
405	mtx_assert(&Giant, MA_NOTOWNED);
406
407	PROC_LOCK_ASSERT(p, MA_OWNED);
408	KASSERT(p != NULL, ("thread exiting without a process"));
409	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
410	    (long)p->p_pid, td->td_name);
411	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
412
413#ifdef AUDIT
414	AUDIT_SYSCALL_EXIT(0, td);
415#endif
416	umtx_thread_exit(td);
417	/*
418	 * drop FPU & debug register state storage, or any other
419	 * architecture specific resources that
420	 * would not be on a new untouched process.
421	 */
422	cpu_thread_exit(td);	/* XXXSMP */
423
424	/*
425	 * The last thread is left attached to the process
426	 * So that the whole bundle gets recycled. Skip
427	 * all this stuff if we never had threads.
428	 * EXIT clears all sign of other threads when
429	 * it goes to single threading, so the last thread always
430	 * takes the short path.
431	 */
432	if (p->p_flag & P_HADTHREADS) {
433		if (p->p_numthreads > 1) {
434			atomic_add_int(&td->td_proc->p_exitthreads, 1);
435			thread_unlink(td);
436			td2 = FIRST_THREAD_IN_PROC(p);
437			sched_exit_thread(td2, td);
438
439			/*
440			 * The test below is NOT true if we are the
441			 * sole exiting thread. P_STOPPED_SINGLE is unset
442			 * in exit1() after it is the only survivor.
443			 */
444			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
445				if (p->p_numthreads == p->p_suspcount) {
446					thread_lock(p->p_singlethread);
447					wakeup_swapper = thread_unsuspend_one(
448						p->p_singlethread);
449					thread_unlock(p->p_singlethread);
450					if (wakeup_swapper)
451						kick_proc0();
452				}
453			}
454
455			PCPU_SET(deadthread, td);
456		} else {
457			/*
458			 * The last thread is exiting.. but not through exit()
459			 */
460			panic ("thread_exit: Last thread exiting on its own");
461		}
462	}
463#ifdef	HWPMC_HOOKS
464	/*
465	 * If this thread is part of a process that is being tracked by hwpmc(4),
466	 * inform the module of the thread's impending exit.
467	 */
468	if (PMC_PROC_IS_USING_PMCS(td->td_proc))
469		PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
470#endif
471	PROC_UNLOCK(p);
472	PROC_STATLOCK(p);
473	thread_lock(td);
474	PROC_SUNLOCK(p);
475
476	/* Do the same timestamp bookkeeping that mi_switch() would do. */
477	new_switchtime = cpu_ticks();
478	runtime = new_switchtime - PCPU_GET(switchtime);
479	td->td_runtime += runtime;
480	td->td_incruntime += runtime;
481	PCPU_SET(switchtime, new_switchtime);
482	PCPU_SET(switchticks, ticks);
483	PCPU_INC(cnt.v_swtch);
484
485	/* Save our resource usage in our process. */
486	td->td_ru.ru_nvcsw++;
487	ruxagg(p, td);
488	rucollect(&p->p_ru, &td->td_ru);
489	PROC_STATUNLOCK(p);
490
491	td->td_state = TDS_INACTIVE;
492#ifdef WITNESS
493	witness_thread_exit(td);
494#endif
495	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
496	sched_throw(td);
497	panic("I'm a teapot!");
498	/* NOTREACHED */
499}
500
501/*
502 * Do any thread specific cleanups that may be needed in wait()
503 * called with Giant, proc and schedlock not held.
504 */
505void
506thread_wait(struct proc *p)
507{
508	struct thread *td;
509
510	mtx_assert(&Giant, MA_NOTOWNED);
511	KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()"));
512	KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking"));
513	td = FIRST_THREAD_IN_PROC(p);
514	/* Lock the last thread so we spin until it exits cpu_throw(). */
515	thread_lock(td);
516	thread_unlock(td);
517	lock_profile_thread_exit(td);
518	cpuset_rel(td->td_cpuset);
519	td->td_cpuset = NULL;
520	cpu_thread_clean(td);
521	crfree(td->td_ucred);
522	thread_reap();	/* check for zombie threads etc. */
523}
524
525/*
526 * Link a thread to a process.
527 * set up anything that needs to be initialized for it to
528 * be used by the process.
529 */
530void
531thread_link(struct thread *td, struct proc *p)
532{
533
534	/*
535	 * XXX This can't be enabled because it's called for proc0 before
536	 * its lock has been created.
537	 * PROC_LOCK_ASSERT(p, MA_OWNED);
538	 */
539	td->td_state    = TDS_INACTIVE;
540	td->td_proc     = p;
541	td->td_flags    = TDF_INMEM;
542
543	LIST_INIT(&td->td_contested);
544	LIST_INIT(&td->td_lprof[0]);
545	LIST_INIT(&td->td_lprof[1]);
546	sigqueue_init(&td->td_sigqueue, p);
547	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
548	TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
549	p->p_numthreads++;
550}
551
552/*
553 * Called from:
554 *  thread_exit()
555 */
556void
557thread_unlink(struct thread *td)
558{
559	struct proc *p = td->td_proc;
560
561	PROC_LOCK_ASSERT(p, MA_OWNED);
562	TAILQ_REMOVE(&p->p_threads, td, td_plist);
563	p->p_numthreads--;
564	/* could clear a few other things here */
565	/* Must  NOT clear links to proc! */
566}
567
568static int
569calc_remaining(struct proc *p, int mode)
570{
571	int remaining;
572
573	PROC_LOCK_ASSERT(p, MA_OWNED);
574	PROC_SLOCK_ASSERT(p, MA_OWNED);
575	if (mode == SINGLE_EXIT)
576		remaining = p->p_numthreads;
577	else if (mode == SINGLE_BOUNDARY)
578		remaining = p->p_numthreads - p->p_boundary_count;
579	else if (mode == SINGLE_NO_EXIT)
580		remaining = p->p_numthreads - p->p_suspcount;
581	else
582		panic("calc_remaining: wrong mode %d", mode);
583	return (remaining);
584}
585
586static int
587remain_for_mode(int mode)
588{
589
590	return (1);
591}
592
593static int
594weed_inhib(int mode, struct thread *td2, struct proc *p)
595{
596	int wakeup_swapper;
597
598	PROC_LOCK_ASSERT(p, MA_OWNED);
599	PROC_SLOCK_ASSERT(p, MA_OWNED);
600	THREAD_LOCK_ASSERT(td2, MA_OWNED);
601
602	wakeup_swapper = 0;
603	switch (mode) {
604	case SINGLE_EXIT:
605		if (TD_IS_SUSPENDED(td2))
606			wakeup_swapper |= thread_unsuspend_one(td2);
607		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
608			wakeup_swapper |= sleepq_abort(td2, EINTR);
609		break;
610	case SINGLE_BOUNDARY:
611		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
612			wakeup_swapper |= thread_unsuspend_one(td2);
613		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
614			wakeup_swapper |= sleepq_abort(td2, ERESTART);
615		break;
616	case SINGLE_NO_EXIT:
617		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
618			wakeup_swapper |= thread_unsuspend_one(td2);
619		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
620			wakeup_swapper |= sleepq_abort(td2, ERESTART);
621		break;
622	}
623	return (wakeup_swapper);
624}
625
626/*
627 * Enforce single-threading.
628 *
629 * Returns 1 if the caller must abort (another thread is waiting to
630 * exit the process or similar). Process is locked!
631 * Returns 0 when you are successfully the only thread running.
632 * A process has successfully single threaded in the suspend mode when
633 * There are no threads in user mode. Threads in the kernel must be
634 * allowed to continue until they get to the user boundary. They may even
635 * copy out their return values and data before suspending. They may however be
636 * accelerated in reaching the user boundary as we will wake up
637 * any sleeping threads that are interruptable. (PCATCH).
638 */
639int
640thread_single(int mode)
641{
642	struct thread *td;
643	struct thread *td2;
644	struct proc *p;
645	int remaining, wakeup_swapper;
646
647	td = curthread;
648	p = td->td_proc;
649	mtx_assert(&Giant, MA_NOTOWNED);
650	PROC_LOCK_ASSERT(p, MA_OWNED);
651
652	if ((p->p_flag & P_HADTHREADS) == 0)
653		return (0);
654
655	/* Is someone already single threading? */
656	if (p->p_singlethread != NULL && p->p_singlethread != td)
657		return (1);
658
659	if (mode == SINGLE_EXIT) {
660		p->p_flag |= P_SINGLE_EXIT;
661		p->p_flag &= ~P_SINGLE_BOUNDARY;
662	} else {
663		p->p_flag &= ~P_SINGLE_EXIT;
664		if (mode == SINGLE_BOUNDARY)
665			p->p_flag |= P_SINGLE_BOUNDARY;
666		else
667			p->p_flag &= ~P_SINGLE_BOUNDARY;
668	}
669	p->p_flag |= P_STOPPED_SINGLE;
670	PROC_SLOCK(p);
671	p->p_singlethread = td;
672	remaining = calc_remaining(p, mode);
673	while (remaining != remain_for_mode(mode)) {
674		if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
675			goto stopme;
676		wakeup_swapper = 0;
677		FOREACH_THREAD_IN_PROC(p, td2) {
678			if (td2 == td)
679				continue;
680			thread_lock(td2);
681			td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
682			if (TD_IS_INHIBITED(td2))
683				wakeup_swapper |= weed_inhib(mode, td2, p);
684#ifdef SMP
685			else if (TD_IS_RUNNING(td2) && td != td2) {
686				forward_signal(td2);
687			}
688#endif
689			thread_unlock(td2);
690		}
691		if (wakeup_swapper)
692			kick_proc0();
693		remaining = calc_remaining(p, mode);
694
695		/*
696		 * Maybe we suspended some threads.. was it enough?
697		 */
698		if (remaining == remain_for_mode(mode))
699			break;
700
701stopme:
702		/*
703		 * Wake us up when everyone else has suspended.
704		 * In the mean time we suspend as well.
705		 */
706		thread_suspend_switch(td);
707		remaining = calc_remaining(p, mode);
708	}
709	if (mode == SINGLE_EXIT) {
710		/*
711		 * Convert the process to an unthreaded process.  The
712		 * SINGLE_EXIT is called by exit1() or execve(), in
713		 * both cases other threads must be retired.
714		 */
715		KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads"));
716		p->p_singlethread = NULL;
717		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS);
718
719		/*
720		 * Wait for any remaining threads to exit cpu_throw().
721		 */
722		while (p->p_exitthreads != 0) {
723			PROC_SUNLOCK(p);
724			PROC_UNLOCK(p);
725			sched_relinquish(td);
726			PROC_LOCK(p);
727			PROC_SLOCK(p);
728		}
729	}
730	PROC_SUNLOCK(p);
731	return (0);
732}
733
734bool
735thread_suspend_check_needed(void)
736{
737	struct proc *p;
738	struct thread *td;
739
740	td = curthread;
741	p = td->td_proc;
742	PROC_LOCK_ASSERT(p, MA_OWNED);
743	return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 &&
744	    (td->td_dbgflags & TDB_SUSPEND) != 0));
745}
746
747/*
748 * Called in from locations that can safely check to see
749 * whether we have to suspend or at least throttle for a
750 * single-thread event (e.g. fork).
751 *
752 * Such locations include userret().
753 * If the "return_instead" argument is non zero, the thread must be able to
754 * accept 0 (caller may continue), or 1 (caller must abort) as a result.
755 *
756 * The 'return_instead' argument tells the function if it may do a
757 * thread_exit() or suspend, or whether the caller must abort and back
758 * out instead.
759 *
760 * If the thread that set the single_threading request has set the
761 * P_SINGLE_EXIT bit in the process flags then this call will never return
762 * if 'return_instead' is false, but will exit.
763 *
764 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
765 *---------------+--------------------+---------------------
766 *       0       | returns 0          |   returns 0 or 1
767 *               | when ST ends       |   immediately
768 *---------------+--------------------+---------------------
769 *       1       | thread exits       |   returns 1
770 *               |                    |  immediately
771 * 0 = thread_exit() or suspension ok,
772 * other = return error instead of stopping the thread.
773 *
774 * While a full suspension is under effect, even a single threading
775 * thread would be suspended if it made this call (but it shouldn't).
776 * This call should only be made from places where
777 * thread_exit() would be safe as that may be the outcome unless
778 * return_instead is set.
779 */
780int
781thread_suspend_check(int return_instead)
782{
783	struct thread *td;
784	struct proc *p;
785	int wakeup_swapper;
786
787	td = curthread;
788	p = td->td_proc;
789	mtx_assert(&Giant, MA_NOTOWNED);
790	PROC_LOCK_ASSERT(p, MA_OWNED);
791	while (thread_suspend_check_needed()) {
792		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
793			KASSERT(p->p_singlethread != NULL,
794			    ("singlethread not set"));
795			/*
796			 * The only suspension in action is a
797			 * single-threading. Single threader need not stop.
798			 * XXX Should be safe to access unlocked
799			 * as it can only be set to be true by us.
800			 */
801			if (p->p_singlethread == td)
802				return (0);	/* Exempt from stopping. */
803		}
804		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
805			return (EINTR);
806
807		/* Should we goto user boundary if we didn't come from there? */
808		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
809		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
810			return (ERESTART);
811
812		/*
813		 * Ignore suspend requests for stop signals if they
814		 * are deferred.
815		 */
816		if (P_SHOULDSTOP(p) == P_STOPPED_SIG &&
817		    td->td_flags & TDF_SBDRY) {
818			KASSERT(return_instead,
819			    ("TDF_SBDRY set for unsafe thread_suspend_check"));
820			return (0);
821		}
822
823		/*
824		 * If the process is waiting for us to exit,
825		 * this thread should just suicide.
826		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
827		 */
828		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
829			PROC_UNLOCK(p);
830			tidhash_remove(td);
831			PROC_LOCK(p);
832			tdsigcleanup(td);
833			PROC_SLOCK(p);
834			thread_stopped(p);
835			thread_exit();
836		}
837
838		PROC_SLOCK(p);
839		thread_stopped(p);
840		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
841			if (p->p_numthreads == p->p_suspcount + 1) {
842				thread_lock(p->p_singlethread);
843				wakeup_swapper =
844				    thread_unsuspend_one(p->p_singlethread);
845				thread_unlock(p->p_singlethread);
846				if (wakeup_swapper)
847					kick_proc0();
848			}
849		}
850		PROC_UNLOCK(p);
851		thread_lock(td);
852		/*
853		 * When a thread suspends, it just
854		 * gets taken off all queues.
855		 */
856		thread_suspend_one(td);
857		if (return_instead == 0) {
858			p->p_boundary_count++;
859			td->td_flags |= TDF_BOUNDARY;
860		}
861		PROC_SUNLOCK(p);
862		mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
863		if (return_instead == 0)
864			td->td_flags &= ~TDF_BOUNDARY;
865		thread_unlock(td);
866		PROC_LOCK(p);
867		if (return_instead == 0) {
868			PROC_SLOCK(p);
869			p->p_boundary_count--;
870			PROC_SUNLOCK(p);
871		}
872	}
873	return (0);
874}
875
876void
877thread_suspend_switch(struct thread *td)
878{
879	struct proc *p;
880
881	p = td->td_proc;
882	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
883	PROC_LOCK_ASSERT(p, MA_OWNED);
884	PROC_SLOCK_ASSERT(p, MA_OWNED);
885	/*
886	 * We implement thread_suspend_one in stages here to avoid
887	 * dropping the proc lock while the thread lock is owned.
888	 */
889	thread_stopped(p);
890	p->p_suspcount++;
891	PROC_UNLOCK(p);
892	thread_lock(td);
893	td->td_flags &= ~TDF_NEEDSUSPCHK;
894	TD_SET_SUSPENDED(td);
895	sched_sleep(td, 0);
896	PROC_SUNLOCK(p);
897	DROP_GIANT();
898	mi_switch(SW_VOL | SWT_SUSPEND, NULL);
899	thread_unlock(td);
900	PICKUP_GIANT();
901	PROC_LOCK(p);
902	PROC_SLOCK(p);
903}
904
905void
906thread_suspend_one(struct thread *td)
907{
908	struct proc *p = td->td_proc;
909
910	PROC_SLOCK_ASSERT(p, MA_OWNED);
911	THREAD_LOCK_ASSERT(td, MA_OWNED);
912	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
913	p->p_suspcount++;
914	td->td_flags &= ~TDF_NEEDSUSPCHK;
915	TD_SET_SUSPENDED(td);
916	sched_sleep(td, 0);
917}
918
919int
920thread_unsuspend_one(struct thread *td)
921{
922	struct proc *p = td->td_proc;
923
924	PROC_SLOCK_ASSERT(p, MA_OWNED);
925	THREAD_LOCK_ASSERT(td, MA_OWNED);
926	KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
927	TD_CLR_SUSPENDED(td);
928	p->p_suspcount--;
929	return (setrunnable(td));
930}
931
932/*
933 * Allow all threads blocked by single threading to continue running.
934 */
935void
936thread_unsuspend(struct proc *p)
937{
938	struct thread *td;
939	int wakeup_swapper;
940
941	PROC_LOCK_ASSERT(p, MA_OWNED);
942	PROC_SLOCK_ASSERT(p, MA_OWNED);
943	wakeup_swapper = 0;
944	if (!P_SHOULDSTOP(p)) {
945                FOREACH_THREAD_IN_PROC(p, td) {
946			thread_lock(td);
947			if (TD_IS_SUSPENDED(td)) {
948				wakeup_swapper |= thread_unsuspend_one(td);
949			}
950			thread_unlock(td);
951		}
952	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
953	    (p->p_numthreads == p->p_suspcount)) {
954		/*
955		 * Stopping everything also did the job for the single
956		 * threading request. Now we've downgraded to single-threaded,
957		 * let it continue.
958		 */
959		thread_lock(p->p_singlethread);
960		wakeup_swapper = thread_unsuspend_one(p->p_singlethread);
961		thread_unlock(p->p_singlethread);
962	}
963	if (wakeup_swapper)
964		kick_proc0();
965}
966
967/*
968 * End the single threading mode..
969 */
970void
971thread_single_end(void)
972{
973	struct thread *td;
974	struct proc *p;
975	int wakeup_swapper;
976
977	p = curproc;
978	PROC_LOCK_ASSERT(p, MA_OWNED);
979	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
980	PROC_SLOCK(p);
981	p->p_singlethread = NULL;
982	wakeup_swapper = 0;
983	/*
984	 * If there are other threads they may now run,
985	 * unless of course there is a blanket 'stop order'
986	 * on the process. The single threader must be allowed
987	 * to continue however as this is a bad place to stop.
988	 */
989	if (p->p_numthreads != remain_for_mode(SINGLE_EXIT) &&
990	    !P_SHOULDSTOP(p)) {
991                FOREACH_THREAD_IN_PROC(p, td) {
992			thread_lock(td);
993			if (TD_IS_SUSPENDED(td)) {
994				wakeup_swapper |= thread_unsuspend_one(td);
995			}
996			thread_unlock(td);
997		}
998	}
999	PROC_SUNLOCK(p);
1000	if (wakeup_swapper)
1001		kick_proc0();
1002}
1003
1004struct thread *
1005thread_find(struct proc *p, lwpid_t tid)
1006{
1007	struct thread *td;
1008
1009	PROC_LOCK_ASSERT(p, MA_OWNED);
1010	FOREACH_THREAD_IN_PROC(p, td) {
1011		if (td->td_tid == tid)
1012			break;
1013	}
1014	return (td);
1015}
1016
1017/* Locate a thread by number; return with proc lock held. */
1018struct thread *
1019tdfind(lwpid_t tid, pid_t pid)
1020{
1021#define RUN_THRESH	16
1022	struct thread *td;
1023	int run = 0;
1024
1025	rw_rlock(&tidhash_lock);
1026	LIST_FOREACH(td, TIDHASH(tid), td_hash) {
1027		if (td->td_tid == tid) {
1028			if (pid != -1 && td->td_proc->p_pid != pid) {
1029				td = NULL;
1030				break;
1031			}
1032			PROC_LOCK(td->td_proc);
1033			if (td->td_proc->p_state == PRS_NEW) {
1034				PROC_UNLOCK(td->td_proc);
1035				td = NULL;
1036				break;
1037			}
1038			if (run > RUN_THRESH) {
1039				if (rw_try_upgrade(&tidhash_lock)) {
1040					LIST_REMOVE(td, td_hash);
1041					LIST_INSERT_HEAD(TIDHASH(td->td_tid),
1042						td, td_hash);
1043					rw_wunlock(&tidhash_lock);
1044					return (td);
1045				}
1046			}
1047			break;
1048		}
1049		run++;
1050	}
1051	rw_runlock(&tidhash_lock);
1052	return (td);
1053}
1054
1055void
1056tidhash_add(struct thread *td)
1057{
1058	rw_wlock(&tidhash_lock);
1059	LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
1060	rw_wunlock(&tidhash_lock);
1061}
1062
1063void
1064tidhash_remove(struct thread *td)
1065{
1066	rw_wlock(&tidhash_lock);
1067	LIST_REMOVE(td, td_hash);
1068	rw_wunlock(&tidhash_lock);
1069}
1070