1139804Simp/*-
299026Sjulian * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
399026Sjulian *  All rights reserved.
499026Sjulian *
599026Sjulian * Redistribution and use in source and binary forms, with or without
699026Sjulian * modification, are permitted provided that the following conditions
799026Sjulian * are met:
899026Sjulian * 1. Redistributions of source code must retain the above copyright
999026Sjulian *    notice(s), this list of conditions and the following disclaimer as
10124350Sschweikh *    the first lines of this file unmodified other than the possible
1199026Sjulian *    addition of one or more copyright notices.
1299026Sjulian * 2. Redistributions in binary form must reproduce the above copyright
1399026Sjulian *    notice(s), this list of conditions and the following disclaimer in the
1499026Sjulian *    documentation and/or other materials provided with the distribution.
1599026Sjulian *
1699026Sjulian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1799026Sjulian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1899026Sjulian * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1999026Sjulian * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2099026Sjulian * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2199026Sjulian * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2299026Sjulian * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2399026Sjulian * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2499026Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2599026Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2699026Sjulian * DAMAGE.
2799026Sjulian */
2899026Sjulian
29181695Sattilio#include "opt_witness.h"
30198464Sjkoshy#include "opt_hwpmc_hooks.h"
31181695Sattilio
32116182Sobrien#include <sys/cdefs.h>
33116182Sobrien__FBSDID("$FreeBSD: stable/11/sys/kern/kern_thread.c 337242 2018-08-03 14:05:22Z asomers $");
34116182Sobrien
3599026Sjulian#include <sys/param.h>
3699026Sjulian#include <sys/systm.h>
3799026Sjulian#include <sys/kernel.h>
3899026Sjulian#include <sys/lock.h>
3999026Sjulian#include <sys/mutex.h>
4099026Sjulian#include <sys/proc.h>
41236317Skib#include <sys/rangelock.h>
42156705Sdavidxu#include <sys/resourcevar.h>
43235459Srstone#include <sys/sdt.h>
44130355Sjulian#include <sys/smp.h>
45107126Sjeff#include <sys/sched.h>
46126326Sjhb#include <sys/sleepqueue.h>
47174647Sjeff#include <sys/selinfo.h>
48292892Sjhb#include <sys/syscallsubr.h>
49283382Sdchagin#include <sys/sysent.h>
50122514Sjhb#include <sys/turnstile.h>
5199026Sjulian#include <sys/ktr.h>
52213642Sdavidxu#include <sys/rwlock.h>
53143149Sdavidxu#include <sys/umtx.h>
54331017Skevans#include <sys/vmmeter.h>
55176730Sjeff#include <sys/cpuset.h>
56198464Sjkoshy#ifdef	HWPMC_HOOKS
57198464Sjkoshy#include <sys/pmckern.h>
58198464Sjkoshy#endif
5999026Sjulian
60155195Srwatson#include <security/audit/audit.h>
61155195Srwatson
6299026Sjulian#include <vm/vm.h>
63116355Salc#include <vm/vm_extern.h>
6499026Sjulian#include <vm/uma.h>
65285387Sadrian#include <vm/vm_domain.h>
66173631Srrs#include <sys/eventhandler.h>
6799026Sjulian
68318183Skib/*
69318183Skib * Asserts below verify the stability of struct thread and struct proc
70318183Skib * layout, as exposed by KBI to modules.  On head, the KBI is allowed
71318183Skib * to drift, change to the structures must be accompanied by the
72318183Skib * assert update.
73318183Skib *
74318183Skib * On the stable branches after KBI freeze, conditions must not be
75318183Skib * violated.  Typically new fields are moved to the end of the
76318183Skib * structures.
77318183Skib */
78318183Skib#ifdef __amd64__
79318183Skib_Static_assert(offsetof(struct thread, td_flags) == 0xe4,
80318183Skib    "struct thread KBI td_flags");
81318183Skib_Static_assert(offsetof(struct thread, td_pflags) == 0xec,
82318183Skib    "struct thread KBI td_pflags");
83318183Skib_Static_assert(offsetof(struct thread, td_frame) == 0x418,
84318183Skib    "struct thread KBI td_frame");
85318183Skib_Static_assert(offsetof(struct thread, td_emuldata) == 0x4c0,
86318183Skib    "struct thread KBI td_emuldata");
87318183Skib_Static_assert(offsetof(struct proc, p_flag) == 0xb0,
88318183Skib    "struct proc KBI p_flag");
89318183Skib_Static_assert(offsetof(struct proc, p_pid) == 0xbc,
90318183Skib    "struct proc KBI p_pid");
91318743Sbadger_Static_assert(offsetof(struct proc, p_filemon) == 0x3c0,
92318183Skib    "struct proc KBI p_filemon");
93318743Sbadger_Static_assert(offsetof(struct proc, p_comm) == 0x3d0,
94318183Skib    "struct proc KBI p_comm");
95318743Sbadger_Static_assert(offsetof(struct proc, p_emuldata) == 0x4a0,
96318183Skib    "struct proc KBI p_emuldata");
97318183Skib#endif
98318183Skib#ifdef __i386__
99318183Skib_Static_assert(offsetof(struct thread, td_flags) == 0x8c,
100318183Skib    "struct thread KBI td_flags");
101318183Skib_Static_assert(offsetof(struct thread, td_pflags) == 0x94,
102318183Skib    "struct thread KBI td_pflags");
103318183Skib_Static_assert(offsetof(struct thread, td_frame) == 0x2c0,
104318183Skib    "struct thread KBI td_frame");
105318183Skib_Static_assert(offsetof(struct thread, td_emuldata) == 0x30c,
106318183Skib    "struct thread KBI td_emuldata");
107318183Skib_Static_assert(offsetof(struct proc, p_flag) == 0x68,
108318183Skib    "struct proc KBI p_flag");
109318183Skib_Static_assert(offsetof(struct proc, p_pid) == 0x74,
110318183Skib    "struct proc KBI p_pid");
111318743Sbadger_Static_assert(offsetof(struct proc, p_filemon) == 0x268,
112318183Skib    "struct proc KBI p_filemon");
113318743Sbadger_Static_assert(offsetof(struct proc, p_comm) == 0x274,
114318183Skib    "struct proc KBI p_comm");
115318743Sbadger_Static_assert(offsetof(struct proc, p_emuldata) == 0x2f4,
116318183Skib    "struct proc KBI p_emuldata");
117318183Skib#endif
118318183Skib
119235459SrstoneSDT_PROVIDER_DECLARE(proc);
120258622SavgSDT_PROBE_DEFINE(proc, , , lwp__exit);
121235459Srstone
12299026Sjulian/*
123163709Sjb * thread related storage.
124163709Sjb */
12599026Sjulianstatic uma_zone_t thread_zone;
12699026Sjulian
127111028SjeffTAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
128172256Sattiliostatic struct mtx zombie_lock;
129170296SjeffMTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
13099026Sjulian
131170598Sjeffstatic void thread_zombie(struct thread *);
132282944Skibstatic int thread_unsuspend_one(struct thread *td, struct proc *p,
133282944Skib    bool boundary);
134170598Sjeff
135216314Sdavidxu#define TID_BUFFER_SIZE	1024
136216314Sdavidxu
137127794Smarcelstruct mtx tid_lock;
138143802Sphkstatic struct unrhdr *tid_unrhdr;
139216314Sdavidxustatic lwpid_t tid_buffer[TID_BUFFER_SIZE];
140216314Sdavidxustatic int tid_head, tid_tail;
141213642Sdavidxustatic MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
142213642Sdavidxu
143213642Sdavidxustruct	tidhashhead *tidhashtbl;
144213642Sdavidxuu_long	tidhash;
145213642Sdavidxustruct	rwlock tidhash_lock;
146213642Sdavidxu
147331727SmjorasEVENTHANDLER_LIST_DEFINE(thread_ctor);
148331727SmjorasEVENTHANDLER_LIST_DEFINE(thread_dtor);
149331727SmjorasEVENTHANDLER_LIST_DEFINE(thread_init);
150331727SmjorasEVENTHANDLER_LIST_DEFINE(thread_fini);
151331727Smjoras
152216314Sdavidxustatic lwpid_t
153216314Sdavidxutid_alloc(void)
154216314Sdavidxu{
155216314Sdavidxu	lwpid_t	tid;
156216314Sdavidxu
157216314Sdavidxu	tid = alloc_unr(tid_unrhdr);
158216314Sdavidxu	if (tid != -1)
159216314Sdavidxu		return (tid);
160216314Sdavidxu	mtx_lock(&tid_lock);
161216314Sdavidxu	if (tid_head == tid_tail) {
162216314Sdavidxu		mtx_unlock(&tid_lock);
163216314Sdavidxu		return (-1);
164216314Sdavidxu	}
165240951Skib	tid = tid_buffer[tid_head];
166240951Skib	tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
167216314Sdavidxu	mtx_unlock(&tid_lock);
168216314Sdavidxu	return (tid);
169216314Sdavidxu}
170216314Sdavidxu
171216314Sdavidxustatic void
172216314Sdavidxutid_free(lwpid_t tid)
173216314Sdavidxu{
174216314Sdavidxu	lwpid_t tmp_tid = -1;
175216314Sdavidxu
176216314Sdavidxu	mtx_lock(&tid_lock);
177216314Sdavidxu	if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) {
178240951Skib		tmp_tid = tid_buffer[tid_head];
179240951Skib		tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
180216314Sdavidxu	}
181240951Skib	tid_buffer[tid_tail] = tid;
182240951Skib	tid_tail = (tid_tail + 1) % TID_BUFFER_SIZE;
183216314Sdavidxu	mtx_unlock(&tid_lock);
184216314Sdavidxu	if (tmp_tid != -1)
185216314Sdavidxu		free_unr(tid_unrhdr, tmp_tid);
186216314Sdavidxu}
187216314Sdavidxu
188127794Smarcel/*
189107719Sjulian * Prepare a thread for use.
19099026Sjulian */
191132987Sgreenstatic int
192132987Sgreenthread_ctor(void *mem, int size, void *arg, int flags)
19399026Sjulian{
19499026Sjulian	struct thread	*td;
19599026Sjulian
19699026Sjulian	td = (struct thread *)mem;
197103216Sjulian	td->td_state = TDS_INACTIVE;
198135573Sjhb	td->td_oncpu = NOCPU;
199130269Sjmallett
200216314Sdavidxu	td->td_tid = tid_alloc();
201143840Sphk
202130269Sjmallett	/*
203130269Sjmallett	 * Note that td_critnest begins life as 1 because the thread is not
204130269Sjmallett	 * running and is thereby implicitly waiting to be on the receiving
205170296Sjeff	 * end of a context switch.
206130269Sjmallett	 */
207118442Sjhb	td->td_critnest = 1;
208216313Sdavidxu	td->td_lend_user_pri = PRI_MAX;
209331727Smjoras	EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
210155195Srwatson#ifdef AUDIT
211155195Srwatson	audit_thread_alloc(td);
212155195Srwatson#endif
213161678Sdavidxu	umtx_thread_alloc(td);
214132987Sgreen	return (0);
21599026Sjulian}
21699026Sjulian
21799026Sjulian/*
21899026Sjulian * Reclaim a thread after use.
21999026Sjulian */
22099026Sjulianstatic void
22199026Sjulianthread_dtor(void *mem, int size, void *arg)
22299026Sjulian{
223127794Smarcel	struct thread *td;
22499026Sjulian
22599026Sjulian	td = (struct thread *)mem;
22699026Sjulian
22799026Sjulian#ifdef INVARIANTS
22899026Sjulian	/* Verify that this thread is in a safe state to free. */
22999026Sjulian	switch (td->td_state) {
230103216Sjulian	case TDS_INHIBITED:
231103216Sjulian	case TDS_RUNNING:
232103216Sjulian	case TDS_CAN_RUN:
23399026Sjulian	case TDS_RUNQ:
23499026Sjulian		/*
23599026Sjulian		 * We must never unlink a thread that is in one of
23699026Sjulian		 * these states, because it is currently active.
23799026Sjulian		 */
23899026Sjulian		panic("bad state for thread unlinking");
23999026Sjulian		/* NOTREACHED */
240103216Sjulian	case TDS_INACTIVE:
24199026Sjulian		break;
24299026Sjulian	default:
24399026Sjulian		panic("bad thread state");
24499026Sjulian		/* NOTREACHED */
24599026Sjulian	}
24699026Sjulian#endif
247155353Srwatson#ifdef AUDIT
248155353Srwatson	audit_thread_free(td);
249155353Srwatson#endif
250185029Spjd	/* Free all OSD associated to this thread. */
251185029Spjd	osd_thread_exit(td);
252315064Skib	td_softdep_cleanup(td);
253315064Skib	MPASS(td->td_su == NULL);
254185029Spjd
255331727Smjoras	EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
256216314Sdavidxu	tid_free(td->td_tid);
25799026Sjulian}
25899026Sjulian
25999026Sjulian/*
26099026Sjulian * Initialize type-stable parts of a thread (when newly created).
26199026Sjulian */
262132987Sgreenstatic int
263132987Sgreenthread_init(void *mem, int size, int flags)
26499026Sjulian{
265131149Smarcel	struct thread *td;
26699026Sjulian
26799026Sjulian	td = (struct thread *)mem;
268131149Smarcel
269126326Sjhb	td->td_sleepqueue = sleepq_alloc();
270122514Sjhb	td->td_turnstile = turnstile_alloc();
271236317Skib	td->td_rlqe = NULL;
272331727Smjoras	EVENTHANDLER_DIRECT_INVOKE(thread_init, td);
273161678Sdavidxu	umtx_thread_init(td);
274173361Skib	td->td_kstack = 0;
275281696Skib	td->td_sel = NULL;
276132987Sgreen	return (0);
27799026Sjulian}
27899026Sjulian
27999026Sjulian/*
28099026Sjulian * Tear down type-stable parts of a thread (just before being discarded).
28199026Sjulian */
28299026Sjulianstatic void
28399026Sjulianthread_fini(void *mem, int size)
28499026Sjulian{
285131149Smarcel	struct thread *td;
28699026Sjulian
28799026Sjulian	td = (struct thread *)mem;
288331727Smjoras	EVENTHANDLER_DIRECT_INVOKE(thread_fini, td);
289236317Skib	rlqentry_free(td->td_rlqe);
290122514Sjhb	turnstile_free(td->td_turnstile);
291126326Sjhb	sleepq_free(td->td_sleepqueue);
292161678Sdavidxu	umtx_thread_fini(td);
293174647Sjeff	seltdfini(td);
29499026Sjulian}
295111028Sjeff
296107126Sjeff/*
297111028Sjeff * For a newly created process,
298111028Sjeff * link up all the structures and its initial threads etc.
299134791Sjulian * called from:
300268351Smarcel * {arch}/{arch}/machdep.c   {arch}_init(), init386() etc.
301134791Sjulian * proc_dtor() (should go away)
302134791Sjulian * proc_init()
303105854Sjulian */
304105854Sjulianvoid
305173361Skibproc_linkup0(struct proc *p, struct thread *td)
306173361Skib{
307173361Skib	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
308173361Skib	proc_linkup(p, td);
309173361Skib}
310173361Skib
311173361Skibvoid
312163709Sjbproc_linkup(struct proc *p, struct thread *td)
313105854Sjulian{
314170296Sjeff
315151316Sdavidxu	sigqueue_init(&p->p_sigqueue, p);
316153253Sdavidxu	p->p_ksi = ksiginfo_alloc(1);
317153253Sdavidxu	if (p->p_ksi != NULL) {
318153253Sdavidxu		/* XXX p_ksi may be null if ksiginfo zone is not ready */
319153253Sdavidxu		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
320152185Sdavidxu	}
321152948Sdavidxu	LIST_INIT(&p->p_mqnotifier);
322105854Sjulian	p->p_numthreads = 0;
323163709Sjb	thread_link(td, p);
324105854Sjulian}
325105854Sjulian
326111028Sjeff/*
32799026Sjulian * Initialize global thread allocation resources.
32899026Sjulian */
32999026Sjulianvoid
33099026Sjulianthreadinit(void)
33199026Sjulian{
33299026Sjulian
333143802Sphk	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
334239301Skib
335239301Skib	/*
336239328Skib	 * pid_max cannot be greater than PID_MAX.
337239301Skib	 * leave one number for thread0.
338239301Skib	 */
339174848Sjulian	tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
340143802Sphk
341107126Sjeff	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
34299026Sjulian	    thread_ctor, thread_dtor, thread_init, thread_fini,
343315374Smjg	    32 - 1, UMA_ZONE_NOFREE);
344213642Sdavidxu	tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
345213642Sdavidxu	rw_init(&tidhash_lock, "tidhash");
34699026Sjulian}
34799026Sjulian
34899026Sjulian/*
349170598Sjeff * Place an unused thread on the zombie list.
350164936Sjulian * Use the slpq as that must be unused by now.
35199026Sjulian */
35299026Sjulianvoid
353170598Sjeffthread_zombie(struct thread *td)
35499026Sjulian{
355170296Sjeff	mtx_lock_spin(&zombie_lock);
356164936Sjulian	TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
357170296Sjeff	mtx_unlock_spin(&zombie_lock);
35899026Sjulian}
35999026Sjulian
360103410Smini/*
361170598Sjeff * Release a thread that has exited after cpu_throw().
362170598Sjeff */
363170598Sjeffvoid
364170598Sjeffthread_stash(struct thread *td)
365170598Sjeff{
366170598Sjeff	atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
367170598Sjeff	thread_zombie(td);
368170598Sjeff}
369170598Sjeff
370170598Sjeff/*
371177091Sjeff * Reap zombie resources.
37299026Sjulian */
37399026Sjulianvoid
37499026Sjulianthread_reap(void)
37599026Sjulian{
376105854Sjulian	struct thread *td_first, *td_next;
37799026Sjulian
37899026Sjulian	/*
379111028Sjeff	 * Don't even bother to lock if none at this instant,
380304883Skib	 * we really don't care about the next instant.
38199026Sjulian	 */
382163709Sjb	if (!TAILQ_EMPTY(&zombie_threads)) {
383170296Sjeff		mtx_lock_spin(&zombie_lock);
384105854Sjulian		td_first = TAILQ_FIRST(&zombie_threads);
385105854Sjulian		if (td_first)
386105854Sjulian			TAILQ_INIT(&zombie_threads);
387170296Sjeff		mtx_unlock_spin(&zombie_lock);
388105854Sjulian		while (td_first) {
389164936Sjulian			td_next = TAILQ_NEXT(td_first, td_slpq);
390284214Smjg			thread_cow_free(td_first);
391105854Sjulian			thread_free(td_first);
392105854Sjulian			td_first = td_next;
39399026Sjulian		}
39499026Sjulian	}
39599026Sjulian}
39699026Sjulian
39799026Sjulian/*
39899026Sjulian * Allocate a thread.
39999026Sjulian */
40099026Sjulianstruct thread *
401196730Skibthread_alloc(int pages)
40299026Sjulian{
403173361Skib	struct thread *td;
404163709Sjb
40599026Sjulian	thread_reap(); /* check if any zombies to get */
406173361Skib
407173361Skib	td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
408173361Skib	KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
409196730Skib	if (!vm_thread_new(td, pages)) {
410173361Skib		uma_zfree(thread_zone, td);
411173361Skib		return (NULL);
412173361Skib	}
413173615Smarcel	cpu_thread_alloc(td);
414285387Sadrian	vm_domain_policy_init(&td->td_vm_dom_policy);
415173361Skib	return (td);
41699026Sjulian}
41799026Sjulian
418196730Skibint
419196730Skibthread_alloc_stack(struct thread *td, int pages)
420196730Skib{
421103367Sjulian
422196730Skib	KASSERT(td->td_kstack == 0,
423196730Skib	    ("thread_alloc_stack called on a thread with kstack"));
424196730Skib	if (!vm_thread_new(td, pages))
425196730Skib		return (0);
426196730Skib	cpu_thread_alloc(td);
427196730Skib	return (1);
428196730Skib}
429196730Skib
430103367Sjulian/*
43199026Sjulian * Deallocate a thread.
43299026Sjulian */
43399026Sjulianvoid
43499026Sjulianthread_free(struct thread *td)
43599026Sjulian{
436189845Sjeff
437189845Sjeff	lock_profile_thread_exit(td);
438177369Sjeff	if (td->td_cpuset)
439177369Sjeff		cpuset_rel(td->td_cpuset);
440176730Sjeff	td->td_cpuset = NULL;
441173615Smarcel	cpu_thread_free(td);
442173361Skib	if (td->td_kstack != 0)
443173361Skib		vm_thread_dispose(td);
444285387Sadrian	vm_domain_policy_cleanup(&td->td_vm_dom_policy);
445304883Skib	callout_drain(&td->td_slpcallout);
44699026Sjulian	uma_zfree(thread_zone, td);
44799026Sjulian}
44899026Sjulian
449284214Smjgvoid
450284214Smjgthread_cow_get_proc(struct thread *newtd, struct proc *p)
451284214Smjg{
452284214Smjg
453284214Smjg	PROC_LOCK_ASSERT(p, MA_OWNED);
454284214Smjg	newtd->td_ucred = crhold(p->p_ucred);
455284215Smjg	newtd->td_limit = lim_hold(p->p_limit);
456284214Smjg	newtd->td_cowgen = p->p_cowgen;
457284214Smjg}
458284214Smjg
459284214Smjgvoid
460284214Smjgthread_cow_get(struct thread *newtd, struct thread *td)
461284214Smjg{
462284214Smjg
463284214Smjg	newtd->td_ucred = crhold(td->td_ucred);
464284215Smjg	newtd->td_limit = lim_hold(td->td_limit);
465284214Smjg	newtd->td_cowgen = td->td_cowgen;
466284214Smjg}
467284214Smjg
468284214Smjgvoid
469284214Smjgthread_cow_free(struct thread *td)
470284214Smjg{
471284214Smjg
472285633Smjg	if (td->td_ucred != NULL)
473284214Smjg		crfree(td->td_ucred);
474285633Smjg	if (td->td_limit != NULL)
475284215Smjg		lim_free(td->td_limit);
476284214Smjg}
477284214Smjg
478284214Smjgvoid
479284214Smjgthread_cow_update(struct thread *td)
480284214Smjg{
481284214Smjg	struct proc *p;
482285633Smjg	struct ucred *oldcred;
483285633Smjg	struct plimit *oldlimit;
484284214Smjg
485284214Smjg	p = td->td_proc;
486285633Smjg	oldcred = NULL;
487285633Smjg	oldlimit = NULL;
488284214Smjg	PROC_LOCK(p);
489285633Smjg	if (td->td_ucred != p->p_ucred) {
490285633Smjg		oldcred = td->td_ucred;
491285633Smjg		td->td_ucred = crhold(p->p_ucred);
492285633Smjg	}
493285633Smjg	if (td->td_limit != p->p_limit) {
494285633Smjg		oldlimit = td->td_limit;
495285633Smjg		td->td_limit = lim_hold(p->p_limit);
496285633Smjg	}
497284214Smjg	td->td_cowgen = p->p_cowgen;
498284214Smjg	PROC_UNLOCK(p);
499285633Smjg	if (oldcred != NULL)
500285633Smjg		crfree(oldcred);
501285633Smjg	if (oldlimit != NULL)
502285633Smjg		lim_free(oldlimit);
503284214Smjg}
504284214Smjg
50599026Sjulian/*
50699026Sjulian * Discard the current thread and exit from its context.
507130355Sjulian * Always called with scheduler locked.
50899026Sjulian *
50999026Sjulian * Because we can't free a thread while we're operating under its context,
510107719Sjulian * push the current thread into our CPU's deadthread holder. This means
511107719Sjulian * we needn't worry about someone else grabbing our context before we
512177091Sjeff * do a cpu_throw().
51399026Sjulian */
51499026Sjulianvoid
51599026Sjulianthread_exit(void)
51699026Sjulian{
517229429Sjhb	uint64_t runtime, new_switchtime;
51899026Sjulian	struct thread *td;
519170174Sjeff	struct thread *td2;
52099026Sjulian	struct proc *p;
521182011Sjhb	int wakeup_swapper;
52299026Sjulian
52399026Sjulian	td = curthread;
52499026Sjulian	p = td->td_proc;
52599026Sjulian
526170296Sjeff	PROC_SLOCK_ASSERT(p, MA_OWNED);
527134791Sjulian	mtx_assert(&Giant, MA_NOTOWNED);
528170296Sjeff
529134791Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
530102581Sjulian	KASSERT(p != NULL, ("thread exiting without a process"));
531133234Srwatson	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
532173601Sjulian	    (long)p->p_pid, td->td_name);
533315836Savg	SDT_PROBE0(proc, , , lwp__exit);
534151316Sdavidxu	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
53599026Sjulian
536134791Sjulian	/*
537134791Sjulian	 * drop FPU & debug register state storage, or any other
538134791Sjulian	 * architecture specific resources that
539134791Sjulian	 * would not be on a new untouched process.
540134791Sjulian	 */
541301960Skib	cpu_thread_exit(td);
54299026Sjulian
543134791Sjulian	/*
544103002Sjulian	 * The last thread is left attached to the process
545103002Sjulian	 * So that the whole bundle gets recycled. Skip
546134791Sjulian	 * all this stuff if we never had threads.
547134791Sjulian	 * EXIT clears all sign of other threads when
548134791Sjulian	 * it goes to single threading, so the last thread always
549134791Sjulian	 * takes the short path.
550102581Sjulian	 */
551134791Sjulian	if (p->p_flag & P_HADTHREADS) {
552134791Sjulian		if (p->p_numthreads > 1) {
553271000Skib			atomic_add_int(&td->td_proc->p_exitthreads, 1);
554134791Sjulian			thread_unlink(td);
555170174Sjeff			td2 = FIRST_THREAD_IN_PROC(p);
556170174Sjeff			sched_exit_thread(td2, td);
557134791Sjulian
558134791Sjulian			/*
559134791Sjulian			 * The test below is NOT true if we are the
560207606Skib			 * sole exiting thread. P_STOPPED_SINGLE is unset
561134791Sjulian			 * in exit1() after it is the only survivor.
562134791Sjulian			 */
563134791Sjulian			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
564134791Sjulian				if (p->p_numthreads == p->p_suspcount) {
565170296Sjeff					thread_lock(p->p_singlethread);
566182011Sjhb					wakeup_swapper = thread_unsuspend_one(
567282944Skib						p->p_singlethread, p, false);
568170296Sjeff					thread_unlock(p->p_singlethread);
569182011Sjhb					if (wakeup_swapper)
570182011Sjhb						kick_proc0();
571134791Sjulian				}
572103002Sjulian			}
573104695Sjulian
574134791Sjulian			PCPU_SET(deadthread, td);
575134791Sjulian		} else {
576134791Sjulian			/*
577134791Sjulian			 * The last thread is exiting.. but not through exit()
578134791Sjulian			 */
579134791Sjulian			panic ("thread_exit: Last thread exiting on its own");
580119488Sdavidxu		}
581170296Sjeff	}
582198464Sjkoshy#ifdef	HWPMC_HOOKS
583198464Sjkoshy	/*
584198464Sjkoshy	 * If this thread is part of a process that is being tracked by hwpmc(4),
585198464Sjkoshy	 * inform the module of the thread's impending exit.
586198464Sjkoshy	 */
587198464Sjkoshy	if (PMC_PROC_IS_USING_PMCS(td->td_proc))
588198464Sjkoshy		PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
589198464Sjkoshy#endif
590170296Sjeff	PROC_UNLOCK(p);
591275121Skib	PROC_STATLOCK(p);
592275121Skib	thread_lock(td);
593275121Skib	PROC_SUNLOCK(p);
594229429Sjhb
595229429Sjhb	/* Do the same timestamp bookkeeping that mi_switch() would do. */
596229429Sjhb	new_switchtime = cpu_ticks();
597229429Sjhb	runtime = new_switchtime - PCPU_GET(switchtime);
598229429Sjhb	td->td_runtime += runtime;
599229429Sjhb	td->td_incruntime += runtime;
600229429Sjhb	PCPU_SET(switchtime, new_switchtime);
601229429Sjhb	PCPU_SET(switchticks, ticks);
602229429Sjhb	PCPU_INC(cnt.v_swtch);
603229429Sjhb
604229429Sjhb	/* Save our resource usage in our process. */
605229429Sjhb	td->td_ru.ru_nvcsw++;
606208488Skib	ruxagg(p, td);
607229429Sjhb	rucollect(&p->p_ru, &td->td_ru);
608275121Skib	PROC_STATUNLOCK(p);
609229429Sjhb
610133396Sjulian	td->td_state = TDS_INACTIVE;
611181695Sattilio#ifdef WITNESS
612181695Sattilio	witness_thread_exit(td);
613181695Sattilio#endif
614133396Sjulian	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
615170296Sjeff	sched_throw(td);
616112993Speter	panic("I'm a teapot!");
61799026Sjulian	/* NOTREACHED */
61899026Sjulian}
61999026Sjulian
620124350Sschweikh/*
621107719Sjulian * Do any thread specific cleanups that may be needed in wait()
622126932Speter * called with Giant, proc and schedlock not held.
623107719Sjulian */
624107719Sjulianvoid
625107719Sjulianthread_wait(struct proc *p)
626107719Sjulian{
627107719Sjulian	struct thread *td;
628107719Sjulian
629126932Speter	mtx_assert(&Giant, MA_NOTOWNED);
630271008Skib	KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()"));
631271008Skib	KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking"));
632170598Sjeff	td = FIRST_THREAD_IN_PROC(p);
633170598Sjeff	/* Lock the last thread so we spin until it exits cpu_throw(). */
634170598Sjeff	thread_lock(td);
635170598Sjeff	thread_unlock(td);
636189845Sjeff	lock_profile_thread_exit(td);
637176730Sjeff	cpuset_rel(td->td_cpuset);
638176730Sjeff	td->td_cpuset = NULL;
639170598Sjeff	cpu_thread_clean(td);
640284214Smjg	thread_cow_free(td);
641304883Skib	callout_drain(&td->td_slpcallout);
642107719Sjulian	thread_reap();	/* check for zombie threads etc. */
643107719Sjulian}
644107719Sjulian
64599026Sjulian/*
64699026Sjulian * Link a thread to a process.
647103002Sjulian * set up anything that needs to be initialized for it to
648103002Sjulian * be used by the process.
64999026Sjulian */
65099026Sjulianvoid
651163709Sjbthread_link(struct thread *td, struct proc *p)
65299026Sjulian{
65399026Sjulian
654170296Sjeff	/*
655170296Sjeff	 * XXX This can't be enabled because it's called for proc0 before
656177368Sjeff	 * its lock has been created.
657177368Sjeff	 * PROC_LOCK_ASSERT(p, MA_OWNED);
658170296Sjeff	 */
659111028Sjeff	td->td_state    = TDS_INACTIVE;
660111028Sjeff	td->td_proc     = p;
661172207Sjeff	td->td_flags    = TDF_INMEM;
66299026Sjulian
663103002Sjulian	LIST_INIT(&td->td_contested);
664174629Sjeff	LIST_INIT(&td->td_lprof[0]);
665174629Sjeff	LIST_INIT(&td->td_lprof[1]);
666151316Sdavidxu	sigqueue_init(&td->td_sigqueue, p);
667283291Sjkim	callout_init(&td->td_slpcallout, 1);
668269095Sdeischen	TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
66999026Sjulian	p->p_numthreads++;
67099026Sjulian}
67199026Sjulian
672134791Sjulian/*
673136160Sjulian * Called from:
674134791Sjulian *  thread_exit()
675134791Sjulian */
676113641Sjulianvoid
677113641Sjulianthread_unlink(struct thread *td)
678124350Sschweikh{
679113641Sjulian	struct proc *p = td->td_proc;
680113920Sjhb
681177368Sjeff	PROC_LOCK_ASSERT(p, MA_OWNED);
682113641Sjulian	TAILQ_REMOVE(&p->p_threads, td, td_plist);
683113641Sjulian	p->p_numthreads--;
684113641Sjulian	/* could clear a few other things here */
685163709Sjb	/* Must  NOT clear links to proc! */
686124350Sschweikh}
687113641Sjulian
688195701Skibstatic int
689195701Skibcalc_remaining(struct proc *p, int mode)
690195701Skib{
691195701Skib	int remaining;
692195701Skib
693227657Skib	PROC_LOCK_ASSERT(p, MA_OWNED);
694227657Skib	PROC_SLOCK_ASSERT(p, MA_OWNED);
695195701Skib	if (mode == SINGLE_EXIT)
696195701Skib		remaining = p->p_numthreads;
697195701Skib	else if (mode == SINGLE_BOUNDARY)
698195701Skib		remaining = p->p_numthreads - p->p_boundary_count;
699275745Skib	else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC)
700195701Skib		remaining = p->p_numthreads - p->p_suspcount;
701195701Skib	else
702195701Skib		panic("calc_remaining: wrong mode %d", mode);
703195701Skib	return (remaining);
704195701Skib}
705195701Skib
706275617Skibstatic int
707275617Skibremain_for_mode(int mode)
708275617Skib{
709275617Skib
710275745Skib	return (mode == SINGLE_ALLPROC ? 0 : 1);
711275617Skib}
712275617Skib
713275617Skibstatic int
714275617Skibweed_inhib(int mode, struct thread *td2, struct proc *p)
715275617Skib{
716275617Skib	int wakeup_swapper;
717275617Skib
718275617Skib	PROC_LOCK_ASSERT(p, MA_OWNED);
719275617Skib	PROC_SLOCK_ASSERT(p, MA_OWNED);
720275617Skib	THREAD_LOCK_ASSERT(td2, MA_OWNED);
721275617Skib
722275617Skib	wakeup_swapper = 0;
723275617Skib	switch (mode) {
724275617Skib	case SINGLE_EXIT:
725275617Skib		if (TD_IS_SUSPENDED(td2))
726282944Skib			wakeup_swapper |= thread_unsuspend_one(td2, p, true);
727275617Skib		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
728275617Skib			wakeup_swapper |= sleepq_abort(td2, EINTR);
729275617Skib		break;
730275617Skib	case SINGLE_BOUNDARY:
731275617Skib	case SINGLE_NO_EXIT:
732275617Skib		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
733282944Skib			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
734275617Skib		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
735275617Skib			wakeup_swapper |= sleepq_abort(td2, ERESTART);
736275820Skib		break;
737275745Skib	case SINGLE_ALLPROC:
738275745Skib		/*
739275745Skib		 * ALLPROC suspend tries to avoid spurious EINTR for
740275745Skib		 * threads sleeping interruptable, by suspending the
741275745Skib		 * thread directly, similarly to sig_suspend_threads().
742275745Skib		 * Since such sleep is not performed at the user
743275745Skib		 * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP
744275745Skib		 * is used to avoid immediate un-suspend.
745275745Skib		 */
746275745Skib		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |
747275745Skib		    TDF_ALLPROCSUSP)) == 0)
748282944Skib			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
749275745Skib		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) {
750275745Skib			if ((td2->td_flags & TDF_SBDRY) == 0) {
751275745Skib				thread_suspend_one(td2);
752275745Skib				td2->td_flags |= TDF_ALLPROCSUSP;
753275745Skib			} else {
754275745Skib				wakeup_swapper |= sleepq_abort(td2, ERESTART);
755275745Skib			}
756275745Skib		}
757275617Skib		break;
758275617Skib	}
759275617Skib	return (wakeup_swapper);
760275617Skib}
761275617Skib
762111028Sjeff/*
76399026Sjulian * Enforce single-threading.
76499026Sjulian *
76599026Sjulian * Returns 1 if the caller must abort (another thread is waiting to
76699026Sjulian * exit the process or similar). Process is locked!
76799026Sjulian * Returns 0 when you are successfully the only thread running.
76899026Sjulian * A process has successfully single threaded in the suspend mode when
76999026Sjulian * There are no threads in user mode. Threads in the kernel must be
77099026Sjulian * allowed to continue until they get to the user boundary. They may even
77199026Sjulian * copy out their return values and data before suspending. They may however be
772160048Smaxim * accelerated in reaching the user boundary as we will wake up
77399026Sjulian * any sleeping threads that are interruptable. (PCATCH).
77499026Sjulian */
77599026Sjulianint
776275745Skibthread_single(struct proc *p, int mode)
77799026Sjulian{
77899026Sjulian	struct thread *td;
77999026Sjulian	struct thread *td2;
780181334Sjhb	int remaining, wakeup_swapper;
78199026Sjulian
78299026Sjulian	td = curthread;
783275745Skib	KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
784275745Skib	    mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
785275745Skib	    ("invalid mode %d", mode));
786275745Skib	/*
787275745Skib	 * If allowing non-ALLPROC singlethreading for non-curproc
788275745Skib	 * callers, calc_remaining() and remain_for_mode() should be
789275745Skib	 * adjusted to also account for td->td_proc != p.  For now
790275745Skib	 * this is not implemented because it is not used.
791275745Skib	 */
792275745Skib	KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) ||
793275745Skib	    (mode != SINGLE_ALLPROC && td->td_proc == p),
794275745Skib	    ("mode %d proc %p curproc %p", mode, p, td->td_proc));
795126932Speter	mtx_assert(&Giant, MA_NOTOWNED);
79699026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
79799026Sjulian
798275745Skib	if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC)
79999026Sjulian		return (0);
80099026Sjulian
801100648Sjulian	/* Is someone already single threading? */
802136177Sdavidxu	if (p->p_singlethread != NULL && p->p_singlethread != td)
80399026Sjulian		return (1);
80499026Sjulian
805136177Sdavidxu	if (mode == SINGLE_EXIT) {
806136177Sdavidxu		p->p_flag |= P_SINGLE_EXIT;
807136177Sdavidxu		p->p_flag &= ~P_SINGLE_BOUNDARY;
808136177Sdavidxu	} else {
809136177Sdavidxu		p->p_flag &= ~P_SINGLE_EXIT;
810136177Sdavidxu		if (mode == SINGLE_BOUNDARY)
811136177Sdavidxu			p->p_flag |= P_SINGLE_BOUNDARY;
812136177Sdavidxu		else
813136177Sdavidxu			p->p_flag &= ~P_SINGLE_BOUNDARY;
814136177Sdavidxu	}
815275745Skib	if (mode == SINGLE_ALLPROC)
816275745Skib		p->p_flag |= P_TOTAL_STOP;
817102950Sdavidxu	p->p_flag |= P_STOPPED_SINGLE;
818184667Sdavidxu	PROC_SLOCK(p);
81999026Sjulian	p->p_singlethread = td;
820195701Skib	remaining = calc_remaining(p, mode);
821275617Skib	while (remaining != remain_for_mode(mode)) {
822156942Sdavidxu		if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
823156942Sdavidxu			goto stopme;
824181334Sjhb		wakeup_swapper = 0;
82599026Sjulian		FOREACH_THREAD_IN_PROC(p, td2) {
82699026Sjulian			if (td2 == td)
82799026Sjulian				continue;
828170296Sjeff			thread_lock(td2);
829177471Sjeff			td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
830275745Skib			if (TD_IS_INHIBITED(td2)) {
831275617Skib				wakeup_swapper |= weed_inhib(mode, td2, p);
832155594Sdavidxu#ifdef SMP
833275745Skib			} else if (TD_IS_RUNNING(td2) && td != td2) {
834155594Sdavidxu				forward_signal(td2);
835275745Skib#endif
836155594Sdavidxu			}
837170296Sjeff			thread_unlock(td2);
83899026Sjulian		}
839181334Sjhb		if (wakeup_swapper)
840181334Sjhb			kick_proc0();
841195701Skib		remaining = calc_remaining(p, mode);
842130674Sdavidxu
843124350Sschweikh		/*
844124350Sschweikh		 * Maybe we suspended some threads.. was it enough?
845105911Sjulian		 */
846275617Skib		if (remaining == remain_for_mode(mode))
847105911Sjulian			break;
848105911Sjulian
849156942Sdavidxustopme:
85099026Sjulian		/*
85199026Sjulian		 * Wake us up when everyone else has suspended.
852100648Sjulian		 * In the mean time we suspend as well.
85399026Sjulian		 */
854275745Skib		thread_suspend_switch(td, p);
855195701Skib		remaining = calc_remaining(p, mode);
85699026Sjulian	}
857136177Sdavidxu	if (mode == SINGLE_EXIT) {
858135269Sjulian		/*
859271007Skib		 * Convert the process to an unthreaded process.  The
860271007Skib		 * SINGLE_EXIT is called by exit1() or execve(), in
861271007Skib		 * both cases other threads must be retired.
862135269Sjulian		 */
863271007Skib		KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads"));
864136160Sjulian		p->p_singlethread = NULL;
865271007Skib		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS);
866271000Skib
867271000Skib		/*
868271000Skib		 * Wait for any remaining threads to exit cpu_throw().
869271000Skib		 */
870271000Skib		while (p->p_exitthreads != 0) {
871271000Skib			PROC_SUNLOCK(p);
872271000Skib			PROC_UNLOCK(p);
873271000Skib			sched_relinquish(td);
874271000Skib			PROC_LOCK(p);
875271000Skib			PROC_SLOCK(p);
876271000Skib		}
877282679Skib	} else if (mode == SINGLE_BOUNDARY) {
878282679Skib		/*
879282679Skib		 * Wait until all suspended threads are removed from
880282679Skib		 * the processors.  The thread_suspend_check()
881282679Skib		 * increments p_boundary_count while it is still
882282679Skib		 * running, which makes it possible for the execve()
883282679Skib		 * to destroy vmspace while our other threads are
884282679Skib		 * still using the address space.
885282679Skib		 *
886282679Skib		 * We lock the thread, which is only allowed to
887282679Skib		 * succeed after context switch code finished using
888282679Skib		 * the address space.
889282679Skib		 */
890282679Skib		FOREACH_THREAD_IN_PROC(p, td2) {
891282679Skib			if (td2 == td)
892282679Skib				continue;
893282679Skib			thread_lock(td2);
894282679Skib			KASSERT((td2->td_flags & TDF_BOUNDARY) != 0,
895282679Skib			    ("td %p not on boundary", td2));
896282679Skib			KASSERT(TD_IS_SUSPENDED(td2),
897282679Skib			    ("td %p is not suspended", td2));
898282679Skib			thread_unlock(td2);
899282679Skib		}
900111028Sjeff	}
901184667Sdavidxu	PROC_SUNLOCK(p);
90299026Sjulian	return (0);
90399026Sjulian}
90499026Sjulian
905275616Skibbool
906275616Skibthread_suspend_check_needed(void)
907275616Skib{
908275616Skib	struct proc *p;
909275616Skib	struct thread *td;
910275616Skib
911275616Skib	td = curthread;
912275616Skib	p = td->td_proc;
913275616Skib	PROC_LOCK_ASSERT(p, MA_OWNED);
914275616Skib	return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 &&
915275616Skib	    (td->td_dbgflags & TDB_SUSPEND) != 0));
916275616Skib}
917275616Skib
91899026Sjulian/*
91999026Sjulian * Called in from locations that can safely check to see
92099026Sjulian * whether we have to suspend or at least throttle for a
92199026Sjulian * single-thread event (e.g. fork).
92299026Sjulian *
92399026Sjulian * Such locations include userret().
92499026Sjulian * If the "return_instead" argument is non zero, the thread must be able to
92599026Sjulian * accept 0 (caller may continue), or 1 (caller must abort) as a result.
92699026Sjulian *
92799026Sjulian * The 'return_instead' argument tells the function if it may do a
92899026Sjulian * thread_exit() or suspend, or whether the caller must abort and back
92999026Sjulian * out instead.
93099026Sjulian *
93199026Sjulian * If the thread that set the single_threading request has set the
93299026Sjulian * P_SINGLE_EXIT bit in the process flags then this call will never return
93399026Sjulian * if 'return_instead' is false, but will exit.
93499026Sjulian *
93599026Sjulian * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
93699026Sjulian *---------------+--------------------+---------------------
93799026Sjulian *       0       | returns 0          |   returns 0 or 1
938246996Sjhb *               | when ST ends       |   immediately
93999026Sjulian *---------------+--------------------+---------------------
94099026Sjulian *       1       | thread exits       |   returns 1
941246996Sjhb *               |                    |  immediately
94299026Sjulian * 0 = thread_exit() or suspension ok,
94399026Sjulian * other = return error instead of stopping the thread.
94499026Sjulian *
94599026Sjulian * While a full suspension is under effect, even a single threading
94699026Sjulian * thread would be suspended if it made this call (but it shouldn't).
94799026Sjulian * This call should only be made from places where
948124350Sschweikh * thread_exit() would be safe as that may be the outcome unless
94999026Sjulian * return_instead is set.
95099026Sjulian */
95199026Sjulianint
95299026Sjulianthread_suspend_check(int return_instead)
95399026Sjulian{
954104502Sjmallett	struct thread *td;
955104502Sjmallett	struct proc *p;
956302328Skib	int wakeup_swapper;
95799026Sjulian
95899026Sjulian	td = curthread;
95999026Sjulian	p = td->td_proc;
960126932Speter	mtx_assert(&Giant, MA_NOTOWNED);
96199026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
962275616Skib	while (thread_suspend_check_needed()) {
963102950Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
96499026Sjulian			KASSERT(p->p_singlethread != NULL,
96599026Sjulian			    ("singlethread not set"));
96699026Sjulian			/*
967100648Sjulian			 * The only suspension in action is a
968100648Sjulian			 * single-threading. Single threader need not stop.
969301960Skib			 * It is safe to access p->p_singlethread unlocked
970301960Skib			 * because it can only be set to our address by us.
97199026Sjulian			 */
972100648Sjulian			if (p->p_singlethread == td)
97399026Sjulian				return (0);	/* Exempt from stopping. */
974124350Sschweikh		}
975134498Sdavidxu		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
976155741Sdavidxu			return (EINTR);
97799026Sjulian
978136177Sdavidxu		/* Should we goto user boundary if we didn't come from there? */
979136177Sdavidxu		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
980136177Sdavidxu		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
981155741Sdavidxu			return (ERESTART);
982136177Sdavidxu
98399026Sjulian		/*
984283320Skib		 * Ignore suspend requests if they are deferred.
985248584Sjhb		 */
986283320Skib		if ((td->td_flags & TDF_SBDRY) != 0) {
987248584Sjhb			KASSERT(return_instead,
988248584Sjhb			    ("TDF_SBDRY set for unsafe thread_suspend_check"));
989302328Skib			KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) !=
990302328Skib			    (TDF_SEINTR | TDF_SERESTART),
991302328Skib			    ("both TDF_SEINTR and TDF_SERESTART"));
992302328Skib			return (TD_SBDRY_INTR(td) ? TD_SBDRY_ERRNO(td) : 0);
993248584Sjhb		}
994248584Sjhb
995248584Sjhb		/*
99699026Sjulian		 * If the process is waiting for us to exit,
99799026Sjulian		 * this thread should just suicide.
998102950Sdavidxu		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
99999026Sjulian		 */
1000213642Sdavidxu		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
1001213642Sdavidxu			PROC_UNLOCK(p);
1002283382Sdchagin
1003283382Sdchagin			/*
1004283382Sdchagin			 * Allow Linux emulation layer to do some work
1005283382Sdchagin			 * before thread suicide.
1006283382Sdchagin			 */
1007283382Sdchagin			if (__predict_false(p->p_sysent->sv_thread_detach != NULL))
1008283382Sdchagin				(p->p_sysent->sv_thread_detach)(td);
1009300043Skib			umtx_thread_exit(td);
1010292892Sjhb			kern_thr_exit(td);
1011292892Sjhb			panic("stopped thread did not exit");
1012213642Sdavidxu		}
1013213950Sdavidxu
1014213950Sdavidxu		PROC_SLOCK(p);
1015213950Sdavidxu		thread_stopped(p);
1016170296Sjeff		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1017170296Sjeff			if (p->p_numthreads == p->p_suspcount + 1) {
1018170296Sjeff				thread_lock(p->p_singlethread);
1019282944Skib				wakeup_swapper = thread_unsuspend_one(
1020282944Skib				    p->p_singlethread, p, false);
1021170296Sjeff				thread_unlock(p->p_singlethread);
1022182011Sjhb				if (wakeup_swapper)
1023182011Sjhb					kick_proc0();
1024170296Sjeff			}
1025170296Sjeff		}
1026184667Sdavidxu		PROC_UNLOCK(p);
1027184199Sdavidxu		thread_lock(td);
102899026Sjulian		/*
102999026Sjulian		 * When a thread suspends, it just
1030164936Sjulian		 * gets taken off all queues.
103199026Sjulian		 */
1032103216Sjulian		thread_suspend_one(td);
1033136177Sdavidxu		if (return_instead == 0) {
1034136177Sdavidxu			p->p_boundary_count++;
1035136177Sdavidxu			td->td_flags |= TDF_BOUNDARY;
1036136177Sdavidxu		}
1037184667Sdavidxu		PROC_SUNLOCK(p);
1038178272Sjeff		mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
1039170296Sjeff		thread_unlock(td);
104099026Sjulian		PROC_LOCK(p);
104199026Sjulian	}
104299026Sjulian	return (0);
104399026Sjulian}
104499026Sjulian
1045102898Sdavidxuvoid
1046275745Skibthread_suspend_switch(struct thread *td, struct proc *p)
1047170296Sjeff{
1048170296Sjeff
1049170296Sjeff	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
1050170296Sjeff	PROC_LOCK_ASSERT(p, MA_OWNED);
1051184667Sdavidxu	PROC_SLOCK_ASSERT(p, MA_OWNED);
1052170296Sjeff	/*
1053170296Sjeff	 * We implement thread_suspend_one in stages here to avoid
1054170296Sjeff	 * dropping the proc lock while the thread lock is owned.
1055170296Sjeff	 */
1056275745Skib	if (p == td->td_proc) {
1057275745Skib		thread_stopped(p);
1058275745Skib		p->p_suspcount++;
1059275745Skib	}
1060184667Sdavidxu	PROC_UNLOCK(p);
1061184199Sdavidxu	thread_lock(td);
1062177471Sjeff	td->td_flags &= ~TDF_NEEDSUSPCHK;
1063170296Sjeff	TD_SET_SUSPENDED(td);
1064177085Sjeff	sched_sleep(td, 0);
1065184667Sdavidxu	PROC_SUNLOCK(p);
1066170296Sjeff	DROP_GIANT();
1067178272Sjeff	mi_switch(SW_VOL | SWT_SUSPEND, NULL);
1068170296Sjeff	thread_unlock(td);
1069170296Sjeff	PICKUP_GIANT();
1070170296Sjeff	PROC_LOCK(p);
1071184667Sdavidxu	PROC_SLOCK(p);
1072170296Sjeff}
1073170296Sjeff
1074170296Sjeffvoid
1075102898Sdavidxuthread_suspend_one(struct thread *td)
1076102898Sdavidxu{
1077275745Skib	struct proc *p;
1078102898Sdavidxu
1079275745Skib	p = td->td_proc;
1080184667Sdavidxu	PROC_SLOCK_ASSERT(p, MA_OWNED);
1081170296Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
1082112071Sdavidxu	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
1083102898Sdavidxu	p->p_suspcount++;
1084177471Sjeff	td->td_flags &= ~TDF_NEEDSUSPCHK;
1085103216Sjulian	TD_SET_SUSPENDED(td);
1086177085Sjeff	sched_sleep(td, 0);
1087102898Sdavidxu}
1088102898Sdavidxu
1089282944Skibstatic int
1090282944Skibthread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)
1091102898Sdavidxu{
1092102898Sdavidxu
1093170296Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
1094164936Sjulian	KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
1095103216Sjulian	TD_CLR_SUSPENDED(td);
1096275745Skib	td->td_flags &= ~TDF_ALLPROCSUSP;
1097275745Skib	if (td->td_proc == p) {
1098275745Skib		PROC_SLOCK_ASSERT(p, MA_OWNED);
1099275745Skib		p->p_suspcount--;
1100282944Skib		if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) {
1101282944Skib			td->td_flags &= ~TDF_BOUNDARY;
1102282944Skib			p->p_boundary_count--;
1103282944Skib		}
1104275745Skib	}
1105182011Sjhb	return (setrunnable(td));
1106102898Sdavidxu}
1107102898Sdavidxu
110899026Sjulian/*
110999026Sjulian * Allow all threads blocked by single threading to continue running.
111099026Sjulian */
111199026Sjulianvoid
111299026Sjulianthread_unsuspend(struct proc *p)
111399026Sjulian{
111499026Sjulian	struct thread *td;
1115182011Sjhb	int wakeup_swapper;
111699026Sjulian
111799026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
1118184667Sdavidxu	PROC_SLOCK_ASSERT(p, MA_OWNED);
1119182011Sjhb	wakeup_swapper = 0;
112099026Sjulian	if (!P_SHOULDSTOP(p)) {
1121164936Sjulian                FOREACH_THREAD_IN_PROC(p, td) {
1122170296Sjeff			thread_lock(td);
1123164936Sjulian			if (TD_IS_SUSPENDED(td)) {
1124282944Skib				wakeup_swapper |= thread_unsuspend_one(td, p,
1125282944Skib				    true);
1126164936Sjulian			}
1127170296Sjeff			thread_unlock(td);
112899026Sjulian		}
1129282944Skib	} else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
1130282944Skib	    p->p_numthreads == p->p_suspcount) {
113199026Sjulian		/*
113299026Sjulian		 * Stopping everything also did the job for the single
113399026Sjulian		 * threading request. Now we've downgraded to single-threaded,
113499026Sjulian		 * let it continue.
113599026Sjulian		 */
1136275745Skib		if (p->p_singlethread->td_proc == p) {
1137275745Skib			thread_lock(p->p_singlethread);
1138275745Skib			wakeup_swapper = thread_unsuspend_one(
1139282944Skib			    p->p_singlethread, p, false);
1140275745Skib			thread_unlock(p->p_singlethread);
1141275745Skib		}
114299026Sjulian	}
1143182011Sjhb	if (wakeup_swapper)
1144182011Sjhb		kick_proc0();
114599026Sjulian}
114699026Sjulian
1147134791Sjulian/*
1148134791Sjulian * End the single threading mode..
1149134791Sjulian */
115099026Sjulianvoid
1151275745Skibthread_single_end(struct proc *p, int mode)
115299026Sjulian{
115399026Sjulian	struct thread *td;
1154182011Sjhb	int wakeup_swapper;
115599026Sjulian
1156275745Skib	KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
1157275745Skib	    mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
1158275745Skib	    ("invalid mode %d", mode));
115999026Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
1160275745Skib	KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) ||
1161275745Skib	    (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0),
1162275745Skib	    ("mode %d does not match P_TOTAL_STOP", mode));
1163282944Skib	KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread,
1164282944Skib	    ("thread_single_end from other thread %p %p",
1165282944Skib	    curthread, p->p_singlethread));
1166282944Skib	KASSERT(mode != SINGLE_BOUNDARY ||
1167282944Skib	    (p->p_flag & P_SINGLE_BOUNDARY) != 0,
1168282944Skib	    ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag));
1169275745Skib	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
1170275745Skib	    P_TOTAL_STOP);
1171184667Sdavidxu	PROC_SLOCK(p);
117299026Sjulian	p->p_singlethread = NULL;
1173182011Sjhb	wakeup_swapper = 0;
1174102292Sjulian	/*
1175182011Sjhb	 * If there are other threads they may now run,
1176102292Sjulian	 * unless of course there is a blanket 'stop order'
1177102292Sjulian	 * on the process. The single threader must be allowed
1178102292Sjulian	 * to continue however as this is a bad place to stop.
1179102292Sjulian	 */
1180275745Skib	if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
1181164936Sjulian                FOREACH_THREAD_IN_PROC(p, td) {
1182170296Sjeff			thread_lock(td);
1183164936Sjulian			if (TD_IS_SUSPENDED(td)) {
1184282944Skib				wakeup_swapper |= thread_unsuspend_one(td, p,
1185282944Skib				    mode == SINGLE_BOUNDARY);
1186164936Sjulian			}
1187170296Sjeff			thread_unlock(td);
1188102292Sjulian		}
1189102292Sjulian	}
1190282944Skib	KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
1191282944Skib	    ("inconsistent boundary count %d", p->p_boundary_count));
1192184667Sdavidxu	PROC_SUNLOCK(p);
1193182011Sjhb	if (wakeup_swapper)
1194182011Sjhb		kick_proc0();
119599026Sjulian}
1196128721Sdeischen
1197151990Sdavidxustruct thread *
1198151990Sdavidxuthread_find(struct proc *p, lwpid_t tid)
1199151990Sdavidxu{
1200151990Sdavidxu	struct thread *td;
1201151990Sdavidxu
1202151990Sdavidxu	PROC_LOCK_ASSERT(p, MA_OWNED);
1203151990Sdavidxu	FOREACH_THREAD_IN_PROC(p, td) {
1204151990Sdavidxu		if (td->td_tid == tid)
1205151990Sdavidxu			break;
1206151990Sdavidxu	}
1207151990Sdavidxu	return (td);
1208151990Sdavidxu}
1209213642Sdavidxu
1210213642Sdavidxu/* Locate a thread by number; return with proc lock held. */
1211213642Sdavidxustruct thread *
1212213642Sdavidxutdfind(lwpid_t tid, pid_t pid)
1213213642Sdavidxu{
1214213642Sdavidxu#define RUN_THRESH	16
1215213642Sdavidxu	struct thread *td;
1216213642Sdavidxu	int run = 0;
1217213642Sdavidxu
1218213642Sdavidxu	rw_rlock(&tidhash_lock);
1219213642Sdavidxu	LIST_FOREACH(td, TIDHASH(tid), td_hash) {
1220213642Sdavidxu		if (td->td_tid == tid) {
1221213642Sdavidxu			if (pid != -1 && td->td_proc->p_pid != pid) {
1222213642Sdavidxu				td = NULL;
1223213642Sdavidxu				break;
1224213642Sdavidxu			}
1225219968Sjhb			PROC_LOCK(td->td_proc);
1226213642Sdavidxu			if (td->td_proc->p_state == PRS_NEW) {
1227219968Sjhb				PROC_UNLOCK(td->td_proc);
1228213642Sdavidxu				td = NULL;
1229213642Sdavidxu				break;
1230213642Sdavidxu			}
1231213642Sdavidxu			if (run > RUN_THRESH) {
1232213642Sdavidxu				if (rw_try_upgrade(&tidhash_lock)) {
1233213642Sdavidxu					LIST_REMOVE(td, td_hash);
1234213642Sdavidxu					LIST_INSERT_HEAD(TIDHASH(td->td_tid),
1235213642Sdavidxu						td, td_hash);
1236213642Sdavidxu					rw_wunlock(&tidhash_lock);
1237213642Sdavidxu					return (td);
1238213642Sdavidxu				}
1239213642Sdavidxu			}
1240213642Sdavidxu			break;
1241213642Sdavidxu		}
1242213642Sdavidxu		run++;
1243213642Sdavidxu	}
1244213642Sdavidxu	rw_runlock(&tidhash_lock);
1245213642Sdavidxu	return (td);
1246213642Sdavidxu}
1247213642Sdavidxu
1248213642Sdavidxuvoid
1249213642Sdavidxutidhash_add(struct thread *td)
1250213642Sdavidxu{
1251213642Sdavidxu	rw_wlock(&tidhash_lock);
1252213950Sdavidxu	LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
1253213642Sdavidxu	rw_wunlock(&tidhash_lock);
1254213642Sdavidxu}
1255213642Sdavidxu
1256213642Sdavidxuvoid
1257213642Sdavidxutidhash_remove(struct thread *td)
1258213642Sdavidxu{
1259213642Sdavidxu	rw_wlock(&tidhash_lock);
1260213950Sdavidxu	LIST_REMOVE(td, td_hash);
1261213642Sdavidxu	rw_wunlock(&tidhash_lock);
1262213642Sdavidxu}
1263