kern_thread.c revision 155741
1/*-
2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3 *  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice(s), this list of conditions and the following disclaimer as
10 *    the first lines of this file unmodified other than the possible
11 *    addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice(s), this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 155741 2006-02-15 23:52:01Z davidxu $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/smp.h>
39#include <sys/sysctl.h>
40#include <sys/sched.h>
41#include <sys/sleepqueue.h>
42#include <sys/turnstile.h>
43#include <sys/ktr.h>
44#include <sys/umtx.h>
45
46#include <security/audit/audit.h>
47
48#include <vm/vm.h>
49#include <vm/vm_extern.h>
50#include <vm/uma.h>
51
52/*
53 * KSEGRP related storage.
54 */
55static uma_zone_t ksegrp_zone;
56static uma_zone_t thread_zone;
57
58/* DEBUG ONLY */
59SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
60static int thread_debug = 0;
61SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
62	&thread_debug, 0, "thread debug");
63
64int max_threads_per_proc = 1500;
65SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
66	&max_threads_per_proc, 0, "Limit on threads per proc");
67
68int max_groups_per_proc = 1500;
69SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
70	&max_groups_per_proc, 0, "Limit on thread groups per proc");
71
72int max_threads_hits;
73SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
74	&max_threads_hits, 0, "");
75
76int virtual_cpu;
77
78TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
79TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
80struct mtx kse_zombie_lock;
81MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
82
83static int
84sysctl_kse_virtual_cpu(SYSCTL_HANDLER_ARGS)
85{
86	int error, new_val;
87	int def_val;
88
89	def_val = mp_ncpus;
90	if (virtual_cpu == 0)
91		new_val = def_val;
92	else
93		new_val = virtual_cpu;
94	error = sysctl_handle_int(oidp, &new_val, 0, req);
95	if (error != 0 || req->newptr == NULL)
96		return (error);
97	if (new_val < 0)
98		return (EINVAL);
99	virtual_cpu = new_val;
100	return (0);
101}
102
103/* DEBUG ONLY */
104SYSCTL_PROC(_kern_threads, OID_AUTO, virtual_cpu, CTLTYPE_INT|CTLFLAG_RW,
105	0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I",
106	"debug virtual cpus");
107
108struct mtx tid_lock;
109static struct unrhdr *tid_unrhdr;
110
111/*
112 * Prepare a thread for use.
113 */
114static int
115thread_ctor(void *mem, int size, void *arg, int flags)
116{
117	struct thread	*td;
118
119	td = (struct thread *)mem;
120	td->td_state = TDS_INACTIVE;
121	td->td_oncpu = NOCPU;
122
123	td->td_tid = alloc_unr(tid_unrhdr);
124
125	/*
126	 * Note that td_critnest begins life as 1 because the thread is not
127	 * running and is thereby implicitly waiting to be on the receiving
128	 * end of a context switch.  A context switch must occur inside a
129	 * critical section, and in fact, includes hand-off of the sched_lock.
130	 * After a context switch to a newly created thread, it will release
131	 * sched_lock for the first time, and its td_critnest will hit 0 for
132	 * the first time.  This happens on the far end of a context switch,
133	 * and when it context switches away from itself, it will in fact go
134	 * back into a critical section, and hand off the sched lock to the
135	 * next thread.
136	 */
137	td->td_critnest = 1;
138
139#ifdef AUDIT
140	audit_thread_alloc(td);
141#endif
142	return (0);
143}
144
145/*
146 * Reclaim a thread after use.
147 */
148static void
149thread_dtor(void *mem, int size, void *arg)
150{
151	struct thread *td;
152
153	td = (struct thread *)mem;
154
155#ifdef INVARIANTS
156	/* Verify that this thread is in a safe state to free. */
157	switch (td->td_state) {
158	case TDS_INHIBITED:
159	case TDS_RUNNING:
160	case TDS_CAN_RUN:
161	case TDS_RUNQ:
162		/*
163		 * We must never unlink a thread that is in one of
164		 * these states, because it is currently active.
165		 */
166		panic("bad state for thread unlinking");
167		/* NOTREACHED */
168	case TDS_INACTIVE:
169		break;
170	default:
171		panic("bad thread state");
172		/* NOTREACHED */
173	}
174#endif
175#ifdef AUDIT
176	audit_thread_free(td);
177#endif
178	free_unr(tid_unrhdr, td->td_tid);
179	sched_newthread(td);
180}
181
182/*
183 * Initialize type-stable parts of a thread (when newly created).
184 */
185static int
186thread_init(void *mem, int size, int flags)
187{
188	struct thread *td;
189
190	td = (struct thread *)mem;
191
192	vm_thread_new(td, 0);
193	cpu_thread_setup(td);
194	td->td_sleepqueue = sleepq_alloc();
195	td->td_turnstile = turnstile_alloc();
196	td->td_umtxq = umtxq_alloc();
197	td->td_sched = (struct td_sched *)&td[1];
198	sched_newthread(td);
199	return (0);
200}
201
202/*
203 * Tear down type-stable parts of a thread (just before being discarded).
204 */
205static void
206thread_fini(void *mem, int size)
207{
208	struct thread *td;
209
210	td = (struct thread *)mem;
211	turnstile_free(td->td_turnstile);
212	sleepq_free(td->td_sleepqueue);
213	umtxq_free(td->td_umtxq);
214	vm_thread_dispose(td);
215}
216
217/*
218 * Initialize type-stable parts of a ksegrp (when newly created).
219 */
220static int
221ksegrp_ctor(void *mem, int size, void *arg, int flags)
222{
223	struct ksegrp	*kg;
224
225	kg = (struct ksegrp *)mem;
226	bzero(mem, size);
227	kg->kg_sched = (struct kg_sched *)&kg[1];
228	return (0);
229}
230
231void
232ksegrp_link(struct ksegrp *kg, struct proc *p)
233{
234
235	TAILQ_INIT(&kg->kg_threads);
236	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
237	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
238	kg->kg_proc = p;
239	/*
240	 * the following counters are in the -zero- section
241	 * and may not need clearing
242	 */
243	kg->kg_numthreads = 0;
244	kg->kg_numupcalls = 0;
245	/* link it in now that it's consistent */
246	p->p_numksegrps++;
247	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
248}
249
250/*
251 * Called from:
252 *   thread-exit()
253 */
254void
255ksegrp_unlink(struct ksegrp *kg)
256{
257	struct proc *p;
258
259	mtx_assert(&sched_lock, MA_OWNED);
260	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
261	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
262
263	p = kg->kg_proc;
264	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
265	p->p_numksegrps--;
266	/*
267	 * Aggregate stats from the KSE
268	 */
269	if (p->p_procscopegrp == kg)
270		p->p_procscopegrp = NULL;
271}
272
273/*
274 * For a newly created process,
275 * link up all the structures and its initial threads etc.
276 * called from:
277 * {arch}/{arch}/machdep.c   ia64_init(), init386() etc.
278 * proc_dtor() (should go away)
279 * proc_init()
280 */
281void
282proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td)
283{
284
285	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
286	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
287	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
288	sigqueue_init(&p->p_sigqueue, p);
289	p->p_ksi = ksiginfo_alloc(1);
290	if (p->p_ksi != NULL) {
291		/* XXX p_ksi may be null if ksiginfo zone is not ready */
292		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
293	}
294	LIST_INIT(&p->p_mqnotifier);
295	p->p_numksegrps = 0;
296	p->p_numthreads = 0;
297
298	ksegrp_link(kg, p);
299	thread_link(td, kg);
300}
301
302/*
303 * Initialize global thread allocation resources.
304 */
305void
306threadinit(void)
307{
308
309	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
310	tid_unrhdr = new_unrhdr(PID_MAX + 1, INT_MAX, &tid_lock);
311
312	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
313	    thread_ctor, thread_dtor, thread_init, thread_fini,
314	    UMA_ALIGN_CACHE, 0);
315	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
316	    ksegrp_ctor, NULL, NULL, NULL,
317	    UMA_ALIGN_CACHE, 0);
318	kseinit();	/* set up kse specific stuff  e.g. upcall zone*/
319}
320
321/*
322 * Stash an embarasingly extra thread into the zombie thread queue.
323 */
324void
325thread_stash(struct thread *td)
326{
327	mtx_lock_spin(&kse_zombie_lock);
328	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
329	mtx_unlock_spin(&kse_zombie_lock);
330}
331
332/*
333 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
334 */
335void
336ksegrp_stash(struct ksegrp *kg)
337{
338	mtx_lock_spin(&kse_zombie_lock);
339	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
340	mtx_unlock_spin(&kse_zombie_lock);
341}
342
343/*
344 * Reap zombie kse resource.
345 */
346void
347thread_reap(void)
348{
349	struct thread *td_first, *td_next;
350	struct ksegrp *kg_first, * kg_next;
351
352	/*
353	 * Don't even bother to lock if none at this instant,
354	 * we really don't care about the next instant..
355	 */
356	if ((!TAILQ_EMPTY(&zombie_threads))
357	    || (!TAILQ_EMPTY(&zombie_ksegrps))) {
358		mtx_lock_spin(&kse_zombie_lock);
359		td_first = TAILQ_FIRST(&zombie_threads);
360		kg_first = TAILQ_FIRST(&zombie_ksegrps);
361		if (td_first)
362			TAILQ_INIT(&zombie_threads);
363		if (kg_first)
364			TAILQ_INIT(&zombie_ksegrps);
365		mtx_unlock_spin(&kse_zombie_lock);
366		while (td_first) {
367			td_next = TAILQ_NEXT(td_first, td_runq);
368			if (td_first->td_ucred)
369				crfree(td_first->td_ucred);
370			thread_free(td_first);
371			td_first = td_next;
372		}
373		while (kg_first) {
374			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
375			ksegrp_free(kg_first);
376			kg_first = kg_next;
377		}
378		/*
379		 * there will always be a thread on the list if one of these
380		 * is there.
381		 */
382		kse_GC();
383	}
384}
385
386/*
387 * Allocate a ksegrp.
388 */
389struct ksegrp *
390ksegrp_alloc(void)
391{
392	return (uma_zalloc(ksegrp_zone, M_WAITOK));
393}
394
395/*
396 * Allocate a thread.
397 */
398struct thread *
399thread_alloc(void)
400{
401	thread_reap(); /* check if any zombies to get */
402	return (uma_zalloc(thread_zone, M_WAITOK));
403}
404
405/*
406 * Deallocate a ksegrp.
407 */
408void
409ksegrp_free(struct ksegrp *td)
410{
411	uma_zfree(ksegrp_zone, td);
412}
413
414/*
415 * Deallocate a thread.
416 */
417void
418thread_free(struct thread *td)
419{
420
421	cpu_thread_clean(td);
422	uma_zfree(thread_zone, td);
423}
424
425/*
426 * Discard the current thread and exit from its context.
427 * Always called with scheduler locked.
428 *
429 * Because we can't free a thread while we're operating under its context,
430 * push the current thread into our CPU's deadthread holder. This means
431 * we needn't worry about someone else grabbing our context before we
432 * do a cpu_throw().  This may not be needed now as we are under schedlock.
433 * Maybe we can just do a thread_stash() as thr_exit1 does.
434 */
435/*  XXX
436 * libthr expects its thread exit to return for the last
437 * thread, meaning that the program is back to non-threaded
438 * mode I guess. Because we do this (cpu_throw) unconditionally
439 * here, they have their own version of it. (thr_exit1())
440 * that doesn't do it all if this was the last thread.
441 * It is also called from thread_suspend_check().
442 * Of course in the end, they end up coming here through exit1
443 * anyhow..  After fixing 'thr' to play by the rules we should be able
444 * to merge these two functions together.
445 *
446 * called from:
447 * exit1()
448 * kse_exit()
449 * thr_exit()
450 * thread_user_enter()
451 * thread_userret()
452 * thread_suspend_check()
453 */
454void
455thread_exit(void)
456{
457	struct thread *td;
458	struct proc *p;
459	struct ksegrp	*kg;
460
461	td = curthread;
462	kg = td->td_ksegrp;
463	p = td->td_proc;
464
465	mtx_assert(&sched_lock, MA_OWNED);
466	mtx_assert(&Giant, MA_NOTOWNED);
467	PROC_LOCK_ASSERT(p, MA_OWNED);
468	KASSERT(p != NULL, ("thread exiting without a process"));
469	KASSERT(kg != NULL, ("thread exiting without a kse group"));
470	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
471	    (long)p->p_pid, p->p_comm);
472	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
473
474#ifdef AUDIT
475	AUDIT_SYSCALL_EXIT(0, td);
476#endif
477
478	if (td->td_standin != NULL) {
479		/*
480		 * Note that we don't need to free the cred here as it
481		 * is done in thread_reap().
482		 */
483		thread_stash(td->td_standin);
484		td->td_standin = NULL;
485	}
486
487	/*
488	 * drop FPU & debug register state storage, or any other
489	 * architecture specific resources that
490	 * would not be on a new untouched process.
491	 */
492	cpu_thread_exit(td);	/* XXXSMP */
493
494	/*
495	 * The thread is exiting. scheduler can release its stuff
496	 * and collect stats etc.
497	 */
498	sched_thread_exit(td);
499
500	/*
501	 * The last thread is left attached to the process
502	 * So that the whole bundle gets recycled. Skip
503	 * all this stuff if we never had threads.
504	 * EXIT clears all sign of other threads when
505	 * it goes to single threading, so the last thread always
506	 * takes the short path.
507	 */
508	if (p->p_flag & P_HADTHREADS) {
509		if (p->p_numthreads > 1) {
510			thread_unlink(td);
511
512			/* XXX first arg not used in 4BSD or ULE */
513			sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
514
515			/*
516			 * The test below is NOT true if we are the
517			 * sole exiting thread. P_STOPPED_SNGL is unset
518			 * in exit1() after it is the only survivor.
519			 */
520			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
521				if (p->p_numthreads == p->p_suspcount) {
522					thread_unsuspend_one(p->p_singlethread);
523				}
524			}
525
526			/*
527			 * Because each upcall structure has an owner thread,
528			 * owner thread exits only when process is in exiting
529			 * state, so upcall to userland is no longer needed,
530			 * deleting upcall structure is safe here.
531			 * So when all threads in a group is exited, all upcalls
532			 * in the group should be automatically freed.
533			 *  XXXKSE This is a KSE thing and should be exported
534			 * there somehow.
535			 */
536			upcall_remove(td);
537
538			/*
539			 * If the thread we unlinked above was the last one,
540			 * then this ksegrp should go away too.
541			 */
542			if (kg->kg_numthreads == 0) {
543				/*
544				 * let the scheduler know about this in case
545				 * it needs to recover stats or resources.
546				 * Theoretically we could let
547				 * sched_exit_ksegrp()  do the equivalent of
548				 * setting the concurrency to 0
549				 * but don't do it yet to avoid changing
550				 * the existing scheduler code until we
551				 * are ready.
552				 * We supply a random other ksegrp
553				 * as the recipient of any built up
554				 * cpu usage etc. (If the scheduler wants it).
555				 * XXXKSE
556				 * This is probably not fair so think of
557 				 * a better answer.
558				 */
559				sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td);
560				sched_set_concurrency(kg, 0); /* XXX TEMP */
561				ksegrp_unlink(kg);
562				ksegrp_stash(kg);
563			}
564			PROC_UNLOCK(p);
565			td->td_ksegrp	= NULL;
566			PCPU_SET(deadthread, td);
567		} else {
568			/*
569			 * The last thread is exiting.. but not through exit()
570			 * what should we do?
571			 * Theoretically this can't happen
572 			 * exit1() - clears threading flags before coming here
573 			 * kse_exit() - treats last thread specially
574 			 * thr_exit() - treats last thread specially
575 			 * thread_user_enter() - only if more exist
576 			 * thread_userret() - only if more exist
577 			 * thread_suspend_check() - only if more exist
578			 */
579			panic ("thread_exit: Last thread exiting on its own");
580		}
581	} else {
582		/*
583		 * non threaded process comes here.
584		 * This includes an EX threaded process that is coming
585		 * here via exit1(). (exit1 dethreads the proc first).
586		 */
587		PROC_UNLOCK(p);
588	}
589	td->td_state = TDS_INACTIVE;
590	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
591	cpu_throw(td, choosethread());
592	panic("I'm a teapot!");
593	/* NOTREACHED */
594}
595
596/*
597 * Do any thread specific cleanups that may be needed in wait()
598 * called with Giant, proc and schedlock not held.
599 */
600void
601thread_wait(struct proc *p)
602{
603	struct thread *td;
604
605	mtx_assert(&Giant, MA_NOTOWNED);
606	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
607	KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()"));
608	FOREACH_THREAD_IN_PROC(p, td) {
609		if (td->td_standin != NULL) {
610			if (td->td_standin->td_ucred != NULL) {
611				crfree(td->td_standin->td_ucred);
612				td->td_standin->td_ucred = NULL;
613			}
614			thread_free(td->td_standin);
615			td->td_standin = NULL;
616		}
617		cpu_thread_clean(td);
618		crfree(td->td_ucred);
619	}
620	thread_reap();	/* check for zombie threads etc. */
621}
622
623/*
624 * Link a thread to a process.
625 * set up anything that needs to be initialized for it to
626 * be used by the process.
627 *
628 * Note that we do not link to the proc's ucred here.
629 * The thread is linked as if running but no KSE assigned.
630 * Called from:
631 *  proc_linkup()
632 *  thread_schedule_upcall()
633 *  thr_create()
634 */
635void
636thread_link(struct thread *td, struct ksegrp *kg)
637{
638	struct proc *p;
639
640	p = kg->kg_proc;
641	td->td_state    = TDS_INACTIVE;
642	td->td_proc     = p;
643	td->td_ksegrp   = kg;
644	td->td_flags    = 0;
645	td->td_kflags	= 0;
646
647	LIST_INIT(&td->td_contested);
648	sigqueue_init(&td->td_sigqueue, p);
649	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
650	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
651	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
652	p->p_numthreads++;
653	kg->kg_numthreads++;
654}
655
656/*
657 * Convert a process with one thread to an unthreaded process.
658 * Called from:
659 *  thread_single(exit)  (called from execve and exit)
660 *  kse_exit()		XXX may need cleaning up wrt KSE stuff
661 */
662void
663thread_unthread(struct thread *td)
664{
665	struct proc *p = td->td_proc;
666
667	KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads"));
668	upcall_remove(td);
669	p->p_flag &= ~(P_SA|P_HADTHREADS);
670	td->td_mailbox = NULL;
671	td->td_pflags &= ~(TDP_SA | TDP_CAN_UNBIND);
672	if (td->td_standin != NULL) {
673		thread_stash(td->td_standin);
674		td->td_standin = NULL;
675	}
676	sched_set_concurrency(td->td_ksegrp, 1);
677}
678
679/*
680 * Called from:
681 *  thread_exit()
682 */
683void
684thread_unlink(struct thread *td)
685{
686	struct proc *p = td->td_proc;
687	struct ksegrp *kg = td->td_ksegrp;
688
689	mtx_assert(&sched_lock, MA_OWNED);
690	TAILQ_REMOVE(&p->p_threads, td, td_plist);
691	p->p_numthreads--;
692	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
693	kg->kg_numthreads--;
694	/* could clear a few other things here */
695	/* Must  NOT clear links to proc and ksegrp! */
696}
697
698/*
699 * Enforce single-threading.
700 *
701 * Returns 1 if the caller must abort (another thread is waiting to
702 * exit the process or similar). Process is locked!
703 * Returns 0 when you are successfully the only thread running.
704 * A process has successfully single threaded in the suspend mode when
705 * There are no threads in user mode. Threads in the kernel must be
706 * allowed to continue until they get to the user boundary. They may even
707 * copy out their return values and data before suspending. They may however be
708 * accellerated in reaching the user boundary as we will wake up
709 * any sleeping threads that are interruptable. (PCATCH).
710 */
711int
712thread_single(int mode)
713{
714	struct thread *td;
715	struct thread *td2;
716	struct proc *p;
717	int remaining;
718
719	td = curthread;
720	p = td->td_proc;
721	mtx_assert(&Giant, MA_NOTOWNED);
722	PROC_LOCK_ASSERT(p, MA_OWNED);
723	KASSERT((td != NULL), ("curthread is NULL"));
724
725	if ((p->p_flag & P_HADTHREADS) == 0)
726		return (0);
727
728	/* Is someone already single threading? */
729	if (p->p_singlethread != NULL && p->p_singlethread != td)
730		return (1);
731
732	if (mode == SINGLE_EXIT) {
733		p->p_flag |= P_SINGLE_EXIT;
734		p->p_flag &= ~P_SINGLE_BOUNDARY;
735	} else {
736		p->p_flag &= ~P_SINGLE_EXIT;
737		if (mode == SINGLE_BOUNDARY)
738			p->p_flag |= P_SINGLE_BOUNDARY;
739		else
740			p->p_flag &= ~P_SINGLE_BOUNDARY;
741	}
742	p->p_flag |= P_STOPPED_SINGLE;
743	mtx_lock_spin(&sched_lock);
744	p->p_singlethread = td;
745	if (mode == SINGLE_EXIT)
746		remaining = p->p_numthreads;
747	else if (mode == SINGLE_BOUNDARY)
748		remaining = p->p_numthreads - p->p_boundary_count;
749	else
750		remaining = p->p_numthreads - p->p_suspcount;
751	while (remaining != 1) {
752		FOREACH_THREAD_IN_PROC(p, td2) {
753			if (td2 == td)
754				continue;
755			td2->td_flags |= TDF_ASTPENDING;
756			if (TD_IS_INHIBITED(td2)) {
757				switch (mode) {
758				case SINGLE_EXIT:
759					if (td->td_flags & TDF_DBSUSPEND)
760						td->td_flags &= ~TDF_DBSUSPEND;
761					if (TD_IS_SUSPENDED(td2))
762						thread_unsuspend_one(td2);
763					if (TD_ON_SLEEPQ(td2) &&
764					    (td2->td_flags & TDF_SINTR))
765						sleepq_abort(td2, EINTR);
766					break;
767				case SINGLE_BOUNDARY:
768					if (TD_IS_SUSPENDED(td2) &&
769					    !(td2->td_flags & TDF_BOUNDARY))
770						thread_unsuspend_one(td2);
771					if (TD_ON_SLEEPQ(td2) &&
772					    (td2->td_flags & TDF_SINTR))
773						sleepq_abort(td2, ERESTART);
774					break;
775				default:
776					if (TD_IS_SUSPENDED(td2))
777						continue;
778					/*
779					 * maybe other inhibitted states too?
780					 */
781					if ((td2->td_flags & TDF_SINTR) &&
782					    (td2->td_inhibitors &
783					    (TDI_SLEEPING | TDI_SWAPPED)))
784						thread_suspend_one(td2);
785					break;
786				}
787			}
788#ifdef SMP
789			else if (TD_IS_RUNNING(td2) && td != td2) {
790				forward_signal(td2);
791			}
792#endif
793		}
794		if (mode == SINGLE_EXIT)
795			remaining = p->p_numthreads;
796		else if (mode == SINGLE_BOUNDARY)
797			remaining = p->p_numthreads - p->p_boundary_count;
798		else
799			remaining = p->p_numthreads - p->p_suspcount;
800
801		/*
802		 * Maybe we suspended some threads.. was it enough?
803		 */
804		if (remaining == 1)
805			break;
806
807		/*
808		 * Wake us up when everyone else has suspended.
809		 * In the mean time we suspend as well.
810		 */
811		thread_suspend_one(td);
812		PROC_UNLOCK(p);
813		mi_switch(SW_VOL, NULL);
814		mtx_unlock_spin(&sched_lock);
815		PROC_LOCK(p);
816		mtx_lock_spin(&sched_lock);
817		if (mode == SINGLE_EXIT)
818			remaining = p->p_numthreads;
819		else if (mode == SINGLE_BOUNDARY)
820			remaining = p->p_numthreads - p->p_boundary_count;
821		else
822			remaining = p->p_numthreads - p->p_suspcount;
823	}
824	if (mode == SINGLE_EXIT) {
825		/*
826		 * We have gotten rid of all the other threads and we
827		 * are about to either exit or exec. In either case,
828		 * we try our utmost  to revert to being a non-threaded
829		 * process.
830		 */
831		p->p_singlethread = NULL;
832		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
833		thread_unthread(td);
834	}
835	mtx_unlock_spin(&sched_lock);
836	return (0);
837}
838
839/*
840 * Called in from locations that can safely check to see
841 * whether we have to suspend or at least throttle for a
842 * single-thread event (e.g. fork).
843 *
844 * Such locations include userret().
845 * If the "return_instead" argument is non zero, the thread must be able to
846 * accept 0 (caller may continue), or 1 (caller must abort) as a result.
847 *
848 * The 'return_instead' argument tells the function if it may do a
849 * thread_exit() or suspend, or whether the caller must abort and back
850 * out instead.
851 *
852 * If the thread that set the single_threading request has set the
853 * P_SINGLE_EXIT bit in the process flags then this call will never return
854 * if 'return_instead' is false, but will exit.
855 *
856 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
857 *---------------+--------------------+---------------------
858 *       0       | returns 0          |   returns 0 or 1
859 *               | when ST ends       |   immediatly
860 *---------------+--------------------+---------------------
861 *       1       | thread exits       |   returns 1
862 *               |                    |  immediatly
863 * 0 = thread_exit() or suspension ok,
864 * other = return error instead of stopping the thread.
865 *
866 * While a full suspension is under effect, even a single threading
867 * thread would be suspended if it made this call (but it shouldn't).
868 * This call should only be made from places where
869 * thread_exit() would be safe as that may be the outcome unless
870 * return_instead is set.
871 */
872int
873thread_suspend_check(int return_instead)
874{
875	struct thread *td;
876	struct proc *p;
877
878	td = curthread;
879	p = td->td_proc;
880	mtx_assert(&Giant, MA_NOTOWNED);
881	PROC_LOCK_ASSERT(p, MA_OWNED);
882	while (P_SHOULDSTOP(p) ||
883	      ((p->p_flag & P_TRACED) && (td->td_flags & TDF_DBSUSPEND))) {
884		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
885			KASSERT(p->p_singlethread != NULL,
886			    ("singlethread not set"));
887			/*
888			 * The only suspension in action is a
889			 * single-threading. Single threader need not stop.
890			 * XXX Should be safe to access unlocked
891			 * as it can only be set to be true by us.
892			 */
893			if (p->p_singlethread == td)
894				return (0);	/* Exempt from stopping. */
895		}
896		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
897			return (EINTR);
898
899		/* Should we goto user boundary if we didn't come from there? */
900		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
901		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
902			return (ERESTART);
903
904		/* If thread will exit, flush its pending signals */
905		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
906			sigqueue_flush(&td->td_sigqueue);
907
908		mtx_lock_spin(&sched_lock);
909		thread_stopped(p);
910		/*
911		 * If the process is waiting for us to exit,
912		 * this thread should just suicide.
913		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
914		 */
915		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
916			thread_exit();
917
918		/*
919		 * When a thread suspends, it just
920		 * moves to the processes's suspend queue
921		 * and stays there.
922		 */
923		thread_suspend_one(td);
924		if (return_instead == 0) {
925			p->p_boundary_count++;
926			td->td_flags |= TDF_BOUNDARY;
927		}
928		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
929			if (p->p_numthreads == p->p_suspcount)
930				thread_unsuspend_one(p->p_singlethread);
931		}
932		PROC_UNLOCK(p);
933		mi_switch(SW_INVOL, NULL);
934		if (return_instead == 0) {
935			p->p_boundary_count--;
936			td->td_flags &= ~TDF_BOUNDARY;
937		}
938		mtx_unlock_spin(&sched_lock);
939		PROC_LOCK(p);
940	}
941	return (0);
942}
943
944void
945thread_suspend_one(struct thread *td)
946{
947	struct proc *p = td->td_proc;
948
949	mtx_assert(&sched_lock, MA_OWNED);
950	PROC_LOCK_ASSERT(p, MA_OWNED);
951	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
952	p->p_suspcount++;
953	TD_SET_SUSPENDED(td);
954	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
955}
956
957void
958thread_unsuspend_one(struct thread *td)
959{
960	struct proc *p = td->td_proc;
961
962	mtx_assert(&sched_lock, MA_OWNED);
963	PROC_LOCK_ASSERT(p, MA_OWNED);
964	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
965	TD_CLR_SUSPENDED(td);
966	p->p_suspcount--;
967	setrunnable(td);
968}
969
970/*
971 * Allow all threads blocked by single threading to continue running.
972 */
973void
974thread_unsuspend(struct proc *p)
975{
976	struct thread *td;
977
978	mtx_assert(&sched_lock, MA_OWNED);
979	PROC_LOCK_ASSERT(p, MA_OWNED);
980	if (!P_SHOULDSTOP(p)) {
981		while ((td = TAILQ_FIRST(&p->p_suspended))) {
982			thread_unsuspend_one(td);
983		}
984	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
985	    (p->p_numthreads == p->p_suspcount)) {
986		/*
987		 * Stopping everything also did the job for the single
988		 * threading request. Now we've downgraded to single-threaded,
989		 * let it continue.
990		 */
991		thread_unsuspend_one(p->p_singlethread);
992	}
993}
994
995/*
996 * End the single threading mode..
997 */
998void
999thread_single_end(void)
1000{
1001	struct thread *td;
1002	struct proc *p;
1003
1004	td = curthread;
1005	p = td->td_proc;
1006	PROC_LOCK_ASSERT(p, MA_OWNED);
1007	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
1008	mtx_lock_spin(&sched_lock);
1009	p->p_singlethread = NULL;
1010	p->p_procscopegrp = NULL;
1011	/*
1012	 * If there are other threads they mey now run,
1013	 * unless of course there is a blanket 'stop order'
1014	 * on the process. The single threader must be allowed
1015	 * to continue however as this is a bad place to stop.
1016	 */
1017	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
1018		while ((td = TAILQ_FIRST(&p->p_suspended))) {
1019			thread_unsuspend_one(td);
1020		}
1021	}
1022	mtx_unlock_spin(&sched_lock);
1023}
1024
1025/*
1026 * Called before going into an interruptible sleep to see if we have been
1027 * interrupted or requested to exit.
1028 */
1029int
1030thread_sleep_check(struct thread *td)
1031{
1032	struct proc *p;
1033
1034	p = td->td_proc;
1035	mtx_assert(&sched_lock, MA_OWNED);
1036	if (p->p_flag & P_HADTHREADS) {
1037		if (p->p_singlethread != td) {
1038			if (p->p_flag & P_SINGLE_EXIT)
1039				return (EINTR);
1040			if (p->p_flag & P_SINGLE_BOUNDARY)
1041				return (ERESTART);
1042		}
1043		if (td->td_flags & TDF_INTERRUPT)
1044			return (td->td_intrval);
1045	}
1046	return (0);
1047}
1048
1049struct thread *
1050thread_find(struct proc *p, lwpid_t tid)
1051{
1052	struct thread *td;
1053
1054	PROC_LOCK_ASSERT(p, MA_OWNED);
1055	mtx_lock_spin(&sched_lock);
1056	FOREACH_THREAD_IN_PROC(p, td) {
1057		if (td->td_tid == tid)
1058			break;
1059	}
1060	mtx_unlock_spin(&sched_lock);
1061	return (td);
1062}
1063