kern_thread.c revision 130199
1/*
2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3 *  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice(s), this list of conditions and the following disclaimer as
10 *    the first lines of this file unmodified other than the possible
11 *    addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice(s), this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/kern/kern_thread.c 130199 2004-06-07 19:00:57Z julian $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/sysctl.h>
39#include <sys/sched.h>
40#include <sys/sleepqueue.h>
41#include <sys/turnstile.h>
42#include <sys/ktr.h>
43
44#include <vm/vm.h>
45#include <vm/vm_extern.h>
46#include <vm/uma.h>
47
48/*
49 * KSEGRP related storage.
50 */
51static uma_zone_t ksegrp_zone;
52static uma_zone_t kse_zone;
53static uma_zone_t thread_zone;
54
55/* DEBUG ONLY */
56SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
57static int thread_debug = 0;
58SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
59	&thread_debug, 0, "thread debug");
60
61int max_threads_per_proc = 1500;
62SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
63	&max_threads_per_proc, 0, "Limit on threads per proc");
64
65int max_groups_per_proc = 500;
66SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
67	&max_groups_per_proc, 0, "Limit on thread groups per proc");
68
69int max_threads_hits;
70SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
71	&max_threads_hits, 0, "");
72
73
74TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
75TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
76TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
77struct mtx kse_zombie_lock;
78MTX_SYSINIT(kse_zombie_lock, &kse_zombie_lock, "kse zombie lock", MTX_SPIN);
79
80void kse_purge(struct proc *p, struct thread *td);
81void kse_purge_group(struct thread *td);
82
83/* move to proc.h */
84extern void	kseinit(void);
85extern void	kse_GC(void);
86
87
88
89/*
90 * Thread ID allocator. The allocator keeps track of assigned IDs by
91 * using a bitmap. The bitmap is created in parts. The parts are linked
92 * together.
93 */
94typedef u_long tid_bitmap_word;
95
96#define	TID_IDS_PER_PART	1024
97#define	TID_IDS_PER_IDX		(sizeof(tid_bitmap_word) << 3)
98#define	TID_BITMAP_SIZE		(TID_IDS_PER_PART / TID_IDS_PER_IDX)
99#define	TID_MIN			(PID_MAX + 1)
100
101struct tid_bitmap_part {
102	STAILQ_ENTRY(tid_bitmap_part) bmp_next;
103	tid_bitmap_word	bmp_bitmap[TID_BITMAP_SIZE];
104	int		bmp_base;
105	int		bmp_free;
106};
107
108static STAILQ_HEAD(, tid_bitmap_part) tid_bitmap =
109    STAILQ_HEAD_INITIALIZER(tid_bitmap);
110static uma_zone_t tid_zone;
111
112struct mtx tid_lock;
113MTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF);
114
115/*
116 * Prepare a thread for use.
117 */
118static void
119thread_ctor(void *mem, int size, void *arg)
120{
121	struct thread	*td;
122
123	td = (struct thread *)mem;
124	td->td_tid = 0;
125	td->td_state = TDS_INACTIVE;
126	td->td_oncpu	= NOCPU;
127	td->td_critnest = 1;
128}
129
130/*
131 * Reclaim a thread after use.
132 */
133static void
134thread_dtor(void *mem, int size, void *arg)
135{
136	struct thread *td;
137	struct tid_bitmap_part *bmp;
138	int bit, idx, tid;
139
140	td = (struct thread *)mem;
141
142	if (td->td_tid > PID_MAX) {
143		STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
144			if (td->td_tid >= bmp->bmp_base &&
145			    td->td_tid < bmp->bmp_base + TID_IDS_PER_PART)
146				break;
147		}
148		KASSERT(bmp != NULL, ("No TID bitmap?"));
149		mtx_lock(&tid_lock);
150		tid = td->td_tid - bmp->bmp_base;
151		idx = tid / TID_IDS_PER_IDX;
152		bit = 1UL << (tid % TID_IDS_PER_IDX);
153		bmp->bmp_bitmap[idx] |= bit;
154		bmp->bmp_free++;
155		mtx_unlock(&tid_lock);
156	}
157
158#ifdef INVARIANTS
159	/* Verify that this thread is in a safe state to free. */
160	switch (td->td_state) {
161	case TDS_INHIBITED:
162	case TDS_RUNNING:
163	case TDS_CAN_RUN:
164	case TDS_RUNQ:
165		/*
166		 * We must never unlink a thread that is in one of
167		 * these states, because it is currently active.
168		 */
169		panic("bad state for thread unlinking");
170		/* NOTREACHED */
171	case TDS_INACTIVE:
172		break;
173	default:
174		panic("bad thread state");
175		/* NOTREACHED */
176	}
177#endif
178}
179
180/*
181 * Initialize type-stable parts of a thread (when newly created).
182 */
183static void
184thread_init(void *mem, int size)
185{
186	struct thread	*td;
187
188	td = (struct thread *)mem;
189	vm_thread_new(td, 0);
190	cpu_thread_setup(td);
191	td->td_sleepqueue = sleepq_alloc();
192	td->td_turnstile = turnstile_alloc();
193	td->td_sched = (struct td_sched *)&td[1];
194}
195
196/*
197 * Tear down type-stable parts of a thread (just before being discarded).
198 */
199static void
200thread_fini(void *mem, int size)
201{
202	struct thread	*td;
203
204	td = (struct thread *)mem;
205	turnstile_free(td->td_turnstile);
206	sleepq_free(td->td_sleepqueue);
207	vm_thread_dispose(td);
208}
209
210/*
211 * Initialize type-stable parts of a kse (when newly created).
212 */
213static void
214kse_init(void *mem, int size)
215{
216	struct kse	*ke;
217
218	ke = (struct kse *)mem;
219	ke->ke_sched = (struct ke_sched *)&ke[1];
220}
221
222/*
223 * Initialize type-stable parts of a ksegrp (when newly created).
224 */
225static void
226ksegrp_init(void *mem, int size)
227{
228	struct ksegrp	*kg;
229
230	kg = (struct ksegrp *)mem;
231	kg->kg_sched = (struct kg_sched *)&kg[1];
232}
233
234/*
235 * KSE is linked into kse group.
236 */
237void
238kse_link(struct kse *ke, struct ksegrp *kg)
239{
240	struct proc *p = kg->kg_proc;
241
242	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
243	kg->kg_kses++;
244	ke->ke_state	= KES_UNQUEUED;
245	ke->ke_proc	= p;
246	ke->ke_ksegrp	= kg;
247	ke->ke_thread	= NULL;
248	ke->ke_oncpu	= NOCPU;
249	ke->ke_flags	= 0;
250}
251
252void
253kse_unlink(struct kse *ke)
254{
255	struct ksegrp *kg;
256
257	mtx_assert(&sched_lock, MA_OWNED);
258	kg = ke->ke_ksegrp;
259	TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
260	if (ke->ke_state == KES_IDLE) {
261		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
262		kg->kg_idle_kses--;
263	}
264	--kg->kg_kses;
265	/*
266	 * Aggregate stats from the KSE
267	 */
268	kse_stash(ke);
269}
270
271void
272ksegrp_link(struct ksegrp *kg, struct proc *p)
273{
274
275	TAILQ_INIT(&kg->kg_threads);
276	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
277	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
278	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
279	TAILQ_INIT(&kg->kg_iq);		/* all idle kses in ksegrp */
280	TAILQ_INIT(&kg->kg_upcalls);	/* all upcall structure in ksegrp */
281	kg->kg_proc = p;
282	/*
283	 * the following counters are in the -zero- section
284	 * and may not need clearing
285	 */
286	kg->kg_numthreads = 0;
287	kg->kg_runnable   = 0;
288	kg->kg_kses       = 0;
289	kg->kg_runq_kses  = 0; /* XXXKSE change name */
290	kg->kg_idle_kses  = 0;
291	kg->kg_numupcalls = 0;
292	/* link it in now that it's consistent */
293	p->p_numksegrps++;
294	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
295}
296
297void
298ksegrp_unlink(struct ksegrp *kg)
299{
300	struct proc *p;
301
302	mtx_assert(&sched_lock, MA_OWNED);
303	KASSERT((kg->kg_numthreads == 0), ("ksegrp_unlink: residual threads"));
304	KASSERT((kg->kg_kses == 0), ("ksegrp_unlink: residual kses"));
305	KASSERT((kg->kg_numupcalls == 0), ("ksegrp_unlink: residual upcalls"));
306
307	p = kg->kg_proc;
308	TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
309	p->p_numksegrps--;
310	/*
311	 * Aggregate stats from the KSE
312	 */
313	ksegrp_stash(kg);
314}
315
316/*
317 * For a newly created process,
318 * link up all the structures and its initial threads etc.
319 */
320void
321proc_linkup(struct proc *p, struct ksegrp *kg,
322	    struct kse *ke, struct thread *td)
323{
324
325	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
326	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
327	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
328	p->p_numksegrps = 0;
329	p->p_numthreads = 0;
330
331	ksegrp_link(kg, p);
332	kse_link(ke, kg);
333	thread_link(td, kg);
334}
335
336/*
337 * Initialize global thread allocation resources.
338 */
339void
340threadinit(void)
341{
342
343	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
344	    thread_ctor, thread_dtor, thread_init, thread_fini,
345	    UMA_ALIGN_CACHE, 0);
346	tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part),
347	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
348	ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
349	    NULL, NULL, ksegrp_init, NULL,
350	    UMA_ALIGN_CACHE, 0);
351	kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
352	    NULL, NULL, kse_init, NULL,
353	    UMA_ALIGN_CACHE, 0);
354	kseinit();
355}
356
357/*
358 * Stash an embarasingly extra thread into the zombie thread queue.
359 */
360void
361thread_stash(struct thread *td)
362{
363	mtx_lock_spin(&kse_zombie_lock);
364	TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
365	mtx_unlock_spin(&kse_zombie_lock);
366}
367
368/*
369 * Stash an embarasingly extra kse into the zombie kse queue.
370 */
371void
372kse_stash(struct kse *ke)
373{
374	mtx_lock_spin(&kse_zombie_lock);
375	TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
376	mtx_unlock_spin(&kse_zombie_lock);
377}
378
379/*
380 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
381 */
382void
383ksegrp_stash(struct ksegrp *kg)
384{
385	mtx_lock_spin(&kse_zombie_lock);
386	TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
387	mtx_unlock_spin(&kse_zombie_lock);
388}
389
390/*
391 * Reap zombie kse resource.
392 */
393void
394thread_reap(void)
395{
396	struct thread *td_first, *td_next;
397	struct kse *ke_first, *ke_next;
398	struct ksegrp *kg_first, * kg_next;
399
400	/*
401	 * Don't even bother to lock if none at this instant,
402	 * we really don't care about the next instant..
403	 */
404	if ((!TAILQ_EMPTY(&zombie_threads))
405	    || (!TAILQ_EMPTY(&zombie_kses))
406	    || (!TAILQ_EMPTY(&zombie_ksegrps))) {
407		mtx_lock_spin(&kse_zombie_lock);
408		td_first = TAILQ_FIRST(&zombie_threads);
409		ke_first = TAILQ_FIRST(&zombie_kses);
410		kg_first = TAILQ_FIRST(&zombie_ksegrps);
411		if (td_first)
412			TAILQ_INIT(&zombie_threads);
413		if (ke_first)
414			TAILQ_INIT(&zombie_kses);
415		if (kg_first)
416			TAILQ_INIT(&zombie_ksegrps);
417		mtx_unlock_spin(&kse_zombie_lock);
418		while (td_first) {
419			td_next = TAILQ_NEXT(td_first, td_runq);
420			if (td_first->td_ucred)
421				crfree(td_first->td_ucred);
422			thread_free(td_first);
423			td_first = td_next;
424		}
425		while (ke_first) {
426			ke_next = TAILQ_NEXT(ke_first, ke_procq);
427			kse_free(ke_first);
428			ke_first = ke_next;
429		}
430		while (kg_first) {
431			kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
432			ksegrp_free(kg_first);
433			kg_first = kg_next;
434		}
435	}
436	kse_GC();
437}
438
439/*
440 * Allocate a ksegrp.
441 */
442struct ksegrp *
443ksegrp_alloc(void)
444{
445	return (uma_zalloc(ksegrp_zone, M_WAITOK));
446}
447
448/*
449 * Allocate a kse.
450 */
451struct kse *
452kse_alloc(void)
453{
454	return (uma_zalloc(kse_zone, M_WAITOK));
455}
456
457/*
458 * Allocate a thread.
459 */
460struct thread *
461thread_alloc(void)
462{
463	thread_reap(); /* check if any zombies to get */
464	return (uma_zalloc(thread_zone, M_WAITOK));
465}
466
467/*
468 * Deallocate a ksegrp.
469 */
470void
471ksegrp_free(struct ksegrp *td)
472{
473	uma_zfree(ksegrp_zone, td);
474}
475
476/*
477 * Deallocate a kse.
478 */
479void
480kse_free(struct kse *td)
481{
482	uma_zfree(kse_zone, td);
483}
484
485/*
486 * Deallocate a thread.
487 */
488void
489thread_free(struct thread *td)
490{
491
492	cpu_thread_clean(td);
493	uma_zfree(thread_zone, td);
494}
495
496/*
497 * Assign a thread ID.
498 */
499int
500thread_new_tid(void)
501{
502	struct tid_bitmap_part *bmp, *new;
503	int bit, idx, tid;
504
505	mtx_lock(&tid_lock);
506	STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
507		if (bmp->bmp_free)
508			break;
509	}
510	/* Create a new bitmap if we run out of free bits. */
511	if (bmp == NULL) {
512		mtx_unlock(&tid_lock);
513		new = uma_zalloc(tid_zone, M_WAITOK);
514		mtx_lock(&tid_lock);
515		bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next);
516		if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) {
517			/* 1=free, 0=assigned. This way we can use ffsl(). */
518			memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap));
519			new->bmp_base = (bmp == NULL) ? TID_MIN :
520			    bmp->bmp_base + TID_IDS_PER_PART;
521			new->bmp_free = TID_IDS_PER_PART;
522			STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next);
523			bmp = new;
524			new = NULL;
525		}
526	} else
527		new = NULL;
528	/* We have a bitmap with available IDs. */
529	idx = 0;
530	while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL)
531		idx++;
532	bit = ffsl(bmp->bmp_bitmap[idx]) - 1;
533	tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit;
534	bmp->bmp_bitmap[idx] &= ~(1UL << bit);
535	bmp->bmp_free--;
536	mtx_unlock(&tid_lock);
537
538	if (new != NULL)
539		uma_zfree(tid_zone, new);
540	return (tid);
541}
542
543
544/*
545 * Discard the current thread and exit from its context.
546 *
547 * Because we can't free a thread while we're operating under its context,
548 * push the current thread into our CPU's deadthread holder. This means
549 * we needn't worry about someone else grabbing our context before we
550 * do a cpu_throw().
551 */
552void
553thread_exit(void)
554{
555	struct thread *td;
556	struct kse *ke;
557	struct proc *p;
558	struct ksegrp	*kg;
559
560	td = curthread;
561	kg = td->td_ksegrp;
562	p = td->td_proc;
563	ke = td->td_kse;
564
565	mtx_assert(&sched_lock, MA_OWNED);
566	KASSERT(p != NULL, ("thread exiting without a process"));
567	KASSERT(ke != NULL, ("thread exiting without a kse"));
568	KASSERT(kg != NULL, ("thread exiting without a kse group"));
569	PROC_LOCK_ASSERT(p, MA_OWNED);
570	CTR1(KTR_PROC, "thread_exit: thread %p", td);
571	mtx_assert(&Giant, MA_NOTOWNED);
572
573	if (td->td_standin != NULL) {
574		thread_stash(td->td_standin);
575		td->td_standin = NULL;
576	}
577
578	cpu_thread_exit(td);	/* XXXSMP */
579
580	/*
581	 * The last thread is left attached to the process
582	 * So that the whole bundle gets recycled. Skip
583	 * all this stuff.
584	 */
585	if (p->p_numthreads > 1) {
586		thread_unlink(td);
587		if (p->p_maxthrwaits)
588			wakeup(&p->p_numthreads);
589		/*
590		 * The test below is NOT true if we are the
591		 * sole exiting thread. P_STOPPED_SNGL is unset
592		 * in exit1() after it is the only survivor.
593		 */
594		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
595			if (p->p_numthreads == p->p_suspcount) {
596				thread_unsuspend_one(p->p_singlethread);
597			}
598		}
599
600		/*
601		 * Because each upcall structure has an owner thread,
602		 * owner thread exits only when process is in exiting
603		 * state, so upcall to userland is no longer needed,
604		 * deleting upcall structure is safe here.
605		 * So when all threads in a group is exited, all upcalls
606		 * in the group should be automatically freed.
607		 */
608		if (td->td_upcall)
609			upcall_remove(td);
610
611		sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
612		sched_exit_kse(FIRST_KSE_IN_PROC(p), ke);
613		ke->ke_state = KES_UNQUEUED;
614		ke->ke_thread = NULL;
615		/*
616		 * Decide what to do with the KSE attached to this thread.
617		 */
618		if (ke->ke_flags & KEF_EXIT) {
619			kse_unlink(ke);
620			if (kg->kg_kses == 0) {
621				sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), kg);
622				ksegrp_unlink(kg);
623			}
624		}
625		else
626			kse_reassign(ke);
627		PROC_UNLOCK(p);
628		td->td_kse	= NULL;
629		td->td_state	= TDS_INACTIVE;
630#if 0
631		td->td_proc	= NULL;
632#endif
633		td->td_ksegrp	= NULL;
634		td->td_last_kse	= NULL;
635		PCPU_SET(deadthread, td);
636	} else {
637		PROC_UNLOCK(p);
638	}
639	/* XXX Shouldn't cpu_throw() here. */
640	mtx_assert(&sched_lock, MA_OWNED);
641	cpu_throw(td, choosethread());
642	panic("I'm a teapot!");
643	/* NOTREACHED */
644}
645
646/*
647 * Do any thread specific cleanups that may be needed in wait()
648 * called with Giant, proc and schedlock not held.
649 */
650void
651thread_wait(struct proc *p)
652{
653	struct thread *td;
654
655	mtx_assert(&Giant, MA_NOTOWNED);
656	KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
657	KASSERT((p->p_numksegrps == 1), ("Multiple ksegrps in wait1()"));
658	FOREACH_THREAD_IN_PROC(p, td) {
659		if (td->td_standin != NULL) {
660			thread_free(td->td_standin);
661			td->td_standin = NULL;
662		}
663		cpu_thread_clean(td);
664	}
665	thread_reap();	/* check for zombie threads etc. */
666}
667
668/*
669 * Link a thread to a process.
670 * set up anything that needs to be initialized for it to
671 * be used by the process.
672 *
673 * Note that we do not link to the proc's ucred here.
674 * The thread is linked as if running but no KSE assigned.
675 */
676void
677thread_link(struct thread *td, struct ksegrp *kg)
678{
679	struct proc *p;
680
681	p = kg->kg_proc;
682	td->td_state    = TDS_INACTIVE;
683	td->td_proc     = p;
684	td->td_ksegrp   = kg;
685	td->td_last_kse = NULL;
686	td->td_flags    = 0;
687	td->td_kflags	= 0;
688	td->td_kse      = NULL;
689
690	LIST_INIT(&td->td_contested);
691	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
692	TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
693	TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
694	p->p_numthreads++;
695	kg->kg_numthreads++;
696}
697
698void
699thread_unlink(struct thread *td)
700{
701	struct proc *p = td->td_proc;
702	struct ksegrp *kg = td->td_ksegrp;
703
704	mtx_assert(&sched_lock, MA_OWNED);
705	TAILQ_REMOVE(&p->p_threads, td, td_plist);
706	p->p_numthreads--;
707	TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
708	kg->kg_numthreads--;
709	/* could clear a few other things here */
710}
711
712/*
713 * Purge a ksegrp resource. When a ksegrp is preparing to
714 * exit, it calls this function.
715 */
716void
717kse_purge_group(struct thread *td)
718{
719	struct ksegrp *kg;
720	struct kse *ke;
721
722	kg = td->td_ksegrp;
723 	KASSERT(kg->kg_numthreads == 1, ("%s: bad thread number", __func__));
724	while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
725		KASSERT(ke->ke_state == KES_IDLE,
726			("%s: wrong idle KSE state", __func__));
727		kse_unlink(ke);
728	}
729	KASSERT((kg->kg_kses == 1),
730		("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses));
731	KASSERT((kg->kg_numupcalls == 0),
732	        ("%s: ksegrp still has %d upcall datas",
733		__func__, kg->kg_numupcalls));
734}
735
736/*
737 * Purge a process's KSE resource. When a process is preparing to
738 * exit, it calls kse_purge to release any extra KSE resources in
739 * the process.
740 */
741void
742kse_purge(struct proc *p, struct thread *td)
743{
744	struct ksegrp *kg;
745	struct kse *ke;
746
747 	KASSERT(p->p_numthreads == 1, ("bad thread number"));
748	while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
749		TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
750		p->p_numksegrps--;
751		/*
752		 * There is no ownership for KSE, after all threads
753		 * in the group exited, it is possible that some KSEs
754		 * were left in idle queue, gc them now.
755		 */
756		while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
757			KASSERT(ke->ke_state == KES_IDLE,
758			   ("%s: wrong idle KSE state", __func__));
759			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
760			kg->kg_idle_kses--;
761			TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
762			kg->kg_kses--;
763			kse_stash(ke);
764		}
765		KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
766		        ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
767		        ("ksegrp has wrong kg_kses: %d", kg->kg_kses));
768		KASSERT((kg->kg_numupcalls == 0),
769		        ("%s: ksegrp still has %d upcall datas",
770			__func__, kg->kg_numupcalls));
771
772		if (kg != td->td_ksegrp)
773			ksegrp_stash(kg);
774	}
775	TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
776	p->p_numksegrps++;
777}
778
779/*
780 * Enforce single-threading.
781 *
782 * Returns 1 if the caller must abort (another thread is waiting to
783 * exit the process or similar). Process is locked!
784 * Returns 0 when you are successfully the only thread running.
785 * A process has successfully single threaded in the suspend mode when
786 * There are no threads in user mode. Threads in the kernel must be
787 * allowed to continue until they get to the user boundary. They may even
788 * copy out their return values and data before suspending. They may however be
789 * accellerated in reaching the user boundary as we will wake up
790 * any sleeping threads that are interruptable. (PCATCH).
791 */
792int
793thread_single(int force_exit)
794{
795	struct thread *td;
796	struct thread *td2;
797	struct proc *p;
798
799	td = curthread;
800	p = td->td_proc;
801	mtx_assert(&Giant, MA_NOTOWNED);
802	PROC_LOCK_ASSERT(p, MA_OWNED);
803	KASSERT((td != NULL), ("curthread is NULL"));
804
805	if ((p->p_flag & P_SA) == 0 && p->p_numthreads == 1)
806		return (0);
807
808	/* Is someone already single threading? */
809	if (p->p_singlethread)
810		return (1);
811
812	if (force_exit == SINGLE_EXIT) {
813		p->p_flag |= P_SINGLE_EXIT;
814	} else
815		p->p_flag &= ~P_SINGLE_EXIT;
816	p->p_flag |= P_STOPPED_SINGLE;
817	mtx_lock_spin(&sched_lock);
818	p->p_singlethread = td;
819	while ((p->p_numthreads - p->p_suspcount) != 1) {
820		FOREACH_THREAD_IN_PROC(p, td2) {
821			if (td2 == td)
822				continue;
823			td2->td_flags |= TDF_ASTPENDING;
824			if (TD_IS_INHIBITED(td2)) {
825				if (force_exit == SINGLE_EXIT) {
826					if (TD_IS_SUSPENDED(td2)) {
827						thread_unsuspend_one(td2);
828					}
829					if (TD_ON_SLEEPQ(td2) &&
830					    (td2->td_flags & TDF_SINTR)) {
831						sleepq_abort(td2);
832					}
833				} else {
834					if (TD_IS_SUSPENDED(td2))
835						continue;
836					/*
837					 * maybe other inhibitted states too?
838					 * XXXKSE Is it totally safe to
839					 * suspend a non-interruptable thread?
840					 */
841					if (td2->td_inhibitors &
842					    (TDI_SLEEPING | TDI_SWAPPED))
843						thread_suspend_one(td2);
844				}
845			}
846		}
847		/*
848		 * Maybe we suspended some threads.. was it enough?
849		 */
850		if ((p->p_numthreads - p->p_suspcount) == 1)
851			break;
852
853		/*
854		 * Wake us up when everyone else has suspended.
855		 * In the mean time we suspend as well.
856		 */
857		thread_suspend_one(td);
858		PROC_UNLOCK(p);
859		mi_switch(SW_VOL);
860		mtx_unlock_spin(&sched_lock);
861		PROC_LOCK(p);
862		mtx_lock_spin(&sched_lock);
863	}
864	if (force_exit == SINGLE_EXIT) {
865		if (td->td_upcall)
866			upcall_remove(td);
867		kse_purge(p, td);
868	}
869	mtx_unlock_spin(&sched_lock);
870	return (0);
871}
872
873/*
874 * Called in from locations that can safely check to see
875 * whether we have to suspend or at least throttle for a
876 * single-thread event (e.g. fork).
877 *
878 * Such locations include userret().
879 * If the "return_instead" argument is non zero, the thread must be able to
880 * accept 0 (caller may continue), or 1 (caller must abort) as a result.
881 *
882 * The 'return_instead' argument tells the function if it may do a
883 * thread_exit() or suspend, or whether the caller must abort and back
884 * out instead.
885 *
886 * If the thread that set the single_threading request has set the
887 * P_SINGLE_EXIT bit in the process flags then this call will never return
888 * if 'return_instead' is false, but will exit.
889 *
890 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
891 *---------------+--------------------+---------------------
892 *       0       | returns 0          |   returns 0 or 1
893 *               | when ST ends       |   immediatly
894 *---------------+--------------------+---------------------
895 *       1       | thread exits       |   returns 1
896 *               |                    |  immediatly
897 * 0 = thread_exit() or suspension ok,
898 * other = return error instead of stopping the thread.
899 *
900 * While a full suspension is under effect, even a single threading
901 * thread would be suspended if it made this call (but it shouldn't).
902 * This call should only be made from places where
903 * thread_exit() would be safe as that may be the outcome unless
904 * return_instead is set.
905 */
906int
907thread_suspend_check(int return_instead)
908{
909	struct thread *td;
910	struct proc *p;
911
912	td = curthread;
913	p = td->td_proc;
914	mtx_assert(&Giant, MA_NOTOWNED);
915	PROC_LOCK_ASSERT(p, MA_OWNED);
916	while (P_SHOULDSTOP(p)) {
917		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
918			KASSERT(p->p_singlethread != NULL,
919			    ("singlethread not set"));
920			/*
921			 * The only suspension in action is a
922			 * single-threading. Single threader need not stop.
923			 * XXX Should be safe to access unlocked
924			 * as it can only be set to be true by us.
925			 */
926			if (p->p_singlethread == td)
927				return (0);	/* Exempt from stopping. */
928		}
929		if (return_instead)
930			return (1);
931
932		mtx_lock_spin(&sched_lock);
933		thread_stopped(p);
934		/*
935		 * If the process is waiting for us to exit,
936		 * this thread should just suicide.
937		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
938		 */
939		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
940			if (p->p_flag & P_SA)
941				thread_exit();
942			else
943				thr_exit1();
944		}
945
946		/*
947		 * When a thread suspends, it just
948		 * moves to the processes's suspend queue
949		 * and stays there.
950		 */
951		thread_suspend_one(td);
952		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
953			if (p->p_numthreads == p->p_suspcount) {
954				thread_unsuspend_one(p->p_singlethread);
955			}
956		}
957		PROC_UNLOCK(p);
958		mi_switch(SW_INVOL);
959		mtx_unlock_spin(&sched_lock);
960		PROC_LOCK(p);
961	}
962	return (0);
963}
964
965void
966thread_suspend_one(struct thread *td)
967{
968	struct proc *p = td->td_proc;
969
970	mtx_assert(&sched_lock, MA_OWNED);
971	PROC_LOCK_ASSERT(p, MA_OWNED);
972	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
973	p->p_suspcount++;
974	TD_SET_SUSPENDED(td);
975	TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
976	/*
977	 * Hack: If we are suspending but are on the sleep queue
978	 * then we are in msleep or the cv equivalent. We
979	 * want to look like we have two Inhibitors.
980	 * May already be set.. doesn't matter.
981	 */
982	if (TD_ON_SLEEPQ(td))
983		TD_SET_SLEEPING(td);
984}
985
986void
987thread_unsuspend_one(struct thread *td)
988{
989	struct proc *p = td->td_proc;
990
991	mtx_assert(&sched_lock, MA_OWNED);
992	PROC_LOCK_ASSERT(p, MA_OWNED);
993	TAILQ_REMOVE(&p->p_suspended, td, td_runq);
994	TD_CLR_SUSPENDED(td);
995	p->p_suspcount--;
996	setrunnable(td);
997}
998
999/*
1000 * Allow all threads blocked by single threading to continue running.
1001 */
1002void
1003thread_unsuspend(struct proc *p)
1004{
1005	struct thread *td;
1006
1007	mtx_assert(&sched_lock, MA_OWNED);
1008	PROC_LOCK_ASSERT(p, MA_OWNED);
1009	if (!P_SHOULDSTOP(p)) {
1010		while (( td = TAILQ_FIRST(&p->p_suspended))) {
1011			thread_unsuspend_one(td);
1012		}
1013	} else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
1014	    (p->p_numthreads == p->p_suspcount)) {
1015		/*
1016		 * Stopping everything also did the job for the single
1017		 * threading request. Now we've downgraded to single-threaded,
1018		 * let it continue.
1019		 */
1020		thread_unsuspend_one(p->p_singlethread);
1021	}
1022}
1023
1024void
1025thread_single_end(void)
1026{
1027	struct thread *td;
1028	struct proc *p;
1029
1030	td = curthread;
1031	p = td->td_proc;
1032	PROC_LOCK_ASSERT(p, MA_OWNED);
1033	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
1034	mtx_lock_spin(&sched_lock);
1035	p->p_singlethread = NULL;
1036	/*
1037	 * If there are other threads they mey now run,
1038	 * unless of course there is a blanket 'stop order'
1039	 * on the process. The single threader must be allowed
1040	 * to continue however as this is a bad place to stop.
1041	 */
1042	if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
1043		while (( td = TAILQ_FIRST(&p->p_suspended))) {
1044			thread_unsuspend_one(td);
1045		}
1046	}
1047	mtx_unlock_spin(&sched_lock);
1048}
1049
1050