thread.c revision 641:057d58d31499
1275970Scy/*
2275970Scy * CDDL HEADER START
3275970Scy *
4275970Scy * The contents of this file are subject to the terms of the
5275970Scy * Common Development and Distribution License, Version 1.0 only
6275970Scy * (the "License").  You may not use this file except in compliance
7275970Scy * with the License.
8275970Scy *
9275970Scy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10275970Scy * or http://www.opensolaris.org/os/licensing.
11275970Scy * See the License for the specific language governing permissions
12275970Scy * and limitations under the License.
13275970Scy *
14275970Scy * When distributing Covered Code, include this CDDL HEADER in each
15275970Scy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16275970Scy * If applicable, add the following below this CDDL HEADER, with the
17275970Scy * fields enclosed by brackets "[]" replaced with your own identifying
18275970Scy * information: Portions Copyright [yyyy] [name of copyright owner]
19275970Scy *
20275970Scy * CDDL HEADER END
21275970Scy */
22275970Scy/*
23275970Scy * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24275970Scy * Use is subject to license terms.
25275970Scy */
26275970Scy
27275970Scy#pragma ident	"%Z%%M%	%I%	%E% SMI"
28275970Scy
29275970Scy#include <sys/types.h>
30275970Scy#include <sys/param.h>
31275970Scy#include <sys/sysmacros.h>
32275970Scy#include <sys/signal.h>
33275970Scy#include <sys/stack.h>
34275970Scy#include <sys/pcb.h>
35275970Scy#include <sys/user.h>
36275970Scy#include <sys/systm.h>
37275970Scy#include <sys/sysinfo.h>
38275970Scy#include <sys/var.h>
39275970Scy#include <sys/errno.h>
40275970Scy#include <sys/cmn_err.h>
41275970Scy#include <sys/cred.h>
42275970Scy#include <sys/resource.h>
43275970Scy#include <sys/task.h>
44275970Scy#include <sys/project.h>
45275970Scy#include <sys/proc.h>
46275970Scy#include <sys/debug.h>
47275970Scy#include <sys/inline.h>
48275970Scy#include <sys/disp.h>
49275970Scy#include <sys/class.h>
50275970Scy#include <vm/seg_kmem.h>
51275970Scy#include <vm/seg_kp.h>
52275970Scy#include <sys/machlock.h>
53275970Scy#include <sys/kmem.h>
54275970Scy#include <sys/varargs.h>
55275970Scy#include <sys/turnstile.h>
56275970Scy#include <sys/poll.h>
57275970Scy#include <sys/vtrace.h>
58275970Scy#include <sys/callb.h>
59275970Scy#include <c2/audit.h>
60275970Scy#include <sys/tnf.h>
61275970Scy#include <sys/sobject.h>
62275970Scy#include <sys/cpupart.h>
63275970Scy#include <sys/pset.h>
64275970Scy#include <sys/door.h>
65275970Scy#include <sys/spl.h>
66275970Scy#include <sys/copyops.h>
67275970Scy#include <sys/rctl.h>
68275970Scy#include <sys/pool.h>
69275970Scy#include <sys/zone.h>
70275970Scy#include <sys/cpc_impl.h>
71275970Scy#include <sys/sdt.h>
72275970Scy#include <sys/reboot.h>
73275970Scy#include <sys/kdi.h>
74275970Scy
75275970Scystruct kmem_cache *thread_cache;	/* cache of free threads */
76275970Scystruct kmem_cache *lwp_cache;		/* cache of free lwps */
77275970Scystruct kmem_cache *turnstile_cache;	/* cache of free turnstiles */
78275970Scy
79275970Scy/*
80275970Scy * allthreads is only for use by kmem_readers.  All kernel loops can use
81275970Scy * the current thread as a start/end point.
82275970Scy */
83275970Scystatic kthread_t *allthreads = &t0;	/* circular list of all threads */
84275970Scy
85275970Scystatic kcondvar_t reaper_cv;		/* synchronization var */
86275970Scykthread_t	*thread_deathrow;	/* circular list of reapable threads */
87275970Scykthread_t	*lwp_deathrow;		/* circular list of reapable threads */
88275970Scykmutex_t	reaplock;		/* protects lwp and thread deathrows */
89275970Scykmutex_t	thread_free_lock;	/* protects clock from reaper */
90275970Scyint	thread_reapcnt = 0;		/* number of threads on deathrow */
91int	lwp_reapcnt = 0;		/* number of lwps on deathrow */
92int	reaplimit = 16;			/* delay reaping until reaplimit */
93
94extern int nthread;
95
96id_t	syscid;				/* system scheduling class ID */
97void	*segkp_thread;			/* cookie for segkp pool */
98
99int lwp_cache_sz = 32;
100int t_cache_sz = 8;
101static kt_did_t next_t_id = 1;
102
103/*
104 * Min/Max stack sizes for stack size parameters
105 */
106#define	MAX_STKSIZE	(32 * DEFAULTSTKSZ)
107#define	MIN_STKSIZE	DEFAULTSTKSZ
108
109/*
110 * default_stksize overrides lwp_default_stksize if it is set.
111 */
112int	default_stksize;
113int	lwp_default_stksize;
114
115static zone_key_t zone_thread_key;
116
117/*
118 * forward declarations for internal thread specific data (tsd)
119 */
120static void *tsd_realloc(void *, size_t, size_t);
121
122/*ARGSUSED*/
123static int
124turnstile_constructor(void *buf, void *cdrarg, int kmflags)
125{
126	bzero(buf, sizeof (turnstile_t));
127	return (0);
128}
129
130/*ARGSUSED*/
131static void
132turnstile_destructor(void *buf, void *cdrarg)
133{
134	turnstile_t *ts = buf;
135
136	ASSERT(ts->ts_free == NULL);
137	ASSERT(ts->ts_waiters == 0);
138	ASSERT(ts->ts_inheritor == NULL);
139	ASSERT(ts->ts_sleepq[0].sq_first == NULL);
140	ASSERT(ts->ts_sleepq[1].sq_first == NULL);
141}
142
143void
144thread_init(void)
145{
146	kthread_t *tp;
147	extern char sys_name[];
148	extern void idle();
149	struct cpu *cpu = CPU;
150
151	mutex_init(&reaplock, NULL, MUTEX_SPIN, (void *)ipltospl(DISP_LEVEL));
152
153#if defined(__i386) || defined(__amd64)
154	thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
155	    PTR24_ALIGN, NULL, NULL, NULL, NULL, NULL, 0);
156
157	/*
158	 * "struct _klwp" includes a "struct pcb", which includes a
159	 * "struct fpu", which needs to be 16-byte aligned on amd64
160	 * (and even on i386 for fxsave/fxrstor).
161	 */
162	lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
163	    16, NULL, NULL, NULL, NULL, NULL, 0);
164#else
165	/*
166	 * Allocate thread structures from static_arena.  This prevents
167	 * issues where a thread tries to relocate its own thread
168	 * structure and touches it after the mapping has been suspended.
169	 */
170	thread_cache = kmem_cache_create("thread_cache", sizeof (kthread_t),
171	    PTR24_ALIGN, NULL, NULL, NULL, NULL, static_arena, 0);
172
173	lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
174	    0, NULL, NULL, NULL, NULL, NULL, 0);
175#endif
176
177	turnstile_cache = kmem_cache_create("turnstile_cache",
178	    sizeof (turnstile_t), 0,
179	    turnstile_constructor, turnstile_destructor, NULL, NULL, NULL, 0);
180
181	cred_init();
182
183	rctl_init();
184	project_init();
185	zone_init();
186	task_init();
187	pool_init();
188
189	curthread->t_ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
190
191	/*
192	 * Originally, we had two parameters to set default stack
193	 * size: one for lwp's (lwp_default_stksize), and one for
194	 * kernel-only threads (DEFAULTSTKSZ, a.k.a. _defaultstksz).
195	 * Now we have a third parameter that overrides both if it is
196	 * set to a legal stack size, called default_stksize.
197	 */
198
199	if (default_stksize == 0) {
200		default_stksize = DEFAULTSTKSZ;
201	} else if (default_stksize % PAGESIZE != 0 ||
202	    default_stksize > MAX_STKSIZE ||
203	    default_stksize < MIN_STKSIZE) {
204		cmn_err(CE_WARN, "Illegal stack size. Using %d",
205		    (int)DEFAULTSTKSZ);
206		default_stksize = DEFAULTSTKSZ;
207	} else {
208		lwp_default_stksize = default_stksize;
209	}
210
211	if (lwp_default_stksize == 0) {
212		lwp_default_stksize = default_stksize;
213	} else if (lwp_default_stksize % PAGESIZE != 0 ||
214	    lwp_default_stksize > MAX_STKSIZE ||
215	    lwp_default_stksize < MIN_STKSIZE) {
216		cmn_err(CE_WARN, "Illegal stack size. Using %d",
217		    default_stksize);
218		lwp_default_stksize = default_stksize;
219	}
220
221	segkp_lwp = segkp_cache_init(segkp, lwp_cache_sz,
222	    lwp_default_stksize,
223	    (KPD_NOWAIT | KPD_HASREDZONE | KPD_LOCKED));
224
225	segkp_thread = segkp_cache_init(segkp, t_cache_sz,
226	    default_stksize, KPD_HASREDZONE | KPD_LOCKED | KPD_NO_ANON);
227
228	(void) getcid(sys_name, &syscid);
229	curthread->t_cid = syscid;	/* current thread is t0 */
230
231	/*
232	 * Set up the first CPU's idle thread.
233	 * It runs whenever the CPU has nothing worthwhile to do.
234	 */
235	tp = thread_create(NULL, 0, idle, NULL, 0, &p0, TS_STOPPED, -1);
236	cpu->cpu_idle_thread = tp;
237	tp->t_preempt = 1;
238	tp->t_disp_queue = cpu->cpu_disp;
239	ASSERT(tp->t_disp_queue != NULL);
240	tp->t_bound_cpu = cpu;
241	tp->t_affinitycnt = 1;
242
243	/*
244	 * Registering a thread in the callback table is usually
245	 * done in the initialization code of the thread. In this
246	 * case, we do it right after thread creation to avoid
247	 * blocking idle thread while registering itself. It also
248	 * avoids the possibility of reregistration in case a CPU
249	 * restarts its idle thread.
250	 */
251	CALLB_CPR_INIT_SAFE(tp, "idle");
252
253	/*
254	 * Finish initializing the kernel memory allocator now that
255	 * thread_create() is available.
256	 */
257	kmem_thread_init();
258
259	if (boothowto & RB_DEBUG)
260		kdi_dvec_thravail();
261}
262
263/*
264 * Create a thread.
265 *
266 * thread_create() blocks for memory if necessary.  It never fails.
267 *
268 * If stk is NULL, the thread is created at the base of the stack
269 * and cannot be swapped.
270 */
271kthread_t *
272thread_create(
273	caddr_t	stk,
274	size_t	stksize,
275	void	(*proc)(),
276	void	*arg,
277	size_t	len,
278	proc_t	 *pp,
279	int	state,
280	pri_t	pri)
281{
282	kthread_t *t;
283	extern struct classfuncs sys_classfuncs;
284	turnstile_t *ts;
285
286	/*
287	 * Every thread keeps a turnstile around in case it needs to block.
288	 * The only reason the turnstile is not simply part of the thread
289	 * structure is that we may have to break the association whenever
290	 * more than one thread blocks on a given synchronization object.
291	 * From a memory-management standpoint, turnstiles are like the
292	 * "attached mblks" that hang off dblks in the streams allocator.
293	 */
294	ts = kmem_cache_alloc(turnstile_cache, KM_SLEEP);
295
296	if (stk == NULL) {
297		/*
298		 * alloc both thread and stack in segkp chunk
299		 */
300
301		if (stksize < default_stksize)
302			stksize = default_stksize;
303
304		if (stksize == default_stksize) {
305			stk = (caddr_t)segkp_cache_get(segkp_thread);
306		} else {
307			stksize = roundup(stksize, PAGESIZE);
308			stk = (caddr_t)segkp_get(segkp, stksize,
309			    (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
310		}
311
312		ASSERT(stk != NULL);
313
314		/*
315		 * The machine-dependent mutex code may require that
316		 * thread pointers (since they may be used for mutex owner
317		 * fields) have certain alignment requirements.
318		 * PTR24_ALIGN is the size of the alignment quanta.
319		 * XXX - assumes stack grows toward low addresses.
320		 */
321		if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
322			cmn_err(CE_PANIC, "thread_create: proposed stack size"
323			    " too small to hold thread.");
324#ifdef STACK_GROWTH_DOWN
325		stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
326		stksize &= -PTR24_ALIGN;	/* make thread aligned */
327		t = (kthread_t *)(stk + stksize);
328		bzero(t, sizeof (kthread_t));
329#ifdef	C2_AUDIT
330		if (audit_active)
331			audit_thread_create(t);
332#endif
333		t->t_stk = stk + stksize;
334		t->t_stkbase = stk;
335#else	/* stack grows to larger addresses */
336		stksize -= SA(sizeof (kthread_t));
337		t = (kthread_t *)(stk);
338		bzero(t, sizeof (kthread_t));
339		t->t_stk = stk + sizeof (kthread_t);
340		t->t_stkbase = stk + stksize + sizeof (kthread_t);
341#endif	/* STACK_GROWTH_DOWN */
342		t->t_flag |= T_TALLOCSTK;
343		t->t_swap = stk;
344	} else {
345		t = kmem_cache_alloc(thread_cache, KM_SLEEP);
346		bzero(t, sizeof (kthread_t));
347		ASSERT(((uintptr_t)t & (PTR24_ALIGN - 1)) == 0);
348#ifdef	C2_AUDIT
349		if (audit_active)
350			audit_thread_create(t);
351#endif
352		/*
353		 * Initialize t_stk to the kernel stack pointer to use
354		 * upon entry to the kernel
355		 */
356#ifdef STACK_GROWTH_DOWN
357		t->t_stk = stk + stksize;
358		t->t_stkbase = stk;
359#else
360		t->t_stk = stk;			/* 3b2-like */
361		t->t_stkbase = stk + stksize;
362#endif /* STACK_GROWTH_DOWN */
363	}
364
365	/* set default stack flag */
366	if (stksize == lwp_default_stksize)
367		t->t_flag |= T_DFLTSTK;
368
369	t->t_ts = ts;
370
371	/*
372	 * p_cred could be NULL if it thread_create is called before cred_init
373	 * is called in main.
374	 */
375	mutex_enter(&pp->p_crlock);
376	if (pp->p_cred)
377		crhold(t->t_cred = pp->p_cred);
378	mutex_exit(&pp->p_crlock);
379	t->t_start = gethrestime_sec();
380	t->t_startpc = proc;
381	t->t_procp = pp;
382	t->t_clfuncs = &sys_classfuncs.thread;
383	t->t_cid = syscid;
384	t->t_pri = pri;
385	t->t_stime = lbolt;
386	t->t_schedflag = TS_LOAD | TS_DONT_SWAP;
387	t->t_bind_cpu = PBIND_NONE;
388	t->t_bind_pset = PS_NONE;
389	t->t_plockp = &pp->p_lock;
390	t->t_copyops = NULL;
391	t->t_taskq = NULL;
392	t->t_anttime = 0;
393	t->t_hatdepth = 0;
394
395	t->t_dtrace_vtime = 1;	/* assure vtimestamp is always non-zero */
396
397	CPU_STATS_ADDQ(CPU, sys, nthreads, 1);
398#ifndef NPROBE
399	/* Kernel probe */
400	tnf_thread_create(t);
401#endif /* NPROBE */
402	LOCK_INIT_CLEAR(&t->t_lock);
403
404	/*
405	 * Callers who give us a NULL proc must do their own
406	 * stack initialization.  e.g. lwp_create()
407	 */
408	if (proc != NULL) {
409		t->t_stk = thread_stk_init(t->t_stk);
410		thread_load(t, proc, arg, len);
411	}
412
413	/*
414	 * Put a hold on project0. If this thread is actually in a
415	 * different project, then t_proj will be changed later in
416	 * lwp_create().  All kernel-only threads must be in project 0.
417	 */
418	t->t_proj = project_hold(proj0p);
419
420	lgrp_affinity_init(&t->t_lgrp_affinity);
421
422	mutex_enter(&pidlock);
423	nthread++;
424	t->t_did = next_t_id++;
425	t->t_prev = curthread->t_prev;
426	t->t_next = curthread;
427
428	/*
429	 * Add the thread to the list of all threads, and initialize
430	 * its t_cpu pointer.  We need to block preemption since
431	 * cpu_offline walks the thread list looking for threads
432	 * with t_cpu pointing to the CPU being offlined.  We want
433	 * to make sure that the list is consistent and that if t_cpu
434	 * is set, the thread is on the list.
435	 */
436	kpreempt_disable();
437	curthread->t_prev->t_next = t;
438	curthread->t_prev = t;
439
440	/*
441	 * Threads should never have a NULL t_cpu pointer so assign it
442	 * here.  If the thread is being created with state TS_RUN a
443	 * better CPU may be chosen when it is placed on the run queue.
444	 *
445	 * We need to keep kernel preemption disabled when setting all
446	 * three fields to keep them in sync.  Also, always create in
447	 * the default partition since that's where kernel threads go
448	 * (if this isn't a kernel thread, t_cpupart will be changed
449	 * in lwp_create before setting the thread runnable).
450	 */
451	t->t_cpupart = &cp_default;
452
453	/*
454	 * For now, affiliate this thread with the root lgroup.
455	 * Since the kernel does not (presently) allocate its memory
456	 * in a locality aware fashion, the root is an appropriate home.
457	 * If this thread is later associated with an lwp, it will have
458	 * it's lgroup re-assigned at that time.
459	 */
460	lgrp_move_thread(t, &cp_default.cp_lgrploads[LGRP_ROOTID], 1);
461
462	/*
463	 * Inherit the current cpu.  If this cpu isn't part of the chosen
464	 * lgroup, a new cpu will be chosen by cpu_choose when the thread
465	 * is ready to run.
466	 */
467	if (CPU->cpu_part == &cp_default)
468		t->t_cpu = CPU;
469	else
470		t->t_cpu = disp_lowpri_cpu(cp_default.cp_cpulist, t->t_lpl,
471		    t->t_pri, NULL);
472
473	t->t_disp_queue = t->t_cpu->cpu_disp;
474	kpreempt_enable();
475
476	/*
477	 * Initialize thread state and the dispatcher lock pointer.
478	 * Need to hold onto pidlock to block allthreads walkers until
479	 * the state is set.
480	 */
481	switch (state) {
482	case TS_RUN:
483		curthread->t_oldspl = splhigh();	/* get dispatcher spl */
484		THREAD_SET_STATE(t, TS_STOPPED, &transition_lock);
485		CL_SETRUN(t);
486		thread_unlock(t);
487		break;
488
489	case TS_ONPROC:
490		THREAD_ONPROC(t, t->t_cpu);
491		break;
492
493	case TS_FREE:
494		/*
495		 * Free state will be used for intr threads.
496		 * The interrupt routine must set the thread dispatcher
497		 * lock pointer (t_lockp) if starting on a CPU
498		 * other than the current one.
499		 */
500		THREAD_FREEINTR(t, CPU);
501		break;
502
503	case TS_STOPPED:
504		THREAD_SET_STATE(t, TS_STOPPED, &stop_lock);
505		break;
506
507	default:			/* TS_SLEEP, TS_ZOMB or TS_TRANS */
508		cmn_err(CE_PANIC, "thread_create: invalid state %d", state);
509	}
510	mutex_exit(&pidlock);
511	return (t);
512}
513
514/*
515 * Move thread to project0 and take care of project reference counters.
516 */
517void
518thread_rele(kthread_t *t)
519{
520	kproject_t *kpj;
521
522	thread_lock(t);
523
524	ASSERT(t == curthread || t->t_state == TS_FREE || t->t_procp == &p0);
525	kpj = ttoproj(t);
526	t->t_proj = proj0p;
527
528	thread_unlock(t);
529
530	if (kpj != proj0p) {
531		project_rele(kpj);
532		(void) project_hold(proj0p);
533	}
534}
535
536
537void	(*ip_cleanup_func)(void);
538
539void
540thread_exit()
541{
542	kthread_t *t = curthread;
543
544	if ((t->t_proc_flag & TP_ZTHREAD) != 0)
545		cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
546
547	if (ip_cleanup_func != NULL)
548		(*ip_cleanup_func)();
549
550	tsd_exit();		/* Clean up this thread's TSD */
551
552	kcpc_passivate();	/* clean up performance counter state */
553
554	/*
555	 * No kernel thread should have called poll() without arranging
556	 * calling pollcleanup() here.
557	 */
558	ASSERT(t->t_pollstate == NULL);
559	ASSERT(t->t_schedctl == NULL);
560	if (t->t_door)
561		door_slam();	/* in case thread did an upcall */
562
563#ifndef NPROBE
564	/* Kernel probe */
565	if (t->t_tnf_tpdp)
566		tnf_thread_exit();
567#endif /* NPROBE */
568
569	thread_rele(t);
570	t->t_preempt++;
571
572	/*
573	 * remove thread from the all threads list so that
574	 * death-row can use the same pointers.
575	 */
576	mutex_enter(&pidlock);
577	t->t_next->t_prev = t->t_prev;
578	t->t_prev->t_next = t->t_next;
579	ASSERT(allthreads != t);	/* t0 never exits */
580	cv_broadcast(&t->t_joincv);	/* wake up anyone in thread_join */
581	mutex_exit(&pidlock);
582
583	if (t->t_ctx != NULL)
584		exitctx(t);
585
586	t->t_state = TS_ZOMB;	/* set zombie thread */
587
588	swtch_from_zombie();	/* give up the CPU */
589	/* NOTREACHED */
590}
591
592/*
593 * Check to see if the specified thread is active (defined as being on
594 * the thread list).  This is certainly a slow way to do this; if there's
595 * ever a reason to speed it up, we could maintain a hash table of active
596 * threads indexed by their t_did.
597 */
598static kthread_t *
599did_to_thread(kt_did_t tid)
600{
601	kthread_t *t;
602
603	ASSERT(MUTEX_HELD(&pidlock));
604	for (t = curthread->t_next; t != curthread; t = t->t_next) {
605		if (t->t_did == tid)
606			break;
607	}
608	if (t->t_did == tid)
609		return (t);
610	else
611		return (NULL);
612}
613
614/*
615 * Wait for specified thread to exit.  Returns immediately if the thread
616 * could not be found, meaning that it has either already exited or never
617 * existed.
618 */
619void
620thread_join(kt_did_t tid)
621{
622	kthread_t *t;
623
624	ASSERT(tid != curthread->t_did);
625	ASSERT(tid != t0.t_did);
626
627	mutex_enter(&pidlock);
628	/*
629	 * Make sure we check that the thread is on the thread list
630	 * before blocking on it; otherwise we could end up blocking on
631	 * a cv that's already been freed.  In other words, don't cache
632	 * the thread pointer across calls to cv_wait.
633	 *
634	 * The choice of loop invariant means that whenever a thread
635	 * is taken off the allthreads list, a cv_broadcast must be
636	 * performed on that thread's t_joincv to wake up any waiters.
637	 * The broadcast doesn't have to happen right away, but it
638	 * shouldn't be postponed indefinitely (e.g., by doing it in
639	 * thread_free which may only be executed when the deathrow
640	 * queue is processed.
641	 */
642	while (t = did_to_thread(tid))
643		cv_wait(&t->t_joincv, &pidlock);
644	mutex_exit(&pidlock);
645}
646
647void
648thread_free(kthread_t *t)
649{
650	ASSERT(t != &t0 && t->t_state == TS_FREE);
651	ASSERT(t->t_door == NULL);
652	ASSERT(t->t_schedctl == NULL);
653	ASSERT(t->t_pollstate == NULL);
654
655	t->t_pri = 0;
656	t->t_pc = 0;
657	t->t_sp = 0;
658	t->t_wchan0 = NULL;
659	t->t_wchan = NULL;
660	if (t->t_cred != NULL) {
661		crfree(t->t_cred);
662		t->t_cred = 0;
663	}
664	if (t->t_pdmsg) {
665		kmem_free(t->t_pdmsg, strlen(t->t_pdmsg) + 1);
666		t->t_pdmsg = NULL;
667	}
668#ifdef	C2_AUDIT
669	if (audit_active)
670		audit_thread_free(t);
671#endif
672#ifndef NPROBE
673	if (t->t_tnf_tpdp)
674		tnf_thread_free(t);
675#endif /* NPROBE */
676	if (t->t_cldata) {
677		CL_EXITCLASS(t->t_cid, (caddr_t *)t->t_cldata);
678	}
679	if (t->t_rprof != NULL) {
680		kmem_free(t->t_rprof, sizeof (*t->t_rprof));
681		t->t_rprof = NULL;
682	}
683	t->t_lockp = NULL;	/* nothing should try to lock this thread now */
684	if (t->t_lwp)
685		lwp_freeregs(t->t_lwp, 0);
686	if (t->t_ctx)
687		freectx(t, 0);
688	t->t_stk = NULL;
689	if (t->t_lwp)
690		lwp_stk_fini(t->t_lwp);
691	lock_clear(&t->t_lock);
692
693	if (t->t_ts->ts_waiters > 0)
694		panic("thread_free: turnstile still active");
695
696	kmem_cache_free(turnstile_cache, t->t_ts);
697
698	free_afd(&t->t_activefd);
699
700	/*
701	 * Barrier for clock thread.  The clock holds this lock to
702	 * keep the thread from going away while it's looking at it.
703	 */
704	mutex_enter(&thread_free_lock);
705	mutex_exit(&thread_free_lock);
706
707	ASSERT(ttoproj(t) == proj0p);
708	project_rele(ttoproj(t));
709
710	lgrp_affinity_free(&t->t_lgrp_affinity);
711
712	/*
713	 * Free thread struct and its stack.
714	 */
715	if (t->t_flag & T_TALLOCSTK) {
716		/* thread struct is embedded in stack */
717		segkp_release(segkp, t->t_swap);
718		mutex_enter(&pidlock);
719		nthread--;
720		mutex_exit(&pidlock);
721	} else {
722		if (t->t_swap) {
723			segkp_release(segkp, t->t_swap);
724			t->t_swap = NULL;
725		}
726		if (t->t_lwp) {
727			kmem_cache_free(lwp_cache, t->t_lwp);
728			t->t_lwp = NULL;
729		}
730		mutex_enter(&pidlock);
731		nthread--;
732		mutex_exit(&pidlock);
733		kmem_cache_free(thread_cache, t);
734	}
735}
736
737/*
738 * Removes threads associated with the given zone from a deathrow queue.
739 * tp is a pointer to the head of the deathrow queue, and countp is a
740 * pointer to the current deathrow count.  Returns a linked list of
741 * threads removed from the list.
742 */
743static kthread_t *
744thread_zone_cleanup(kthread_t **tp, int *countp, zoneid_t zoneid)
745{
746	kthread_t *tmp, *list = NULL;
747	cred_t *cr;
748
749	ASSERT(MUTEX_HELD(&reaplock));
750	while (*tp != NULL) {
751		if ((cr = (*tp)->t_cred) != NULL && crgetzoneid(cr) == zoneid) {
752			tmp = *tp;
753			*tp = tmp->t_forw;
754			tmp->t_forw = list;
755			list = tmp;
756			(*countp)--;
757		} else {
758			tp = &(*tp)->t_forw;
759		}
760	}
761	return (list);
762}
763
764static void
765thread_reap_list(kthread_t *t)
766{
767	kthread_t *next;
768
769	while (t != NULL) {
770		next = t->t_forw;
771		thread_free(t);
772		t = next;
773	}
774}
775
776/* ARGSUSED */
777static void
778thread_zone_destroy(zoneid_t zoneid, void *unused)
779{
780	kthread_t *t, *l;
781
782	mutex_enter(&reaplock);
783	/*
784	 * Pull threads and lwps associated with zone off deathrow lists.
785	 */
786	t = thread_zone_cleanup(&thread_deathrow, &thread_reapcnt, zoneid);
787	l = thread_zone_cleanup(&lwp_deathrow, &lwp_reapcnt, zoneid);
788	mutex_exit(&reaplock);
789
790	/*
791	 * Reap threads
792	 */
793	thread_reap_list(t);
794
795	/*
796	 * Reap lwps
797	 */
798	thread_reap_list(l);
799}
800
801/*
802 * cleanup zombie threads that are on deathrow.
803 */
804void
805thread_reaper()
806{
807	kthread_t *t, *l;
808	callb_cpr_t cprinfo;
809
810	/*
811	 * Register callback to clean up threads when zone is destroyed.
812	 */
813	zone_key_create(&zone_thread_key, NULL, NULL, thread_zone_destroy);
814
815	CALLB_CPR_INIT(&cprinfo, &reaplock, callb_generic_cpr, "t_reaper");
816	for (;;) {
817		mutex_enter(&reaplock);
818		while (thread_deathrow == NULL && lwp_deathrow == NULL) {
819			CALLB_CPR_SAFE_BEGIN(&cprinfo);
820			cv_wait(&reaper_cv, &reaplock);
821			CALLB_CPR_SAFE_END(&cprinfo, &reaplock);
822		}
823		t = thread_deathrow;
824		l = lwp_deathrow;
825		thread_deathrow = NULL;
826		lwp_deathrow = NULL;
827		thread_reapcnt = 0;
828		lwp_reapcnt = 0;
829		mutex_exit(&reaplock);
830
831		/*
832		 * Reap threads
833		 */
834		thread_reap_list(t);
835
836		/*
837		 * Reap lwps
838		 */
839		thread_reap_list(l);
840	}
841}
842
843/*
844 * This is called by resume() to put a zombie thread onto deathrow.
845 * The thread's state is changed to TS_FREE to indicate that is reapable.
846 * This is called from the idle thread so it must not block (just spin).
847 */
848void
849reapq_add(kthread_t *t)
850{
851	mutex_enter(&reaplock);
852
853	/*
854	 * lwp_deathrow contains only threads with lwp linkage
855	 * that are of the default stacksize. Anything else goes
856	 * on thread_deathrow.
857	 */
858	if (ttolwp(t) && (t->t_flag & T_DFLTSTK)) {
859		t->t_forw = lwp_deathrow;
860		lwp_deathrow = t;
861		lwp_reapcnt++;
862	} else {
863		t->t_forw = thread_deathrow;
864		thread_deathrow = t;
865		thread_reapcnt++;
866	}
867	if (lwp_reapcnt + thread_reapcnt > reaplimit)
868		cv_signal(&reaper_cv);	/* wake the reaper */
869	t->t_state = TS_FREE;
870	lock_clear(&t->t_lock);
871	mutex_exit(&reaplock);
872}
873
874/*
875 * Install a device context for the current thread
876 */
877void
878installctx(
879	kthread_t *t,
880	void	*arg,
881	void	(*save)(void *),
882	void	(*restore)(void *),
883	void	(*fork)(void *, void *),
884	void	(*lwp_create)(void *, void *),
885	void	(*exit)(void *),
886	void	(*free)(void *, int))
887{
888	struct ctxop *ctx;
889
890	ctx = kmem_alloc(sizeof (struct ctxop), KM_SLEEP);
891	ctx->save_op = save;
892	ctx->restore_op = restore;
893	ctx->fork_op = fork;
894	ctx->lwp_create_op = lwp_create;
895	ctx->exit_op = exit;
896	ctx->free_op = free;
897	ctx->arg = arg;
898	ctx->next = t->t_ctx;
899	t->t_ctx = ctx;
900}
901
902/*
903 * Remove a device context from the current thread
904 * (Or allow the agent thread to remove device context from another
905 * thread in the same, stopped, process)
906 */
907int
908removectx(
909	kthread_t *t,
910	void	*arg,
911	void	(*save)(void *),
912	void	(*restore)(void *),
913	void	(*fork)(void *, void *),
914	void	(*lwp_create)(void *, void *),
915	void	(*exit)(void *),
916	void	(*free)(void *, int))
917{
918	struct ctxop *ctx, *prev_ctx;
919
920	ASSERT(t == curthread || ttoproc(t)->p_stat == SIDL ||
921	    ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
922
923	prev_ctx = NULL;
924	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next) {
925		if (ctx->save_op == save && ctx->restore_op == restore &&
926		    ctx->fork_op == fork && ctx->lwp_create_op == lwp_create &&
927		    ctx->exit_op == exit && ctx->free_op == free &&
928		    ctx->arg == arg) {
929			if (prev_ctx)
930				prev_ctx->next = ctx->next;
931			else
932				t->t_ctx = ctx->next;
933			if (ctx->free_op != NULL)
934				(ctx->free_op)(ctx->arg, 0);
935			kmem_free(ctx, sizeof (struct ctxop));
936			return (1);
937		}
938		prev_ctx = ctx;
939	}
940	return (0);
941}
942
943void
944savectx(kthread_t *t)
945{
946	struct ctxop *ctx;
947
948	ASSERT(t == curthread);
949	for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
950		if (ctx->save_op != NULL)
951			(ctx->save_op)(ctx->arg);
952}
953
954void
955restorectx(kthread_t *t)
956{
957	struct ctxop *ctx;
958
959	ASSERT(t == curthread);
960	for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
961		if (ctx->restore_op != NULL)
962			(ctx->restore_op)(ctx->arg);
963}
964
965void
966forkctx(kthread_t *t, kthread_t *ct)
967{
968	struct ctxop *ctx;
969
970	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
971		if (ctx->fork_op != NULL)
972			(ctx->fork_op)(t, ct);
973}
974
975/*
976 * Note that this operator is only invoked via the _lwp_create
977 * system call.  The system may have other reasons to create lwps
978 * e.g. the agent lwp or the doors unreferenced lwp.
979 */
980void
981lwp_createctx(kthread_t *t, kthread_t *ct)
982{
983	struct ctxop *ctx;
984
985	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
986		if (ctx->lwp_create_op != NULL)
987			(ctx->lwp_create_op)(t, ct);
988}
989
990/*
991 * exitctx is called from thread_exit() and lwp_exit() to perform any actions
992 * needed when the thread/LWP leaves the processor for the last time. This
993 * routine is not intended to deal with freeing memory; freectx() is used for
994 * that purpose during thread_free(). This routine is provided to allow for
995 * clean-up that can't wait until thread_free().
996 */
997void
998exitctx(kthread_t *t)
999{
1000	struct ctxop *ctx;
1001
1002	for (ctx = t->t_ctx; ctx != NULL; ctx = ctx->next)
1003		if (ctx->exit_op != NULL)
1004			(ctx->exit_op)(t);
1005}
1006
1007/*
1008 * freectx is called from thread_free() and exec() to get
1009 * rid of old device context.
1010 */
1011void
1012freectx(kthread_t *t, int isexec)
1013{
1014	struct ctxop *ctx;
1015
1016	while ((ctx = t->t_ctx) != NULL) {
1017		t->t_ctx = ctx->next;
1018		if (ctx->free_op != NULL)
1019			(ctx->free_op)(ctx->arg, isexec);
1020		kmem_free(ctx, sizeof (struct ctxop));
1021	}
1022}
1023
1024/*
1025 * Set the thread running; arrange for it to be swapped in if necessary.
1026 */
1027void
1028setrun_locked(kthread_t *t)
1029{
1030	ASSERT(THREAD_LOCK_HELD(t));
1031	if (t->t_state == TS_SLEEP) {
1032		/*
1033		 * Take off sleep queue.
1034		 */
1035		SOBJ_UNSLEEP(t->t_sobj_ops, t);
1036	} else if (t->t_state & (TS_RUN | TS_ONPROC)) {
1037		/*
1038		 * Already on dispatcher queue.
1039		 */
1040		return;
1041	} else if (t->t_state == TS_STOPPED) {
1042		/*
1043		 * All of the sending of SIGCONT (TC_XSTART) and /proc
1044		 * (TC_PSTART) and lwp_continue() (TC_CSTART) must have
1045		 * requested that the thread be run.
1046		 * Just calling setrun() is not sufficient to set a stopped
1047		 * thread running.  TP_TXSTART is always set if the thread
1048		 * is not stopped by a jobcontrol stop signal.
1049		 * TP_TPSTART is always set if /proc is not controlling it.
1050		 * TP_TCSTART is always set if lwp_suspend() didn't stop it.
1051		 * The thread won't be stopped unless one of these
1052		 * three mechanisms did it.
1053		 *
1054		 * These flags must be set before calling setrun_locked(t).
1055		 * They can't be passed as arguments because the streams
1056		 * code calls setrun() indirectly and the mechanism for
1057		 * doing so admits only one argument.  Note that the
1058		 * thread must be locked in order to change t_schedflags.
1059		 */
1060		if ((t->t_schedflag & TS_ALLSTART) != TS_ALLSTART)
1061			return;
1062		/*
1063		 * Process is no longer stopped (a thread is running).
1064		 */
1065		t->t_whystop = 0;
1066		t->t_whatstop = 0;
1067		/*
1068		 * Strictly speaking, we do not have to clear these
1069		 * flags here; they are cleared on entry to stop().
1070		 * However, they are confusing when doing kernel
1071		 * debugging or when they are revealed by ps(1).
1072		 */
1073		t->t_schedflag &= ~TS_ALLSTART;
1074		THREAD_TRANSITION(t);	/* drop stopped-thread lock */
1075		ASSERT(t->t_lockp == &transition_lock);
1076		ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL);
1077		/*
1078		 * Let the class put the process on the dispatcher queue.
1079		 */
1080		CL_SETRUN(t);
1081	}
1082
1083
1084}
1085
1086void
1087setrun(kthread_t *t)
1088{
1089	thread_lock(t);
1090	setrun_locked(t);
1091	thread_unlock(t);
1092}
1093
1094/*
1095 * Unpin an interrupted thread.
1096 *	When an interrupt occurs, the interrupt is handled on the stack
1097 *	of an interrupt thread, taken from a pool linked to the CPU structure.
1098 *
1099 *	When swtch() is switching away from an interrupt thread because it
1100 *	blocked or was preempted, this routine is called to complete the
1101 *	saving of the interrupted thread state, and returns the interrupted
1102 *	thread pointer so it may be resumed.
1103 *
1104 *	Called by swtch() only at high spl.
1105 */
1106kthread_t *
1107thread_unpin()
1108{
1109	kthread_t	*t = curthread;	/* current thread */
1110	kthread_t	*itp;		/* interrupted thread */
1111	int		i;		/* interrupt level */
1112	extern int	intr_passivate();
1113
1114	ASSERT(t->t_intr != NULL);
1115
1116	itp = t->t_intr;		/* interrupted thread */
1117	t->t_intr = NULL;		/* clear interrupt ptr */
1118
1119	/*
1120	 * Get state from interrupt thread for the one
1121	 * it interrupted.
1122	 */
1123
1124	i = intr_passivate(t, itp);
1125
1126	TRACE_5(TR_FAC_INTR, TR_INTR_PASSIVATE,
1127		"intr_passivate:level %d curthread %p (%T) ithread %p (%T)",
1128		i, t, t, itp, itp);
1129
1130	/*
1131	 * Dissociate the current thread from the interrupted thread's LWP.
1132	 */
1133	t->t_lwp = NULL;
1134
1135	/*
1136	 * Interrupt handlers above the level that spinlocks block must
1137	 * not block.
1138	 */
1139#if DEBUG
1140	if (i < 0 || i > LOCK_LEVEL)
1141		cmn_err(CE_PANIC, "thread_unpin: ipl out of range %x", i);
1142#endif
1143
1144	/*
1145	 * Compute the CPU's base interrupt level based on the active
1146	 * interrupts.
1147	 */
1148	ASSERT(CPU->cpu_intr_actv & (1 << i));
1149	set_base_spl();
1150
1151	return (itp);
1152}
1153
1154/*
1155 * Create and initialize an interrupt thread.
1156 *	Returns non-zero on error.
1157 *	Called at spl7() or better.
1158 */
1159void
1160thread_create_intr(struct cpu *cp)
1161{
1162	kthread_t *tp;
1163
1164	tp = thread_create(NULL, 0,
1165	    (void (*)())thread_create_intr, NULL, 0, &p0, TS_ONPROC, 0);
1166
1167	/*
1168	 * Set the thread in the TS_FREE state.  The state will change
1169	 * to TS_ONPROC only while the interrupt is active.  Think of these
1170	 * as being on a private free list for the CPU.  Being TS_FREE keeps
1171	 * inactive interrupt threads out of debugger thread lists.
1172	 *
1173	 * We cannot call thread_create with TS_FREE because of the current
1174	 * checks there for ONPROC.  Fix this when thread_create takes flags.
1175	 */
1176	THREAD_FREEINTR(tp, cp);
1177
1178	/*
1179	 * Nobody should ever reference the credentials of an interrupt
1180	 * thread so make it NULL to catch any such references.
1181	 */
1182	tp->t_cred = NULL;
1183	tp->t_flag |= T_INTR_THREAD;
1184	tp->t_cpu = cp;
1185	tp->t_bound_cpu = cp;
1186	tp->t_disp_queue = cp->cpu_disp;
1187	tp->t_affinitycnt = 1;
1188	tp->t_preempt = 1;
1189
1190	/*
1191	 * Don't make a user-requested binding on this thread so that
1192	 * the processor can be offlined.
1193	 */
1194	tp->t_bind_cpu = PBIND_NONE;	/* no USER-requested binding */
1195	tp->t_bind_pset = PS_NONE;
1196
1197#if defined(__i386) || defined(__amd64)
1198	tp->t_stk -= STACK_ALIGN;
1199	*(tp->t_stk) = 0;		/* terminate intr thread stack */
1200#endif
1201
1202	/*
1203	 * Link onto CPU's interrupt pool.
1204	 */
1205	tp->t_link = cp->cpu_intr_thread;
1206	cp->cpu_intr_thread = tp;
1207}
1208
1209/*
1210 * TSD -- THREAD SPECIFIC DATA
1211 */
1212static kmutex_t		tsd_mutex;	 /* linked list spin lock */
1213static uint_t		tsd_nkeys;	 /* size of destructor array */
1214/* per-key destructor funcs */
1215static void 		(**tsd_destructor)(void *);
1216/* list of tsd_thread's */
1217static struct tsd_thread	*tsd_list;
1218
1219/*
1220 * Default destructor
1221 *	Needed because NULL destructor means that the key is unused
1222 */
1223/* ARGSUSED */
1224void
1225tsd_defaultdestructor(void *value)
1226{}
1227
1228/*
1229 * Create a key (index into per thread array)
1230 *	Locks out tsd_create, tsd_destroy, and tsd_exit
1231 *	May allocate memory with lock held
1232 */
1233void
1234tsd_create(uint_t *keyp, void (*destructor)(void *))
1235{
1236	int	i;
1237	uint_t	nkeys;
1238
1239	/*
1240	 * if key is allocated, do nothing
1241	 */
1242	mutex_enter(&tsd_mutex);
1243	if (*keyp) {
1244		mutex_exit(&tsd_mutex);
1245		return;
1246	}
1247	/*
1248	 * find an unused key
1249	 */
1250	if (destructor == NULL)
1251		destructor = tsd_defaultdestructor;
1252
1253	for (i = 0; i < tsd_nkeys; ++i)
1254		if (tsd_destructor[i] == NULL)
1255			break;
1256
1257	/*
1258	 * if no unused keys, increase the size of the destructor array
1259	 */
1260	if (i == tsd_nkeys) {
1261		if ((nkeys = (tsd_nkeys << 1)) == 0)
1262			nkeys = 1;
1263		tsd_destructor =
1264		    (void (**)(void *))tsd_realloc((void *)tsd_destructor,
1265		    (size_t)(tsd_nkeys * sizeof (void (*)(void *))),
1266		    (size_t)(nkeys * sizeof (void (*)(void *))));
1267		tsd_nkeys = nkeys;
1268	}
1269
1270	/*
1271	 * allocate the next available unused key
1272	 */
1273	tsd_destructor[i] = destructor;
1274	*keyp = i + 1;
1275	mutex_exit(&tsd_mutex);
1276}
1277
1278/*
1279 * Destroy a key -- this is for unloadable modules
1280 *
1281 * Assumes that the caller is preventing tsd_set and tsd_get
1282 * Locks out tsd_create, tsd_destroy, and tsd_exit
1283 * May free memory with lock held
1284 */
1285void
1286tsd_destroy(uint_t *keyp)
1287{
1288	uint_t key;
1289	struct tsd_thread *tsd;
1290
1291	/*
1292	 * protect the key namespace and our destructor lists
1293	 */
1294	mutex_enter(&tsd_mutex);
1295	key = *keyp;
1296	*keyp = 0;
1297
1298	ASSERT(key <= tsd_nkeys);
1299
1300	/*
1301	 * if the key is valid
1302	 */
1303	if (key != 0) {
1304		uint_t k = key - 1;
1305		/*
1306		 * for every thread with TSD, call key's destructor
1307		 */
1308		for (tsd = tsd_list; tsd; tsd = tsd->ts_next) {
1309			/*
1310			 * no TSD for key in this thread
1311			 */
1312			if (key > tsd->ts_nkeys)
1313				continue;
1314			/*
1315			 * call destructor for key
1316			 */
1317			if (tsd->ts_value[k] && tsd_destructor[k])
1318				(*tsd_destructor[k])(tsd->ts_value[k]);
1319			/*
1320			 * reset value for key
1321			 */
1322			tsd->ts_value[k] = NULL;
1323		}
1324		/*
1325		 * actually free the key (NULL destructor == unused)
1326		 */
1327		tsd_destructor[k] = NULL;
1328	}
1329
1330	mutex_exit(&tsd_mutex);
1331}
1332
1333/*
1334 * Quickly return the per thread value that was stored with the specified key
1335 * Assumes the caller is protecting key from tsd_create and tsd_destroy
1336 */
1337void *
1338tsd_get(uint_t key)
1339{
1340	return (tsd_agent_get(curthread, key));
1341}
1342
1343/*
1344 * Set a per thread value indexed with the specified key
1345 */
1346int
1347tsd_set(uint_t key, void *value)
1348{
1349	return (tsd_agent_set(curthread, key, value));
1350}
1351
1352/*
1353 * Like tsd_get(), except that the agent lwp can get the tsd of
1354 * another thread in the same process (the agent thread only runs when the
1355 * process is completely stopped by /proc), or syslwp is creating a new lwp.
1356 */
1357void *
1358tsd_agent_get(kthread_t *t, uint_t key)
1359{
1360	struct tsd_thread *tsd = t->t_tsd;
1361
1362	ASSERT(t == curthread ||
1363	    ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1364
1365	if (key && tsd != NULL && key <= tsd->ts_nkeys)
1366		return (tsd->ts_value[key - 1]);
1367	return (NULL);
1368}
1369
1370/*
1371 * Like tsd_set(), except that the agent lwp can set the tsd of
1372 * another thread in the same process, or syslwp can set the tsd
1373 * of a thread it's in the middle of creating.
1374 *
1375 * Assumes the caller is protecting key from tsd_create and tsd_destroy
1376 * May lock out tsd_destroy (and tsd_create), may allocate memory with
1377 * lock held
1378 */
1379int
1380tsd_agent_set(kthread_t *t, uint_t key, void *value)
1381{
1382	struct tsd_thread *tsd = t->t_tsd;
1383
1384	ASSERT(t == curthread ||
1385	    ttoproc(t)->p_agenttp == curthread || t->t_state == TS_STOPPED);
1386
1387	if (key == 0)
1388		return (EINVAL);
1389	if (tsd == NULL)
1390		tsd = t->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1391	if (key <= tsd->ts_nkeys) {
1392		tsd->ts_value[key - 1] = value;
1393		return (0);
1394	}
1395
1396	ASSERT(key <= tsd_nkeys);
1397
1398	/*
1399	 * lock out tsd_destroy()
1400	 */
1401	mutex_enter(&tsd_mutex);
1402	if (tsd->ts_nkeys == 0) {
1403		/*
1404		 * Link onto list of threads with TSD
1405		 */
1406		if ((tsd->ts_next = tsd_list) != NULL)
1407			tsd_list->ts_prev = tsd;
1408		tsd_list = tsd;
1409	}
1410
1411	/*
1412	 * Allocate thread local storage and set the value for key
1413	 */
1414	tsd->ts_value = tsd_realloc(tsd->ts_value,
1415	    tsd->ts_nkeys * sizeof (void *),
1416	    key * sizeof (void *));
1417	tsd->ts_nkeys = key;
1418	tsd->ts_value[key - 1] = value;
1419	mutex_exit(&tsd_mutex);
1420
1421	return (0);
1422}
1423
1424
1425/*
1426 * Return the per thread value that was stored with the specified key
1427 *	If necessary, create the key and the value
1428 *	Assumes the caller is protecting *keyp from tsd_destroy
1429 */
1430void *
1431tsd_getcreate(uint_t *keyp, void (*destroy)(void *), void *(*allocate)(void))
1432{
1433	void *value;
1434	uint_t key = *keyp;
1435	struct tsd_thread *tsd = curthread->t_tsd;
1436
1437	if (tsd == NULL)
1438		tsd = curthread->t_tsd = kmem_zalloc(sizeof (*tsd), KM_SLEEP);
1439	if (key && key <= tsd->ts_nkeys && (value = tsd->ts_value[key - 1]))
1440		return (value);
1441	if (key == 0)
1442		tsd_create(keyp, destroy);
1443	(void) tsd_set(*keyp, value = (*allocate)());
1444
1445	return (value);
1446}
1447
1448/*
1449 * Called from thread_exit() to run the destructor function for each tsd
1450 *	Locks out tsd_create and tsd_destroy
1451 *	Assumes that the destructor *DOES NOT* use tsd
1452 */
1453void
1454tsd_exit(void)
1455{
1456	int i;
1457	struct tsd_thread *tsd = curthread->t_tsd;
1458
1459	if (tsd == NULL)
1460		return;
1461
1462	if (tsd->ts_nkeys == 0) {
1463		kmem_free(tsd, sizeof (*tsd));
1464		curthread->t_tsd = NULL;
1465		return;
1466	}
1467
1468	/*
1469	 * lock out tsd_create and tsd_destroy, call
1470	 * the destructor, and mark the value as destroyed.
1471	 */
1472	mutex_enter(&tsd_mutex);
1473
1474	for (i = 0; i < tsd->ts_nkeys; i++) {
1475		if (tsd->ts_value[i] && tsd_destructor[i])
1476			(*tsd_destructor[i])(tsd->ts_value[i]);
1477		tsd->ts_value[i] = NULL;
1478	}
1479
1480	/*
1481	 * remove from linked list of threads with TSD
1482	 */
1483	if (tsd->ts_next)
1484		tsd->ts_next->ts_prev = tsd->ts_prev;
1485	if (tsd->ts_prev)
1486		tsd->ts_prev->ts_next = tsd->ts_next;
1487	if (tsd_list == tsd)
1488		tsd_list = tsd->ts_next;
1489
1490	mutex_exit(&tsd_mutex);
1491
1492	/*
1493	 * free up the TSD
1494	 */
1495	kmem_free(tsd->ts_value, tsd->ts_nkeys * sizeof (void *));
1496	kmem_free(tsd, sizeof (struct tsd_thread));
1497	curthread->t_tsd = NULL;
1498}
1499
1500/*
1501 * realloc
1502 */
1503static void *
1504tsd_realloc(void *old, size_t osize, size_t nsize)
1505{
1506	void *new;
1507
1508	new = kmem_zalloc(nsize, KM_SLEEP);
1509	if (old) {
1510		bcopy(old, new, osize);
1511		kmem_free(old, osize);
1512	}
1513	return (new);
1514}
1515
1516/*
1517 * Check to see if an interrupt thread might be active at a given ipl.
1518 * If so return true.
1519 * We must be conservative--it is ok to give a false yes, but a false no
1520 * will cause disaster.  (But if the situation changes after we check it is
1521 * ok--the caller is trying to ensure that an interrupt routine has been
1522 * exited).
1523 * This is used when trying to remove an interrupt handler from an autovector
1524 * list in avintr.c.
1525 */
1526int
1527intr_active(struct cpu *cp, int level)
1528{
1529	if (level <= LOCK_LEVEL)
1530		return (cp->cpu_thread != cp->cpu_dispthread);
1531	else
1532		return (CPU_ON_INTR(cp));
1533}
1534
1535/*
1536 * Return non-zero if an interrupt is being serviced.
1537 */
1538int
1539servicing_interrupt()
1540{
1541	/*
1542	 * Note: single-OR used on purpose to return non-zero if T_INTR_THREAD
1543	 * flag set or CPU_ON_INTR(CPU) is non-zero (indicating high-level
1544	 * interrupt).
1545	 */
1546	return ((curthread->t_flag & T_INTR_THREAD) | CPU_ON_INTR(CPU));
1547}
1548
1549
1550/*
1551 * Change the dispatch priority of a thread in the system.
1552 * Used when raising or lowering a thread's priority.
1553 * (E.g., priority inheritance)
1554 *
1555 * Since threads are queued according to their priority, we
1556 * we must check the thread's state to determine whether it
1557 * is on a queue somewhere. If it is, we've got to:
1558 *
1559 *	o Dequeue the thread.
1560 *	o Change its effective priority.
1561 *	o Enqueue the thread.
1562 *
1563 * Assumptions: The thread whose priority we wish to change
1564 * must be locked before we call thread_change_(e)pri().
1565 * The thread_change(e)pri() function doesn't drop the thread
1566 * lock--that must be done by its caller.
1567 */
1568void
1569thread_change_epri(kthread_t *t, pri_t disp_pri)
1570{
1571	uint_t	state;
1572
1573	ASSERT(THREAD_LOCK_HELD(t));
1574
1575	/*
1576	 * If the inherited priority hasn't actually changed,
1577	 * just return.
1578	 */
1579	if (t->t_epri == disp_pri)
1580		return;
1581
1582	state = t->t_state;
1583
1584	/*
1585	 * If it's not on a queue, change the priority with
1586	 * impunity.
1587	 */
1588	if ((state & (TS_SLEEP | TS_RUN)) == 0) {
1589		t->t_epri = disp_pri;
1590
1591		if (state == TS_ONPROC) {
1592			cpu_t *cp = t->t_disp_queue->disp_cpu;
1593
1594			if (t == cp->cpu_dispthread)
1595				cp->cpu_dispatch_pri = DISP_PRIO(t);
1596		}
1597		return;
1598	}
1599
1600	/*
1601	 * It's either on a sleep queue or a run queue.
1602	 */
1603	if (state == TS_SLEEP) {
1604
1605		/*
1606		 * Take the thread out of its sleep queue.
1607		 * Change the inherited priority.
1608		 * Re-enqueue the thread.
1609		 * Each synchronization object exports a function
1610		 * to do this in an appropriate manner.
1611		 */
1612		SOBJ_CHANGE_EPRI(t->t_sobj_ops, t, disp_pri);
1613	} else {
1614		/*
1615		 * The thread is on a run queue.
1616		 * Note: setbackdq() may not put the thread
1617		 * back on the same run queue where it originally
1618		 * resided.
1619		 */
1620		(void) dispdeq(t);
1621		t->t_epri = disp_pri;
1622		setbackdq(t);
1623	}
1624}	/* end of thread_change_epri */
1625
1626/*
1627 * Function: Change the t_pri field of a thread.
1628 * Side Effects: Adjust the thread ordering on a run queue
1629 *		 or sleep queue, if necessary.
1630 * Returns: 1 if the thread was on a run queue, else 0.
1631 */
1632int
1633thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
1634{
1635	uint_t	state;
1636	int	on_rq = 0;
1637
1638	ASSERT(THREAD_LOCK_HELD(t));
1639
1640	state = t->t_state;
1641	THREAD_WILLCHANGE_PRI(t, disp_pri);
1642
1643	/*
1644	 * If it's not on a queue, change the priority with
1645	 * impunity.
1646	 */
1647	if ((state & (TS_SLEEP | TS_RUN)) == 0) {
1648		t->t_pri = disp_pri;
1649
1650		if (state == TS_ONPROC) {
1651			cpu_t *cp = t->t_disp_queue->disp_cpu;
1652
1653			if (t == cp->cpu_dispthread)
1654				cp->cpu_dispatch_pri = DISP_PRIO(t);
1655		}
1656		return (0);
1657	}
1658
1659	/*
1660	 * It's either on a sleep queue or a run queue.
1661	 */
1662	if (state == TS_SLEEP) {
1663		/*
1664		 * If the priority has changed, take the thread out of
1665		 * its sleep queue and change the priority.
1666		 * Re-enqueue the thread.
1667		 * Each synchronization object exports a function
1668		 * to do this in an appropriate manner.
1669		 */
1670		if (disp_pri != t->t_pri)
1671			SOBJ_CHANGE_PRI(t->t_sobj_ops, t, disp_pri);
1672	} else {
1673		/*
1674		 * The thread is on a run queue.
1675		 * Note: setbackdq() may not put the thread
1676		 * back on the same run queue where it originally
1677		 * resided.
1678		 *
1679		 * We still requeue the thread even if the priority
1680		 * is unchanged to preserve round-robin (and other)
1681		 * effects between threads of the same priority.
1682		 */
1683		on_rq = dispdeq(t);
1684		ASSERT(on_rq);
1685		t->t_pri = disp_pri;
1686		if (front) {
1687			setfrontdq(t);
1688		} else {
1689			setbackdq(t);
1690		}
1691	}
1692	return (on_rq);
1693}
1694