machdep.c revision 2712:f74a135872bc
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/types.h>
29#include <sys/kstat.h>
30#include <sys/param.h>
31#include <sys/stack.h>
32#include <sys/regset.h>
33#include <sys/thread.h>
34#include <sys/proc.h>
35#include <sys/procfs_isa.h>
36#include <sys/kmem.h>
37#include <sys/cpuvar.h>
38#include <sys/systm.h>
39#include <sys/machpcb.h>
40#include <sys/machasi.h>
41#include <sys/vis.h>
42#include <sys/fpu/fpusystm.h>
43#include <sys/cpu_module.h>
44#include <sys/privregs.h>
45#include <sys/archsystm.h>
46#include <sys/atomic.h>
47#include <sys/cmn_err.h>
48#include <sys/time.h>
49#include <sys/clock.h>
50#include <sys/chip.h>
51#include <sys/cmp.h>
52#include <sys/platform_module.h>
53#include <sys/bl.h>
54#include <sys/nvpair.h>
55#include <sys/kdi_impl.h>
56#include <sys/machsystm.h>
57#include <sys/sysmacros.h>
58#include <sys/promif.h>
59#include <sys/pool_pset.h>
60#include <vm/seg_kmem.h>
61
62int maxphys = MMU_PAGESIZE * 16;	/* 128k */
63int klustsize = MMU_PAGESIZE * 16;	/* 128k */
64
65/*
66 * Initialize kernel thread's stack.
67 */
68caddr_t
69thread_stk_init(caddr_t stk)
70{
71	kfpu_t *fp;
72	ulong_t align;
73
74	/* allocate extra space for floating point state */
75	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
76	align = (uintptr_t)stk & 0x3f;
77	stk -= align;		/* force v9_fpu to be 16 byte aligned */
78	fp = (kfpu_t *)stk;
79	fp->fpu_fprs = 0;
80
81	stk -= SA(MINFRAME);
82	return (stk);
83}
84
85#define	WIN32_SIZE	(MAXWIN * sizeof (struct rwindow32))
86#define	WIN64_SIZE	(MAXWIN * sizeof (struct rwindow64))
87
88kmem_cache_t	*wbuf32_cache;
89kmem_cache_t	*wbuf64_cache;
90
91void
92lwp_stk_cache_init(void)
93{
94	/*
95	 * Window buffers are allocated from the static arena
96	 * because they are accessed at TL>0. We also must use
97	 * KMC_NOHASH to prevent them from straddling page
98	 * boundaries as they are accessed by physical address.
99	 */
100	wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
101	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
102	wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
103	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
104}
105
106/*
107 * Initialize lwp's kernel stack.
108 * Note that now that the floating point register save area (kfpu_t)
109 * has been broken out from machpcb and aligned on a 64 byte boundary so that
110 * we can do block load/stores to/from it, there are a couple of potential
111 * optimizations to save stack space. 1. The floating point register save
112 * area could be aligned on a 16 byte boundary, and the floating point code
113 * changed to (a) check the alignment and (b) use different save/restore
114 * macros depending upon the alignment. 2. The lwp_stk_init code below
115 * could be changed to calculate if less space would be wasted if machpcb
116 * was first instead of second. However there is a REGOFF macro used in
117 * locore, syscall_trap, machdep and mlsetup that assumes that the saved
118 * register area is a fixed distance from the %sp, and would have to be
119 * changed to a pointer or something...JJ said later.
120 */
121caddr_t
122lwp_stk_init(klwp_t *lwp, caddr_t stk)
123{
124	struct machpcb *mpcb;
125	kfpu_t *fp;
126	uintptr_t aln;
127
128	stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
129	aln = (uintptr_t)stk & 0x3F;
130	stk -= aln;
131	fp = (kfpu_t *)stk;
132	stk -= SA(sizeof (struct machpcb));
133	mpcb = (struct machpcb *)stk;
134	bzero(mpcb, sizeof (struct machpcb));
135	bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
136	lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
137	lwp->lwp_fpu = (void *)fp;
138	mpcb->mpcb_fpu = fp;
139	mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
140	mpcb->mpcb_thread = lwp->lwp_thread;
141	mpcb->mpcb_wbcnt = 0;
142	if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
143		mpcb->mpcb_wstate = WSTATE_USER32;
144		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
145	} else {
146		mpcb->mpcb_wstate = WSTATE_USER64;
147		mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
148	}
149	ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
150	mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
151	mpcb->mpcb_pa = va_to_pa(mpcb);
152	return (stk);
153}
154
155void
156lwp_stk_fini(klwp_t *lwp)
157{
158	struct machpcb *mpcb = lwptompcb(lwp);
159
160	/*
161	 * there might be windows still in the wbuf due to unmapped
162	 * stack, misaligned stack pointer, etc.  We just free it.
163	 */
164	mpcb->mpcb_wbcnt = 0;
165	if (mpcb->mpcb_wstate == WSTATE_USER32)
166		kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
167	else
168		kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
169	mpcb->mpcb_wbuf = NULL;
170	mpcb->mpcb_wbuf_pa = -1;
171}
172
173
174/*
175 * Copy regs from parent to child.
176 */
177void
178lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
179{
180	kthread_t *t, *pt = lwptot(lwp);
181	struct machpcb *mpcb = lwptompcb(clwp);
182	struct machpcb *pmpcb = lwptompcb(lwp);
183	kfpu_t *fp, *pfp = lwptofpu(lwp);
184	caddr_t wbuf;
185	uint_t wstate;
186
187	t = mpcb->mpcb_thread;
188	/*
189	 * remember child's fp and wbuf since they will get erased during
190	 * the bcopy.
191	 */
192	fp = mpcb->mpcb_fpu;
193	wbuf = mpcb->mpcb_wbuf;
194	wstate = mpcb->mpcb_wstate;
195	/*
196	 * Don't copy mpcb_frame since we hand-crafted it
197	 * in thread_load().
198	 */
199	bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
200	mpcb->mpcb_thread = t;
201	mpcb->mpcb_fpu = fp;
202	fp->fpu_q = mpcb->mpcb_fpu_q;
203
204	/*
205	 * It is theoretically possibly for the lwp's wstate to
206	 * be different from its value assigned in lwp_stk_init,
207	 * since lwp_stk_init assumed the data model of the process.
208	 * Here, we took on the data model of the cloned lwp.
209	 */
210	if (mpcb->mpcb_wstate != wstate) {
211		if (wstate == WSTATE_USER32) {
212			kmem_cache_free(wbuf32_cache, wbuf);
213			wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
214			wstate = WSTATE_USER64;
215		} else {
216			kmem_cache_free(wbuf64_cache, wbuf);
217			wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
218			wstate = WSTATE_USER32;
219		}
220	}
221
222	mpcb->mpcb_pa = va_to_pa(mpcb);
223	mpcb->mpcb_wbuf = wbuf;
224	mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
225
226	ASSERT(mpcb->mpcb_wstate == wstate);
227
228	if (mpcb->mpcb_wbcnt != 0) {
229		bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
230		    mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
231		    sizeof (struct rwindow32) : sizeof (struct rwindow64)));
232	}
233
234	if (pt == curthread)
235		pfp->fpu_fprs = _fp_read_fprs();
236	if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
237		if (pt == curthread && fpu_exists) {
238			save_gsr(clwp->lwp_fpu);
239		} else {
240			uint64_t gsr;
241			gsr = get_gsr(lwp->lwp_fpu);
242			set_gsr(gsr, clwp->lwp_fpu);
243		}
244		fp_fork(lwp, clwp);
245	}
246}
247
248/*
249 * Free lwp fpu regs.
250 */
251void
252lwp_freeregs(klwp_t *lwp, int isexec)
253{
254	kfpu_t *fp = lwptofpu(lwp);
255
256	if (lwptot(lwp) == curthread)
257		fp->fpu_fprs = _fp_read_fprs();
258	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
259		fp_free(fp, isexec);
260}
261
262/*
263 * This function is currently unused on sparc.
264 */
265/*ARGSUSED*/
266void
267lwp_attach_brand_hdlrs(klwp_t *lwp)
268{}
269
270/*
271 * fill in the extra register state area specified with the
272 * specified lwp's platform-dependent non-floating-point extra
273 * register state information
274 */
275/* ARGSUSED */
276void
277xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
278{
279	/* for sun4u nothing to do here, added for symmetry */
280}
281
282/*
283 * fill in the extra register state area specified with the specified lwp's
284 * platform-dependent floating-point extra register state information.
285 * NOTE:  'lwp' might not correspond to 'curthread' since this is
286 * called from code in /proc to get the registers of another lwp.
287 */
288void
289xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
290{
291	prxregset_t *xregs = (prxregset_t *)xrp;
292	kfpu_t *fp = lwptofpu(lwp);
293	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
294	uint64_t gsr;
295
296	/*
297	 * fp_fksave() does not flush the GSR register into
298	 * the lwp area, so do it now
299	 */
300	kpreempt_disable();
301	if (ttolwp(curthread) == lwp && fpu_exists) {
302		fp->fpu_fprs = _fp_read_fprs();
303		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
304			_fp_write_fprs(fprs);
305			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
306		}
307		save_gsr(fp);
308	}
309	gsr = get_gsr(fp);
310	kpreempt_enable();
311	PRXREG_GSR(xregs) = gsr;
312}
313
314/*
315 * set the specified lwp's platform-dependent non-floating-point
316 * extra register state based on the specified input
317 */
318/* ARGSUSED */
319void
320xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
321{
322	/* for sun4u nothing to do here, added for symmetry */
323}
324
325/*
326 * set the specified lwp's platform-dependent floating-point
327 * extra register state based on the specified input
328 */
329void
330xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
331{
332	prxregset_t *xregs = (prxregset_t *)xrp;
333	kfpu_t *fp = lwptofpu(lwp);
334	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
335	uint64_t gsr = PRXREG_GSR(xregs);
336
337	kpreempt_disable();
338	set_gsr(gsr, lwptofpu(lwp));
339
340	if ((lwp == ttolwp(curthread)) && fpu_exists) {
341		fp->fpu_fprs = _fp_read_fprs();
342		if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
343			_fp_write_fprs(fprs);
344			fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
345		}
346		restore_gsr(lwptofpu(lwp));
347	}
348	kpreempt_enable();
349}
350
351/*
352 * fill in the sun4u asrs, ie, the lwp's platform-dependent
353 * non-floating-point extra register state information
354 */
355/* ARGSUSED */
356void
357getasrs(klwp_t *lwp, asrset_t asr)
358{
359	/* for sun4u nothing to do here, added for symmetry */
360}
361
362/*
363 * fill in the sun4u asrs, ie, the lwp's platform-dependent
364 * floating-point extra register state information
365 */
366void
367getfpasrs(klwp_t *lwp, asrset_t asr)
368{
369	kfpu_t *fp = lwptofpu(lwp);
370	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
371
372	kpreempt_disable();
373	if (ttolwp(curthread) == lwp)
374		fp->fpu_fprs = _fp_read_fprs();
375	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
376		if (fpu_exists && ttolwp(curthread) == lwp) {
377			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
378				_fp_write_fprs(fprs);
379				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
380			}
381			save_gsr(fp);
382		}
383		asr[ASR_GSR] = (int64_t)get_gsr(fp);
384	}
385	kpreempt_enable();
386}
387
388/*
389 * set the sun4u asrs, ie, the lwp's platform-dependent
390 * non-floating-point extra register state information
391 */
392/* ARGSUSED */
393void
394setasrs(klwp_t *lwp, asrset_t asr)
395{
396	/* for sun4u nothing to do here, added for symmetry */
397}
398
399void
400setfpasrs(klwp_t *lwp, asrset_t asr)
401{
402	kfpu_t *fp = lwptofpu(lwp);
403	uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
404
405	kpreempt_disable();
406	if (ttolwp(curthread) == lwp)
407		fp->fpu_fprs = _fp_read_fprs();
408	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
409		set_gsr(asr[ASR_GSR], fp);
410		if (fpu_exists && ttolwp(curthread) == lwp) {
411			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
412				_fp_write_fprs(fprs);
413				fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
414			}
415			restore_gsr(fp);
416		}
417	}
418	kpreempt_enable();
419}
420
421/*
422 * Create interrupt kstats for this CPU.
423 */
424void
425cpu_create_intrstat(cpu_t *cp)
426{
427	int		i;
428	kstat_t		*intr_ksp;
429	kstat_named_t	*knp;
430	char		name[KSTAT_STRLEN];
431	zoneid_t	zoneid;
432
433	ASSERT(MUTEX_HELD(&cpu_lock));
434
435	if (pool_pset_enabled())
436		zoneid = GLOBAL_ZONEID;
437	else
438		zoneid = ALL_ZONES;
439
440	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
441	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
442
443	/*
444	 * Initialize each PIL's named kstat
445	 */
446	if (intr_ksp != NULL) {
447		intr_ksp->ks_update = cpu_kstat_intrstat_update;
448		knp = (kstat_named_t *)intr_ksp->ks_data;
449		intr_ksp->ks_private = cp;
450		for (i = 0; i < PIL_MAX; i++) {
451			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
452			    i + 1);
453			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
454			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
455			    i + 1);
456			kstat_named_init(&knp[(i * 2) + 1], name,
457			    KSTAT_DATA_UINT64);
458		}
459		kstat_install(intr_ksp);
460	}
461}
462
463/*
464 * Delete interrupt kstats for this CPU.
465 */
466void
467cpu_delete_intrstat(cpu_t *cp)
468{
469	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
470}
471
472/*
473 * Convert interrupt statistics from CPU ticks to nanoseconds and
474 * update kstat.
475 */
476int
477cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
478{
479	kstat_named_t	*knp = ksp->ks_data;
480	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
481	int		i;
482
483	if (rw == KSTAT_WRITE)
484		return (EACCES);
485
486	/*
487	 * We use separate passes to copy and convert the statistics to
488	 * nanoseconds. This assures that the snapshot of the data is as
489	 * self-consistent as possible.
490	 */
491
492	for (i = 0; i < PIL_MAX; i++) {
493		knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
494		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
495	}
496
497	for (i = 0; i < PIL_MAX; i++) {
498		knp[i * 2].value.ui64 =
499		    (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
500			cpup->cpu_id);
501	}
502
503	return (0);
504}
505
506/*
507 * Called by common/os/cpu.c for psrinfo(1m) kstats
508 */
509char *
510cpu_fru_fmri(cpu_t *cp)
511{
512	return (cpunodes[cp->cpu_id].fru_fmri);
513}
514
515/*
516 * An interrupt thread is ending a time slice, so compute the interval it
517 * ran for and update the statistic for its PIL.
518 */
519void
520cpu_intr_swtch_enter(kthread_id_t t)
521{
522	uint64_t	interval;
523	uint64_t	start;
524	cpu_t		*cpu;
525
526	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
527	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
528
529	/*
530	 * We could be here with a zero timestamp. This could happen if:
531	 * an interrupt thread which no longer has a pinned thread underneath
532	 * it (i.e. it blocked at some point in its past) has finished running
533	 * its handler. intr_thread() updated the interrupt statistic for its
534	 * PIL and zeroed its timestamp. Since there was no pinned thread to
535	 * return to, swtch() gets called and we end up here.
536	 *
537	 * It can also happen if an interrupt thread in intr_thread() calls
538	 * preempt. It will have already taken care of updating stats. In
539	 * this event, the interrupt thread will be runnable.
540	 */
541	if (t->t_intr_start) {
542		do {
543			start = t->t_intr_start;
544			interval = gettick_counter() - start;
545		} while (cas64(&t->t_intr_start, start, 0) != start);
546		cpu = CPU;
547		if (cpu->cpu_m.divisor > 1)
548			interval *= cpu->cpu_m.divisor;
549		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
550
551		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
552		    interval);
553	} else
554		ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
555}
556
557
558/*
559 * An interrupt thread is returning from swtch(). Place a starting timestamp
560 * in its thread structure.
561 */
562void
563cpu_intr_swtch_exit(kthread_id_t t)
564{
565	uint64_t ts;
566
567	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
568	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
569
570	do {
571		ts = t->t_intr_start;
572	} while (cas64(&t->t_intr_start, ts, gettick_counter()) != ts);
573}
574
575
576int
577blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
578{
579	if (&plat_blacklist)
580		return (plat_blacklist(cmd, scheme, fmri, class));
581
582	return (ENOTSUP);
583}
584
585int
586kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
587{
588	extern void kdi_flush_caches(void);
589	size_t nread = 0;
590	uint32_t word;
591	int slop, i;
592
593	kdi_flush_caches();
594	membar_enter();
595
596	/* We might not begin on a word boundary. */
597	if ((slop = addr & 3) != 0) {
598		word = ldphys(addr & ~3);
599		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
600			*buf++ = ((uchar_t *)&word)[i];
601		addr = roundup(addr, 4);
602	}
603
604	while (nbytes > 0) {
605		word = ldphys(addr);
606		for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
607			*buf++ = ((uchar_t *)&word)[i];
608	}
609
610	kdi_flush_caches();
611
612	*ncopiedp = nread;
613	return (0);
614}
615
616int
617kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
618{
619	extern void kdi_flush_caches(void);
620	size_t nwritten = 0;
621	uint32_t word;
622	int slop, i;
623
624	kdi_flush_caches();
625
626	/* We might not begin on a word boundary. */
627	if ((slop = addr & 3) != 0) {
628		word = ldphys(addr & ~3);
629		for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
630			((uchar_t *)&word)[i] = *buf++;
631		stphys(addr & ~3, word);
632		addr = roundup(addr, 4);
633	}
634
635	while (nbytes > 3) {
636		for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
637			((uchar_t *)&word)[i] = *buf++;
638		stphys(addr, word);
639		addr += 4;
640	}
641
642	/* We might not end with a whole word. */
643	if (nbytes > 0) {
644		word = ldphys(addr);
645		for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
646			((uchar_t *)&word)[i] = *buf++;
647		stphys(addr, word);
648	}
649
650	membar_enter();
651	kdi_flush_caches();
652
653	*ncopiedp = nwritten;
654	return (0);
655}
656
657static void
658kdi_kernpanic(struct regs *regs, uint_t tt)
659{
660	sync_reg_buf = *regs;
661	sync_tt = tt;
662
663	sync_handler();
664}
665
666static void
667kdi_plat_call(void (*platfn)(void))
668{
669	if (platfn != NULL) {
670		prom_suspend_prepost();
671		platfn();
672		prom_resume_prepost();
673	}
674}
675
676void
677mach_kdi_init(kdi_t *kdi)
678{
679	kdi->kdi_plat_call = kdi_plat_call;
680	kdi->mkdi_cpu_index = kdi_cpu_index;
681	kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
682	kdi->mkdi_kernpanic = kdi_kernpanic;
683}
684
685
686/*
687 * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
688 * long, and it fills in the array with the time spent on cpu in
689 * each of the mstates, where time is returned in nsec.
690 *
691 * No guarantee is made that the returned values in times[] will
692 * monotonically increase on sequential calls, although this will
693 * be true in the long run. Any such guarantee must be handled by
694 * the caller, if needed. This can happen if we fail to account
695 * for elapsed time due to a generation counter conflict, yet we
696 * did account for it on a prior call (see below).
697 *
698 * The complication is that the cpu in question may be updating
699 * its microstate at the same time that we are reading it.
700 * Because the microstate is only updated when the CPU's state
701 * changes, the values in cpu_intracct[] can be indefinitely out
702 * of date. To determine true current values, it is necessary to
703 * compare the current time with cpu_mstate_start, and add the
704 * difference to times[cpu_mstate].
705 *
706 * This can be a problem if those values are changing out from
707 * under us. Because the code path in new_cpu_mstate() is
708 * performance critical, we have not added a lock to it. Instead,
709 * we have added a generation counter. Before beginning
710 * modifications, the counter is set to 0. After modifications,
711 * it is set to the old value plus one.
712 *
713 * get_cpu_mstate() will not consider the values of cpu_mstate
714 * and cpu_mstate_start to be usable unless the value of
715 * cpu_mstate_gen is both non-zero and unchanged, both before and
716 * after reading the mstate information. Note that we must
717 * protect against out-of-order loads around accesses to the
718 * generation counter. Also, this is a best effort approach in
719 * that we do not retry should the counter be found to have
720 * changed.
721 *
722 * cpu_intracct[] is used to identify time spent in each CPU
723 * mstate while handling interrupts. Such time should be reported
724 * against system time, and so is subtracted out from its
725 * corresponding cpu_acct[] time and added to
726 * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
727 * %ticks, but acct time may be stored as %sticks, thus requiring
728 * different conversions before they can be compared.
729 */
730
731void
732get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
733{
734	int i;
735	hrtime_t now, start;
736	uint16_t gen;
737	uint16_t state;
738	hrtime_t intracct[NCMSTATES];
739
740	/*
741	 * Load all volatile state under the protection of membar.
742	 * cpu_acct[cpu_mstate] must be loaded to avoid double counting
743	 * of (now - cpu_mstate_start) by a change in CPU mstate that
744	 * arrives after we make our last check of cpu_mstate_gen.
745	 */
746
747	now = gethrtime_unscaled();
748	gen = cpu->cpu_mstate_gen;
749
750	membar_consumer();	/* guarantee load ordering */
751	start = cpu->cpu_mstate_start;
752	state = cpu->cpu_mstate;
753	for (i = 0; i < NCMSTATES; i++) {
754		intracct[i] = cpu->cpu_intracct[i];
755		times[i] = cpu->cpu_acct[i];
756	}
757	membar_consumer();	/* guarantee load ordering */
758
759	if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
760		times[state] += now - start;
761
762	for (i = 0; i < NCMSTATES; i++) {
763		scalehrtime(&times[i]);
764		intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
765	}
766
767	for (i = 0; i < NCMSTATES; i++) {
768		if (i == CMS_SYSTEM)
769			continue;
770		times[i] -= intracct[i];
771		if (times[i] < 0) {
772			intracct[i] += times[i];
773			times[i] = 0;
774		}
775		times[CMS_SYSTEM] += intracct[i];
776	}
777}
778