1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII).
28 */
29
30#include <sys/types.h>
31#include <sys/systm.h>
32#include <sys/ddi.h>
33#include <sys/sysmacros.h>
34#include <sys/archsystm.h>
35#include <sys/vmsystm.h>
36#include <sys/machparam.h>
37#include <sys/machsystm.h>
38#include <sys/machthread.h>
39#include <sys/cpu.h>
40#include <sys/cmp.h>
41#include <sys/elf_SPARC.h>
42#include <vm/vm_dep.h>
43#include <vm/hat_sfmmu.h>
44#include <vm/seg_kpm.h>
45#include <vm/seg_kmem.h>
46#include <sys/cpuvar.h>
47#include <sys/opl_olympus_regs.h>
48#include <sys/opl_module.h>
49#include <sys/async.h>
50#include <sys/cmn_err.h>
51#include <sys/debug.h>
52#include <sys/dditypes.h>
53#include <sys/cpu_module.h>
54#include <sys/sysmacros.h>
55#include <sys/intreg.h>
56#include <sys/clock.h>
57#include <sys/platform_module.h>
58#include <sys/ontrap.h>
59#include <sys/panic.h>
60#include <sys/memlist.h>
61#include <sys/ndifm.h>
62#include <sys/ddifm.h>
63#include <sys/fm/protocol.h>
64#include <sys/fm/util.h>
65#include <sys/fm/cpu/SPARC64-VI.h>
66#include <sys/dtrace.h>
67#include <sys/watchpoint.h>
68#include <sys/promif.h>
69
70/*
71 * Internal functions.
72 */
73static int cpu_sync_log_err(void *flt);
74static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *);
75static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t);
76static int  cpu_flt_in_memory(opl_async_flt_t *, uint64_t);
77static int prom_SPARC64VII_support_enabled(void);
78static void opl_ta3();
79static int plat_prom_preserve_kctx_is_supported(void);
80
81/*
82 * Error counters resetting interval.
83 */
84static int opl_async_check_interval = 60;		/* 1 min */
85
86uint_t cpu_impl_dual_pgsz = 1;
87
88/*
89 * PA[22:0] represent Displacement in Jupiter
90 * configuration space.
91 */
92uint_t	root_phys_addr_lo_mask = 0x7fffffu;
93
94/*
95 * set in /etc/system to control logging of user BERR/TO's
96 */
97int cpu_berr_to_verbose = 0;
98
99/*
100 * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled).
101 */
102int cpu_alljupiter = 0;
103
104/*
105 * The sfmmu_cext field to be used by processes in a shared context domain.
106 */
107static uchar_t shctx_cext = TAGACCEXT_MKSZPAIR(DEFAULT_ISM_PAGESZC, TTE8K);
108
109static int min_ecache_size;
110static uint_t priv_hcl_1;
111static uint_t priv_hcl_2;
112static uint_t priv_hcl_4;
113static uint_t priv_hcl_8;
114
115/*
116 * Olympus error log
117 */
118static opl_errlog_t	*opl_err_log;
119static int		opl_cpu0_log_setup;
120
121/*
122 * OPL ta 3 save area.
123 */
124char	*opl_ta3_save;
125
126/*
127 * UE is classified into four classes (MEM, CHANNEL, CPU, PATH).
128 * No any other ecc_type_info insertion is allowed in between the following
129 * four UE classess.
130 */
131ecc_type_to_info_t ecc_type_to_info[] = {
132	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
133	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
134	FM_EREPORT_CPU_UE_MEM,
135	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
136	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
137	FM_EREPORT_CPU_UE_CHANNEL,
138	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
139	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
140	FM_EREPORT_CPU_UE_CPU,
141	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
142	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
143	FM_EREPORT_CPU_UE_PATH,
144	SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
145	"Bus Error",  FM_EREPORT_PAYLOAD_SYNC,
146	FM_EREPORT_CPU_BERR,
147	SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
148	"Bus Timeout",  FM_EREPORT_PAYLOAD_SYNC,
149	FM_EREPORT_CPU_BTO,
150	SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
151	"TLB MultiHit",  FM_EREPORT_PAYLOAD_SYNC,
152	FM_EREPORT_CPU_MTLB,
153	SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
154	"TLB Parity",  FM_EREPORT_PAYLOAD_SYNC,
155	FM_EREPORT_CPU_TLBP,
156
157	UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
158	"IAUG CRE",  FM_EREPORT_PAYLOAD_URGENT,
159	FM_EREPORT_CPU_CRE,
160	UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT",
161	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
162	"IAUG TSBCTXT",  FM_EREPORT_PAYLOAD_URGENT,
163	FM_EREPORT_CPU_TSBCTX,
164	UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
165	"IUG TSBP",  FM_EREPORT_PAYLOAD_URGENT,
166	FM_EREPORT_CPU_TSBP,
167	UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
168	"IUG PSTATE",  FM_EREPORT_PAYLOAD_URGENT,
169	FM_EREPORT_CPU_PSTATE,
170	UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
171	"IUG TSTATE",  FM_EREPORT_PAYLOAD_URGENT,
172	FM_EREPORT_CPU_TSTATE,
173	UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
174	"IUG FREG",  FM_EREPORT_PAYLOAD_URGENT,
175	FM_EREPORT_CPU_IUG_F,
176	UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
177	"IUG RREG",  FM_EREPORT_PAYLOAD_URGENT,
178	FM_EREPORT_CPU_IUG_R,
179	UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
180	"AUG SDC",  FM_EREPORT_PAYLOAD_URGENT,
181	FM_EREPORT_CPU_SDC,
182	UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
183	"IUG WDT",  FM_EREPORT_PAYLOAD_URGENT,
184	FM_EREPORT_CPU_WDT,
185	UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
186	"IUG DTLB",  FM_EREPORT_PAYLOAD_URGENT,
187	FM_EREPORT_CPU_DTLB,
188	UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
189	"IUG ITLB",  FM_EREPORT_PAYLOAD_URGENT,
190	FM_EREPORT_CPU_ITLB,
191	UGESR_IUG_COREERR, "IUG_COREERR",
192	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
193	"IUG COREERR",  FM_EREPORT_PAYLOAD_URGENT,
194	FM_EREPORT_CPU_CORE,
195	UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
196	"MULTI DAE",  FM_EREPORT_PAYLOAD_URGENT,
197	FM_EREPORT_CPU_DAE,
198	UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
199	"MULTI IAE",  FM_EREPORT_PAYLOAD_URGENT,
200	FM_EREPORT_CPU_IAE,
201	UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
202	"MULTI UGE",  FM_EREPORT_PAYLOAD_URGENT,
203	FM_EREPORT_CPU_UGE,
204	0,		NULL,		0,		0,
205	NULL,  0,	   0,
206};
207
208int (*p2get_mem_info)(int synd_code, uint64_t paddr,
209		uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
210		int *segsp, int *banksp, int *mcidp);
211
212
213/*
214 * Setup trap handlers for 0xA, 0x32, 0x40 trap types
215 * and "ta 3" and "ta 4".
216 */
217void
218cpu_init_trap(void)
219{
220	OPL_SET_TRAP(tt0_iae, opl_serr_instr);
221	OPL_SET_TRAP(tt1_iae, opl_serr_instr);
222	OPL_SET_TRAP(tt0_dae, opl_serr_instr);
223	OPL_SET_TRAP(tt1_dae, opl_serr_instr);
224	OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr);
225	OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr);
226	OPL_SET_TRAP(tt0_flushw, opl_ta3_instr);
227	OPL_PATCH_28(opl_cleanw_patch, opl_ta4_instr);
228}
229
230static int
231getintprop(pnode_t node, char *name, int deflt)
232{
233	int	value;
234
235	switch (prom_getproplen(node, name)) {
236	case sizeof (int):
237		(void) prom_getprop(node, name, (caddr_t)&value);
238		break;
239
240	default:
241		value = deflt;
242		break;
243	}
244
245	return (value);
246}
247
248/*
249 * Set the magic constants of the implementation.
250 */
251/*ARGSUSED*/
252void
253cpu_fiximp(pnode_t dnode)
254{
255	int i, a;
256	extern int vac_size, vac_shift;
257	extern uint_t vac_mask;
258
259	static struct {
260		char	*name;
261		int	*var;
262		int	defval;
263	} prop[] = {
264		"l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE,
265		"l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE,
266		"l1-icache-size", &icache_size, OPL_ICACHE_SIZE,
267		"l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE,
268		"l2-cache-size", &ecache_size, OPL_ECACHE_SIZE,
269		"l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE,
270		"l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY
271	};
272
273	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
274		*prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval);
275
276	ecache_setsize = ecache_size / ecache_associativity;
277
278	vac_size = OPL_VAC_SIZE;
279	vac_mask = MMU_PAGEMASK & (vac_size - 1);
280	i = 0; a = vac_size;
281	while (a >>= 1)
282		++i;
283	vac_shift = i;
284	shm_alignment = vac_size;
285	vac = 1;
286}
287
288/*
289 * Enable features for Jupiter-only domains.
290 */
291void
292cpu_fix_alljupiter(void)
293{
294	if (!prom_SPARC64VII_support_enabled()) {
295		/*
296		 * Do not enable all-Jupiter features and do not turn on
297		 * the cpu_alljupiter flag.
298		 */
299		return;
300	}
301
302	cpu_alljupiter = 1;
303
304	/*
305	 * Enable ima hwcap for Jupiter-only domains.  DR will prevent
306	 * addition of Olympus-C to all-Jupiter domains to preserve ima
307	 * hwcap semantics.
308	 */
309	cpu_hwcap_flags |= AV_SPARC_IMA;
310
311	/*
312	 * Enable shared context support.
313	 */
314	shctx_on = 1;
315}
316
317#ifdef	OLYMPUS_C_REV_B_ERRATA_XCALL
318/*
319 * Quick and dirty way to redefine locally in
320 * OPL the value of IDSR_BN_SETS to 31 instead
321 * of the standard 32 value. This is to workaround
322 * REV_B of Olympus_c processor's problem in handling
323 * more than 31 xcall broadcast.
324 */
325#undef	IDSR_BN_SETS
326#define	IDSR_BN_SETS    31
327#endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
328
329void
330send_mondo_set(cpuset_t set)
331{
332	int lo, busy, nack, shipped = 0;
333	uint16_t i, cpuids[IDSR_BN_SETS];
334	uint64_t idsr, nackmask = 0, busymask, curnack, curbusy;
335	uint64_t starttick, endtick, tick, lasttick;
336#if (NCPU > IDSR_BN_SETS)
337	int index = 0;
338	int ncpuids = 0;
339#endif
340#ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
341	int bn_sets = IDSR_BN_SETS;
342	uint64_t ver;
343
344	ASSERT(NCPU > bn_sets);
345#endif
346
347	ASSERT(!CPUSET_ISNULL(set));
348	starttick = lasttick = gettick();
349
350#ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
351	ver = ultra_getver();
352	if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) &&
353	    ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A))
354		bn_sets = 1;
355#endif
356
357#if (NCPU <= IDSR_BN_SETS)
358	for (i = 0; i < NCPU; i++)
359		if (CPU_IN_SET(set, i)) {
360			shipit(i, shipped);
361			nackmask |= IDSR_NACK_BIT(shipped);
362			cpuids[shipped++] = i;
363			CPUSET_DEL(set, i);
364			if (CPUSET_ISNULL(set))
365				break;
366		}
367	CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
368#else
369	for (i = 0; i < NCPU; i++)
370		if (CPU_IN_SET(set, i)) {
371			ncpuids++;
372
373			/*
374			 * Ship only to the first (IDSR_BN_SETS) CPUs.  If we
375			 * find we have shipped to more than (IDSR_BN_SETS)
376			 * CPUs, set "index" to the highest numbered CPU in
377			 * the set so we can ship to other CPUs a bit later on.
378			 */
379#ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
380			if (shipped < bn_sets) {
381#else
382			if (shipped < IDSR_BN_SETS) {
383#endif
384				shipit(i, shipped);
385				nackmask |= IDSR_NACK_BIT(shipped);
386				cpuids[shipped++] = i;
387				CPUSET_DEL(set, i);
388				if (CPUSET_ISNULL(set))
389					break;
390			} else
391				index = (int)i;
392		}
393
394	CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids);
395#endif
396
397	busymask = IDSR_NACK_TO_BUSY(nackmask);
398	busy = nack = 0;
399	endtick = starttick + xc_tick_limit;
400	for (;;) {
401		idsr = getidsr();
402#if (NCPU <= IDSR_BN_SETS)
403		if (idsr == 0)
404			break;
405#else
406		if (idsr == 0 && shipped == ncpuids)
407			break;
408#endif
409		tick = gettick();
410		/*
411		 * If there is a big jump between the current tick
412		 * count and lasttick, we have probably hit a break
413		 * point.  Adjust endtick accordingly to avoid panic.
414		 */
415		if (tick > (lasttick + xc_tick_jump_limit))
416			endtick += (tick - lasttick);
417		lasttick = tick;
418		if (tick > endtick) {
419			if (panic_quiesce)
420				return;
421			cmn_err(CE_CONT, "send mondo timeout [%d NACK %d "
422			    "BUSY]\nIDSR 0x%" PRIx64 "  cpuids:",
423			    nack, busy, idsr);
424#ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
425			for (i = 0; i < bn_sets; i++) {
426#else
427			for (i = 0; i < IDSR_BN_SETS; i++) {
428#endif
429				if (idsr & (IDSR_NACK_BIT(i) |
430				    IDSR_BUSY_BIT(i))) {
431					cmn_err(CE_CONT, " 0x%x", cpuids[i]);
432				}
433			}
434			cmn_err(CE_CONT, "\n");
435			cmn_err(CE_PANIC, "send_mondo_set: timeout");
436		}
437		curnack = idsr & nackmask;
438		curbusy = idsr & busymask;
439
440#ifdef OLYMPUS_C_REV_B_ERRATA_XCALL
441		/*
442		 * Only proceed to send more xcalls if all the
443		 * cpus in the previous IDSR_BN_SETS were completed.
444		 */
445		if (curbusy) {
446			busy++;
447			continue;
448		}
449#endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */
450
451#if (NCPU > IDSR_BN_SETS)
452		if (shipped < ncpuids) {
453			uint64_t cpus_left;
454			uint16_t next = (uint16_t)index;
455
456			cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) &
457			    busymask;
458
459			if (cpus_left) {
460				do {
461					/*
462					 * Sequence through and ship to the
463					 * remainder of the CPUs in the system
464					 * (e.g. other than the first
465					 * (IDSR_BN_SETS)) in reverse order.
466					 */
467					lo = lowbit(cpus_left) - 1;
468					i = IDSR_BUSY_IDX(lo);
469					shipit(next, i);
470					shipped++;
471					cpuids[i] = next;
472
473					/*
474					 * If we've processed all the CPUs,
475					 * exit the loop now and save
476					 * instructions.
477					 */
478					if (shipped == ncpuids)
479						break;
480
481					for ((index = ((int)next - 1));
482					    index >= 0; index--)
483						if (CPU_IN_SET(set, index)) {
484							next = (uint16_t)index;
485							break;
486						}
487
488					cpus_left &= ~(1ull << lo);
489				} while (cpus_left);
490				continue;
491			}
492		}
493#endif
494#ifndef	OLYMPUS_C_REV_B_ERRATA_XCALL
495		if (curbusy) {
496			busy++;
497			continue;
498		}
499#endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
500#ifdef SEND_MONDO_STATS
501		{
502			int n = gettick() - starttick;
503			if (n < 8192)
504				x_nack_stimes[n >> 7]++;
505		}
506#endif
507		while (gettick() < (tick + sys_clock_mhz))
508			;
509		do {
510			lo = lowbit(curnack) - 1;
511			i = IDSR_NACK_IDX(lo);
512			shipit(cpuids[i], i);
513			curnack &= ~(1ull << lo);
514		} while (curnack);
515		nack++;
516		busy = 0;
517	}
518#ifdef SEND_MONDO_STATS
519	{
520		int n = gettick() - starttick;
521		if (n < 8192)
522			x_set_stimes[n >> 7]++;
523		else
524			x_set_ltimes[(n >> 13) & 0xf]++;
525	}
526	x_set_cpus[shipped]++;
527#endif
528}
529
530/*
531 * Cpu private initialization.
532 */
533void
534cpu_init_private(struct cpu *cp)
535{
536	if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) ||
537	    (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) {
538		cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is "
539		    "supported", cp->cpu_id,
540		    cpunodes[cp->cpu_id].implementation);
541	}
542
543	adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size);
544}
545
546void
547cpu_setup(void)
548{
549	extern int at_flags;
550	extern int cpc_has_overflow_intr;
551	uint64_t cpu0_log;
552	extern	 uint64_t opl_cpu0_err_log;
553
554	/*
555	 * Initialize Error log Scratch register for error handling.
556	 */
557
558	cpu0_log = va_to_pa(&opl_cpu0_err_log);
559	opl_error_setup(cpu0_log);
560	opl_cpu0_log_setup = 1;
561
562	/*
563	 * Enable MMU translating multiple page sizes for
564	 * sITLB and sDTLB.
565	 */
566	cpu_early_feature_init();
567
568	/*
569	 * Setup chip-specific trap handlers.
570	 */
571	cpu_init_trap();
572
573	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
574
575	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
576
577	/*
578	 * Due to the number of entries in the fully-associative tlb
579	 * this may have to be tuned lower than in spitfire.
580	 */
581	pp_slots = MIN(8, MAXPP_SLOTS);
582
583	/*
584	 * Block stores do not invalidate all pages of the d$, pagecopy
585	 * et. al. need virtual translations with virtual coloring taken
586	 * into consideration.  prefetch/ldd will pollute the d$ on the
587	 * load side.
588	 */
589	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
590
591	if (use_page_coloring) {
592		do_pg_coloring = 1;
593	}
594
595	isa_list =
596	    "sparcv9+vis2 sparcv9+vis sparcv9 "
597	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
598	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
599
600	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 |
601	    AV_SPARC_POPC | AV_SPARC_FMAF;
602
603	/*
604	 * On SPARC64-VI, there's no hole in the virtual address space
605	 */
606	hole_start = hole_end = 0;
607
608	/*
609	 * The kpm mapping window.
610	 * kpm_size:
611	 *	The size of a single kpm range.
612	 *	The overall size will be: kpm_size * vac_colors.
613	 * kpm_vbase:
614	 *	The virtual start address of the kpm range within the kernel
615	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
616	 */
617	kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */
618	kpm_size_shift = 47;
619	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
620	kpm_smallpages = 1;
621
622	/*
623	 * The traptrace code uses either %tick or %stick for
624	 * timestamping.  We have %stick so we can use it.
625	 */
626	traptrace_use_stick = 1;
627
628	/*
629	 * SPARC64-VI has a performance counter overflow interrupt
630	 */
631	cpc_has_overflow_intr = 1;
632
633	/*
634	 * Declare that this architecture/cpu combination does not support
635	 * fpRAS.
636	 */
637	fpras_implemented = 0;
638}
639
640/*
641 * Called by setcpudelay
642 */
643void
644cpu_init_tick_freq(void)
645{
646	/*
647	 * For SPARC64-VI we want to use the system clock rate as
648	 * the basis for low level timing, due to support of mixed
649	 * speed CPUs and power managment.
650	 */
651	if (system_clock_freq == 0)
652		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
653
654	sys_tick_freq = system_clock_freq;
655}
656
657#ifdef SEND_MONDO_STATS
658uint32_t x_one_stimes[64];
659uint32_t x_one_ltimes[16];
660uint32_t x_set_stimes[64];
661uint32_t x_set_ltimes[16];
662uint32_t x_set_cpus[NCPU];
663uint32_t x_nack_stimes[64];
664#endif
665
666/*
667 * Note: A version of this function is used by the debugger via the KDI,
668 * and must be kept in sync with this version.  Any changes made to this
669 * function to support new chips or to accomodate errata must also be included
670 * in the KDI-specific version.  See us3_kdi.c.
671 */
672void
673send_one_mondo(int cpuid)
674{
675	int busy, nack;
676	uint64_t idsr, starttick, endtick, tick, lasttick;
677	uint64_t busymask;
678
679	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
680	starttick = lasttick = gettick();
681	shipit(cpuid, 0);
682	endtick = starttick + xc_tick_limit;
683	busy = nack = 0;
684	busymask = IDSR_BUSY;
685	for (;;) {
686		idsr = getidsr();
687		if (idsr == 0)
688			break;
689
690		tick = gettick();
691		/*
692		 * If there is a big jump between the current tick
693		 * count and lasttick, we have probably hit a break
694		 * point.  Adjust endtick accordingly to avoid panic.
695		 */
696		if (tick > (lasttick + xc_tick_jump_limit))
697			endtick += (tick - lasttick);
698		lasttick = tick;
699		if (tick > endtick) {
700			if (panic_quiesce)
701				return;
702			cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) "
703			    "[%d NACK %d BUSY]", cpuid, nack, busy);
704		}
705
706		if (idsr & busymask) {
707			busy++;
708			continue;
709		}
710		drv_usecwait(1);
711		shipit(cpuid, 0);
712		nack++;
713		busy = 0;
714	}
715#ifdef SEND_MONDO_STATS
716	{
717		int n = gettick() - starttick;
718		if (n < 8192)
719			x_one_stimes[n >> 7]++;
720		else
721			x_one_ltimes[(n >> 13) & 0xf]++;
722	}
723#endif
724}
725
726/*
727 * init_mmu_page_sizes is set to one after the bootup time initialization
728 * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a
729 * valid value.
730 *
731 * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific
732 * versions of disable_ism_large_pages and disable_large_pages, and feed back
733 * into those two hat variables at hat initialization time.
734 *
735 */
736int init_mmu_page_sizes = 0;
737
738static uint_t mmu_disable_large_pages = 0;
739static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) |
740	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
741static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
742	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
743static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) |
744	(1 << TTE512K));
745
746/*
747 * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support.
748 * Called during very early bootup from check_cpus_set().
749 * Can be called to verify that mmu_page_sizes are set up correctly.
750 *
751 * Set Olympus defaults. We do not use the function parameter.
752 */
753/*ARGSUSED*/
754void
755mmu_init_scd(sf_scd_t *scdp)
756{
757	scdp->scd_sfmmup->sfmmu_cext = shctx_cext;
758}
759
760/*ARGSUSED*/
761int
762mmu_init_mmu_page_sizes(int32_t not_used)
763{
764	if (!init_mmu_page_sizes) {
765		mmu_page_sizes = MMU_PAGE_SIZES;
766		mmu_hashcnt = MAX_HASHCNT;
767		mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
768		mmu_exported_pagesize_mask = (1 << TTE8K) |
769		    (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) |
770		    (1 << TTE32M) | (1 << TTE256M);
771		init_mmu_page_sizes = 1;
772		return (0);
773	}
774	return (1);
775}
776
777/* SPARC64-VI worst case DTLB parameters */
778#ifndef	LOCKED_DTLB_ENTRIES
779#define	LOCKED_DTLB_ENTRIES	5	/* 2 user TSBs, 2 nucleus, + OBP */
780#endif
781#define	TOTAL_DTLB_ENTRIES	32
782#define	AVAIL_32M_ENTRIES	0
783#define	AVAIL_256M_ENTRIES	0
784#define	AVAIL_DTLB_ENTRIES	(TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES)
785static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = {
786	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
787	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
788	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES};
789
790/*
791 * The function returns the mmu-specific values for the
792 * hat's disable_large_pages, disable_ism_large_pages, and
793 * disable_auto_data_large_pages and
794 * disable_text_data_large_pages variables.
795 */
796uint_t
797mmu_large_pages_disabled(uint_t flag)
798{
799	uint_t pages_disable = 0;
800	extern int use_text_pgsz64K;
801	extern int use_text_pgsz512K;
802
803	if (flag == HAT_LOAD) {
804		pages_disable =  mmu_disable_large_pages;
805	} else if (flag == HAT_LOAD_SHARE) {
806		pages_disable = mmu_disable_ism_large_pages;
807	} else if (flag == HAT_AUTO_DATA) {
808		pages_disable = mmu_disable_auto_data_large_pages;
809	} else if (flag == HAT_AUTO_TEXT) {
810		pages_disable = mmu_disable_auto_text_large_pages;
811		if (use_text_pgsz512K) {
812			pages_disable &= ~(1 << TTE512K);
813		}
814		if (use_text_pgsz64K) {
815			pages_disable &= ~(1 << TTE64K);
816		}
817	}
818	return (pages_disable);
819}
820
821/*
822 * mmu_init_large_pages is called with the desired ism_pagesize parameter.
823 * It may be called from set_platform_defaults, if some value other than 4M
824 * is desired.  mmu_ism_pagesize is the tunable.  If it has a bad value,
825 * then only warn, since it would be bad form to panic due to a user typo.
826 *
827 * The function re-initializes the mmu_disable_ism_large_pages variable.
828 */
829void
830mmu_init_large_pages(size_t ism_pagesize)
831{
832
833	switch (ism_pagesize) {
834	case MMU_PAGESIZE4M:
835		mmu_disable_ism_large_pages = ((1 << TTE64K) |
836		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
837		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
838		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
839		shctx_cext = TAGACCEXT_MKSZPAIR(TTE4M, TTE8K);
840		break;
841	case MMU_PAGESIZE32M:
842		mmu_disable_ism_large_pages = ((1 << TTE64K) |
843		    (1 << TTE512K) | (1 << TTE256M));
844		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
845		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
846		adjust_data_maxlpsize(ism_pagesize);
847		shctx_cext = TAGACCEXT_MKSZPAIR(TTE32M, TTE8K);
848		break;
849	case MMU_PAGESIZE256M:
850		mmu_disable_ism_large_pages = ((1 << TTE64K) |
851		    (1 << TTE512K) | (1 << TTE32M));
852		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
853		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
854		adjust_data_maxlpsize(ism_pagesize);
855		shctx_cext = TAGACCEXT_MKSZPAIR(TTE256M, TTE8K);
856		break;
857	default:
858		cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
859		    ism_pagesize);
860		break;
861	}
862}
863
864/*
865 * Function to reprogram the TLBs when page sizes used
866 * by a process change significantly.
867 */
868static void
869mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz)
870{
871	uint8_t pgsz0, pgsz1;
872
873	/*
874	 * Don't program 2nd dtlb for kernel and ism hat
875	 */
876	ASSERT(hat->sfmmu_ismhat == NULL);
877	ASSERT(hat != ksfmmup);
878
879	/*
880	 * hat->sfmmu_pgsz[] is an array whose elements
881	 * contain a sorted order of page sizes.  Element
882	 * 0 is the most commonly used page size, followed
883	 * by element 1, and so on.
884	 *
885	 * ttecnt[] is an array of per-page-size page counts
886	 * mapped into the process.
887	 *
888	 * If the HAT's choice for page sizes is unsuitable,
889	 * we can override it here.  The new values written
890	 * to the array will be handed back to us later to
891	 * do the actual programming of the TLB hardware.
892	 *
893	 */
894	pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]);
895	pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]);
896
897	/*
898	 * This implements PAGESIZE programming of the sTLB
899	 * if large TTE counts don't exceed the thresholds.
900	 */
901	if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0])
902		pgsz0 = page_szc(MMU_PAGESIZE);
903	if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1])
904		pgsz1 = page_szc(MMU_PAGESIZE);
905	tmp_pgsz[0] = pgsz0;
906	tmp_pgsz[1] = pgsz1;
907	/* otherwise, accept what the HAT chose for us */
908}
909
910/*
911 * The HAT calls this function when an MMU context is allocated so that we
912 * can reprogram the large TLBs appropriately for the new process using
913 * the context.
914 *
915 * The caller must hold the HAT lock.
916 */
917void
918mmu_set_ctx_page_sizes(struct hat *hat)
919{
920	uint8_t pgsz0, pgsz1;
921	uint8_t new_cext;
922
923	ASSERT(sfmmu_hat_lock_held(hat));
924	/*
925	 * Don't program 2nd dtlb for kernel and ism hat
926	 */
927	if (hat->sfmmu_ismhat || hat == ksfmmup)
928		return;
929
930	/*
931	 * If supported, reprogram the TLBs to a larger pagesize.
932	 */
933	if (hat->sfmmu_scdp != NULL) {
934		new_cext = hat->sfmmu_scdp->scd_sfmmup->sfmmu_cext;
935		ASSERT(new_cext == shctx_cext);
936	} else {
937		pgsz0 = hat->sfmmu_pgsz[0];
938		pgsz1 = hat->sfmmu_pgsz[1];
939		ASSERT(pgsz0 < mmu_page_sizes);
940		ASSERT(pgsz1 < mmu_page_sizes);
941		new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0);
942	}
943	if (hat->sfmmu_cext != new_cext) {
944#ifdef DEBUG
945		int i;
946		/*
947		 * assert cnum should be invalid, this is because pagesize
948		 * can only be changed after a proc's ctxs are invalidated.
949		 */
950		for (i = 0; i < max_mmu_ctxdoms; i++) {
951			ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT);
952		}
953#endif /* DEBUG */
954		hat->sfmmu_cext = new_cext;
955	}
956	/*
957	 * sfmmu_setctx_sec() will take care of the
958	 * rest of the dirty work for us.
959	 */
960}
961
962/*
963 * This function assumes that there are either four or six supported page
964 * sizes and at most two programmable TLBs, so we need to decide which
965 * page sizes are most important and then adjust the TLB page sizes
966 * accordingly (if supported).
967 *
968 * If these assumptions change, this function will need to be
969 * updated to support whatever the new limits are.
970 */
971void
972mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
973{
974	uint64_t sortcnt[MMU_PAGE_SIZES];
975	uint8_t tmp_pgsz[MMU_PAGE_SIZES];
976	uint8_t i, j, max;
977	uint16_t oldval, newval;
978
979	/*
980	 * We only consider reprogramming the TLBs if one or more of
981	 * the two most used page sizes changes and we're using
982	 * large pages in this process.
983	 */
984	if (SFMMU_LGPGS_INUSE(sfmmup)) {
985		/* Sort page sizes. */
986		for (i = 0; i < mmu_page_sizes; i++) {
987			sortcnt[i] = ttecnt[i];
988		}
989		for (j = 0; j < mmu_page_sizes; j++) {
990			for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) {
991				if (sortcnt[i] > sortcnt[max])
992					max = i;
993			}
994			tmp_pgsz[j] = max;
995			sortcnt[max] = 0;
996		}
997
998		oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1];
999
1000		mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz);
1001
1002		/* Check 2 largest values after the sort. */
1003		newval = tmp_pgsz[0] << 8 | tmp_pgsz[1];
1004		if (newval != oldval) {
1005			sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz);
1006		}
1007	}
1008}
1009
1010/*
1011 * Return processor specific async error structure
1012 * size used.
1013 */
1014int
1015cpu_aflt_size(void)
1016{
1017	return (sizeof (opl_async_flt_t));
1018}
1019
1020/*
1021 * The cpu_sync_log_err() function is called via the [uc]e_drain() function to
1022 * post-process CPU events that are dequeued.  As such, it can be invoked
1023 * from softint context, from AST processing in the trap() flow, or from the
1024 * panic flow.  We decode the CPU-specific data, and take appropriate actions.
1025 * Historically this entry point was used to log the actual cmn_err(9F) text;
1026 * now with FMA it is used to prepare 'flt' to be converted into an ereport.
1027 * With FMA this function now also returns a flag which indicates to the
1028 * caller whether the ereport should be posted (1) or suppressed (0).
1029 */
1030/*ARGSUSED*/
1031static int
1032cpu_sync_log_err(void *flt)
1033{
1034	opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt;
1035	struct async_flt *aflt = (struct async_flt *)flt;
1036
1037	/*
1038	 * No extra processing of urgent error events.
1039	 * Always generate ereports for these events.
1040	 */
1041	if (aflt->flt_status == OPL_ECC_URGENT_TRAP)
1042		return (1);
1043
1044	/*
1045	 * Additional processing for synchronous errors.
1046	 */
1047	switch (opl_flt->flt_type) {
1048	case OPL_CPU_INV_SFSR:
1049		return (1);
1050
1051	case OPL_CPU_SYNC_UE:
1052		/*
1053		 * The validity: SFSR_MK_UE bit has been checked
1054		 * in opl_cpu_sync_error()
1055		 * No more check is required.
1056		 *
1057		 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W,
1058		 * and they have been retrieved in cpu_queue_events()
1059		 */
1060
1061		if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) {
1062			ASSERT(aflt->flt_in_memory);
1063			/*
1064			 * We want to skip logging only if ALL the following
1065			 * conditions are true:
1066			 *
1067			 *	1. We are not panicing already.
1068			 *	2. The error is a memory error.
1069			 *	3. There is only one error.
1070			 *	4. The error is on a retired page.
1071			 *	5. The error occurred under on_trap
1072			 *	protection AFLT_PROT_EC
1073			 */
1074			if (!panicstr && aflt->flt_prot == AFLT_PROT_EC &&
1075			    page_retire_check(aflt->flt_addr, NULL) == 0) {
1076				/*
1077				 * Do not log an error from
1078				 * the retired page
1079				 */
1080				softcall(ecc_page_zero, (void *)aflt->flt_addr);
1081				return (0);
1082			}
1083			if (!panicstr)
1084				cpu_page_retire(opl_flt);
1085		}
1086		return (1);
1087
1088	case OPL_CPU_SYNC_OTHERS:
1089		/*
1090		 * For the following error cases, the processor HW does
1091		 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt
1092		 * to assign appropriate values here to reflect what we
1093		 * think is the most likely cause of the problem w.r.t to
1094		 * the particular error event.  For Buserr and timeout
1095		 * error event, we will assign OPL_ERRID_CHANNEL as the
1096		 * most likely reason.  For TLB parity or multiple hit
1097		 * error events, we will assign the reason as
1098		 * OPL_ERRID_CPU (cpu related problem) and set the
1099		 * flt_eid_sid to point to the cpuid.
1100		 */
1101
1102		if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) {
1103			/*
1104			 * flt_eid_sid will not be used for this case.
1105			 */
1106			opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL;
1107		}
1108		if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) {
1109			opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1110			opl_flt->flt_eid_sid = aflt->flt_inst;
1111		}
1112
1113		/*
1114		 * In case of no effective error bit
1115		 */
1116		if ((opl_flt->flt_bit & SFSR_ERRS) == 0) {
1117			opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1118			opl_flt->flt_eid_sid = aflt->flt_inst;
1119		}
1120		break;
1121
1122		default:
1123			return (1);
1124	}
1125	return (1);
1126}
1127
1128/*
1129 * Retire the bad page that may contain the flushed error.
1130 */
1131void
1132cpu_page_retire(opl_async_flt_t *opl_flt)
1133{
1134	struct async_flt *aflt = (struct async_flt *)opl_flt;
1135	(void) page_retire(aflt->flt_addr, PR_UE);
1136}
1137
1138/*
1139 * Invoked by error_init() early in startup and therefore before
1140 * startup_errorq() is called to drain any error Q -
1141 *
1142 * startup()
1143 *   startup_end()
1144 *     error_init()
1145 *       cpu_error_init()
1146 * errorq_init()
1147 *   errorq_drain()
1148 * start_other_cpus()
1149 *
1150 * The purpose of this routine is to create error-related taskqs.  Taskqs
1151 * are used for this purpose because cpu_lock can't be grabbed from interrupt
1152 * context.
1153 *
1154 */
1155/*ARGSUSED*/
1156void
1157cpu_error_init(int items)
1158{
1159	opl_err_log = (opl_errlog_t *)
1160	    kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP);
1161	if ((uint64_t)opl_err_log & MMU_PAGEOFFSET)
1162		cmn_err(CE_PANIC, "The base address of the error log "
1163		    "is not page aligned");
1164}
1165
1166/*
1167 * We route all errors through a single switch statement.
1168 */
1169void
1170cpu_ue_log_err(struct async_flt *aflt)
1171{
1172	switch (aflt->flt_class) {
1173	case CPU_FAULT:
1174		if (cpu_sync_log_err(aflt))
1175			cpu_ereport_post(aflt);
1176		break;
1177
1178	case BUS_FAULT:
1179		bus_async_log_err(aflt);
1180		break;
1181
1182	default:
1183		cmn_err(CE_WARN, "discarding async error %p with invalid "
1184		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1185		return;
1186	}
1187}
1188
1189/*
1190 * Routine for panic hook callback from panic_idle().
1191 *
1192 * Nothing to do here.
1193 */
1194void
1195cpu_async_panic_callb(void)
1196{
1197}
1198
1199/*
1200 * Routine to return a string identifying the physical name
1201 * associated with a memory/cache error.
1202 */
1203/*ARGSUSED*/
1204int
1205cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
1206    uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
1207    ushort_t flt_status, char *buf, int buflen, int *lenp)
1208{
1209	int synd_code;
1210	int ret;
1211
1212	/*
1213	 * An AFSR of -1 defaults to a memory syndrome.
1214	 */
1215	synd_code = (int)flt_synd;
1216
1217	if (&plat_get_mem_unum) {
1218		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
1219		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
1220			buf[0] = '\0';
1221			*lenp = 0;
1222		}
1223		return (ret);
1224	}
1225	buf[0] = '\0';
1226	*lenp = 0;
1227	return (ENOTSUP);
1228}
1229
1230/*
1231 * Wrapper for cpu_get_mem_unum() routine that takes an
1232 * async_flt struct rather than explicit arguments.
1233 */
1234int
1235cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1236    char *buf, int buflen, int *lenp)
1237{
1238	/*
1239	 * We always pass -1 so that cpu_get_mem_unum will interpret this as a
1240	 * memory error.
1241	 */
1242	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
1243	    (uint64_t)-1,
1244	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
1245	    aflt->flt_status, buf, buflen, lenp));
1246}
1247
1248/*
1249 * This routine is a more generic interface to cpu_get_mem_unum()
1250 * that may be used by other modules (e.g. mm).
1251 */
1252/*ARGSUSED*/
1253int
1254cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1255    char *buf, int buflen, int *lenp)
1256{
1257	int synd_status, flt_in_memory, ret;
1258	ushort_t flt_status = 0;
1259	char unum[UNUM_NAMLEN];
1260
1261	/*
1262	 * Check for an invalid address.
1263	 */
1264	if (afar == (uint64_t)-1)
1265		return (ENXIO);
1266
1267	if (synd == (uint64_t)-1)
1268		synd_status = AFLT_STAT_INVALID;
1269	else
1270		synd_status = AFLT_STAT_VALID;
1271
1272	flt_in_memory = (*afsr & SFSR_MEMORY) &&
1273	    pf_is_memory(afar >> MMU_PAGESHIFT);
1274
1275	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1276	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
1277	if (ret != 0)
1278		return (ret);
1279
1280	if (*lenp >= buflen)
1281		return (ENAMETOOLONG);
1282
1283	(void) strncpy(buf, unum, buflen);
1284
1285	return (0);
1286}
1287
1288/*
1289 * Routine to return memory information associated
1290 * with a physical address and syndrome.
1291 */
1292/*ARGSUSED*/
1293int
1294cpu_get_mem_info(uint64_t synd, uint64_t afar,
1295    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1296    int *segsp, int *banksp, int *mcidp)
1297{
1298	int synd_code = (int)synd;
1299
1300	if (afar == (uint64_t)-1)
1301		return (ENXIO);
1302
1303	if (p2get_mem_info != NULL)
1304		return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep,
1305		    bank_sizep, segsp, banksp, mcidp));
1306	else
1307		return (ENOTSUP);
1308}
1309
1310/*
1311 * Routine to return a string identifying the physical
1312 * name associated with a cpuid.
1313 */
1314int
1315cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1316{
1317	int ret;
1318	char unum[UNUM_NAMLEN];
1319
1320	if (&plat_get_cpu_unum) {
1321		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN,
1322		    lenp)) != 0)
1323			return (ret);
1324	} else {
1325		return (ENOTSUP);
1326	}
1327
1328	if (*lenp >= buflen)
1329		return (ENAMETOOLONG);
1330
1331	(void) strncpy(buf, unum, *lenp);
1332
1333	return (0);
1334}
1335
1336/*
1337 * This routine exports the name buffer size.
1338 */
1339size_t
1340cpu_get_name_bufsize()
1341{
1342	return (UNUM_NAMLEN);
1343}
1344
1345/*
1346 * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH
1347 */
1348void
1349cpu_flush_ecache(void)
1350{
1351	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
1352	    cpunodes[CPU->cpu_id].ecache_linesize);
1353}
1354
1355static uint8_t
1356flt_to_trap_type(struct async_flt *aflt)
1357{
1358	if (aflt->flt_status & OPL_ECC_ISYNC_TRAP)
1359		return (TRAP_TYPE_ECC_I);
1360	if (aflt->flt_status & OPL_ECC_DSYNC_TRAP)
1361		return (TRAP_TYPE_ECC_D);
1362	if (aflt->flt_status & OPL_ECC_URGENT_TRAP)
1363		return (TRAP_TYPE_URGENT);
1364	return (TRAP_TYPE_UNKNOWN);
1365}
1366
1367/*
1368 * Encode the data saved in the opl_async_flt_t struct into
1369 * the FM ereport payload.
1370 */
1371/* ARGSUSED */
1372static void
1373cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
1374		nvlist_t *resource)
1375{
1376	opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt;
1377	char unum[UNUM_NAMLEN];
1378	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1379	int len;
1380
1381
1382	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) {
1383		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR,
1384		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1385	}
1386	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) {
1387		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR,
1388		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
1389	}
1390	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) {
1391		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR,
1392		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1393	}
1394	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
1395		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
1396		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
1397	}
1398	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
1399		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
1400		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
1401	}
1402	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
1403		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
1404		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
1405	}
1406	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
1407		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
1408		    DATA_TYPE_BOOLEAN_VALUE,
1409		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
1410	}
1411	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) {
1412		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS,
1413		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL);
1414	}
1415
1416	switch (opl_flt->flt_eid_mod) {
1417	case OPL_ERRID_CPU:
1418		(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1419		    (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id);
1420		(void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION,
1421		    NULL, opl_flt->flt_eid_sid,
1422		    (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf);
1423		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1424		    DATA_TYPE_NVLIST, resource, NULL);
1425		break;
1426
1427	case OPL_ERRID_CHANNEL:
1428		/*
1429		 * No resource is created but the cpumem DE will find
1430		 * the defective path by retreiving EID from SFSR which is
1431		 * included in the payload.
1432		 */
1433		break;
1434
1435	case OPL_ERRID_MEM:
1436		(void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len);
1437		(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL,
1438		    unum, NULL, (uint64_t)-1);
1439		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1440		    DATA_TYPE_NVLIST, resource, NULL);
1441		break;
1442
1443	case OPL_ERRID_PATH:
1444		/*
1445		 * No resource is created but the cpumem DE will find
1446		 * the defective path by retreiving EID from SFSR which is
1447		 * included in the payload.
1448		 */
1449		break;
1450	}
1451}
1452
1453/*
1454 * Returns whether fault address is valid for this error bit and
1455 * whether the address is "in memory" (i.e. pf_is_memory returns 1).
1456 */
1457/*ARGSUSED*/
1458static int
1459cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit)
1460{
1461	struct async_flt *aflt = (struct async_flt *)opl_flt;
1462
1463	if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) {
1464		return ((t_afsr_bit & SFSR_MEMORY) &&
1465		    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
1466	}
1467	return (0);
1468}
1469
1470/*
1471 * In OPL SCF does the stick synchronization.
1472 */
1473void
1474sticksync_slave(void)
1475{
1476}
1477
1478/*
1479 * In OPL SCF does the stick synchronization.
1480 */
1481void
1482sticksync_master(void)
1483{
1484}
1485
1486/*
1487 * Cpu private unitialization.  OPL cpus do not use the private area.
1488 */
1489void
1490cpu_uninit_private(struct cpu *cp)
1491{
1492	cmp_delete_cpu(cp->cpu_id);
1493}
1494
1495/*
1496 * Always flush an entire cache.
1497 */
1498void
1499cpu_error_ecache_flush(void)
1500{
1501	cpu_flush_ecache();
1502}
1503
1504void
1505cpu_ereport_post(struct async_flt *aflt)
1506{
1507	char *cpu_type, buf[FM_MAX_CLASS];
1508	nv_alloc_t *nva = NULL;
1509	nvlist_t *ereport, *detector, *resource;
1510	errorq_elem_t *eqep;
1511	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1512
1513	if (aflt->flt_panic || panicstr) {
1514		eqep = errorq_reserve(ereport_errorq);
1515		if (eqep == NULL)
1516			return;
1517		ereport = errorq_elem_nvl(ereport_errorq, eqep);
1518		nva = errorq_elem_nva(ereport_errorq, eqep);
1519	} else {
1520		ereport = fm_nvlist_create(nva);
1521	}
1522
1523	/*
1524	 * Create the scheme "cpu" FMRI.
1525	 */
1526	detector = fm_nvlist_create(nva);
1527	resource = fm_nvlist_create(nva);
1528	switch (cpunodes[aflt->flt_inst].implementation) {
1529	case OLYMPUS_C_IMPL:
1530		cpu_type = FM_EREPORT_CPU_SPARC64_VI;
1531		break;
1532	case JUPITER_IMPL:
1533		cpu_type = FM_EREPORT_CPU_SPARC64_VII;
1534		break;
1535	default:
1536		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
1537		break;
1538	}
1539	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1540	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
1541	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
1542	    aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version,
1543	    sbuf);
1544
1545	/*
1546	 * Encode all the common data into the ereport.
1547	 */
1548	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
1549	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
1550
1551	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
1552	    fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL);
1553
1554	/*
1555	 * Encode the error specific data that was saved in
1556	 * the async_flt structure into the ereport.
1557	 */
1558	cpu_payload_add_aflt(aflt, ereport, resource);
1559
1560	if (aflt->flt_panic || panicstr) {
1561		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1562	} else {
1563		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1564		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1565		fm_nvlist_destroy(detector, FM_NVA_FREE);
1566		fm_nvlist_destroy(resource, FM_NVA_FREE);
1567	}
1568}
1569
1570void
1571cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
1572{
1573	int status;
1574	ddi_fm_error_t de;
1575
1576	bzero(&de, sizeof (ddi_fm_error_t));
1577
1578	de.fme_version = DDI_FME_VERSION;
1579	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
1580	de.fme_flag = expected;
1581	de.fme_bus_specific = (void *)aflt->flt_addr;
1582	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
1583	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
1584		aflt->flt_panic = 1;
1585}
1586
1587void
1588cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
1589    errorq_t *eqp, uint_t flag)
1590{
1591	struct async_flt *aflt = (struct async_flt *)payload;
1592
1593	aflt->flt_erpt_class = error_class;
1594	errorq_dispatch(eqp, payload, payload_sz, flag);
1595}
1596
1597void
1598adjust_hw_copy_limits(int ecache_size)
1599{
1600	/*
1601	 * Set hw copy limits.
1602	 *
1603	 * /etc/system will be parsed later and can override one or more
1604	 * of these settings.
1605	 *
1606	 * At this time, ecache size seems only mildly relevant.
1607	 * We seem to run into issues with the d-cache and stalls
1608	 * we see on misses.
1609	 *
1610	 * Cycle measurement indicates that 2 byte aligned copies fare
1611	 * little better than doing things with VIS at around 512 bytes.
1612	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
1613	 * aligned is faster whenever the source and destination data
1614	 * in cache and the total size is less than 2 Kbytes.  The 2K
1615	 * limit seems to be driven by the 2K write cache.
1616	 * When more than 2K of copies are done in non-VIS mode, stores
1617	 * backup in the write cache.  In VIS mode, the write cache is
1618	 * bypassed, allowing faster cache-line writes aligned on cache
1619	 * boundaries.
1620	 *
1621	 * In addition, in non-VIS mode, there is no prefetching, so
1622	 * for larger copies, the advantage of prefetching to avoid even
1623	 * occasional cache misses is enough to justify using the VIS code.
1624	 *
1625	 * During testing, it was discovered that netbench ran 3% slower
1626	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
1627	 * applications, data is only used once (copied to the output
1628	 * buffer, then copied by the network device off the system).  Using
1629	 * the VIS copy saves more L2 cache state.  Network copies are
1630	 * around 1.3K to 1.5K in size for historical reasons.
1631	 *
1632	 * Therefore, a limit of 1K bytes will be used for the 8 byte
1633	 * aligned copy even for large caches and 8 MB ecache.  The
1634	 * infrastructure to allow different limits for different sized
1635	 * caches is kept to allow further tuning in later releases.
1636	 */
1637
1638	if (min_ecache_size == 0 && use_hw_bcopy) {
1639		/*
1640		 * First time through - should be before /etc/system
1641		 * is read.
1642		 * Could skip the checks for zero but this lets us
1643		 * preserve any debugger rewrites.
1644		 */
1645		if (hw_copy_limit_1 == 0) {
1646			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
1647			priv_hcl_1 = hw_copy_limit_1;
1648		}
1649		if (hw_copy_limit_2 == 0) {
1650			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
1651			priv_hcl_2 = hw_copy_limit_2;
1652		}
1653		if (hw_copy_limit_4 == 0) {
1654			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
1655			priv_hcl_4 = hw_copy_limit_4;
1656		}
1657		if (hw_copy_limit_8 == 0) {
1658			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
1659			priv_hcl_8 = hw_copy_limit_8;
1660		}
1661		min_ecache_size = ecache_size;
1662	} else {
1663		/*
1664		 * MP initialization. Called *after* /etc/system has
1665		 * been parsed. One CPU has already been initialized.
1666		 * Need to cater for /etc/system having scragged one
1667		 * of our values.
1668		 */
1669		if (ecache_size == min_ecache_size) {
1670			/*
1671			 * Same size ecache. We do nothing unless we
1672			 * have a pessimistic ecache setting. In that
1673			 * case we become more optimistic (if the cache is
1674			 * large enough).
1675			 */
1676			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
1677				/*
1678				 * Need to adjust hw_copy_limit* from our
1679				 * pessimistic uniprocessor value to a more
1680				 * optimistic UP value *iff* it hasn't been
1681				 * reset.
1682				 */
1683				if ((ecache_size > 1048576) &&
1684				    (priv_hcl_8 == hw_copy_limit_8)) {
1685					if (ecache_size <= 2097152)
1686						hw_copy_limit_8 = 4 *
1687						    VIS_COPY_THRESHOLD;
1688					else if (ecache_size <= 4194304)
1689						hw_copy_limit_8 = 4 *
1690						    VIS_COPY_THRESHOLD;
1691					else
1692						hw_copy_limit_8 = 4 *
1693						    VIS_COPY_THRESHOLD;
1694					priv_hcl_8 = hw_copy_limit_8;
1695				}
1696			}
1697		} else if (ecache_size < min_ecache_size) {
1698			/*
1699			 * A different ecache size. Can this even happen?
1700			 */
1701			if (priv_hcl_8 == hw_copy_limit_8) {
1702				/*
1703				 * The previous value that we set
1704				 * is unchanged (i.e., it hasn't been
1705				 * scragged by /etc/system). Rewrite it.
1706				 */
1707				if (ecache_size <= 1048576)
1708					hw_copy_limit_8 = 8 *
1709					    VIS_COPY_THRESHOLD;
1710				else if (ecache_size <= 2097152)
1711					hw_copy_limit_8 = 8 *
1712					    VIS_COPY_THRESHOLD;
1713				else if (ecache_size <= 4194304)
1714					hw_copy_limit_8 = 8 *
1715					    VIS_COPY_THRESHOLD;
1716				else
1717					hw_copy_limit_8 = 10 *
1718					    VIS_COPY_THRESHOLD;
1719				priv_hcl_8 = hw_copy_limit_8;
1720				min_ecache_size = ecache_size;
1721			}
1722		}
1723	}
1724}
1725
1726#define	VIS_BLOCKSIZE		64
1727
1728int
1729dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
1730{
1731	int ret, watched;
1732
1733	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1734	ret = dtrace_blksuword32(addr, data, 0);
1735	if (watched)
1736		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1737
1738	return (ret);
1739}
1740
1741void
1742opl_cpu_reg_init()
1743{
1744	uint64_t	this_cpu_log;
1745
1746	if (cpu[getprocessorid()] == &cpu0 && opl_cpu0_log_setup == 1) {
1747		/*
1748		 * Support for "ta 3"
1749		 */
1750		opl_ta3();
1751
1752		/*
1753		 * If we are being called at boot time on cpu0 the error
1754		 * log is already set up in cpu_setup. Clear the
1755		 * opl_cpu0_log_setup flag so that a subsequent DR of cpu0 will
1756		 * do the proper initialization.
1757		 */
1758		opl_cpu0_log_setup = 0;
1759		return;
1760	}
1761
1762	/*
1763	 * Initialize Error log Scratch register for error handling.
1764	 */
1765
1766	this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) +
1767	    ERRLOG_BUFSZ * (getprocessorid())));
1768	opl_error_setup(this_cpu_log);
1769}
1770
1771/*
1772 * Queue one event in ue_queue based on ecc_type_to_info entry.
1773 */
1774static void
1775cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason,
1776    ecc_type_to_info_t *eccp)
1777{
1778	struct async_flt *aflt = (struct async_flt *)opl_flt;
1779
1780	if (reason &&
1781	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
1782		(void) strcat(reason, eccp->ec_reason);
1783	}
1784
1785	opl_flt->flt_bit = eccp->ec_afsr_bit;
1786	opl_flt->flt_type = eccp->ec_flt_type;
1787	aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit);
1788	aflt->flt_payload = eccp->ec_err_payload;
1789
1790	ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP));
1791	cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt,
1792	    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
1793}
1794
1795/*
1796 * Queue events on async event queue one event per error bit.
1797 * Return number of events queued.
1798 */
1799int
1800cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs)
1801{
1802	struct async_flt *aflt = (struct async_flt *)opl_flt;
1803	ecc_type_to_info_t *eccp;
1804	int nevents = 0;
1805
1806	/*
1807	 * Queue expected errors, error bit and fault type must must match
1808	 * in the ecc_type_to_info table.
1809	 */
1810	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
1811	    eccp++) {
1812		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
1813		    (eccp->ec_flags & aflt->flt_status) != 0) {
1814			/*
1815			 * UE error event can be further
1816			 * classified/breakdown into finer granularity
1817			 * based on the flt_eid_mod value set by HW.  We do
1818			 * special handling here so that we can report UE
1819			 * error in finer granularity as ue_mem,
1820			 * ue_channel, ue_cpu or ue_path.
1821			 */
1822			if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) {
1823				opl_flt->flt_eid_mod = (aflt->flt_stat &
1824				    SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT;
1825				opl_flt->flt_eid_sid = (aflt->flt_stat &
1826				    SFSR_EID_SID) >> SFSR_EID_SID_SHIFT;
1827				/*
1828				 * Need to advance eccp pointer by flt_eid_mod
1829				 * so that we get an appropriate ecc pointer
1830				 *
1831				 * EID			# of advances
1832				 * ----------------------------------
1833				 * OPL_ERRID_MEM	0
1834				 * OPL_ERRID_CHANNEL	1
1835				 * OPL_ERRID_CPU	2
1836				 * OPL_ERRID_PATH	3
1837				 */
1838				eccp += opl_flt->flt_eid_mod;
1839			}
1840			cpu_queue_one_event(opl_flt, reason, eccp);
1841			t_afsr_errs &= ~eccp->ec_afsr_bit;
1842			nevents++;
1843		}
1844	}
1845
1846	return (nevents);
1847}
1848
1849/*
1850 * Sync. error wrapper functions.
1851 * We use these functions in order to transfer here from the
1852 * nucleus trap handler information about trap type (data or
1853 * instruction) and trap level (0 or above 0). This way we
1854 * get rid of using SFSR's reserved bits.
1855 */
1856
1857#define	OPL_SYNC_TL0	0
1858#define	OPL_SYNC_TL1	1
1859#define	OPL_ISYNC_ERR	0
1860#define	OPL_DSYNC_ERR	1
1861
1862void
1863opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1864{
1865	uint64_t t_sfar = p_sfar;
1866	uint64_t t_sfsr = p_sfsr;
1867
1868	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1869	    OPL_SYNC_TL0, OPL_ISYNC_ERR);
1870}
1871
1872void
1873opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1874{
1875	uint64_t t_sfar = p_sfar;
1876	uint64_t t_sfsr = p_sfsr;
1877
1878	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1879	    OPL_SYNC_TL1, OPL_ISYNC_ERR);
1880}
1881
1882void
1883opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1884{
1885	uint64_t t_sfar = p_sfar;
1886	uint64_t t_sfsr = p_sfsr;
1887
1888	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1889	    OPL_SYNC_TL0, OPL_DSYNC_ERR);
1890}
1891
1892void
1893opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1894{
1895	uint64_t t_sfar = p_sfar;
1896	uint64_t t_sfsr = p_sfsr;
1897
1898	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1899	    OPL_SYNC_TL1, OPL_DSYNC_ERR);
1900}
1901
1902/*
1903 * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL
1904 * and TLB_PRT.
1905 * This function is designed based on cpu_deferred_error().
1906 */
1907
1908static void
1909opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr,
1910    uint_t tl, uint_t derr)
1911{
1912	opl_async_flt_t opl_flt;
1913	struct async_flt *aflt;
1914	int trampolined = 0;
1915	char pr_reason[MAX_REASON_STRING];
1916	uint64_t log_sfsr;
1917	int expected = DDI_FM_ERR_UNEXPECTED;
1918	ddi_acc_hdl_t *hp;
1919
1920	/*
1921	 * We need to look at p_flag to determine if the thread detected an
1922	 * error while dumping core.  We can't grab p_lock here, but it's ok
1923	 * because we just need a consistent snapshot and we know that everyone
1924	 * else will store a consistent set of bits while holding p_lock.  We
1925	 * don't have to worry about a race because SDOCORE is set once prior
1926	 * to doing i/o from the process's address space and is never cleared.
1927	 */
1928	uint_t pflag = ttoproc(curthread)->p_flag;
1929
1930	pr_reason[0] = '\0';
1931
1932	/*
1933	 * handle the specific error
1934	 */
1935	bzero(&opl_flt, sizeof (opl_async_flt_t));
1936	aflt = (struct async_flt *)&opl_flt;
1937	aflt->flt_id = gethrtime_waitfree();
1938	aflt->flt_bus_id = getprocessorid();
1939	aflt->flt_inst = CPU->cpu_id;
1940	aflt->flt_stat = t_sfsr;
1941	aflt->flt_addr = t_sfar;
1942	aflt->flt_pc = (caddr_t)rp->r_pc;
1943	aflt->flt_prot = (uchar_t)AFLT_PROT_NONE;
1944	aflt->flt_class = (uchar_t)CPU_FAULT;
1945	aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate &
1946	    TSTATE_PRIV) ? 1 : 0));
1947	aflt->flt_tl = (uchar_t)tl;
1948	aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 ||
1949	    (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0);
1950	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1951	aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP;
1952
1953	/*
1954	 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain.
1955	 * So, clear all error bits to avoid mis-handling and force the system
1956	 * panicked.
1957	 * We skip all the procedures below down to the panic message call.
1958	 */
1959	if (!(t_sfsr & SFSR_FV)) {
1960		opl_flt.flt_type = OPL_CPU_INV_SFSR;
1961		aflt->flt_panic = 1;
1962		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
1963		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt,
1964		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
1965		fm_panic("%sErrors(s)", "invalid SFSR");
1966	}
1967
1968	/*
1969	 * If either UE and MK bit is off, this is not valid UE error.
1970	 * If it is not valid UE error, clear UE & MK_UE bits to prevent
1971	 * mis-handling below.
1972	 * aflt->flt_stat keeps the original bits as a reference.
1973	 */
1974	if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) !=
1975	    (SFSR_MK_UE|SFSR_UE)) {
1976		t_sfsr &= ~(SFSR_MK_UE|SFSR_UE);
1977	}
1978
1979	/*
1980	 * If the trap occurred in privileged mode at TL=0, we need to check to
1981	 * see if we were executing in the kernel under on_trap() or t_lofault
1982	 * protection.  If so, modify the saved registers so that we return
1983	 * from the trap to the appropriate trampoline routine.
1984	 */
1985	if (!aflt->flt_panic && aflt->flt_priv && tl == 0) {
1986		if (curthread->t_ontrap != NULL) {
1987			on_trap_data_t *otp = curthread->t_ontrap;
1988
1989			if (otp->ot_prot & OT_DATA_EC) {
1990				aflt->flt_prot = (uchar_t)AFLT_PROT_EC;
1991				otp->ot_trap |= (ushort_t)OT_DATA_EC;
1992				rp->r_pc = otp->ot_trampoline;
1993				rp->r_npc = rp->r_pc + 4;
1994				trampolined = 1;
1995			}
1996
1997			if ((t_sfsr & (SFSR_TO | SFSR_BERR)) &&
1998			    (otp->ot_prot & OT_DATA_ACCESS)) {
1999				aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS;
2000				otp->ot_trap |= (ushort_t)OT_DATA_ACCESS;
2001				rp->r_pc = otp->ot_trampoline;
2002				rp->r_npc = rp->r_pc + 4;
2003				trampolined = 1;
2004				/*
2005				 * for peeks and caut_gets errors are expected
2006				 */
2007				hp = (ddi_acc_hdl_t *)otp->ot_handle;
2008				if (!hp)
2009					expected = DDI_FM_ERR_PEEK;
2010				else if (hp->ah_acc.devacc_attr_access ==
2011				    DDI_CAUTIOUS_ACC)
2012					expected = DDI_FM_ERR_EXPECTED;
2013			}
2014
2015		} else if (curthread->t_lofault) {
2016			aflt->flt_prot = AFLT_PROT_COPY;
2017			rp->r_g1 = EFAULT;
2018			rp->r_pc = curthread->t_lofault;
2019			rp->r_npc = rp->r_pc + 4;
2020			trampolined = 1;
2021		}
2022	}
2023
2024	/*
2025	 * If we're in user mode or we're doing a protected copy, we either
2026	 * want the ASTON code below to send a signal to the user process
2027	 * or we want to panic if aft_panic is set.
2028	 *
2029	 * If we're in privileged mode and we're not doing a copy, then we
2030	 * need to check if we've trampolined.  If we haven't trampolined,
2031	 * we should panic.
2032	 */
2033	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2034		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
2035			aflt->flt_panic |= aft_panic;
2036	} else if (!trampolined) {
2037		aflt->flt_panic = 1;
2038	}
2039
2040	/*
2041	 * If we've trampolined due to a privileged TO or BERR, or if an
2042	 * unprivileged TO or BERR occurred, we don't want to enqueue an
2043	 * event for that TO or BERR.  Queue all other events (if any) besides
2044	 * the TO/BERR.
2045	 */
2046	log_sfsr = t_sfsr;
2047	if (trampolined) {
2048		log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2049	} else if (!aflt->flt_priv) {
2050		/*
2051		 * User mode, suppress messages if
2052		 * cpu_berr_to_verbose is not set.
2053		 */
2054		if (!cpu_berr_to_verbose)
2055			log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2056	}
2057
2058	if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason,
2059	    t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) {
2060		opl_flt.flt_type = OPL_CPU_INV_SFSR;
2061		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
2062		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt,
2063		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
2064	}
2065
2066	if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) {
2067		cpu_run_bus_error_handlers(aflt, expected);
2068	}
2069
2070	/*
2071	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2072	 * be logged as part of the panic flow.
2073	 */
2074	if (aflt->flt_panic) {
2075		if (pr_reason[0] == 0)
2076			strcpy(pr_reason, "invalid SFSR ");
2077
2078		fm_panic("%sErrors(s)", pr_reason);
2079	}
2080
2081	/*
2082	 * If we queued an error and we are going to return from the trap and
2083	 * the error was in user mode or inside of a copy routine, set AST flag
2084	 * so the queue will be drained before returning to user mode.  The
2085	 * AST processing will also act on our failure policy.
2086	 */
2087	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2088		int pcb_flag = 0;
2089
2090		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
2091			pcb_flag |= ASYNC_HWERR;
2092
2093		if (t_sfsr & SFSR_BERR)
2094			pcb_flag |= ASYNC_BERR;
2095
2096		if (t_sfsr & SFSR_TO)
2097			pcb_flag |= ASYNC_BTO;
2098
2099		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2100		aston(curthread);
2101	}
2102}
2103
2104/*ARGSUSED*/
2105void
2106opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl)
2107{
2108	opl_async_flt_t opl_flt;
2109	struct async_flt *aflt;
2110	char pr_reason[MAX_REASON_STRING];
2111
2112	/* normalize tl */
2113	tl = (tl >= 2 ? 1 : 0);
2114	pr_reason[0] = '\0';
2115
2116	bzero(&opl_flt, sizeof (opl_async_flt_t));
2117	aflt = (struct async_flt *)&opl_flt;
2118	aflt->flt_id = gethrtime_waitfree();
2119	aflt->flt_bus_id = getprocessorid();
2120	aflt->flt_inst = CPU->cpu_id;
2121	aflt->flt_stat = p_ugesr;
2122	aflt->flt_pc = (caddr_t)rp->r_pc;
2123	aflt->flt_class = (uchar_t)CPU_FAULT;
2124	aflt->flt_tl = tl;
2125	aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?
2126	    1 : 0));
2127	aflt->flt_status = OPL_ECC_URGENT_TRAP;
2128	aflt->flt_panic = 1;
2129	/*
2130	 * HW does not set mod/sid in case of urgent error.
2131	 * So we have to set it here.
2132	 */
2133	opl_flt.flt_eid_mod = OPL_ERRID_CPU;
2134	opl_flt.flt_eid_sid = aflt->flt_inst;
2135
2136	if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) {
2137		opl_flt.flt_type = OPL_CPU_INV_UGESR;
2138		aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT;
2139		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt,
2140		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
2141	}
2142
2143	fm_panic("Urgent Error");
2144}
2145
2146/*
2147 * Initialization error counters resetting.
2148 */
2149/* ARGSUSED */
2150static void
2151opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when)
2152{
2153	hdlr->cyh_func = (cyc_func_t)ras_cntr_reset;
2154	hdlr->cyh_level = CY_LOW_LEVEL;
2155	hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id;
2156
2157	when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU);
2158	when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval;
2159}
2160
2161void
2162cpu_mp_init(void)
2163{
2164	cyc_omni_handler_t hdlr;
2165
2166	hdlr.cyo_online = opl_ras_online;
2167	hdlr.cyo_offline = NULL;
2168	hdlr.cyo_arg = NULL;
2169	mutex_enter(&cpu_lock);
2170	(void) cyclic_add_omni(&hdlr);
2171	mutex_exit(&cpu_lock);
2172}
2173
2174int heaplp_use_stlb = 0;
2175
2176void
2177mmu_init_kernel_pgsz(struct hat *hat)
2178{
2179	uint_t tte = page_szc(segkmem_lpsize);
2180	uchar_t new_cext_primary, new_cext_nucleus;
2181
2182	if (heaplp_use_stlb == 0) {
2183		/* do not reprogram stlb */
2184		tte = TTE8K;
2185	} else if (!plat_prom_preserve_kctx_is_supported()) {
2186		/* OBP does not support non-zero primary context */
2187		tte = TTE8K;
2188		heaplp_use_stlb = 0;
2189	}
2190
2191	new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K);
2192	new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte);
2193
2194	hat->sfmmu_cext = new_cext_primary;
2195	kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) |
2196	    ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT);
2197}
2198
2199size_t
2200mmu_get_kernel_lpsize(size_t lpsize)
2201{
2202	uint_t tte;
2203
2204	if (lpsize == 0) {
2205		/* no setting for segkmem_lpsize in /etc/system: use default */
2206		return (MMU_PAGESIZE4M);
2207	}
2208
2209	for (tte = TTE8K; tte <= TTE4M; tte++) {
2210		if (lpsize == TTEBYTES(tte))
2211			return (lpsize);
2212	}
2213
2214	return (TTEBYTES(TTE8K));
2215}
2216
2217/*
2218 * Support for ta 3.
2219 * We allocate here a buffer for each cpu
2220 * for saving the current register window.
2221 */
2222typedef struct win_regs {
2223	uint64_t l[8];
2224	uint64_t i[8];
2225} win_regs_t;
2226static void
2227opl_ta3(void)
2228{
2229	/*
2230	 * opl_ta3 should only be called once at boot time.
2231	 */
2232	if (opl_ta3_save == NULL)
2233		opl_ta3_save = (char *)kmem_alloc(NCPU * sizeof (win_regs_t),
2234		    KM_SLEEP);
2235}
2236
2237/*
2238 * The following are functions that are unused in
2239 * OPL cpu module. They are defined here to resolve
2240 * dependencies in the "unix" module.
2241 * Unused functions that should never be called in
2242 * OPL are coded with ASSERT(0).
2243 */
2244
2245void
2246cpu_disable_errors(void)
2247{}
2248
2249void
2250cpu_enable_errors(void)
2251{ ASSERT(0); }
2252
2253/*ARGSUSED*/
2254void
2255cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t)
2256{ ASSERT(0); }
2257
2258/*ARGSUSED*/
2259void
2260cpu_faulted_enter(struct cpu *cp)
2261{}
2262
2263/*ARGSUSED*/
2264void
2265cpu_faulted_exit(struct cpu *cp)
2266{}
2267
2268/*ARGSUSED*/
2269void
2270cpu_check_allcpus(struct async_flt *aflt)
2271{}
2272
2273/*ARGSUSED*/
2274void
2275cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t)
2276{ ASSERT(0); }
2277
2278/*ARGSUSED*/
2279void
2280cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
2281{ ASSERT(0); }
2282
2283/*ARGSUSED*/
2284void
2285cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
2286{ ASSERT(0); }
2287
2288/*ARGSUSED*/
2289void
2290cpu_busy_ecache_scrub(struct cpu *cp)
2291{}
2292
2293/*ARGSUSED*/
2294void
2295cpu_idle_ecache_scrub(struct cpu *cp)
2296{}
2297
2298/* ARGSUSED */
2299void
2300cpu_change_speed(uint64_t divisor, uint64_t arg2)
2301{ ASSERT(0); }
2302
2303void
2304cpu_init_cache_scrub(void)
2305{}
2306
2307/* ARGSUSED */
2308int
2309cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
2310{
2311	if (&plat_get_mem_sid) {
2312		return (plat_get_mem_sid(unum, buf, buflen, lenp));
2313	} else {
2314		return (ENOTSUP);
2315	}
2316}
2317
2318/* ARGSUSED */
2319int
2320cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
2321{
2322	if (&plat_get_mem_addr) {
2323		return (plat_get_mem_addr(unum, sid, offset, addrp));
2324	} else {
2325		return (ENOTSUP);
2326	}
2327}
2328
2329/* ARGSUSED */
2330int
2331cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
2332{
2333	if (&plat_get_mem_offset) {
2334		return (plat_get_mem_offset(flt_addr, offp));
2335	} else {
2336		return (ENOTSUP);
2337	}
2338}
2339
2340/*ARGSUSED*/
2341void
2342itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2343{ ASSERT(0); }
2344
2345/*ARGSUSED*/
2346void
2347dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2348{ ASSERT(0); }
2349
2350/*ARGSUSED*/
2351void
2352read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
2353{ ASSERT(0); }
2354
2355/*ARGSUSED*/
2356int
2357ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
2358    errorq_elem_t *eqep, size_t afltoffset)
2359{
2360	ASSERT(0);
2361	return (0);
2362}
2363
2364/*ARGSUSED*/
2365char *
2366flt_to_error_type(struct async_flt *aflt)
2367{
2368	ASSERT(0);
2369	return (NULL);
2370}
2371
2372#define	PROM_SPARC64VII_MODE_PROPNAME	"SPARC64-VII-mode"
2373
2374/*
2375 * Check for existence of OPL OBP property that indicates
2376 * SPARC64-VII support. By default, only enable Jupiter
2377 * features if the property is present.   It will be
2378 * present in all-Jupiter domains by OBP if the domain has
2379 * been selected by the user on the system controller to
2380 * run in Jupiter mode.  Basically, this OBP property must
2381 * be present to turn on the cpu_alljupiter flag.
2382 */
2383static int
2384prom_SPARC64VII_support_enabled(void)
2385{
2386	int val;
2387
2388	return ((prom_getprop(prom_rootnode(), PROM_SPARC64VII_MODE_PROPNAME,
2389	    (caddr_t)&val) == 0) ? 1 : 0);
2390}
2391
2392#define	PROM_KCTX_PRESERVED_PROPNAME	"context0-page-size-preserved"
2393
2394/*
2395 * Check for existence of OPL OBP property that indicates support for
2396 * preserving Solaris kernel page sizes when entering OBP.  We need to
2397 * check the prom tree since the ddi tree is not yet built when the
2398 * platform startup sequence is called.
2399 */
2400static int
2401plat_prom_preserve_kctx_is_supported(void)
2402{
2403	pnode_t		pnode;
2404	int		val;
2405
2406	/*
2407	 * Check for existence of context0-page-size-preserved property
2408	 * in virtual-memory prom node.
2409	 */
2410	pnode = (pnode_t)prom_getphandle(prom_mmu_ihandle());
2411	return ((prom_getprop(pnode, PROM_KCTX_PRESERVED_PROPNAME,
2412	    (caddr_t)&val) == 0) ? 1 : 0);
2413}
2414