1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24/*
25 * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2008
26 */
27
28#include <sys/types.h>
29#include <sys/sysmacros.h>
30#include <sys/conf.h>
31#include <sys/modctl.h>
32#include <sys/stat.h>
33#include <sys/async.h>
34#include <sys/machcpuvar.h>
35#include <sys/machsystm.h>
36#include <sys/promif.h>
37#include <sys/ksynch.h>
38#include <sys/ddi.h>
39#include <sys/sunddi.h>
40#include <sys/sunndi.h>
41#include <sys/ddifm.h>
42#include <sys/fm/protocol.h>
43#include <sys/fm/util.h>
44#include <sys/kmem.h>
45#include <sys/fm/io/opl_mc_fm.h>
46#include <sys/memlist.h>
47#include <sys/param.h>
48#include <sys/disp.h>
49#include <vm/page.h>
50#include <sys/mc-opl.h>
51#include <sys/opl.h>
52#include <sys/opl_dimm.h>
53#include <sys/scfd/scfostoescf.h>
54#include <sys/cpu_module.h>
55#include <vm/seg_kmem.h>
56#include <sys/vmem.h>
57#include <vm/hat_sfmmu.h>
58#include <sys/vmsystm.h>
59#include <sys/membar.h>
60#include <sys/mem.h>
61
62/*
63 * Function prototypes
64 */
65static int mc_open(dev_t *, int, int, cred_t *);
66static int mc_close(dev_t, int, int, cred_t *);
67static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
68static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
69static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
70
71static int mc_poll_init(void);
72static void mc_poll_fini(void);
73static int mc_board_add(mc_opl_t *mcp);
74static int mc_board_del(mc_opl_t *mcp);
75static int mc_suspend(mc_opl_t *mcp, uint32_t flag);
76static int mc_resume(mc_opl_t *mcp, uint32_t flag);
77int opl_mc_suspend(void);
78int opl_mc_resume(void);
79
80static void insert_mcp(mc_opl_t *mcp);
81static void delete_mcp(mc_opl_t *mcp);
82
83static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr);
84
85static int mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa);
86
87int mc_get_mem_unum(int, uint64_t, char *, int, int *);
88int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr);
89int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
90int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp);
91int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf,
92    int buflen, int *lenp);
93mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp);
94mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp);
95int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank,
96    uint32_t mf_type, uint32_t d_slot);
97static void mc_free_dimm_list(mc_dimm_info_t *d);
98static void mc_get_mlist(mc_opl_t *);
99static void mc_polling(void);
100static int mc_opl_get_physical_board(int);
101
102static void mc_clear_rewrite(mc_opl_t *mcp, int i);
103static void mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state);
104static int mc_scf_log_event(mc_flt_page_t *flt_pag);
105
106#ifdef	DEBUG
107static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *);
108void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz);
109void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp);
110#endif
111
112#pragma weak opl_get_physical_board
113extern int opl_get_physical_board(int);
114extern int plat_max_boards(void);
115
116/*
117 * Configuration data structures
118 */
119static struct cb_ops mc_cb_ops = {
120	mc_open,			/* open */
121	mc_close,			/* close */
122	nulldev,			/* strategy */
123	nulldev,			/* print */
124	nodev,				/* dump */
125	nulldev,			/* read */
126	nulldev,			/* write */
127	mc_ioctl,			/* ioctl */
128	nodev,				/* devmap */
129	nodev,				/* mmap */
130	nodev,				/* segmap */
131	nochpoll,			/* poll */
132	ddi_prop_op,			/* cb_prop_op */
133	0,				/* streamtab */
134	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
135	CB_REV,				/* rev */
136	nodev,				/* cb_aread */
137	nodev				/* cb_awrite */
138};
139
140static struct dev_ops mc_ops = {
141	DEVO_REV,			/* rev */
142	0,				/* refcnt  */
143	ddi_getinfo_1to1,		/* getinfo */
144	nulldev,			/* identify */
145	nulldev,			/* probe */
146	mc_attach,			/* attach */
147	mc_detach,			/* detach */
148	nulldev,			/* reset */
149	&mc_cb_ops,			/* cb_ops */
150	(struct bus_ops *)0,		/* bus_ops */
151	nulldev,			/* power */
152	ddi_quiesce_not_needed,			/* quiesce */
153};
154
155/*
156 * Driver globals
157 */
158
159static enum {
160	MODEL_FF1,
161	MODEL_FF2,
162	MODEL_DC,
163	MODEL_IKKAKU
164} plat_model = MODEL_DC;	/* The default behaviour is DC */
165
166static struct plat_model_names {
167	const char *unit_name;
168	const char *mem_name;
169} model_names[] = {
170	{ "MBU_A", "MEMB" },
171	{ "MBU_B", "MEMB" },
172	{ "CMU", "" },
173	{ "MBU_A", "" }
174};
175
176/*
177 * The DIMM Names for DC platform.
178 * The index into this table is made up of (bank, dslot),
179 * Where dslot occupies bits 0-1 and bank occupies 2-4.
180 */
181static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = {
182	/* --------CMUnn----------- */
183	/* --CS0-----|--CS1------ */
184	/* -H-|--L-- | -H- | -L-- */
185	"03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */
186	"13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */
187	"23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */
188	"33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */
189	"01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */
190	"11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */
191	"21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */
192	"31A", "30A", "31B", "30B"  /* Bank 7 (MAC 3 bank 1) */
193};
194
195/*
196 * The DIMM Names for FF1/FF2/IKKAKU platforms.
197 * The index into this table is made up of (board, bank, dslot),
198 * Where dslot occupies bits 0-1, bank occupies 2-4 and
199 * board occupies the bit 5.
200 */
201static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = {
202	/* --------CMU0---------- */
203	/* --CS0-----|--CS1------ */
204	/* -H-|--L-- | -H- | -L-- */
205	"03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */
206	"01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */
207	"13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */
208	"11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */
209	"23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */
210	"21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */
211	"33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */
212	"31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */
213	/* --------CMU1---------- */
214	/* --CS0-----|--CS1------ */
215	/* -H-|--L-- | -H- | -L-- */
216	"43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */
217	"41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */
218	"53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */
219	"51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */
220	"63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */
221	"61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */
222	"73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */
223	"71A", "70A", "71B", "70B"  /* Bank 7 (MAC 3 bank 1) */
224};
225
226#define	BD_BK_SLOT_TO_INDEX(bd, bk, s)			\
227	(((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03))
228
229#define	INDEX_TO_BANK(i)			(((i) & 0x1C) >> 2)
230#define	INDEX_TO_SLOT(i)			((i) & 0x03)
231
232#define	SLOT_TO_CS(slot)	((slot & 0x3) >> 1)
233
234/* Isolation unit size is 64 MB */
235#define	MC_ISOLATION_BSIZE	(64 * 1024 * 1024)
236
237#define	MC_MAX_SPEEDS 7
238
239typedef struct {
240	uint32_t mc_speeds;
241	uint32_t mc_period;
242} mc_scan_speed_t;
243
244#define	MC_CNTL_SPEED_SHIFT 26
245
246/*
247 * In mirror mode, we normalized the bank idx to "even" since
248 * the HW treats them as one unit w.r.t programming.
249 * This bank index will be the "effective" bank index.
250 * All mirrored bank state info on mc_period, mc_speedup_period
251 * will be stored in the even bank structure to avoid code duplication.
252 */
253#define	MIRROR_IDX(bankidx)	(bankidx & ~1)
254
255static mc_scan_speed_t	mc_scan_speeds[MC_MAX_SPEEDS] = {
256	{0x6 << MC_CNTL_SPEED_SHIFT, 0},
257	{0x5 << MC_CNTL_SPEED_SHIFT, 32},
258	{0x4 << MC_CNTL_SPEED_SHIFT, 64},
259	{0x3 << MC_CNTL_SPEED_SHIFT, 128},
260	{0x2 << MC_CNTL_SPEED_SHIFT, 256},
261	{0x1 << MC_CNTL_SPEED_SHIFT, 512},
262	{0x0 << MC_CNTL_SPEED_SHIFT, 1024}
263};
264
265static uint32_t	mc_max_speed = (0x6 << 26);
266
267int mc_isolation_bsize = MC_ISOLATION_BSIZE;
268int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC;
269int mc_max_scf_retry = 16;
270int mc_max_scf_logs = 64;
271int mc_max_errlog_processed = BANKNUM_PER_SB*2;
272int mc_scan_period = 12 * 60 * 60;	/* 12 hours period */
273int mc_max_rewrite_loop = 100;
274int mc_rewrite_delay = 10;
275/*
276 * it takes SCF about 300 m.s. to process a requst.  We can bail out
277 * if it is busy.  It does not pay to wait for it too long.
278 */
279int mc_max_scf_loop = 2;
280int mc_scf_delay = 100;
281int mc_pce_dropped = 0;
282int mc_poll_priority = MINCLSYSPRI;
283int mc_max_rewrite_retry = 6 * 60;
284
285
286/*
287 * Mutex hierarchy in mc-opl
288 * If both mcmutex and mc_lock must be held,
289 * mcmutex must be acquired first, and then mc_lock.
290 */
291
292static kmutex_t mcmutex;
293mc_opl_t *mc_instances[OPL_MAX_BOARDS];
294
295static kmutex_t mc_polling_lock;
296static kcondvar_t mc_polling_cv;
297static kcondvar_t mc_poll_exit_cv;
298static int mc_poll_cmd = 0;
299static int mc_pollthr_running = 0;
300int mc_timeout_period = 0; /* this is in m.s. */
301void *mc_statep;
302
303#ifdef	DEBUG
304int oplmc_debug = 0;
305#endif
306
307static int mc_debug_show_all = 0;
308
309extern struct mod_ops mod_driverops;
310
311static struct modldrv modldrv = {
312	&mod_driverops,			/* module type, this one is a driver */
313	"OPL Memory-controller",	/* module name */
314	&mc_ops,			/* driver ops */
315};
316
317static struct modlinkage modlinkage = {
318	MODREV_1,		/* rev */
319	(void *)&modldrv,
320	NULL
321};
322
323#pragma weak opl_get_mem_unum
324#pragma weak opl_get_mem_sid
325#pragma weak opl_get_mem_offset
326#pragma weak opl_get_mem_addr
327
328extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *);
329extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp);
330extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp);
331extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset,
332    uint64_t *paddr);
333
334
335/*
336 * pseudo-mc node portid format
337 *
338 *		[10]   = 0
339 *		[9]    = 1
340 *		[8]    = LSB_ID[4] = 0
341 *		[7:4]  = LSB_ID[3:0]
342 *		[3:0]  = 0
343 *
344 */
345
346/*
347 * These are the module initialization routines.
348 */
349int
350_init(void)
351{
352	int	error;
353	int	plen;
354	char	model[20];
355	pnode_t	node;
356
357
358	if ((error = ddi_soft_state_init(&mc_statep,
359	    sizeof (mc_opl_t), 1)) != 0)
360		return (error);
361
362	if ((error = mc_poll_init()) != 0) {
363		ddi_soft_state_fini(&mc_statep);
364		return (error);
365	}
366
367	mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
368	if (&opl_get_mem_unum)
369		opl_get_mem_unum = mc_get_mem_unum;
370	if (&opl_get_mem_sid)
371		opl_get_mem_sid = mc_get_mem_sid;
372	if (&opl_get_mem_offset)
373		opl_get_mem_offset = mc_get_mem_offset;
374	if (&opl_get_mem_addr)
375		opl_get_mem_addr = mc_get_mem_addr;
376
377	node = prom_rootnode();
378	plen = prom_getproplen(node, "model");
379
380	if (plen > 0 && plen < sizeof (model)) {
381		(void) prom_getprop(node, "model", model);
382		model[plen] = '\0';
383		if (strcmp(model, "FF1") == 0)
384			plat_model = MODEL_FF1;
385		else if (strcmp(model, "FF2") == 0)
386			plat_model = MODEL_FF2;
387		else if (strncmp(model, "DC", 2) == 0)
388			plat_model = MODEL_DC;
389		else if (strcmp(model, "IKKAKU") == 0)
390			plat_model = MODEL_IKKAKU;
391	}
392
393	error =  mod_install(&modlinkage);
394	if (error != 0) {
395		if (&opl_get_mem_unum)
396			opl_get_mem_unum = NULL;
397		if (&opl_get_mem_sid)
398			opl_get_mem_sid = NULL;
399		if (&opl_get_mem_offset)
400			opl_get_mem_offset = NULL;
401		if (&opl_get_mem_addr)
402			opl_get_mem_addr = NULL;
403		mutex_destroy(&mcmutex);
404		mc_poll_fini();
405		ddi_soft_state_fini(&mc_statep);
406	}
407	return (error);
408}
409
410int
411_fini(void)
412{
413	int error;
414
415	if ((error = mod_remove(&modlinkage)) != 0)
416		return (error);
417
418	if (&opl_get_mem_unum)
419		opl_get_mem_unum = NULL;
420	if (&opl_get_mem_sid)
421		opl_get_mem_sid = NULL;
422	if (&opl_get_mem_offset)
423		opl_get_mem_offset = NULL;
424	if (&opl_get_mem_addr)
425		opl_get_mem_addr = NULL;
426
427	mutex_destroy(&mcmutex);
428	mc_poll_fini();
429	ddi_soft_state_fini(&mc_statep);
430
431	return (0);
432}
433
434int
435_info(struct modinfo *modinfop)
436{
437	return (mod_info(&modlinkage, modinfop));
438}
439
440static void
441mc_polling_thread()
442{
443	mutex_enter(&mc_polling_lock);
444	mc_pollthr_running = 1;
445	while (!(mc_poll_cmd & MC_POLL_EXIT)) {
446		mc_polling();
447		(void) cv_reltimedwait(&mc_polling_cv, &mc_polling_lock,
448		    mc_timeout_period, TR_CLOCK_TICK);
449	}
450	mc_pollthr_running = 0;
451
452	/*
453	 * signal if any one is waiting for this thread to exit.
454	 */
455	cv_signal(&mc_poll_exit_cv);
456	mutex_exit(&mc_polling_lock);
457	thread_exit();
458	/* NOTREACHED */
459}
460
461static int
462mc_poll_init()
463{
464	mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL);
465	cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL);
466	cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL);
467	return (0);
468}
469
470static void
471mc_poll_fini()
472{
473	mutex_enter(&mc_polling_lock);
474	if (mc_pollthr_running) {
475		mc_poll_cmd = MC_POLL_EXIT;
476		cv_signal(&mc_polling_cv);
477		while (mc_pollthr_running) {
478			cv_wait(&mc_poll_exit_cv, &mc_polling_lock);
479		}
480	}
481	mutex_exit(&mc_polling_lock);
482	mutex_destroy(&mc_polling_lock);
483	cv_destroy(&mc_polling_cv);
484	cv_destroy(&mc_poll_exit_cv);
485}
486
487static int
488mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
489{
490	mc_opl_t *mcp;
491	int instance;
492	int rv;
493
494	/* get the instance of this devi */
495	instance = ddi_get_instance(devi);
496
497	switch (cmd) {
498	case DDI_ATTACH:
499		break;
500	case DDI_RESUME:
501		mcp = ddi_get_soft_state(mc_statep, instance);
502		rv = mc_resume(mcp, MC_DRIVER_SUSPENDED);
503		return (rv);
504	default:
505		return (DDI_FAILURE);
506	}
507
508	if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS)
509		return (DDI_FAILURE);
510
511	if (ddi_create_minor_node(devi, "mc-opl", S_IFCHR, instance,
512	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
513		MC_LOG("mc_attach: create_minor_node failed\n");
514		return (DDI_FAILURE);
515	}
516
517	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
518		goto bad;
519	}
520
521	if (mc_timeout_period == 0) {
522		mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi,
523		    DDI_PROP_DONTPASS, "mc-timeout-interval-sec",
524		    mc_patrol_interval_sec);
525		mc_timeout_period = drv_usectohz(1000000 *
526		    mc_patrol_interval_sec / OPL_MAX_BOARDS);
527	}
528
529	/* set informations in mc state */
530	mcp->mc_dip = devi;
531
532	if (mc_board_add(mcp))
533		goto bad;
534
535	insert_mcp(mcp);
536
537	/*
538	 * Start the polling thread if it is not running already.
539	 */
540	mutex_enter(&mc_polling_lock);
541	if (!mc_pollthr_running) {
542		(void) thread_create(NULL, 0, (void (*)())mc_polling_thread,
543		    NULL, 0, &p0, TS_RUN, mc_poll_priority);
544	}
545	mutex_exit(&mc_polling_lock);
546	ddi_report_dev(devi);
547
548	return (DDI_SUCCESS);
549
550bad:
551	ddi_remove_minor_node(devi, NULL);
552	ddi_soft_state_free(mc_statep, instance);
553	return (DDI_FAILURE);
554}
555
556/* ARGSUSED */
557static int
558mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
559{
560	int rv;
561	int instance;
562	mc_opl_t *mcp;
563
564	/* get the instance of this devi */
565	instance = ddi_get_instance(devi);
566	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
567		return (DDI_FAILURE);
568	}
569
570	switch (cmd) {
571	case DDI_SUSPEND:
572		rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED);
573		return (rv);
574	case DDI_DETACH:
575		break;
576	default:
577		return (DDI_FAILURE);
578	}
579
580	delete_mcp(mcp);
581	if (mc_board_del(mcp) != DDI_SUCCESS) {
582		return (DDI_FAILURE);
583	}
584
585	ddi_remove_minor_node(devi, NULL);
586
587	/* free up the soft state */
588	ddi_soft_state_free(mc_statep, instance);
589
590	return (DDI_SUCCESS);
591}
592
593/* ARGSUSED */
594static int
595mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
596{
597	return (0);
598}
599
600/* ARGSUSED */
601static int
602mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
603{
604	return (0);
605}
606
607/* ARGSUSED */
608static int
609mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
610	int *rvalp)
611{
612	mc_flt_page_t flt_page;
613
614	if (cmd == MCIOC_FAULT_PAGE) {
615		if (arg == NULL)
616			return (EINVAL);
617
618		if (ddi_copyin((const void *)arg, (void *)&flt_page,
619		    sizeof (mc_flt_page_t), 0) < 0)
620			return (EFAULT);
621
622		return (mc_scf_log_event(&flt_page));
623	}
624#ifdef DEBUG
625	return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp));
626#else
627	return (ENOTTY);
628#endif
629}
630
631/*
632 * PA validity check:
633 * This function return 1 if the PA is a valid PA
634 * in the running Solaris instance i.e. in physinstall
635 * Otherwise, return 0.
636 */
637
638/* ARGSUSED */
639static int
640pa_is_valid(mc_opl_t *mcp, uint64_t addr)
641{
642	if (mcp->mlist == NULL)
643		mc_get_mlist(mcp);
644
645	if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) {
646		return (1);
647	}
648	return (0);
649}
650
651/*
652 * mac-pa translation routines.
653 *
654 *    Input: mc driver state, (LSB#, Bank#, DIMM address)
655 *    Output: physical address
656 *
657 *    Valid   - return value:  0
658 *    Invalid - return value: -1
659 */
660static int
661mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa)
662{
663	int i;
664	uint64_t pa_offset = 0;
665	int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1;
666	int bank = maddr->ma_bank;
667	mc_addr_t maddr1;
668	int bank0, bank1;
669
670	MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
671	    maddr->ma_dimm_addr);
672
673	/* loc validity check */
674	ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd);
675	ASSERT(bank >= 0 && OPL_BANK_MAX > bank);
676
677	/* Do translation */
678	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
679		int pa_bit = 0;
680		int mc_bit = mcp->mc_trans_table[cs][i];
681		if (mc_bit < MC_ADDRESS_BITS) {
682			pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1;
683		} else if (mc_bit == MP_NONE) {
684			pa_bit = 0;
685		} else if (mc_bit == MP_BANK_0) {
686			pa_bit = bank & 1;
687		} else if (mc_bit == MP_BANK_1) {
688			pa_bit = (bank >> 1) & 1;
689		} else if (mc_bit == MP_BANK_2) {
690			pa_bit = (bank >> 2) & 1;
691		}
692		pa_offset |= ((uint64_t)pa_bit) << i;
693	}
694	*pa = mcp->mc_start_address + pa_offset;
695	MC_LOG("pa = %lx\n", *pa);
696
697	if (pa_to_maddr(mcp, *pa, &maddr1) == -1) {
698		cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to "
699		    "convert PA %lx\n", maddr->ma_bd, bank,
700		    maddr->ma_dimm_addr, *pa);
701		return (-1);
702	}
703
704	/*
705	 * In mirror mode, PA is always translated to the even bank.
706	 */
707	if (IS_MIRROR(mcp, maddr->ma_bank)) {
708		bank0 = maddr->ma_bank & ~(1);
709		bank1 = maddr1.ma_bank & ~(1);
710	} else {
711		bank0 = maddr->ma_bank;
712		bank1 = maddr1.ma_bank;
713	}
714	/*
715	 * there is no need to check ma_bd because it is generated from
716	 * mcp.  They are the same.
717	 */
718	if ((bank0 == bank1) && (maddr->ma_dimm_addr ==
719	    maddr1.ma_dimm_addr)) {
720		return (0);
721	} else {
722		MC_LOG("Translation error source /LSB%d/B%d/%x, "
723		    "PA %lx, target /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
724		    maddr->ma_dimm_addr, *pa, maddr1.ma_bd, maddr1.ma_bank,
725		    maddr1.ma_dimm_addr);
726		return (-1);
727	}
728}
729
730/*
731 * PA to CS (used by pa_to_maddr).
732 */
733static int
734pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset)
735{
736	int i;
737	int cs = 1;
738
739	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
740		/* MAC address bit<29> is arranged on the same PA bit */
741		/* on both table. So we may use any table. */
742		if (mcp->mc_trans_table[0][i] == CS_SHIFT) {
743			cs = (pa_offset >> i) & 1;
744			break;
745		}
746	}
747	return (cs);
748}
749
750/*
751 * PA to DIMM (used by pa_to_maddr).
752 */
753/* ARGSUSED */
754static uint32_t
755pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset)
756{
757	int i;
758	int cs = pa_to_cs(mcp, pa_offset);
759	uint32_t dimm_addr = 0;
760
761	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
762		int pa_bit_value = (pa_offset >> i) & 1;
763		int mc_bit = mcp->mc_trans_table[cs][i];
764		if (mc_bit < MC_ADDRESS_BITS) {
765			dimm_addr |= pa_bit_value << mc_bit;
766		}
767	}
768	dimm_addr |= cs << CS_SHIFT;
769	return (dimm_addr);
770}
771
772/*
773 * PA to Bank (used by pa_to_maddr).
774 */
775static int
776pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset)
777{
778	int i;
779	int cs = pa_to_cs(mcp, pa_offset);
780	int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT];
781
782
783	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
784		int pa_bit_value = (pa_offset >> i) & 1;
785		int mc_bit = mcp->mc_trans_table[cs][i];
786		switch (mc_bit) {
787		case MP_BANK_0:
788			bankno |= pa_bit_value;
789			break;
790		case MP_BANK_1:
791			bankno |= pa_bit_value << 1;
792			break;
793		case MP_BANK_2:
794			bankno |= pa_bit_value << 2;
795			break;
796		}
797	}
798
799	return (bankno);
800}
801
802/*
803 * PA to MAC address translation
804 *
805 *   Input: MAC driver state, physicall adress
806 *   Output: LSB#, Bank id, mac address
807 *
808 *    Valid   - return value:  0
809 *    Invalid - return value: -1
810 */
811
812int
813pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr)
814{
815	uint64_t pa_offset;
816
817	if (!mc_rangecheck_pa(mcp, pa))
818		return (-1);
819
820	/* Do translation */
821	pa_offset = pa - mcp->mc_start_address;
822
823	maddr->ma_bd = mcp->mc_board_num;
824	maddr->ma_phys_bd = mcp->mc_phys_board_num;
825	maddr->ma_bank = pa_to_bank(mcp, pa_offset);
826	maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset);
827	MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", pa_offset, maddr->ma_bd,
828	    maddr->ma_bank, maddr->ma_dimm_addr);
829	return (0);
830}
831
832/*
833 * UNUM format for DC is "/CMUnn/MEMxyZ", where
834 *	nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3.
835 *	x = MAC 0..3
836 *	y = 0..3 (slot info).
837 *	Z = 'A' or 'B'
838 *
839 * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where
840 *	x = 0..3 (MEMB number)
841 *	y = 0..3 (slot info).
842 *	Z = 'A' or 'B'
843 *
844 * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ", where
845 *	x = 0..7 (MEMB number)
846 *	y = 0..3 (slot info).
847 *	Z = 'A' or 'B'
848 *
849 * UNUM format for IKKAKU is "/MBU_A/MEMyZ", where
850 *	y = 0..3 (slot info).
851 *	Z = 'A' or 'B'
852 *
853 */
854int
855mc_set_mem_unum(char *buf, int buflen, int sb, int bank,
856    uint32_t mf_type, uint32_t d_slot)
857{
858	char *dimmnm;
859	char memb_num;
860	int cs;
861	int i;
862	int j;
863
864	cs = SLOT_TO_CS(d_slot);
865
866	switch (plat_model) {
867	case MODEL_DC:
868		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
869		    mf_type == FLT_TYPE_PERMANENT_CE) {
870			i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot);
871			dimmnm = mc_dc_dimm_unum_table[i];
872			(void) snprintf(buf, buflen, "/%s%02d/MEM%s",
873			    model_names[plat_model].unit_name, sb, dimmnm);
874		} else {
875			i = BD_BK_SLOT_TO_INDEX(0, bank, 0);
876			j = (cs == 0) ?  i : i + 2;
877			(void) snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s",
878			    model_names[plat_model].unit_name, sb,
879			    mc_dc_dimm_unum_table[j],
880			    mc_dc_dimm_unum_table[j + 1]);
881		}
882		break;
883	case MODEL_FF1:
884	case MODEL_FF2:
885		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
886		    mf_type == FLT_TYPE_PERMANENT_CE) {
887			i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
888			dimmnm = mc_ff_dimm_unum_table[i];
889			memb_num = dimmnm[0];
890			(void) snprintf(buf, buflen, "/%s/%s%c/MEM%s",
891			    model_names[plat_model].unit_name,
892			    model_names[plat_model].mem_name,
893			    memb_num, &dimmnm[1]);
894		} else {
895			i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
896			j = (cs == 0) ?  i : i + 2;
897			memb_num = mc_ff_dimm_unum_table[i][0],
898			    (void) snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s",
899			    model_names[plat_model].unit_name,
900			    model_names[plat_model].mem_name, memb_num,
901			    &mc_ff_dimm_unum_table[j][1],
902			    &mc_ff_dimm_unum_table[j + 1][1]);
903		}
904		break;
905	case MODEL_IKKAKU:
906		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
907		    mf_type == FLT_TYPE_PERMANENT_CE) {
908			i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
909			dimmnm = mc_ff_dimm_unum_table[i];
910			(void) snprintf(buf, buflen, "/%s/MEM%s",
911			    model_names[plat_model].unit_name, &dimmnm[1]);
912		} else {
913			i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
914			j = (cs == 0) ?  i : i + 2;
915			memb_num = mc_ff_dimm_unum_table[i][0],
916			    (void) snprintf(buf, buflen, "/%s/MEM%s MEM%s",
917			    model_names[plat_model].unit_name,
918			    &mc_ff_dimm_unum_table[j][1],
919			    &mc_ff_dimm_unum_table[j + 1][1]);
920		}
921		break;
922	default:
923		return (-1);
924	}
925	return (0);
926}
927
928static void
929mc_ereport_post(mc_aflt_t *mc_aflt)
930{
931	char buf[FM_MAX_CLASS];
932	char device_path[MAXPATHLEN];
933	char sid[MAXPATHLEN];
934	nv_alloc_t *nva = NULL;
935	nvlist_t *ereport, *detector, *resource;
936	errorq_elem_t *eqep;
937	int nflts;
938	mc_flt_stat_t *flt_stat;
939	int i, n;
940	int blen = MAXPATHLEN;
941	char *p, *s = NULL;
942	uint32_t values[2], synd[2], dslot[2];
943	uint64_t offset = (uint64_t)-1;
944	int ret = -1;
945
946	if (panicstr) {
947		eqep = errorq_reserve(ereport_errorq);
948		if (eqep == NULL)
949			return;
950		ereport = errorq_elem_nvl(ereport_errorq, eqep);
951		nva = errorq_elem_nva(ereport_errorq, eqep);
952	} else {
953		ereport = fm_nvlist_create(nva);
954	}
955
956	/*
957	 * Create the scheme "dev" FMRI.
958	 */
959	detector = fm_nvlist_create(nva);
960	resource = fm_nvlist_create(nva);
961
962	nflts = mc_aflt->mflt_nflts;
963
964	ASSERT(nflts >= 1 && nflts <= 2);
965
966	flt_stat = mc_aflt->mflt_stat[0];
967	(void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path);
968	(void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL,
969	    device_path, NULL, NULL);
970
971	/*
972	 * Encode all the common data into the ereport.
973	 */
974	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", MC_OPL_ERROR_CLASS,
975	    mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : MC_OPL_MI_SUBCLASS,
976	    mc_aflt->mflt_erpt_class);
977
978	MC_LOG("mc_ereport_post: ereport %s\n", buf);
979
980
981	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
982	    fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), detector, NULL);
983
984	/*
985	 * Set payload.
986	 */
987	fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32,
988	    flt_stat->mf_flt_maddr.ma_bd, NULL);
989
990	fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64,
991	    flt_stat->mf_flt_paddr, NULL);
992
993	if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE ||
994	    flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
995		fm_payload_set(ereport, MC_OPL_FLT_TYPE, DATA_TYPE_UINT8,
996		    ECC_STICKY, NULL);
997	}
998
999	for (i = 0; i < nflts; i++)
1000		values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank;
1001
1002	fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, nflts,
1003	    values, NULL);
1004
1005	for (i = 0; i < nflts; i++)
1006		values[i] = mc_aflt->mflt_stat[i]->mf_cntl;
1007
1008	fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, nflts,
1009	    values, NULL);
1010
1011	for (i = 0; i < nflts; i++)
1012		values[i] = mc_aflt->mflt_stat[i]->mf_err_add;
1013
1014	/* offset is set only for PCE and ICE */
1015	if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_INTERMITTENT_CE ||
1016	    mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) {
1017		offset = values[0];
1018
1019	}
1020	fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, nflts,
1021	    values, NULL);
1022
1023	for (i = 0; i < nflts; i++)
1024		values[i] = mc_aflt->mflt_stat[i]->mf_err_log;
1025
1026	fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, nflts,
1027	    values, NULL);
1028
1029	for (i = 0; i < nflts; i++) {
1030		flt_stat = mc_aflt->mflt_stat[i];
1031		if (flt_stat->mf_errlog_valid) {
1032			synd[i] = flt_stat->mf_synd;
1033			dslot[i] = flt_stat->mf_dimm_slot;
1034			values[i] = flt_stat->mf_dram_place;
1035		} else {
1036			synd[i] = 0;
1037			dslot[i] = 0;
1038			values[i] = 0;
1039		}
1040	}
1041
1042	fm_payload_set(ereport, MC_OPL_ERR_SYND, DATA_TYPE_UINT32_ARRAY, nflts,
1043	    synd, NULL);
1044
1045	fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, DATA_TYPE_UINT32_ARRAY,
1046	    nflts, dslot, NULL);
1047
1048	fm_payload_set(ereport, MC_OPL_ERR_DRAM, DATA_TYPE_UINT32_ARRAY, nflts,
1049	    values, NULL);
1050
1051	device_path[0] = 0;
1052	p = &device_path[0];
1053	sid[0] = 0;
1054	s = &sid[0];
1055	ret = 0;
1056
1057	for (i = 0; i < nflts; i++) {
1058		int bank;
1059
1060		flt_stat = mc_aflt->mflt_stat[i];
1061		bank = flt_stat->mf_flt_maddr.ma_bank;
1062		ret = mc_set_mem_unum(p + strlen(p), blen,
1063		    flt_stat->mf_flt_maddr.ma_phys_bd, bank, flt_stat->mf_type,
1064		    flt_stat->mf_dimm_slot);
1065
1066		if (ret != 0) {
1067			cmn_err(CE_WARN,
1068			    "mc_ereport_post: Failed to determine the unum "
1069			    "for board=%d bank=%d type=0x%x slot=0x%x",
1070			    flt_stat->mf_flt_maddr.ma_bd, bank,
1071			    flt_stat->mf_type, flt_stat->mf_dimm_slot);
1072			continue;
1073		}
1074		n = strlen(device_path);
1075		blen = MAXPATHLEN - n;
1076		p = &device_path[n];
1077		if (i < (nflts - 1)) {
1078			(void) snprintf(p, blen, " ");
1079			blen--;
1080			p++;
1081		}
1082
1083		if (ret == 0) {
1084			ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s),
1085			    blen, flt_stat->mf_flt_maddr.ma_phys_bd, bank,
1086			    flt_stat->mf_type, flt_stat->mf_dimm_slot);
1087
1088		}
1089	}
1090
1091	(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL,
1092	    device_path, (ret == 0) ? sid : NULL, (ret == 0) ? offset :
1093	    (uint64_t)-1);
1094
1095	fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, resource,
1096	    NULL);
1097
1098	if (panicstr) {
1099		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1100	} else {
1101		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1102		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1103		fm_nvlist_destroy(detector, FM_NVA_FREE);
1104		fm_nvlist_destroy(resource, FM_NVA_FREE);
1105	}
1106}
1107
1108
1109static void
1110mc_err_drain(mc_aflt_t *mc_aflt)
1111{
1112	int rv;
1113	uint64_t pa = (uint64_t)(-1);
1114	int i;
1115
1116	MC_LOG("mc_err_drain: %s\n", mc_aflt->mflt_erpt_class);
1117	/*
1118	 * we come here only when we have:
1119	 * In mirror mode: MUE, SUE
1120	 * In normal mode: UE, Permanent CE, Intermittent CE
1121	 */
1122	for (i = 0; i < mc_aflt->mflt_nflts; i++) {
1123		rv = mcaddr_to_pa(mc_aflt->mflt_mcp,
1124		    &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa);
1125
1126		/* Ensure the pa is valid (not in isolated memory block) */
1127		if (rv == 0 && pa_is_valid(mc_aflt->mflt_mcp, pa))
1128			mc_aflt->mflt_stat[i]->mf_flt_paddr = pa;
1129		else
1130			mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1;
1131	}
1132
1133	MC_LOG("mc_err_drain:pa = %lx\n", pa);
1134
1135	switch (page_retire_check(pa, NULL)) {
1136	case 0:
1137	case EAGAIN:
1138		MC_LOG("Page retired or pending\n");
1139		return;
1140	case EIO:
1141		/*
1142		 * Do page retirement except for the PCE and ICE cases.
1143		 * This is taken care by the OPL DE
1144		 */
1145		if (mc_aflt->mflt_stat[0]->mf_type !=
1146		    FLT_TYPE_INTERMITTENT_CE &&
1147		    mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) {
1148			MC_LOG("offline page at pa %lx error %x\n", pa,
1149			    mc_aflt->mflt_pr);
1150			(void) page_retire(pa, mc_aflt->mflt_pr);
1151		}
1152		break;
1153	case EINVAL:
1154	default:
1155		/*
1156		 * Some memory do not have page structure so
1157		 * we keep going in case of EINVAL.
1158		 */
1159		break;
1160	}
1161
1162	for (i = 0; i < mc_aflt->mflt_nflts; i++) {
1163		mc_aflt_t mc_aflt0;
1164		if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) {
1165			mc_aflt0 = *mc_aflt;
1166			mc_aflt0.mflt_nflts = 1;
1167			mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i];
1168			mc_ereport_post(&mc_aflt0);
1169		}
1170	}
1171}
1172
1173/*
1174 * The restart address is actually defined in unit of PA[37:6]
1175 * the mac patrol will convert that to dimm offset.  If the
1176 * address is not in the bank, it will continue to search for
1177 * the next PA that is within the bank.
1178 *
1179 * Also the mac patrol scans the dimms based on PA, not
1180 * dimm offset.
1181 */
1182static int
1183restart_patrol(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr_info)
1184{
1185	uint64_t pa;
1186	int rv;
1187
1188	if (MC_REWRITE_MODE(mcp, bank)) {
1189		return (0);
1190	}
1191	if (rsaddr_info == NULL || (rsaddr_info->mi_valid == 0)) {
1192		MAC_PTRL_START(mcp, bank);
1193		return (0);
1194	}
1195
1196	rv = mcaddr_to_pa(mcp, &rsaddr_info->mi_restartaddr, &pa);
1197	if (rv != 0) {
1198		MC_LOG("cannot convert mcaddr to pa. use auto restart\n");
1199		MAC_PTRL_START(mcp, bank);
1200		return (0);
1201	}
1202
1203	if (!mc_rangecheck_pa(mcp, pa)) {
1204		/* pa is not on this board, just retry */
1205		cmn_err(CE_WARN, "restart_patrol: invalid address %lx "
1206		    "on board %d\n", pa, mcp->mc_board_num);
1207		MAC_PTRL_START(mcp, bank);
1208		return (0);
1209	}
1210
1211	MC_LOG("restart_patrol: pa = %lx\n", pa);
1212
1213	if (!rsaddr_info->mi_injectrestart) {
1214		/*
1215		 * For non-error injection restart we need to
1216		 * determine if the current restart pa/page is
1217		 * a "good" page. A "good" page is a page that
1218		 * has not been page retired. If the current
1219		 * page that contains the pa is "good", we will
1220		 * do a HW auto restart and let HW patrol continue
1221		 * where it last stopped. Most desired scenario.
1222		 *
1223		 * If the current page is not "good", we will advance
1224		 * to the next page to find the next "good" page and
1225		 * restart the patrol from there.
1226		 */
1227		int wrapcount = 0;
1228		uint64_t origpa = pa;
1229		while (wrapcount < 2) {
1230			if (!pa_is_valid(mcp, pa)) {
1231			/*
1232			 * Not in physinstall - advance to the
1233			 * next memory isolation blocksize
1234			 */
1235			MC_LOG("Invalid PA\n");
1236			pa = roundup(pa + 1, mc_isolation_bsize);
1237			} else {
1238			int rv;
1239			if ((rv = page_retire_check(pa, NULL)) != 0 &&
1240			    rv != EAGAIN) {
1241					/*
1242					 * The page is "good" (not retired),
1243					 * we will use automatic HW restart
1244					 * algorithm if this is the original
1245					 * current starting page.
1246					 */
1247				if (pa == origpa) {
1248					MC_LOG("Page has no error. "
1249					    "Auto restart\n");
1250					MAC_PTRL_START(mcp, bank);
1251					return (0);
1252				} else {
1253					/*
1254					 * found a subsequent good page
1255					 */
1256					break;
1257				}
1258			}
1259
1260			/*
1261			 * Skip to the next page
1262			 */
1263			pa = roundup(pa + 1, PAGESIZE);
1264			MC_LOG("Skipping bad page to %lx\n", pa);
1265			}
1266
1267		    /* Check to see if we hit the end of the memory range */
1268			if (pa >= (mcp->mc_start_address + mcp->mc_size)) {
1269			MC_LOG("Wrap around\n");
1270			pa = mcp->mc_start_address;
1271			wrapcount++;
1272			}
1273		}
1274
1275		if (wrapcount > 1) {
1276			MC_LOG("Failed to find a good page. Just restart\n");
1277			MAC_PTRL_START(mcp, bank);
1278			return (0);
1279		}
1280	}
1281
1282	/*
1283	 * We reached here either:
1284	 * 1. We are doing an error injection restart that specify
1285	 *    the exact pa/page to restart. OR
1286	 * 2. We found a subsequent good page different from the
1287	 *    original restart pa/page.
1288	 * Restart MAC patrol: PA[37:6]
1289	 */
1290	MC_LOG("restart at pa = %lx\n", pa);
1291	ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa));
1292	MAC_PTRL_START_ADD(mcp, bank);
1293
1294	return (0);
1295}
1296
1297static void
1298mc_retry_info_put(mc_retry_info_t **q, mc_retry_info_t *p)
1299{
1300	ASSERT(p != NULL);
1301	p->ri_next = *q;
1302	*q = p;
1303}
1304
1305static mc_retry_info_t *
1306mc_retry_info_get(mc_retry_info_t **q)
1307{
1308	mc_retry_info_t *p;
1309
1310	if ((p = *q) != NULL) {
1311		*q = p->ri_next;
1312		return (p);
1313	} else {
1314		return (NULL);
1315	}
1316}
1317
1318/*
1319 * Rewriting is used for two purposes.
1320 *  - to correct the error in memory.
1321 *  - to determine whether the error is permanent or intermittent.
1322 * It's done by writing the address in MAC_BANKm_REWRITE_ADD
1323 * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that,
1324 * REW_END (and REW_CE/REW_UE if some error detected) is set when
1325 * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM.
1326 *
1327 * Note that rewrite operation doesn't change RAW_UE to Marked UE.
1328 * Therefore, we use it only CE case.
1329 */
1330
1331static uint32_t
1332do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr, int retrying)
1333{
1334	uint32_t cntl;
1335	int count = 0;
1336	int max_count;
1337	int retry_state;
1338
1339	if (retrying)
1340		max_count = 1;
1341	else
1342		max_count = mc_max_rewrite_loop;
1343
1344	retry_state = RETRY_STATE_PENDING;
1345
1346	if (!retrying && MC_REWRITE_MODE(mcp, bank)) {
1347		goto timeout;
1348	}
1349
1350	retry_state = RETRY_STATE_ACTIVE;
1351
1352	/* first wait to make sure PTRL_STATUS is 0 */
1353	while (count++ < max_count) {
1354		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1355		if (!(cntl & MAC_CNTL_PTRL_STATUS)) {
1356			count = 0;
1357			break;
1358		}
1359		drv_usecwait(mc_rewrite_delay);
1360	}
1361	if (count >= max_count)
1362		goto timeout;
1363
1364	count = 0;
1365
1366	ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr);
1367	MAC_REW_REQ(mcp, bank);
1368
1369	retry_state = RETRY_STATE_REWRITE;
1370
1371	do {
1372		if (count++ > max_count) {
1373			goto timeout;
1374		} else {
1375			drv_usecwait(mc_rewrite_delay);
1376		}
1377		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1378	/*
1379	 * If there are other MEMORY or PCI activities, this
1380	 * will be BUSY, else it should be set immediately
1381	 */
1382	} while (!(cntl & MAC_CNTL_REW_END));
1383
1384	MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS);
1385	return (cntl);
1386timeout:
1387	mc_set_rewrite(mcp, bank, dimm_addr, retry_state);
1388
1389	return (0);
1390}
1391
1392void
1393mc_clear_rewrite(mc_opl_t *mcp, int bank)
1394{
1395	struct mc_bank *bankp;
1396	mc_retry_info_t *retry;
1397	uint32_t rew_addr;
1398
1399	bankp = &(mcp->mc_bank[bank]);
1400	retry = bankp->mcb_active;
1401	bankp->mcb_active = NULL;
1402	mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
1403
1404again:
1405	bankp->mcb_rewrite_count = 0;
1406
1407	while (retry = mc_retry_info_get(&bankp->mcb_retry_pending)) {
1408		rew_addr = retry->ri_addr;
1409		mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
1410		if (do_rewrite(mcp, bank, rew_addr, 1) == 0)
1411			break;
1412	}
1413
1414	/* we break out if no more pending rewrite or we got timeout again */
1415
1416	if (!bankp->mcb_active && !bankp->mcb_retry_pending) {
1417		if (!IS_MIRROR(mcp, bank)) {
1418			MC_CLEAR_REWRITE_MODE(mcp, bank);
1419		} else {
1420			int mbank = bank ^ 1;
1421			bankp = &(mcp->mc_bank[mbank]);
1422			if (!bankp->mcb_active && !bankp->mcb_retry_pending) {
1423			MC_CLEAR_REWRITE_MODE(mcp, bank);
1424			MC_CLEAR_REWRITE_MODE(mcp, mbank);
1425			} else {
1426			bank = mbank;
1427			goto again;
1428			}
1429		}
1430	}
1431}
1432
1433void
1434mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state)
1435{
1436	mc_retry_info_t *retry;
1437	struct mc_bank *bankp;
1438
1439	bankp = &mcp->mc_bank[bank];
1440
1441	retry = mc_retry_info_get(&bankp->mcb_retry_freelist);
1442
1443	if (retry == NULL) {
1444		mc_addr_t maddr;
1445		uint64_t paddr;
1446		/*
1447		 * previous rewrite request has not completed yet.
1448		 * So we discard this rewrite request.
1449		 */
1450		maddr.ma_bd = mcp->mc_board_num;
1451		maddr.ma_bank =  bank;
1452		maddr.ma_dimm_addr = addr;
1453		if (mcaddr_to_pa(mcp, &maddr, &paddr) == 0) {
1454			cmn_err(CE_WARN, "Discard CE rewrite request"
1455			    " for 0x%lx (/LSB%d/B%d/%x).\n",
1456			    paddr, mcp->mc_board_num, bank, addr);
1457		} else {
1458			cmn_err(CE_WARN, "Discard CE rewrite request"
1459			    " for /LSB%d/B%d/%x.\n",
1460			    mcp->mc_board_num, bank, addr);
1461		}
1462		return;
1463	}
1464
1465	retry->ri_addr = addr;
1466	retry->ri_state = state;
1467
1468	MC_SET_REWRITE_MODE(mcp, bank);
1469
1470	if ((state > RETRY_STATE_PENDING)) {
1471		ASSERT(bankp->mcb_active == NULL);
1472		bankp->mcb_active = retry;
1473	} else {
1474		mc_retry_info_put(&bankp->mcb_retry_pending, retry);
1475	}
1476
1477	if (IS_MIRROR(mcp, bank)) {
1478		int mbank = bank ^1;
1479		MC_SET_REWRITE_MODE(mcp, mbank);
1480	}
1481}
1482
1483void
1484mc_process_scf_log(mc_opl_t *mcp)
1485{
1486	int count;
1487	int n = 0;
1488	scf_log_t *p;
1489	int bank;
1490
1491	for (bank = 0; bank < BANKNUM_PER_SB; bank++) {
1492		while ((p = mcp->mc_scf_log[bank]) != NULL &&
1493		    (n < mc_max_errlog_processed)) {
1494		ASSERT(bank == p->sl_bank);
1495		count = 0;
1496		while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank))
1497		    & MAC_STATIC_ERR_VLD)) {
1498			if (count++ >= (mc_max_scf_loop)) {
1499				break;
1500			}
1501			drv_usecwait(mc_scf_delay);
1502		}
1503
1504		if (count < mc_max_scf_loop) {
1505			ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank),
1506			    p->sl_err_log);
1507
1508			ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank),
1509			    p->sl_err_add|MAC_STATIC_ERR_VLD);
1510			mcp->mc_scf_retry[bank] = 0;
1511		} else {
1512			/*
1513			 * if we try too many times, just drop the req
1514			 */
1515			if (mcp->mc_scf_retry[bank]++ <=
1516			    mc_max_scf_retry) {
1517				return;
1518			} else {
1519				if ((++mc_pce_dropped & 0xff) == 0) {
1520					cmn_err(CE_WARN, "Cannot "
1521					    "report CE to SCF\n");
1522				}
1523			}
1524		}
1525		n++;
1526		mcp->mc_scf_log[bank] = p->sl_next;
1527		mcp->mc_scf_total[bank]--;
1528		ASSERT(mcp->mc_scf_total[bank] >= 0);
1529		kmem_free(p, sizeof (scf_log_t));
1530		}
1531	}
1532}
1533void
1534mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank)
1535{
1536	scf_log_t *p;
1537
1538	if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) {
1539		if ((++mc_pce_dropped & 0xff) == 0) {
1540			cmn_err(CE_WARN, "Too many CE requests.\n");
1541		}
1542		return;
1543	}
1544	p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP);
1545	p->sl_next = 0;
1546	p->sl_err_add = flt_stat->mf_err_add;
1547	p->sl_err_log = flt_stat->mf_err_log;
1548	p->sl_bank = bank;
1549
1550	if (mcp->mc_scf_log[bank] == NULL) {
1551		/*
1552		 * we rely on mc_scf_log to detect NULL queue.
1553		 * mc_scf_log_tail is irrelevant is such case.
1554		 */
1555		mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p;
1556	} else {
1557		mcp->mc_scf_log_tail[bank]->sl_next = p;
1558		mcp->mc_scf_log_tail[bank] = p;
1559	}
1560	mcp->mc_scf_total[bank]++;
1561}
1562/*
1563 * This routine determines what kind of CE happens, intermittent
1564 * or permanent as follows. (See 4.7.3 in Columbus2 PRM.)
1565 * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register.
1566 * - If CE is still detected on the same address even after doing
1567 *   rewrite operation twice, it is determined as permanent error.
1568 * - If error is not detected anymore, it is determined as intermittent
1569 *   error.
1570 * - If UE is detected due to rewrite operation, it should be treated
1571 *   as UE.
1572 */
1573
1574/* ARGSUSED */
1575static void
1576mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error)
1577{
1578	uint32_t cntl;
1579	int i;
1580
1581	flt_stat->mf_type = FLT_TYPE_PERMANENT_CE;
1582	/*
1583	 * rewrite request 1st time reads and correct error data
1584	 * and write to DIMM.  2nd rewrite request must be issued
1585	 * after REW_CE/UE/END is 0.  When the 2nd request is completed,
1586	 * if REW_CE = 1, then it is permanent CE.
1587	 */
1588	for (i = 0; i < 2; i++) {
1589		cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add, 0);
1590
1591		if (cntl == 0) {
1592			/* timeout case */
1593			return;
1594		}
1595		/*
1596		 * If the error becomes UE or CMPE
1597		 * we return to the caller immediately.
1598		 */
1599		if (cntl & MAC_CNTL_REW_UE) {
1600			if (ptrl_error)
1601				flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE;
1602			else
1603				flt_stat->mf_cntl |= MAC_CNTL_MI_UE;
1604			flt_stat->mf_type = FLT_TYPE_UE;
1605			return;
1606		}
1607		if (cntl & MAC_CNTL_REW_CMPE) {
1608			if (ptrl_error)
1609				flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE;
1610			else
1611				flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE;
1612			flt_stat->mf_type = FLT_TYPE_CMPE;
1613			return;
1614		}
1615	}
1616	if (!(cntl & MAC_CNTL_REW_CE)) {
1617		flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE;
1618	}
1619
1620	if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
1621		/* report PERMANENT_CE to SP via SCF */
1622		if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) {
1623			mc_queue_scf_log(mcp, flt_stat, bank);
1624		}
1625	}
1626}
1627
1628#define	IS_CMPE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\
1629				MAC_CNTL_MI_CMPE))
1630#define	IS_UE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE))
1631#define	IS_CE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE))
1632#define	IS_OK(cntl, f)	(!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \
1633			MAC_CNTL_MI_ERRS)))
1634
1635
1636static int
1637IS_CE_ONLY(uint32_t cntl, int ptrl_error)
1638{
1639	if (ptrl_error) {
1640		return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE);
1641	} else {
1642		return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE);
1643	}
1644}
1645
1646void
1647mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value)
1648{
1649	int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank;
1650
1651	if (mcp->mc_speedup_period[ebank] > 0)
1652		value |= mc_max_speed;
1653	else
1654		value |= mcp->mc_speed;
1655	ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value);
1656}
1657
1658static void
1659mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1660{
1661	flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1662	    MAC_CNTL_PTRL_ERRS;
1663	flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank));
1664	flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank));
1665	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1666	flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num;
1667	flt_stat->mf_flt_maddr.ma_bank = bank;
1668	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1669}
1670
1671static void
1672mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1673{
1674	uint32_t status, old_status;
1675
1676	status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & MAC_CNTL_MI_ERRS;
1677	old_status = 0;
1678
1679	/* we keep reading until the status is stable */
1680	while (old_status != status) {
1681		old_status = status;
1682		flt_stat->mf_err_add = LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank));
1683		flt_stat->mf_err_log = LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank));
1684		status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1685		    MAC_CNTL_MI_ERRS;
1686		if (status == old_status) {
1687			break;
1688		}
1689	}
1690
1691	flt_stat->mf_cntl = status;
1692	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1693	flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num;
1694	flt_stat->mf_flt_maddr.ma_bank = bank;
1695	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1696}
1697
1698
1699/*
1700 * Error philosophy for mirror mode:
1701 *
1702 * PTRL (The error address for both banks are same, since ptrl stops if it
1703 * detects error.)
1704 * - Compare error  log CMPE.
1705 *
1706 * - UE-UE           Report MUE.  No rewrite.
1707 *
1708 * - UE-*	     UE-(CE/OK). Rewrite to scrub UE.  Report SUE.
1709 *
1710 * - CE-*            CE-(CE/OK). Scrub to determine if CE is permanent.
1711 *                   If CE is permanent, inform SCF.  Once for each
1712 *		     Dimm.  If CE becomes UE or CMPE, go back to above.
1713 *
1714 *
1715 * MI (The error addresses for each bank are the same or different.)
1716 * - Compare  error  If addresses are the same.  Just CMPE, so log CMPE.
1717 *		     If addresses are different (this could happen
1718 *		     as a result of scrubbing.  Report each separately.
1719 *		     Only report error info on each side.
1720 *
1721 * - UE-UE           Addresses are the same.  Report MUE.
1722 *		     Addresses are different.  Report SUE on each bank.
1723 *		     Rewrite to clear UE.
1724 *
1725 * - UE-*	     UE-(CE/OK)
1726 *		     Rewrite to clear UE.  Report SUE for the bank.
1727 *
1728 * - CE-*            CE-(CE/OK).  Scrub to determine if CE is permanent.
1729 *                   If CE becomes UE or CMPE, go back to above.
1730 *
1731 */
1732
1733static int
1734mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat)
1735{
1736	int ptrl_error = mc_aflt->mflt_is_ptrl;
1737	int i;
1738	int rv = 0;
1739	int bank;
1740	int rewrite_timeout = 0;
1741
1742	MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n",
1743	    flt_stat[0].mf_cntl, flt_stat[1].mf_cntl);
1744
1745	if (ptrl_error) {
1746		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) &
1747		    MAC_CNTL_PTRL_ERRS) == 0)
1748			return (0);
1749	} else {
1750		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) &
1751		    MAC_CNTL_MI_ERRS) == 0)
1752			return (0);
1753	}
1754
1755	/*
1756	 * First we take care of the case of CE
1757	 * because they can become UE or CMPE
1758	 */
1759	for (i = 0; i < 2; i++) {
1760		if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) {
1761			bank = flt_stat[i].mf_flt_maddr.ma_bank;
1762			MC_LOG("CE detected on bank %d\n", bank);
1763			mc_scrub_ce(mcp, bank, &flt_stat[i], ptrl_error);
1764			if (MC_REWRITE_ACTIVE(mcp, bank)) {
1765				rewrite_timeout = 1;
1766			}
1767			rv = 1;
1768		}
1769	}
1770
1771	if (rewrite_timeout)
1772		return (0);
1773
1774	/* The above scrubbing can turn CE into UE or CMPE */
1775
1776	/*
1777	 * Now we distinguish two cases: same address or not
1778	 * the same address.  It might seem more intuitive to
1779	 * distinguish PTRL v.s. MI error but it is more
1780	 * complicated that way.
1781	 */
1782
1783	if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) {
1784
1785		if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) ||
1786		    IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) {
1787			flt_stat[0].mf_type = FLT_TYPE_CMPE;
1788			flt_stat[1].mf_type = FLT_TYPE_CMPE;
1789			mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1790			mc_aflt->mflt_nflts = 2;
1791			mc_aflt->mflt_stat[0] = &flt_stat[0];
1792			mc_aflt->mflt_stat[1] = &flt_stat[1];
1793			mc_aflt->mflt_pr = PR_UE;
1794			/*
1795			 * Compare error is result of MAC internal error, so
1796			 * simply log it instead of publishing an ereport. SCF
1797			 * diagnoses all the MAC internal and its i/f error.
1798			 */
1799			MC_LOG("cmpe error detected\n");
1800			return (1);
1801		}
1802
1803		if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) &&
1804		    IS_UE(flt_stat[1].mf_cntl, ptrl_error)) {
1805			/* Both side are UE's */
1806
1807			MAC_SET_ERRLOG_INFO(&flt_stat[0]);
1808			MAC_SET_ERRLOG_INFO(&flt_stat[1]);
1809			MC_LOG("MUE detected\n");
1810			flt_stat[0].mf_type = FLT_TYPE_MUE;
1811			flt_stat[1].mf_type = FLT_TYPE_MUE;
1812			mc_aflt->mflt_erpt_class = MC_OPL_MUE;
1813			mc_aflt->mflt_nflts = 2;
1814			mc_aflt->mflt_stat[0] = &flt_stat[0];
1815			mc_aflt->mflt_stat[1] = &flt_stat[1];
1816			mc_aflt->mflt_pr = PR_UE;
1817			mc_err_drain(mc_aflt);
1818			return (1);
1819		}
1820
1821		/* Now the only case is UE/CE, UE/OK, or don't care */
1822		for (i = 0; i < 2; i++) {
1823			if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1824
1825			/* rewrite can clear the one side UE error */
1826
1827			if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) {
1828				(void) do_rewrite(mcp,
1829				    flt_stat[i].mf_flt_maddr.ma_bank,
1830				    flt_stat[i].mf_flt_maddr.ma_dimm_addr, 0);
1831			}
1832			flt_stat[i].mf_type = FLT_TYPE_UE;
1833			MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1834			mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1835			mc_aflt->mflt_stat[0] = &flt_stat[i];
1836			mc_aflt->mflt_nflts = 1;
1837			mc_aflt->mflt_pr = PR_MCE;
1838			mc_err_drain(mc_aflt);
1839			/* Once we hit a UE/CE or UE/OK case, done */
1840			return (1);
1841			}
1842		}
1843
1844	} else {
1845		/*
1846		 * addresses are different. That means errors
1847		 * on the 2 banks are not related at all.
1848		 */
1849		for (i = 0; i < 2; i++) {
1850			if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) {
1851				flt_stat[i].mf_type = FLT_TYPE_CMPE;
1852				mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1853				mc_aflt->mflt_nflts = 1;
1854				mc_aflt->mflt_stat[0] = &flt_stat[i];
1855				mc_aflt->mflt_pr = PR_UE;
1856				/*
1857				 * Compare error is result of MAC internal
1858				 * error, so simply log it instead of
1859				 * publishing an ereport. SCF diagnoses all
1860				 * the MAC internal and its interface error.
1861				 */
1862				MC_LOG("cmpe error detected\n");
1863				/* no more report on this bank */
1864				flt_stat[i].mf_cntl = 0;
1865				rv = 1;
1866			}
1867		}
1868
1869		/* rewrite can clear the one side UE error */
1870
1871		for (i = 0; i < 2; i++) {
1872			if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1873				(void) do_rewrite(mcp,
1874				    flt_stat[i].mf_flt_maddr.ma_bank,
1875				    flt_stat[i].mf_flt_maddr.ma_dimm_addr,
1876				    0);
1877				flt_stat[i].mf_type = FLT_TYPE_UE;
1878				MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1879				mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1880				mc_aflt->mflt_stat[0] = &flt_stat[i];
1881				mc_aflt->mflt_nflts = 1;
1882				mc_aflt->mflt_pr = PR_MCE;
1883				mc_err_drain(mc_aflt);
1884				rv = 1;
1885			}
1886		}
1887	}
1888	return (rv);
1889}
1890static void
1891mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr)
1892{
1893	mc_aflt_t mc_aflt;
1894	mc_flt_stat_t flt_stat[2], mi_flt_stat[2];
1895	int i;
1896	int mi_valid;
1897
1898	ASSERT(rsaddr);
1899
1900	bzero(&mc_aflt, sizeof (mc_aflt_t));
1901	bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t));
1902	bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t));
1903
1904
1905	mc_aflt.mflt_mcp = mcp;
1906	mc_aflt.mflt_id = gethrtime();
1907
1908	/* Now read all the registers into flt_stat */
1909
1910	for (i = 0; i < 2; i++) {
1911		MC_LOG("Reading registers of bank %d\n", bank);
1912		/* patrol registers */
1913		mc_read_ptrl_reg(mcp, bank, &flt_stat[i]);
1914
1915		/*
1916		 * In mirror mode, it is possible that only one bank
1917		 * may report the error. We need to check for it to
1918		 * ensure we pick the right addr value for patrol restart.
1919		 * Note that if both banks reported errors, we pick the
1920		 * 2nd one. Both banks should reported the same error address.
1921		 */
1922		if (flt_stat[i].mf_cntl & MAC_CNTL_PTRL_ERRS)
1923			rsaddr->mi_restartaddr = flt_stat[i].mf_flt_maddr;
1924
1925		MC_LOG("ptrl registers cntl %x add %x log %x\n",
1926		    flt_stat[i].mf_cntl, flt_stat[i].mf_err_add,
1927		    flt_stat[i].mf_err_log);
1928
1929		/* MI registers */
1930		mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]);
1931
1932		MC_LOG("MI registers cntl %x add %x log %x\n",
1933		    mi_flt_stat[i].mf_cntl, mi_flt_stat[i].mf_err_add,
1934		    mi_flt_stat[i].mf_err_log);
1935
1936		bank = bank^1;
1937	}
1938
1939	/* clear errors once we read all the registers */
1940	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1941
1942	MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1943
1944	/* Process MI errors first */
1945
1946	/* if not error mode, cntl1 is 0 */
1947	if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1948	    (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1949		mi_flt_stat[0].mf_cntl = 0;
1950
1951	if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1952	    (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1953		mi_flt_stat[1].mf_cntl = 0;
1954
1955	mc_aflt.mflt_is_ptrl = 0;
1956	mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]);
1957
1958	if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >>
1959	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[0].mf_cntl &
1960	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
1961	    (flt_stat[0].mf_err_add ==
1962	    ROUNDDOWN(mi_flt_stat[0].mf_err_add, MC_BOUND_BYTE)) &&
1963	    (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >>
1964	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[1].mf_cntl &
1965	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
1966	    (flt_stat[1].mf_err_add ==
1967	    ROUNDDOWN(mi_flt_stat[1].mf_err_add, MC_BOUND_BYTE))) {
1968#ifdef DEBUG
1969		MC_LOG("discarding PTRL error because "
1970		    "it is the same as MI\n");
1971#endif
1972		rsaddr->mi_valid = mi_valid;
1973		return;
1974	}
1975	/* if not error mode, cntl1 is 0 */
1976	if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1977	    (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1978		flt_stat[0].mf_cntl = 0;
1979
1980	if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1981	    (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1982		flt_stat[1].mf_cntl = 0;
1983
1984	mc_aflt.mflt_is_ptrl = 1;
1985	rsaddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]);
1986}
1987static int
1988mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt,
1989	mc_flt_stat_t *flt_stat)
1990{
1991	int ptrl_error = mc_aflt->mflt_is_ptrl;
1992	int rv = 0;
1993
1994	mc_aflt->mflt_erpt_class = NULL;
1995	if (IS_UE(flt_stat->mf_cntl, ptrl_error)) {
1996		MC_LOG("UE detected\n");
1997		flt_stat->mf_type = FLT_TYPE_UE;
1998		mc_aflt->mflt_erpt_class = MC_OPL_UE;
1999		mc_aflt->mflt_pr = PR_UE;
2000		MAC_SET_ERRLOG_INFO(flt_stat);
2001		rv = 1;
2002	} else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) {
2003		MC_LOG("CE detected\n");
2004		MAC_SET_ERRLOG_INFO(flt_stat);
2005
2006		/* Error type can change after scrubbing */
2007		mc_scrub_ce(mcp, bank, flt_stat, ptrl_error);
2008		if (MC_REWRITE_ACTIVE(mcp, bank)) {
2009			return (0);
2010		}
2011
2012		if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE) {
2013			mc_aflt->mflt_erpt_class = MC_OPL_ICE;
2014			mc_aflt->mflt_pr = PR_MCE;
2015		} else if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
2016			mc_aflt->mflt_erpt_class = MC_OPL_CE;
2017			mc_aflt->mflt_pr = PR_MCE;
2018		} else if (flt_stat->mf_type == FLT_TYPE_UE) {
2019			mc_aflt->mflt_erpt_class = MC_OPL_UE;
2020			mc_aflt->mflt_pr = PR_UE;
2021		}
2022		rv = 1;
2023	}
2024	MC_LOG("mc_process_error: fault type %x erpt %s\n", flt_stat->mf_type,
2025	    mc_aflt->mflt_erpt_class);
2026	if (mc_aflt->mflt_erpt_class) {
2027		mc_aflt->mflt_stat[0] = flt_stat;
2028		mc_aflt->mflt_nflts = 1;
2029		mc_err_drain(mc_aflt);
2030	}
2031	return (rv);
2032}
2033
2034static void
2035mc_error_handler(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr)
2036{
2037	mc_aflt_t mc_aflt;
2038	mc_flt_stat_t flt_stat, mi_flt_stat;
2039	int mi_valid;
2040
2041	bzero(&mc_aflt, sizeof (mc_aflt_t));
2042	bzero(&flt_stat, sizeof (mc_flt_stat_t));
2043	bzero(&mi_flt_stat, sizeof (mc_flt_stat_t));
2044
2045	mc_aflt.mflt_mcp = mcp;
2046	mc_aflt.mflt_id = gethrtime();
2047
2048	/* patrol registers */
2049	mc_read_ptrl_reg(mcp, bank, &flt_stat);
2050
2051	ASSERT(rsaddr);
2052	rsaddr->mi_restartaddr = flt_stat.mf_flt_maddr;
2053
2054	MC_LOG("ptrl registers cntl %x add %x log %x\n", flt_stat.mf_cntl,
2055	    flt_stat.mf_err_add, flt_stat.mf_err_log);
2056
2057	/* MI registers */
2058	mc_read_mi_reg(mcp, bank, &mi_flt_stat);
2059
2060
2061	MC_LOG("MI registers cntl %x add %x log %x\n", mi_flt_stat.mf_cntl,
2062	    mi_flt_stat.mf_err_add, mi_flt_stat.mf_err_log);
2063
2064	/* clear errors once we read all the registers */
2065	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
2066
2067	mc_aflt.mflt_is_ptrl = 0;
2068	if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) &&
2069	    ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
2070	    ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
2071		mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat);
2072	}
2073
2074	if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >>
2075	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat.mf_cntl &
2076	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
2077	    (flt_stat.mf_err_add ==
2078	    ROUNDDOWN(mi_flt_stat.mf_err_add, MC_BOUND_BYTE))) {
2079#ifdef DEBUG
2080		MC_LOG("discarding PTRL error because "
2081		    "it is the same as MI\n");
2082#endif
2083		rsaddr->mi_valid = mi_valid;
2084		return;
2085	}
2086
2087	mc_aflt.mflt_is_ptrl = 1;
2088	if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) &&
2089	    ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
2090	    ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
2091		rsaddr->mi_valid = mc_process_error(mcp, bank, &mc_aflt,
2092		    &flt_stat);
2093	}
2094}
2095/*
2096 *	memory patrol error handling algorithm:
2097 *	timeout() is used to do periodic polling
2098 *	This is the flow chart.
2099 *	timeout ->
2100 *	mc_check_errors()
2101 *	    if memory bank is installed, read the status register
2102 *	    if any error bit is set,
2103 *	    -> mc_error_handler()
2104 *		-> read all error registers
2105 *	        -> mc_process_error()
2106 *	            determine error type
2107 *	            rewrite to clear error or scrub to determine CE type
2108 *	            inform SCF on permanent CE
2109 *	        -> mc_err_drain
2110 *	            page offline processing
2111 *	            -> mc_ereport_post()
2112 */
2113
2114static void
2115mc_process_rewrite(mc_opl_t *mcp, int bank)
2116{
2117	uint32_t rew_addr, cntl;
2118	mc_retry_info_t *retry;
2119	struct mc_bank *bankp;
2120
2121	bankp = &(mcp->mc_bank[bank]);
2122	retry = bankp->mcb_active;
2123	if (retry == NULL)
2124		return;
2125
2126	if (retry->ri_state <= RETRY_STATE_ACTIVE) {
2127		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
2128		if (cntl & MAC_CNTL_PTRL_STATUS)
2129			return;
2130		rew_addr = retry->ri_addr;
2131		ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), rew_addr);
2132		MAC_REW_REQ(mcp, bank);
2133
2134		retry->ri_state = RETRY_STATE_REWRITE;
2135	}
2136
2137	cntl = ldphysio(MAC_PTRL_CNTL(mcp, bank));
2138
2139	if (cntl & MAC_CNTL_REW_END) {
2140		MAC_CLEAR_ERRS(mcp, bank,
2141		    MAC_CNTL_REW_ERRS);
2142		mc_clear_rewrite(mcp, bank);
2143	} else {
2144		/*
2145		 * If the rewrite does not complete in
2146		 * 1 hour, we have to consider this a HW
2147		 * failure.  However, there is no recovery
2148		 * mechanism.  The only thing we can do
2149		 * to to print a warning message to the
2150		 * console.  We continue to increment the
2151		 * counter but we only print the message
2152		 * once.  It will take the counter a long
2153		 * time to wrap around and the user might
2154		 * see a second message.  In practice,
2155		 * we have never hit this condition but
2156		 * we have to keep the code here just in case.
2157		 */
2158		if (++mcp->mc_bank[bank].mcb_rewrite_count
2159		    == mc_max_rewrite_retry) {
2160			cmn_err(CE_WARN, "Memory patrol feature is"
2161			" partly suspended on /LSB%d/B%d"
2162			" due to heavy memory load,"
2163			" and it will restart"
2164			" automatically.\n", mcp->mc_board_num,
2165			    bank);
2166		}
2167	}
2168}
2169
2170static void
2171mc_check_errors_func(mc_opl_t *mcp)
2172{
2173	mc_rsaddr_info_t rsaddr_info;
2174	int i, error_count = 0;
2175	uint32_t stat, cntl;
2176	int running;
2177	int wrapped;
2178	int ebk;
2179
2180	/*
2181	 * scan errors.
2182	 */
2183	if (mcp->mc_status & MC_MEMORYLESS)
2184		return;
2185
2186	for (i = 0; i < BANKNUM_PER_SB; i++) {
2187		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2188			if (MC_REWRITE_ACTIVE(mcp, i)) {
2189				mc_process_rewrite(mcp, i);
2190			}
2191			stat = ldphysio(MAC_PTRL_STAT(mcp, i));
2192			cntl = ldphysio(MAC_PTRL_CNTL(mcp, i));
2193			running = cntl & MAC_CNTL_PTRL_START;
2194			wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX;
2195
2196			/* Compute the effective bank idx */
2197			ebk = (IS_MIRROR(mcp, i)) ? MIRROR_IDX(i) : i;
2198
2199			if (mc_debug_show_all || stat) {
2200				MC_LOG("/LSB%d/B%d stat %x cntl %x\n",
2201				    mcp->mc_board_num, i, stat, cntl);
2202			}
2203
2204			/*
2205			 * Update stats and reset flag if the HW patrol
2206			 * wrapped around in its scan.
2207			 */
2208			if (wrapped) {
2209				MAC_CLEAR_MAX(mcp, i);
2210				mcp->mc_period[ebk]++;
2211				if (IS_MIRROR(mcp, i)) {
2212					MC_LOG("mirror mc period %ld on "
2213					    "/LSB%d/B%d\n", mcp->mc_period[ebk],
2214					    mcp->mc_board_num, i);
2215				} else {
2216					MC_LOG("mc period %ld on "
2217					    "/LSB%d/B%d\n", mcp->mc_period[ebk],
2218					    mcp->mc_board_num, i);
2219				}
2220			}
2221
2222			if (running) {
2223				/*
2224				 * Mac patrol HW is still running.
2225				 * Normally when an error is detected,
2226				 * the HW patrol will stop so that we
2227				 * can collect error data for reporting.
2228				 * Certain errors (MI errors) detected may not
2229				 * cause the HW patrol to stop which is a
2230				 * problem since we cannot read error data while
2231				 * the HW patrol is running. SW is not allowed
2232				 * to stop the HW patrol while it is running
2233				 * as it may cause HW inconsistency. This is
2234				 * described in a HW errata.
2235				 * In situations where we detected errors
2236				 * that may not cause the HW patrol to stop.
2237				 * We speed up the HW patrol scanning in
2238				 * the hope that it will find the 'real' PTRL
2239				 * errors associated with the previous errors
2240				 * causing the HW to finally stop so that we
2241				 * can do the reporting.
2242				 */
2243				/*
2244				 * Check to see if we did speed up
2245				 * the HW patrol due to previous errors
2246				 * detected that did not cause the patrol
2247				 * to stop. We only do it if HW patrol scan
2248				 * wrapped (counted as completing a 'period').
2249				 */
2250				if (mcp->mc_speedup_period[ebk] > 0) {
2251					if (wrapped &&
2252					    (--mcp->mc_speedup_period[ebk] ==
2253					    0)) {
2254						/*
2255						 * We did try to speed up.
2256						 * The speed up period has
2257						 * expired and the HW patrol
2258						 * is still running.  The
2259						 * errors must be intermittent.
2260						 * We have no choice but to
2261						 * ignore them, reset the scan
2262						 * speed to normal and clear
2263						 * the MI error bits. For
2264						 * mirror mode, we need to
2265						 * clear errors on both banks.
2266						 */
2267						MC_LOG("Clearing MI errors\n");
2268						MAC_CLEAR_ERRS(mcp, i,
2269						    MAC_CNTL_MI_ERRS);
2270
2271						if (IS_MIRROR(mcp, i)) {
2272							MC_LOG("Clearing "
2273							    "Mirror MI errs\n");
2274							MAC_CLEAR_ERRS(mcp,
2275							    i^1,
2276							    MAC_CNTL_MI_ERRS);
2277						}
2278					}
2279				} else if (stat & MAC_STAT_MI_ERRS) {
2280					/*
2281					 * MI errors detected but we cannot
2282					 * report them since the HW patrol
2283					 * is still running.
2284					 * We will attempt to speed up the
2285					 * scanning and hopefully the HW
2286					 * can detect PRTL errors at the same
2287					 * location that cause the HW patrol
2288					 * to stop.
2289					 */
2290					mcp->mc_speedup_period[ebk] = 2;
2291					MAC_CMD(mcp, i, 0);
2292				}
2293			} else if (stat & (MAC_STAT_PTRL_ERRS |
2294			    MAC_STAT_MI_ERRS)) {
2295				/*
2296				 * HW Patrol has stopped and we found errors.
2297				 * Proceed to collect and report error info.
2298				 */
2299				mcp->mc_speedup_period[ebk] = 0;
2300				rsaddr_info.mi_valid = 0;
2301				rsaddr_info.mi_injectrestart = 0;
2302				if (IS_MIRROR(mcp, i)) {
2303					mc_error_handler_mir(mcp, i,
2304					    &rsaddr_info);
2305				} else {
2306					mc_error_handler(mcp, i, &rsaddr_info);
2307				}
2308
2309				error_count++;
2310				(void) restart_patrol(mcp, i, &rsaddr_info);
2311			} else {
2312				/*
2313				 * HW patrol scan has apparently stopped
2314				 * but no errors detected/flagged.
2315				 * Restart the HW patrol just to be sure.
2316				 * In mirror mode, the odd bank might have
2317				 * reported errors that caused the patrol to
2318				 * stop. We'll defer the restart to the odd
2319				 * bank in this case.
2320				 */
2321				if (!IS_MIRROR(mcp, i) || (i & 0x1))
2322					(void) restart_patrol(mcp, i, NULL);
2323			}
2324		}
2325	}
2326	if (error_count > 0)
2327		mcp->mc_last_error += error_count;
2328	else
2329		mcp->mc_last_error = 0;
2330}
2331
2332/*
2333 * mc_polling -- Check errors for only one instance,
2334 * but process errors for all instances to make sure we drain the errors
2335 * faster than they can be accumulated.
2336 *
2337 * Polling on each board should be done only once per each
2338 * mc_patrol_interval_sec.  This is equivalent to setting mc_tick_left
2339 * to OPL_MAX_BOARDS and decrement by 1 on each timeout.
2340 * Once mc_tick_left becomes negative, the board becomes a candidate
2341 * for polling because it has waited for at least
2342 * mc_patrol_interval_sec's long.    If mc_timeout_period is calculated
2343 * differently, this has to be updated accordingly.
2344 */
2345
2346static void
2347mc_polling(void)
2348{
2349	int i, scan_error;
2350	mc_opl_t *mcp;
2351
2352
2353	scan_error = 1;
2354	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2355		mutex_enter(&mcmutex);
2356		if ((mcp = mc_instances[i]) == NULL) {
2357			mutex_exit(&mcmutex);
2358			continue;
2359		}
2360		mutex_enter(&mcp->mc_lock);
2361		mutex_exit(&mcmutex);
2362		if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2363			mutex_exit(&mcp->mc_lock);
2364			continue;
2365		}
2366		if (scan_error && mcp->mc_tick_left <= 0) {
2367			mc_check_errors_func((void *)mcp);
2368			mcp->mc_tick_left = OPL_MAX_BOARDS;
2369			scan_error = 0;
2370		} else {
2371			mcp->mc_tick_left--;
2372		}
2373		mc_process_scf_log(mcp);
2374		mutex_exit(&mcp->mc_lock);
2375	}
2376}
2377
2378static void
2379get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr)
2380{
2381	maddr->ma_bd = mcp->mc_board_num;
2382	maddr->ma_bank = bank;
2383	maddr->ma_dimm_addr = 0;
2384}
2385
2386typedef struct mc_mem_range {
2387	uint64_t	addr;
2388	uint64_t	size;
2389} mc_mem_range_t;
2390
2391static int
2392get_base_address(mc_opl_t *mcp)
2393{
2394	mc_mem_range_t *mem_range;
2395	int len;
2396
2397	if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2398	    "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) {
2399		return (DDI_FAILURE);
2400	}
2401
2402	mcp->mc_start_address = mem_range->addr;
2403	mcp->mc_size = mem_range->size;
2404
2405	kmem_free(mem_range, len);
2406	return (DDI_SUCCESS);
2407}
2408
2409struct mc_addr_spec {
2410	uint32_t bank;
2411	uint32_t phys_hi;
2412	uint32_t phys_lo;
2413};
2414
2415#define	REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo)
2416
2417static char *mc_tbl_name[] = {
2418	"cs0-mc-pa-trans-table",
2419	"cs1-mc-pa-trans-table"
2420};
2421
2422/*
2423 * This routine performs a rangecheck for a given PA
2424 * to see if it belongs to the memory range for this board.
2425 * Return 1 if it is valid (within the range) and 0 otherwise
2426 */
2427static int
2428mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa)
2429{
2430	if ((pa < mcp->mc_start_address) || (mcp->mc_start_address +
2431	    mcp->mc_size <= pa))
2432		return (0);
2433	else
2434		return (1);
2435}
2436
2437static void
2438mc_memlist_delete(struct memlist *mlist)
2439{
2440	struct memlist *ml;
2441
2442	for (ml = mlist; ml; ml = mlist) {
2443		mlist = ml->ml_next;
2444		kmem_free(ml, sizeof (struct memlist));
2445	}
2446}
2447
2448static struct memlist *
2449mc_memlist_dup(struct memlist *mlist)
2450{
2451	struct memlist *hl = NULL, *tl, **mlp;
2452
2453	if (mlist == NULL)
2454		return (NULL);
2455
2456	mlp = &hl;
2457	tl = *mlp;
2458	for (; mlist; mlist = mlist->ml_next) {
2459		*mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP);
2460		(*mlp)->ml_address = mlist->ml_address;
2461		(*mlp)->ml_size = mlist->ml_size;
2462		(*mlp)->ml_prev = tl;
2463		tl = *mlp;
2464		mlp = &((*mlp)->ml_next);
2465	}
2466	*mlp = NULL;
2467
2468	return (hl);
2469}
2470
2471
2472static struct memlist *
2473mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
2474{
2475	uint64_t	end;
2476	struct memlist	*ml, *tl, *nlp;
2477
2478	if (mlist == NULL)
2479		return (NULL);
2480
2481	end = base + len;
2482	if ((end <= mlist->ml_address) || (base == end))
2483		return (mlist);
2484
2485	for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
2486		uint64_t	mend;
2487
2488		nlp = ml->ml_next;
2489
2490		if (end <= ml->ml_address)
2491			break;
2492
2493		mend = ml->ml_address + ml->ml_size;
2494		if (base < mend) {
2495			if (base <= ml->ml_address) {
2496				ml->ml_address = end;
2497				if (end >= mend)
2498					ml->ml_size = 0ull;
2499				else
2500					ml->ml_size = mend - ml->ml_address;
2501			} else {
2502				ml->ml_size = base - ml->ml_address;
2503				if (end < mend) {
2504					struct memlist	*nl;
2505					/*
2506					 * splitting an memlist entry.
2507					 */
2508					nl = kmem_alloc(sizeof (struct memlist),
2509					    KM_SLEEP);
2510					nl->ml_address = end;
2511					nl->ml_size = mend - nl->ml_address;
2512					if ((nl->ml_next = nlp) != NULL)
2513						nlp->ml_prev = nl;
2514					nl->ml_prev = ml;
2515					ml->ml_next = nl;
2516					nlp = nl;
2517				}
2518			}
2519			if (ml->ml_size == 0ull) {
2520				if (ml == mlist) {
2521					if ((mlist = nlp) != NULL)
2522						nlp->ml_prev = NULL;
2523					kmem_free(ml, sizeof (struct memlist));
2524					if (mlist == NULL)
2525						break;
2526					ml = nlp;
2527				} else {
2528					if ((tl->ml_next = nlp) != NULL)
2529						nlp->ml_prev = tl;
2530					kmem_free(ml, sizeof (struct memlist));
2531					ml = tl;
2532				}
2533			}
2534		}
2535	}
2536
2537	return (mlist);
2538}
2539
2540static void
2541mc_get_mlist(mc_opl_t *mcp)
2542{
2543	struct memlist *mlist;
2544
2545	memlist_read_lock();
2546	mlist = mc_memlist_dup(phys_install);
2547	memlist_read_unlock();
2548
2549	if (mlist) {
2550		mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address);
2551	}
2552
2553	if (mlist) {
2554		uint64_t startpa, endpa;
2555
2556		startpa = mcp->mc_start_address + mcp->mc_size;
2557		endpa = ptob(physmax + 1);
2558		if (endpa > startpa) {
2559			mlist = mc_memlist_del_span(mlist, startpa,
2560			    endpa - startpa);
2561		}
2562	}
2563
2564	if (mlist) {
2565		mcp->mlist = mlist;
2566	}
2567}
2568
2569int
2570mc_board_add(mc_opl_t *mcp)
2571{
2572	struct mc_addr_spec *macaddr;
2573	cs_status_t *cs_status;
2574	int len, len1, i, bk, cc;
2575	mc_rsaddr_info_t rsaddr;
2576	uint32_t mirr;
2577	int nbanks = 0;
2578	uint64_t nbytes = 0;
2579	int mirror_mode = 0;
2580	int ret;
2581
2582	/*
2583	 * Get configurations from "pseudo-mc" node which includes:
2584	 * board# : LSB number
2585	 * mac-addr : physical base address of MAC registers
2586	 * csX-mac-pa-trans-table: translation table from DIMM address
2587	 *			to physical address or vice versa.
2588	 */
2589	mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip,
2590	    DDI_PROP_DONTPASS, "board#", -1);
2591
2592	if (mcp->mc_board_num == -1) {
2593		return (DDI_FAILURE);
2594	}
2595
2596	/*
2597	 * Get start address in this CAB. It can be gotten from
2598	 * "sb-mem-ranges" property.
2599	 */
2600
2601	if (get_base_address(mcp) == DDI_FAILURE) {
2602		return (DDI_FAILURE);
2603	}
2604	/* get mac-pa trans tables */
2605	for (i = 0; i < MC_TT_CS; i++) {
2606		len = MC_TT_ENTRIES;
2607		cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip,
2608		    DDI_PROP_DONTPASS, mc_tbl_name[i],
2609		    (caddr_t)mcp->mc_trans_table[i], &len);
2610
2611		if (cc != DDI_SUCCESS) {
2612			bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES);
2613		}
2614	}
2615	mcp->mlist = NULL;
2616
2617	mc_get_mlist(mcp);
2618
2619	/* initialize bank informations */
2620	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2621	    "mc-addr", (caddr_t)&macaddr, &len);
2622	if (cc != DDI_SUCCESS) {
2623		cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc);
2624		return (DDI_FAILURE);
2625	}
2626
2627	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2628	    "cs-status", (caddr_t)&cs_status, &len1);
2629
2630	if (cc != DDI_SUCCESS) {
2631		if (len > 0)
2632			kmem_free(macaddr, len);
2633		cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc);
2634		return (DDI_FAILURE);
2635	}
2636	/* get the physical board number for a given logical board number */
2637	mcp->mc_phys_board_num = mc_opl_get_physical_board(mcp->mc_board_num);
2638
2639	if (mcp->mc_phys_board_num < 0) {
2640		if (len > 0)
2641			kmem_free(macaddr, len);
2642		cmn_err(CE_WARN, "Unable to obtain the physical board number");
2643		return (DDI_FAILURE);
2644	}
2645
2646	mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL);
2647
2648	for (i = 0; i < len1 / sizeof (cs_status_t); i++) {
2649		nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) |
2650		    ((uint64_t)cs_status[i].cs_avail_low);
2651	}
2652	if (len1 > 0)
2653		kmem_free(cs_status, len1);
2654	nbanks = len / sizeof (struct mc_addr_spec);
2655
2656	if (nbanks > 0)
2657		nbytes /= nbanks;
2658	else {
2659		/* No need to free macaddr because len must be 0 */
2660		mcp->mc_status |= MC_MEMORYLESS;
2661		return (DDI_SUCCESS);
2662	}
2663
2664	for (i = 0; i < BANKNUM_PER_SB; i++) {
2665		mcp->mc_scf_retry[i] = 0;
2666		mcp->mc_period[i] = 0;
2667		mcp->mc_speedup_period[i] = 0;
2668	}
2669
2670	/*
2671	 * Get the memory size here. Let it be B (bytes).
2672	 * Let T be the time in u.s. to scan 64 bytes.
2673	 * If we want to complete 1 round of scanning in P seconds.
2674	 *
2675	 *	B * T * 10^(-6)	= P
2676	 *	---------------
2677	 *		64
2678	 *
2679	 *	T = P * 64 * 10^6
2680	 *	    -------------
2681	 *		B
2682	 *
2683	 *	  = P * 64 * 10^6
2684	 *	    -------------
2685	 *		B
2686	 *
2687	 *	The timing bits are set in PTRL_CNTL[28:26] where
2688	 *
2689	 *	0	- 1 m.s
2690	 *	1	- 512 u.s.
2691	 *	10	- 256 u.s.
2692	 *	11	- 128 u.s.
2693	 *	100	- 64 u.s.
2694	 *	101	- 32 u.s.
2695	 *	110	- 0 u.s.
2696	 *	111	- reserved.
2697	 *
2698	 *
2699	 *	a[0] = 110, a[1] = 101, ... a[6] = 0
2700	 *
2701	 *	cs-status property is int x 7
2702	 *	0 - cs#
2703	 *	1 - cs-status
2704	 *	2 - cs-avail.hi
2705	 *	3 - cs-avail.lo
2706	 *	4 - dimm-capa.hi
2707	 *	5 - dimm-capa.lo
2708	 *	6 - #of dimms
2709	 */
2710
2711	if (nbytes > 0) {
2712		int i;
2713		uint64_t ms;
2714		ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes;
2715		mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds;
2716		for (i = 0; i < MC_MAX_SPEEDS - 1; i++) {
2717			if (ms < mc_scan_speeds[i + 1].mc_period) {
2718				mcp->mc_speed = mc_scan_speeds[i].mc_speeds;
2719				break;
2720			}
2721		}
2722	} else
2723		mcp->mc_speed = 0;
2724
2725
2726	for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) {
2727		struct mc_bank *bankp;
2728		mc_retry_info_t *retry;
2729		uint32_t reg;
2730		int k;
2731
2732		/*
2733		 * setup bank
2734		 */
2735		bk = macaddr[i].bank;
2736		bankp = &(mcp->mc_bank[bk]);
2737		bankp->mcb_status = BANK_INSTALLED;
2738		bankp->mcb_reg_base = REGS_PA(macaddr, i);
2739
2740		bankp->mcb_retry_freelist = NULL;
2741		bankp->mcb_retry_pending = NULL;
2742		bankp->mcb_active = NULL;
2743		retry = &bankp->mcb_retry_infos[0];
2744		for (k = 0; k < MC_RETRY_COUNT; k++, retry++) {
2745			mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
2746		}
2747
2748		reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk));
2749		bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS);
2750
2751		/*
2752		 * check if mirror mode
2753		 */
2754		mirr = LD_MAC_REG(MAC_MIRR(mcp, bk));
2755
2756		if (mirr & MAC_MIRR_MIRROR_MODE) {
2757			MC_LOG("Mirror -> /LSB%d/B%d\n", mcp->mc_board_num,
2758			    bk);
2759			bankp->mcb_status |= BANK_MIRROR_MODE;
2760			mirror_mode = 1;
2761			/*
2762			 * The following bit is only used for
2763			 * error injection.  We should clear it
2764			 */
2765			if (mirr & MAC_MIRR_BANK_EXCLUSIVE)
2766				ST_MAC_REG(MAC_MIRR(mcp, bk), 0);
2767		}
2768
2769		/*
2770		 * restart if not mirror mode or the other bank
2771		 * of the mirror is not running
2772		 */
2773		if (!(mirr & MAC_MIRR_MIRROR_MODE) ||
2774		    !(mcp->mc_bank[bk^1].mcb_status & BANK_PTRL_RUNNING)) {
2775			MC_LOG("Starting up /LSB%d/B%d\n", mcp->mc_board_num,
2776			    bk);
2777			get_ptrl_start_address(mcp, bk, &rsaddr.mi_restartaddr);
2778			rsaddr.mi_valid = 0;
2779			rsaddr.mi_injectrestart = 0;
2780			(void) restart_patrol(mcp, bk, &rsaddr);
2781		} else {
2782			MC_LOG("Not starting up /LSB%d/B%d\n",
2783			    mcp->mc_board_num, bk);
2784		}
2785		bankp->mcb_status |= BANK_PTRL_RUNNING;
2786	}
2787	if (len > 0)
2788		kmem_free(macaddr, len);
2789
2790	ret = ndi_prop_update_int(DDI_DEV_T_NONE, mcp->mc_dip, "mirror-mode",
2791	    mirror_mode);
2792	if (ret != DDI_PROP_SUCCESS) {
2793		cmn_err(CE_WARN, "Unable to update mirror-mode property");
2794	}
2795
2796	mcp->mc_dimm_list = mc_get_dimm_list(mcp);
2797
2798	/*
2799	 * set interval in HZ.
2800	 */
2801	mcp->mc_last_error = 0;
2802
2803	/* restart memory patrol checking */
2804	mcp->mc_status |= MC_POLL_RUNNING;
2805
2806	return (DDI_SUCCESS);
2807}
2808
2809int
2810mc_board_del(mc_opl_t *mcp)
2811{
2812	int i;
2813	scf_log_t *p;
2814
2815	/*
2816	 * cleanup mac state
2817	 */
2818	mutex_enter(&mcp->mc_lock);
2819	if (mcp->mc_status & MC_MEMORYLESS) {
2820		mutex_exit(&mcp->mc_lock);
2821		mutex_destroy(&mcp->mc_lock);
2822		return (DDI_SUCCESS);
2823	}
2824	for (i = 0; i < BANKNUM_PER_SB; i++) {
2825		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2826			mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED;
2827		}
2828	}
2829
2830	/* stop memory patrol checking */
2831	mcp->mc_status &= ~MC_POLL_RUNNING;
2832
2833	/* just throw away all the scf logs */
2834	for (i = 0; i < BANKNUM_PER_SB; i++) {
2835		while ((p = mcp->mc_scf_log[i]) != NULL) {
2836			mcp->mc_scf_log[i] = p->sl_next;
2837			mcp->mc_scf_total[i]--;
2838			kmem_free(p, sizeof (scf_log_t));
2839		}
2840	}
2841
2842	if (mcp->mlist)
2843		mc_memlist_delete(mcp->mlist);
2844
2845	if (mcp->mc_dimm_list)
2846		mc_free_dimm_list(mcp->mc_dimm_list);
2847
2848	mutex_exit(&mcp->mc_lock);
2849
2850	mutex_destroy(&mcp->mc_lock);
2851	return (DDI_SUCCESS);
2852}
2853
2854int
2855mc_suspend(mc_opl_t *mcp, uint32_t flag)
2856{
2857	/* stop memory patrol checking */
2858	mutex_enter(&mcp->mc_lock);
2859	if (mcp->mc_status & MC_MEMORYLESS) {
2860		mutex_exit(&mcp->mc_lock);
2861		return (DDI_SUCCESS);
2862	}
2863
2864	mcp->mc_status &= ~MC_POLL_RUNNING;
2865
2866	mcp->mc_status |= flag;
2867	mutex_exit(&mcp->mc_lock);
2868
2869	return (DDI_SUCCESS);
2870}
2871
2872void
2873opl_mc_update_mlist(void)
2874{
2875	int i;
2876	mc_opl_t *mcp;
2877
2878	/*
2879	 * memory information is not updated until
2880	 * the post attach/detach stage during DR.
2881	 * This interface is used by dr_mem to inform
2882	 * mc-opl to update the mlist.
2883	 */
2884
2885	mutex_enter(&mcmutex);
2886	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2887		if ((mcp = mc_instances[i]) == NULL)
2888			continue;
2889		mutex_enter(&mcp->mc_lock);
2890		if (mcp->mlist)
2891			mc_memlist_delete(mcp->mlist);
2892		mcp->mlist = NULL;
2893		mc_get_mlist(mcp);
2894		mutex_exit(&mcp->mc_lock);
2895	}
2896	mutex_exit(&mcmutex);
2897}
2898
2899/* caller must clear the SUSPEND bits or this will do nothing */
2900
2901int
2902mc_resume(mc_opl_t *mcp, uint32_t flag)
2903{
2904	int i;
2905	uint64_t basepa;
2906
2907	mutex_enter(&mcp->mc_lock);
2908	if (mcp->mc_status & MC_MEMORYLESS) {
2909		mutex_exit(&mcp->mc_lock);
2910		return (DDI_SUCCESS);
2911	}
2912	basepa = mcp->mc_start_address;
2913	if (get_base_address(mcp) == DDI_FAILURE) {
2914		mutex_exit(&mcp->mc_lock);
2915		return (DDI_FAILURE);
2916	}
2917
2918	if (basepa != mcp->mc_start_address) {
2919		if (mcp->mlist)
2920			mc_memlist_delete(mcp->mlist);
2921		mcp->mlist = NULL;
2922		mc_get_mlist(mcp);
2923	}
2924
2925	mcp->mc_status &= ~flag;
2926
2927	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
2928		mutex_exit(&mcp->mc_lock);
2929		return (DDI_SUCCESS);
2930	}
2931
2932	if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2933		/* restart memory patrol checking */
2934		mcp->mc_status |= MC_POLL_RUNNING;
2935		for (i = 0; i < BANKNUM_PER_SB; i++) {
2936			if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2937				mc_check_errors_func(mcp);
2938			}
2939		}
2940	}
2941	mutex_exit(&mcp->mc_lock);
2942
2943	return (DDI_SUCCESS);
2944}
2945
2946static mc_opl_t *
2947mc_pa_to_mcp(uint64_t pa)
2948{
2949	mc_opl_t *mcp;
2950	int i;
2951
2952	ASSERT(MUTEX_HELD(&mcmutex));
2953	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2954		if ((mcp = mc_instances[i]) == NULL)
2955			continue;
2956		/* if mac patrol is suspended, we cannot rely on it */
2957		if (!(mcp->mc_status & MC_POLL_RUNNING) ||
2958		    (mcp->mc_status & MC_SOFT_SUSPENDED))
2959			continue;
2960		if (mc_rangecheck_pa(mcp, pa)) {
2961			return (mcp);
2962		}
2963	}
2964	return (NULL);
2965}
2966
2967/*
2968 * Get Physical Board number from Logical one.
2969 */
2970static int
2971mc_opl_get_physical_board(int sb)
2972{
2973	if (&opl_get_physical_board) {
2974		return (opl_get_physical_board(sb));
2975	}
2976
2977	cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n");
2978	return (-1);
2979}
2980
2981/* ARGSUSED */
2982int
2983mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen,
2984	int *lenp)
2985{
2986	int i;
2987	int j;
2988	int sb;
2989	int bank;
2990	int cs;
2991	int rv = 0;
2992	mc_opl_t *mcp;
2993	char memb_num;
2994
2995	mutex_enter(&mcmutex);
2996
2997	if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) ||
2998	    (!pa_is_valid(mcp, flt_addr))) {
2999		mutex_exit(&mcmutex);
3000		if (snprintf(buf, buflen, "UNKNOWN") >= buflen) {
3001			return (ENOSPC);
3002		} else {
3003			if (lenp)
3004				*lenp = strlen(buf);
3005		}
3006		return (0);
3007	}
3008
3009	bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address);
3010	sb = mcp->mc_phys_board_num;
3011	cs = pa_to_cs(mcp, flt_addr - mcp->mc_start_address);
3012
3013	if (sb == -1) {
3014		mutex_exit(&mcmutex);
3015		return (ENXIO);
3016	}
3017
3018	switch (plat_model) {
3019	case MODEL_DC:
3020		i = BD_BK_SLOT_TO_INDEX(0, bank, 0);
3021		j = (cs == 0) ? i : i + 2;
3022		(void) snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s",
3023		    model_names[plat_model].unit_name, sb,
3024		    mc_dc_dimm_unum_table[j],
3025		    mc_dc_dimm_unum_table[j + 1]);
3026		break;
3027	case MODEL_FF2:
3028	case MODEL_FF1:
3029		i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
3030		j = (cs == 0) ? i : i + 2;
3031		memb_num = mc_ff_dimm_unum_table[i][0];
3032		(void) snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s",
3033		    model_names[plat_model].unit_name,
3034		    model_names[plat_model].mem_name, memb_num,
3035		    &mc_ff_dimm_unum_table[j][1],
3036		    &mc_ff_dimm_unum_table[j + 1][1]);
3037		break;
3038	case MODEL_IKKAKU:
3039		i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
3040		j = (cs == 0) ? i : i + 2;
3041		(void) snprintf(buf, buflen, "/%s/MEM%s MEM%s",
3042		    model_names[plat_model].unit_name,
3043		    &mc_ff_dimm_unum_table[j][1],
3044		    &mc_ff_dimm_unum_table[j + 1][1]);
3045		break;
3046	default:
3047		rv = ENXIO;
3048	}
3049	if (lenp) {
3050		*lenp = strlen(buf);
3051	}
3052	mutex_exit(&mcmutex);
3053	return (rv);
3054}
3055
3056int
3057opl_mc_suspend(void)
3058{
3059	mc_opl_t *mcp;
3060	int i;
3061
3062	mutex_enter(&mcmutex);
3063	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3064		if ((mcp = mc_instances[i]) == NULL)
3065			continue;
3066		(void) mc_suspend(mcp, MC_SOFT_SUSPENDED);
3067	}
3068	mutex_exit(&mcmutex);
3069
3070	return (0);
3071}
3072
3073int
3074opl_mc_resume(void)
3075{
3076	mc_opl_t *mcp;
3077	int i;
3078
3079	mutex_enter(&mcmutex);
3080	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3081		if ((mcp = mc_instances[i]) == NULL)
3082			continue;
3083		(void) mc_resume(mcp, MC_SOFT_SUSPENDED);
3084	}
3085	mutex_exit(&mcmutex);
3086
3087	return (0);
3088}
3089static void
3090insert_mcp(mc_opl_t *mcp)
3091{
3092	mutex_enter(&mcmutex);
3093	if (mc_instances[mcp->mc_board_num] != NULL) {
3094		MC_LOG("mc-opl instance for board# %d already exists\n",
3095		    mcp->mc_board_num);
3096	}
3097	mc_instances[mcp->mc_board_num] = mcp;
3098	mutex_exit(&mcmutex);
3099}
3100
3101static void
3102delete_mcp(mc_opl_t *mcp)
3103{
3104	mutex_enter(&mcmutex);
3105	mc_instances[mcp->mc_board_num] = 0;
3106	mutex_exit(&mcmutex);
3107}
3108
3109/* Error injection interface */
3110
3111static void
3112mc_lock_va(uint64_t pa, caddr_t new_va)
3113{
3114	tte_t tte;
3115
3116	vtag_flushpage(new_va, (uint64_t)ksfmmup);
3117	sfmmu_memtte(&tte, pa >> PAGESHIFT, PROC_DATA|HAT_NOSYNC, TTE8K);
3118	tte.tte_intlo |= TTE_LCK_INT;
3119	sfmmu_dtlb_ld_kva(new_va, &tte);
3120}
3121
3122static void
3123mc_unlock_va(caddr_t va)
3124{
3125	vtag_flushpage(va, (uint64_t)ksfmmup);
3126}
3127
3128/* ARGSUSED */
3129int
3130mc_inject_error(int error_type, uint64_t pa, uint32_t flags)
3131{
3132	mc_opl_t *mcp;
3133	int bank;
3134	uint32_t dimm_addr;
3135	uint32_t cntl;
3136	mc_rsaddr_info_t rsaddr;
3137	uint32_t data, stat;
3138	int both_sides = 0;
3139	uint64_t pa0;
3140	int extra_injection_needed = 0;
3141	extern void cpu_flush_ecache(void);
3142
3143	MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags);
3144
3145	mutex_enter(&mcmutex);
3146	if ((mcp = mc_pa_to_mcp(pa)) == NULL) {
3147		mutex_exit(&mcmutex);
3148		MC_LOG("mc_inject_error: invalid pa\n");
3149		return (ENOTSUP);
3150	}
3151
3152	mutex_enter(&mcp->mc_lock);
3153	mutex_exit(&mcmutex);
3154
3155	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
3156		mutex_exit(&mcp->mc_lock);
3157		MC_LOG("mc-opl has been suspended.  No error injection.\n");
3158		return (EBUSY);
3159	}
3160
3161	/* convert pa to offset within the board */
3162	MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address);
3163
3164	if (!pa_is_valid(mcp, pa)) {
3165		mutex_exit(&mcp->mc_lock);
3166		return (EINVAL);
3167	}
3168
3169	pa0 = pa - mcp->mc_start_address;
3170
3171	bank = pa_to_bank(mcp, pa0);
3172
3173	if (flags & MC_INJECT_FLAG_OTHER)
3174		bank = bank ^ 1;
3175
3176	if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) {
3177		mutex_exit(&mcp->mc_lock);
3178		MC_LOG("Not mirror mode\n");
3179		return (EINVAL);
3180	}
3181
3182	dimm_addr = pa_to_dimm(mcp, pa0);
3183
3184	MC_LOG("injecting error to /LSB%d/B%d/%x\n", mcp->mc_board_num, bank,
3185	    dimm_addr);
3186
3187
3188	switch (error_type) {
3189	case MC_INJECT_INTERMITTENT_MCE:
3190	case MC_INJECT_PERMANENT_MCE:
3191	case MC_INJECT_MUE:
3192		both_sides = 1;
3193	}
3194
3195	if (flags & MC_INJECT_FLAG_RESET)
3196		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0);
3197
3198	ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK);
3199
3200	if (both_sides) {
3201		ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0);
3202		ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), dimm_addr &
3203		    MAC_EG_ADD_MASK);
3204	}
3205
3206	switch (error_type) {
3207	case MC_INJECT_SUE:
3208		extra_injection_needed = 1;
3209		/*FALLTHROUGH*/
3210	case MC_INJECT_UE:
3211	case MC_INJECT_MUE:
3212		if (flags & MC_INJECT_FLAG_PATH) {
3213			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 |
3214			    MAC_EG_FORCE_READ16 | MAC_EG_RDERR_ONCE;
3215		} else {
3216			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR00 |
3217			    MAC_EG_FORCE_DERR16 | MAC_EG_DERR_ONCE;
3218		}
3219		flags |= MC_INJECT_FLAG_ST;
3220		break;
3221	case MC_INJECT_INTERMITTENT_CE:
3222	case MC_INJECT_INTERMITTENT_MCE:
3223		if (flags & MC_INJECT_FLAG_PATH) {
3224			cntl = MAC_EG_ADD_FIX |MAC_EG_FORCE_READ00 |
3225			    MAC_EG_RDERR_ONCE;
3226		} else {
3227			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 |
3228			    MAC_EG_DERR_ONCE;
3229		}
3230		extra_injection_needed = 1;
3231		flags |= MC_INJECT_FLAG_ST;
3232		break;
3233	case MC_INJECT_PERMANENT_CE:
3234	case MC_INJECT_PERMANENT_MCE:
3235		if (flags & MC_INJECT_FLAG_PATH) {
3236			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 |
3237			    MAC_EG_RDERR_ALWAYS;
3238		} else {
3239			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 |
3240			    MAC_EG_DERR_ALWAYS;
3241		}
3242		flags |= MC_INJECT_FLAG_ST;
3243		break;
3244	case MC_INJECT_CMPE:
3245		data = 0xabcdefab;
3246		stphys(pa, data);
3247		cpu_flush_ecache();
3248		MC_LOG("CMPE: writing data %x to %lx\n", data, pa);
3249		ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE);
3250		stphys(pa, data ^ 0xffffffff);
3251		membar_sync();
3252		cpu_flush_ecache();
3253		ST_MAC_REG(MAC_MIRR(mcp, bank), 0);
3254		MC_LOG("CMPE: write new data %xto %lx\n", data, pa);
3255		cntl = 0;
3256		break;
3257	case MC_INJECT_NOP:
3258		cntl = 0;
3259		break;
3260	default:
3261		MC_LOG("mc_inject_error: invalid option\n");
3262		cntl = 0;
3263	}
3264
3265	if (cntl) {
3266		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK);
3267		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
3268
3269		if (both_sides) {
3270			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
3271			    MAC_EG_SETUP_MASK);
3272			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
3273		}
3274	}
3275
3276	/*
3277	 * For all injection cases except compare error, we
3278	 * must write to the PA to trigger the error.
3279	 */
3280
3281	if (flags & MC_INJECT_FLAG_ST) {
3282		data = 0xf0e0d0c0;
3283		MC_LOG("Writing %x to %lx\n", data, pa);
3284		stphys(pa, data);
3285		cpu_flush_ecache();
3286	}
3287
3288
3289	if (flags & MC_INJECT_FLAG_LD) {
3290		if (flags & MC_INJECT_FLAG_PREFETCH) {
3291			/*
3292			 * Use strong prefetch operation to
3293			 * inject MI errors.
3294			 */
3295			page_t *pp;
3296			extern void mc_prefetch(caddr_t);
3297
3298			MC_LOG("prefetch\n");
3299
3300			pp = page_numtopp_nolock(pa >> PAGESHIFT);
3301			if (pp != NULL) {
3302				caddr_t	va, va1;
3303
3304				va = ppmapin(pp, PROT_READ|PROT_WRITE,
3305				    (caddr_t)-1);
3306				kpreempt_disable();
3307				mc_lock_va((uint64_t)pa, va);
3308				va1 = va + (pa & (PAGESIZE - 1));
3309				mc_prefetch(va1);
3310				mc_unlock_va(va);
3311				kpreempt_enable();
3312				ppmapout(va);
3313
3314				/*
3315				 * For MI errors, we need one extra
3316				 * injection for HW patrol to stop.
3317				 */
3318				extra_injection_needed = 1;
3319			} else {
3320				cmn_err(CE_WARN, "Cannot find page structure"
3321				    " for PA %lx\n", pa);
3322			}
3323		} else {
3324			MC_LOG("Reading from %lx\n", pa);
3325			data = ldphys(pa);
3326			MC_LOG("data = %x\n", data);
3327		}
3328
3329		if (extra_injection_needed) {
3330			/*
3331			 * These are the injection cases where the
3332			 * requested injected errors will not cause the HW
3333			 * patrol to stop. For these cases, we need to inject
3334			 * an extra 'real' PTRL error to force the
3335			 * HW patrol to stop so that we can report the
3336			 * errors injected. Note that we cannot read
3337			 * and report error status while the HW patrol
3338			 * is running.
3339			 */
3340			ST_MAC_REG(MAC_EG_CNTL(mcp, bank),
3341			    cntl & MAC_EG_SETUP_MASK);
3342			ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
3343
3344			if (both_sides) {
3345				ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
3346				    MAC_EG_SETUP_MASK);
3347				ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
3348			}
3349			data = 0xf0e0d0c0;
3350			MC_LOG("Writing %x to %lx\n", data, pa);
3351			stphys(pa, data);
3352			cpu_flush_ecache();
3353		}
3354	}
3355
3356	if (flags & MC_INJECT_FLAG_RESTART) {
3357		MC_LOG("Restart patrol\n");
3358		rsaddr.mi_restartaddr.ma_bd = mcp->mc_board_num;
3359		rsaddr.mi_restartaddr.ma_bank = bank;
3360		rsaddr.mi_restartaddr.ma_dimm_addr = dimm_addr;
3361		rsaddr.mi_valid = 1;
3362		rsaddr.mi_injectrestart = 1;
3363		(void) restart_patrol(mcp, bank, &rsaddr);
3364	}
3365
3366	if (flags & MC_INJECT_FLAG_POLL) {
3367		int running;
3368		int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank;
3369
3370		MC_LOG("Poll patrol error\n");
3371		stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank));
3372		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
3373		running = cntl & MAC_CNTL_PTRL_START;
3374
3375		if (!running &&
3376		    (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) {
3377			/*
3378			 * HW patrol stopped and we have errors to
3379			 * report. Do it.
3380			 */
3381			mcp->mc_speedup_period[ebank] = 0;
3382			rsaddr.mi_valid = 0;
3383			rsaddr.mi_injectrestart = 0;
3384			if (IS_MIRROR(mcp, bank)) {
3385				mc_error_handler_mir(mcp, bank, &rsaddr);
3386			} else {
3387				mc_error_handler(mcp, bank, &rsaddr);
3388			}
3389
3390			(void) restart_patrol(mcp, bank, &rsaddr);
3391		} else {
3392			/*
3393			 * We are expecting to report injected
3394			 * errors but the HW patrol is still running.
3395			 * Speed up the scanning
3396			 */
3397			mcp->mc_speedup_period[ebank] = 2;
3398			MAC_CMD(mcp, bank, 0);
3399			(void) restart_patrol(mcp, bank, NULL);
3400		}
3401	}
3402
3403	mutex_exit(&mcp->mc_lock);
3404	return (0);
3405}
3406
3407void
3408mc_stphysio(uint64_t pa, uint32_t data)
3409{
3410	MC_LOG("0x%x -> pa(%lx)\n", data, pa);
3411	stphysio(pa, data);
3412
3413	/* force the above write to be processed by mac patrol */
3414	data = ldphysio(pa);
3415	MC_LOG("pa(%lx) = 0x%x\n", pa, data);
3416}
3417
3418uint32_t
3419mc_ldphysio(uint64_t pa)
3420{
3421	uint32_t rv;
3422
3423	rv = ldphysio(pa);
3424	MC_LOG("pa(%lx) = 0x%x\n", pa, rv);
3425	return (rv);
3426}
3427
3428#define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3429
3430/*
3431 * parse_unum_memory -- extract the board number and the DIMM name from
3432 * the unum.
3433 *
3434 * Return 0 for success and non-zero for a failure.
3435 */
3436int
3437parse_unum_memory(char *unum, int *board, char *dname)
3438{
3439	char *c;
3440	char x, y, z;
3441
3442	if ((c = strstr(unum, "CMU")) != NULL) {
3443		/* DC Model */
3444		c += 3;
3445		*board = (uint8_t)stoi(&c);
3446		if ((c = strstr(c, "MEM")) == NULL) {
3447			return (1);
3448		}
3449		c += 3;
3450		if (strlen(c) < 3) {
3451			return (2);
3452		}
3453		if ((!isdigit(c[0])) || (!(isdigit(c[1]))) ||
3454		    ((c[2] != 'A') && (c[2] != 'B'))) {
3455			return (3);
3456		}
3457		x = c[0];
3458		y = c[1];
3459		z = c[2];
3460	} else if ((c = strstr(unum, "MBU_")) != NULL) {
3461		/*  FF1/FF2/Ikkaku Model */
3462		c += 4;
3463		if ((c[0] != 'A') && (c[0] != 'B')) {
3464			return (4);
3465		}
3466		if (plat_model == MODEL_IKKAKU) {
3467			/* Ikkaku Model */
3468			x = '0';
3469			*board = 0;
3470		} else {
3471			/* FF1/FF2 Model */
3472			if ((c = strstr(c, "MEMB")) == NULL) {
3473				return (5);
3474			}
3475			c += 4;
3476
3477			x = c[0];
3478			*board =  ((uint8_t)stoi(&c)) / 4;
3479		}
3480
3481		if ((c = strstr(c, "MEM")) == NULL) {
3482			return (6);
3483		}
3484		c += 3;
3485		if (strlen(c) < 2) {
3486			return (7);
3487		}
3488		if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) {
3489			return (8);
3490		}
3491		y = c[0];
3492		z = c[1];
3493	} else {
3494		return (9);
3495	}
3496	if (*board < 0) {
3497		return (10);
3498	}
3499	dname[0] = x;
3500	dname[1] = y;
3501	dname[2] = z;
3502	dname[3] = '\0';
3503	return (0);
3504}
3505
3506/*
3507 * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and
3508 * the DIMM name.
3509 */
3510int
3511mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf,
3512    int buflen, int *lenp)
3513{
3514	int		ret = ENODEV;
3515	mc_dimm_info_t	*d = NULL;
3516
3517	if ((d = mcp->mc_dimm_list) == NULL) {
3518		MC_LOG("mc_get_mem_sid_dimm: mc_dimm_list is NULL\n");
3519		return (EINVAL);
3520		}
3521
3522	for (; d != NULL; d = d->md_next) {
3523		if (strcmp(d->md_dimmname, dname) == 0) {
3524			break;
3525		}
3526	}
3527	if (d != NULL) {
3528		*lenp = strlen(d->md_serial) + strlen(d->md_partnum);
3529		if (buflen <=  *lenp) {
3530			cmn_err(CE_WARN, "mc_get_mem_sid_dimm: "
3531			    "buflen is smaller than %d\n", *lenp);
3532			ret = ENOSPC;
3533		} else {
3534			(void) snprintf(buf, buflen, "%s:%s",
3535			    d->md_serial, d->md_partnum);
3536			ret = 0;
3537		}
3538	}
3539	MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n",
3540	    ret, dname, (ret == 0) ? buf : "");
3541	return (ret);
3542}
3543
3544int
3545mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int sb,
3546    int bank, uint32_t mf_type, uint32_t d_slot)
3547{
3548	int	lenp = buflen;
3549	int	id;
3550	int	ret;
3551	char	*dimmnm;
3552
3553	if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
3554	    mf_type == FLT_TYPE_PERMANENT_CE) {
3555		if (plat_model == MODEL_DC) {
3556			/*
3557			 * All DC models
3558			 */
3559			id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot);
3560			dimmnm = mc_dc_dimm_unum_table[id];
3561		} else {
3562			/*
3563			 * All FF and Ikkaku models
3564			 */
3565			id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
3566			dimmnm = mc_ff_dimm_unum_table[id];
3567		}
3568		if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen,
3569		    &lenp)) != 0) {
3570			return (ret);
3571		}
3572	} else {
3573		return (1);
3574	}
3575
3576	return (0);
3577}
3578
3579/*
3580 * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum.
3581 */
3582int
3583mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3584{
3585	int	i;
3586	int	ret = ENODEV;
3587	int	board;
3588	char	dname[MCOPL_MAX_DIMMNAME + 1];
3589	mc_opl_t *mcp;
3590
3591	MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen);
3592	if ((ret = parse_unum_memory(unum, &board, dname)) != 0) {
3593		MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n",
3594		    unum, ret);
3595		return (EINVAL);
3596	}
3597
3598	if (board < 0) {
3599		MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n",
3600		    board, dname);
3601		return (EINVAL);
3602	}
3603
3604	mutex_enter(&mcmutex);
3605	/*
3606	 * return ENOENT if we can not find the matching board.
3607	 */
3608	ret = ENOENT;
3609	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3610		if ((mcp = mc_instances[i]) == NULL)
3611			continue;
3612		mutex_enter(&mcp->mc_lock);
3613		if (mcp->mc_phys_board_num != board) {
3614			mutex_exit(&mcp->mc_lock);
3615			continue;
3616		}
3617		ret = mc_get_mem_sid_dimm(mcp, dname, buf, buflen, lenp);
3618		if (ret == 0) {
3619			mutex_exit(&mcp->mc_lock);
3620			break;
3621		}
3622		mutex_exit(&mcp->mc_lock);
3623	}
3624	mutex_exit(&mcmutex);
3625	return (ret);
3626}
3627
3628/*
3629 * mc_get_mem_offset -- get the offset in a DIMM for a given physical address.
3630 */
3631int
3632mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
3633{
3634	int		i;
3635	int		ret = ENODEV;
3636	mc_addr_t	maddr;
3637	mc_opl_t	*mcp;
3638
3639	mutex_enter(&mcmutex);
3640	for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) {
3641		if ((mcp = mc_instances[i]) == NULL)
3642			continue;
3643		mutex_enter(&mcp->mc_lock);
3644		if (!pa_is_valid(mcp, paddr)) {
3645			mutex_exit(&mcp->mc_lock);
3646			continue;
3647		}
3648		if (pa_to_maddr(mcp, paddr, &maddr) == 0) {
3649			*offp = maddr.ma_dimm_addr;
3650			ret = 0;
3651		}
3652		mutex_exit(&mcp->mc_lock);
3653	}
3654	mutex_exit(&mcmutex);
3655	MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n",
3656	    ret, paddr, *offp);
3657	return (ret);
3658}
3659
3660/*
3661 * dname_to_bankslot - Get the bank and slot number from the DIMM name.
3662 */
3663int
3664dname_to_bankslot(char *dname, int *bank, int *slot)
3665{
3666	int i;
3667	int tsz;
3668	char **tbl;
3669
3670	if (plat_model == MODEL_DC) {
3671		/*
3672		 * All DC models
3673		 */
3674		tbl = mc_dc_dimm_unum_table;
3675		tsz = OPL_MAX_DIMMS;
3676	} else {
3677		/*
3678		 * All FF and Ikkaku models
3679		 */
3680		tbl = mc_ff_dimm_unum_table;
3681		tsz = 2 * OPL_MAX_DIMMS;
3682	}
3683
3684	for (i = 0; i < tsz; i++) {
3685		if (strcmp(dname,  tbl[i]) == 0) {
3686			break;
3687		}
3688	}
3689	if (i == tsz) {
3690		return (1);
3691	}
3692	*bank = INDEX_TO_BANK(i);
3693	*slot = INDEX_TO_SLOT(i);
3694	return (0);
3695}
3696
3697/*
3698 * mc_get_mem_addr -- get the physical address of a DIMM corresponding
3699 * to the unum and sid.
3700 */
3701int
3702mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr)
3703{
3704	int	board;
3705	int	bank;
3706	int	slot;
3707	int	i;
3708	int	ret = ENODEV;
3709	char	dname[MCOPL_MAX_DIMMNAME + 1];
3710	mc_addr_t maddr;
3711	mc_opl_t *mcp;
3712
3713	MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n",
3714	    unum, sid, offset);
3715	if (parse_unum_memory(unum, &board, dname) != 0) {
3716		MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n",
3717		    unum, ret);
3718		return (EINVAL);
3719	}
3720
3721	if (board < 0) {
3722		MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n",
3723		    board, dname);
3724		return (EINVAL);
3725	}
3726
3727	mutex_enter(&mcmutex);
3728	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3729		if ((mcp = mc_instances[i]) == NULL)
3730			continue;
3731		mutex_enter(&mcp->mc_lock);
3732		if (mcp->mc_phys_board_num != board) {
3733			mutex_exit(&mcp->mc_lock);
3734			continue;
3735		}
3736
3737		ret = dname_to_bankslot(dname, &bank, &slot);
3738		MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot);
3739		if (ret != 0) {
3740			MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n");
3741			ret = ENODEV;
3742		} else {
3743			maddr.ma_bd = mcp->mc_board_num;
3744			maddr.ma_bank =  bank;
3745			maddr.ma_dimm_addr = offset;
3746			ret = mcaddr_to_pa(mcp, &maddr, paddr);
3747			if (ret != 0) {
3748				MC_LOG("mc_get_mem_addr: "
3749				    "mcaddr_to_pa failed\n");
3750				ret = ENODEV;
3751				mutex_exit(&mcp->mc_lock);
3752				continue;
3753			}
3754			mutex_exit(&mcp->mc_lock);
3755			break;
3756		}
3757		mutex_exit(&mcp->mc_lock);
3758	}
3759	mutex_exit(&mcmutex);
3760	MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr);
3761	return (ret);
3762}
3763
3764static void
3765mc_free_dimm_list(mc_dimm_info_t *d)
3766{
3767	mc_dimm_info_t *next;
3768
3769	while (d != NULL) {
3770		next = d->md_next;
3771		kmem_free(d, sizeof (mc_dimm_info_t));
3772		d = next;
3773	}
3774}
3775
3776/*
3777 * mc_get_dimm_list -- get the list of dimms with serial-id info
3778 * from the SP.
3779 */
3780mc_dimm_info_t *
3781mc_get_dimm_list(mc_opl_t *mcp)
3782{
3783	uint32_t	bufsz;
3784	uint32_t	maxbufsz;
3785	int		ret;
3786	int		sexp;
3787	board_dimm_info_t *bd_dimmp;
3788	mc_dimm_info_t	*dimm_list = NULL;
3789
3790	maxbufsz = bufsz = sizeof (board_dimm_info_t) +
3791	    ((MCOPL_MAX_DIMMNAME +  MCOPL_MAX_SERIAL +
3792	    MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS);
3793
3794	bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP);
3795	ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz);
3796
3797	MC_LOG("mc_get_dimm_list:  scf_service_getinfo returned=%d\n", ret);
3798	if (ret == 0) {
3799		sexp = sizeof (board_dimm_info_t) +
3800		    ((bd_dimmp->bd_dnamesz +  bd_dimmp->bd_serialsz +
3801		    bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms);
3802
3803		if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) &&
3804		    (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) &&
3805		    (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) &&
3806		    (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) &&
3807		    (sexp <= bufsz)) {
3808
3809#ifdef DEBUG
3810			if (oplmc_debug)
3811				mc_dump_dimm_info(bd_dimmp);
3812#endif
3813			dimm_list = mc_prepare_dimmlist(bd_dimmp);
3814
3815		} else {
3816			cmn_err(CE_WARN, "DIMM info version mismatch\n");
3817		}
3818	}
3819	kmem_free(bd_dimmp, maxbufsz);
3820	MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", (void *)dimm_list);
3821	return (dimm_list);
3822}
3823
3824/*
3825 * mc_prepare_dimmlist - Prepare the dimm list from the information
3826 * received from the SP.
3827 */
3828mc_dimm_info_t *
3829mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp)
3830{
3831	char	*dimm_name;
3832	char	*serial;
3833	char	*part;
3834	int	dimm;
3835	int	dnamesz = bd_dimmp->bd_dnamesz;
3836	int	sersz = bd_dimmp->bd_serialsz;
3837	int	partsz = bd_dimmp->bd_partnumsz;
3838	mc_dimm_info_t	*dimm_list = NULL;
3839	mc_dimm_info_t	*d;
3840
3841	dimm_name = (char *)(bd_dimmp + 1);
3842	for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) {
3843
3844		d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t),
3845		    KM_SLEEP);
3846
3847		bcopy(dimm_name, d->md_dimmname, dnamesz);
3848		d->md_dimmname[dnamesz] = 0;
3849
3850		serial = dimm_name + dnamesz;
3851		bcopy(serial, d->md_serial, sersz);
3852		d->md_serial[sersz] = 0;
3853
3854		part = serial + sersz;
3855		bcopy(part, d->md_partnum, partsz);
3856		d->md_partnum[partsz] = 0;
3857
3858		d->md_next = dimm_list;
3859		dimm_list = d;
3860		dimm_name = part + partsz;
3861	}
3862	return (dimm_list);
3863}
3864
3865static int
3866mc_get_mem_fmri(mc_flt_page_t *fpag, char **unum)
3867{
3868	if (fpag->fmri_addr == 0 || fpag->fmri_sz > MEM_FMRI_MAX_BUFSIZE)
3869		return (EINVAL);
3870
3871	*unum = kmem_alloc(fpag->fmri_sz, KM_SLEEP);
3872	if (copyin((void *)fpag->fmri_addr, *unum, fpag->fmri_sz) != 0) {
3873		kmem_free(*unum, fpag->fmri_sz);
3874		return (EFAULT);
3875	}
3876	return (0);
3877}
3878
3879static int
3880mc_scf_log_event(mc_flt_page_t *flt_pag)
3881{
3882	mc_opl_t *mcp;
3883	int board, bank, slot;
3884	int len, rv = 0;
3885	char *unum, *sid;
3886	char dname[MCOPL_MAX_DIMMNAME + 1];
3887	size_t sid_sz;
3888	uint64_t pa;
3889	mc_flt_stat_t flt_stat;
3890
3891	if ((sid_sz = cpu_get_name_bufsize()) == 0)
3892		return (ENOTSUP);
3893
3894	if ((rv = mc_get_mem_fmri(flt_pag, &unum)) != 0) {
3895		MC_LOG("mc_scf_log_event: mc_get_mem_fmri failed\n");
3896		return (rv);
3897	}
3898
3899	sid = kmem_zalloc(sid_sz, KM_SLEEP);
3900
3901	if ((rv = mc_get_mem_sid(unum, sid, sid_sz, &len)) != 0) {
3902		MC_LOG("mc_scf_log_event: mc_get_mem_sid failed\n");
3903		goto out;
3904	}
3905
3906	if ((rv = mc_get_mem_addr(unum, sid, (uint64_t)flt_pag->err_add,
3907	    &pa)) != 0) {
3908		MC_LOG("mc_scf_log_event: mc_get_mem_addr failed\n");
3909		goto out;
3910	}
3911
3912	if (parse_unum_memory(unum, &board, dname) != 0) {
3913		MC_LOG("mc_scf_log_event: parse_unum_memory failed\n");
3914		rv = EINVAL;
3915		goto out;
3916	}
3917
3918	if (board < 0) {
3919		MC_LOG("mc_scf_log_event: Invalid board=%d dimm=%s\n",
3920		    board, dname);
3921		rv = EINVAL;
3922		goto out;
3923	}
3924
3925	if (dname_to_bankslot(dname, &bank, &slot) != 0) {
3926		MC_LOG("mc_scf_log_event: dname_to_bankslot failed\n");
3927		rv = EINVAL;
3928		goto out;
3929	}
3930
3931	mutex_enter(&mcmutex);
3932
3933	flt_stat.mf_err_add = flt_pag->err_add;
3934	flt_stat.mf_err_log = flt_pag->err_log;
3935	flt_stat.mf_flt_paddr = pa;
3936
3937	if ((mcp = mc_pa_to_mcp(pa)) == NULL) {
3938		mutex_exit(&mcmutex);
3939		MC_LOG("mc_scf_log_event: invalid pa\n");
3940		rv = EINVAL;
3941		goto out;
3942	}
3943
3944	MC_LOG("mc_scf_log_event: DIMM%s, /LSB%d/B%d/%x, pa %lx elog %x\n",
3945	    unum, mcp->mc_board_num, bank, flt_pag->err_add, pa,
3946	    flt_pag->err_log);
3947
3948	mutex_enter(&mcp->mc_lock);
3949
3950	if (!pa_is_valid(mcp, pa)) {
3951		mutex_exit(&mcp->mc_lock);
3952		mutex_exit(&mcmutex);
3953		rv = EINVAL;
3954		goto out;
3955	}
3956
3957	rv = 0;
3958
3959	mc_queue_scf_log(mcp, &flt_stat, bank);
3960
3961	mutex_exit(&mcp->mc_lock);
3962	mutex_exit(&mcmutex);
3963
3964out:
3965	kmem_free(unum, flt_pag->fmri_sz);
3966	kmem_free(sid, sid_sz);
3967
3968	return (rv);
3969}
3970
3971#ifdef DEBUG
3972void
3973mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz)
3974{
3975	char dname[MCOPL_MAX_DIMMNAME + 1];
3976	char serial[MCOPL_MAX_SERIAL + 1];
3977	char part[ MCOPL_MAX_PARTNUM + 1];
3978	char *b;
3979
3980	b = buf;
3981	bcopy(b, dname, dnamesz);
3982	dname[dnamesz] = 0;
3983
3984	b += dnamesz;
3985	bcopy(b, serial, serialsz);
3986	serial[serialsz] = 0;
3987
3988	b += serialsz;
3989	bcopy(b, part, partnumsz);
3990	part[partnumsz] = 0;
3991
3992	printf("DIMM=%s  Serial=%s PartNum=%s\n", dname, serial, part);
3993}
3994
3995void
3996mc_dump_dimm_info(board_dimm_info_t *bd_dimmp)
3997{
3998	int	dimm;
3999	int	dnamesz = bd_dimmp->bd_dnamesz;
4000	int	sersz = bd_dimmp->bd_serialsz;
4001	int	partsz = bd_dimmp->bd_partnumsz;
4002	char	*buf;
4003
4004	printf("Version=%d Board=%02d DIMMs=%d NameSize=%d "
4005	    "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version,
4006	    bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz,
4007	    bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz);
4008	printf("======================================================\n");
4009
4010	buf = (char *)(bd_dimmp + 1);
4011	for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) {
4012		mc_dump_dimm(buf, dnamesz, sersz, partsz);
4013		buf += dnamesz + sersz + partsz;
4014	}
4015	printf("======================================================\n");
4016}
4017
4018
4019/* ARGSUSED */
4020static int
4021mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
4022	int *rvalp)
4023{
4024	caddr_t	buf, kbuf;
4025	uint64_t pa;
4026	int rv = 0;
4027	int i;
4028	uint32_t flags;
4029	static uint32_t offset = 0;
4030
4031
4032	flags = (cmd >> 4) & 0xfffffff;
4033
4034	cmd &= 0xf;
4035
4036	MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags);
4037
4038	if (arg != NULL) {
4039		if (ddi_copyin((const void *)arg, (void *)&pa,
4040		    sizeof (uint64_t), 0) < 0) {
4041			rv = EFAULT;
4042			return (rv);
4043		}
4044		buf = NULL;
4045	} else {
4046		buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP);
4047
4048		pa = va_to_pa(buf);
4049		pa += offset;
4050
4051		offset += 64;
4052		if (offset >= PAGESIZE)
4053			offset = 0;
4054	}
4055
4056	switch (cmd) {
4057	case MCI_CE:
4058		(void) mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, flags);
4059		break;
4060	case MCI_PERM_CE:
4061		(void) mc_inject_error(MC_INJECT_PERMANENT_CE, pa, flags);
4062		break;
4063	case MCI_UE:
4064		(void) mc_inject_error(MC_INJECT_UE, pa, flags);
4065		break;
4066	case MCI_M_CE:
4067		(void) mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, flags);
4068		break;
4069	case MCI_M_PCE:
4070		(void) mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, flags);
4071		break;
4072	case MCI_M_UE:
4073		(void) mc_inject_error(MC_INJECT_MUE, pa, flags);
4074		break;
4075	case MCI_CMP:
4076		(void) mc_inject_error(MC_INJECT_CMPE, pa, flags);
4077		break;
4078	case MCI_NOP:
4079		(void) mc_inject_error(MC_INJECT_NOP, pa, flags); break;
4080	case MCI_SHOW_ALL:
4081		mc_debug_show_all = 1;
4082		break;
4083	case MCI_SHOW_NONE:
4084		mc_debug_show_all = 0;
4085		break;
4086	case MCI_ALLOC:
4087		/*
4088		 * just allocate some kernel memory and never free it
4089		 * 512 MB seems to be the maximum size supported.
4090		 */
4091		cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512);
4092		for (i = 0; i < flags; i++) {
4093			kbuf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP);
4094			cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n",
4095			    (u_longlong_t)kbuf, (u_longlong_t)va_to_pa(kbuf));
4096		}
4097		break;
4098	case MCI_SUSPEND:
4099		(void) opl_mc_suspend();
4100		break;
4101	case MCI_RESUME:
4102		(void) opl_mc_resume();
4103		break;
4104	default:
4105		rv = ENXIO;
4106	}
4107	if (buf)
4108		kmem_free(buf, PAGESIZE);
4109
4110	return (rv);
4111}
4112
4113#endif /* DEBUG */
4114