mc-us3.c revision 7656:2621e50fdf4a
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/conf.h>
28#include <sys/ddi.h>
29#include <sys/stat.h>
30#include <sys/sunddi.h>
31#include <sys/ddi_impldefs.h>
32#include <sys/obpdefs.h>
33#include <sys/cmn_err.h>
34#include <sys/errno.h>
35#include <sys/kmem.h>
36#include <sys/open.h>
37#include <sys/thread.h>
38#include <sys/cpuvar.h>
39#include <sys/x_call.h>
40#include <sys/debug.h>
41#include <sys/sysmacros.h>
42#include <sys/ivintr.h>
43#include <sys/intr.h>
44#include <sys/intreg.h>
45#include <sys/autoconf.h>
46#include <sys/modctl.h>
47#include <sys/spl.h>
48#include <sys/async.h>
49#include <sys/mc.h>
50#include <sys/mc-us3.h>
51#include <sys/cpu_module.h>
52#include <sys/platform_module.h>
53
54/*
55 * Function prototypes
56 */
57
58static int mc_open(dev_t *, int, int, cred_t *);
59static int mc_close(dev_t, int, int, cred_t *);
60static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
61static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
62static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
63
64/*
65 * Configuration data structures
66 */
67static struct cb_ops mc_cb_ops = {
68	mc_open,			/* open */
69	mc_close,			/* close */
70	nulldev,			/* strategy */
71	nulldev,			/* print */
72	nodev,				/* dump */
73	nulldev,			/* read */
74	nulldev,			/* write */
75	mc_ioctl,			/* ioctl */
76	nodev,				/* devmap */
77	nodev,				/* mmap */
78	nodev,				/* segmap */
79	nochpoll,			/* poll */
80	ddi_prop_op,			/* cb_prop_op */
81	0,				/* streamtab */
82	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
83	CB_REV,				/* rev */
84	nodev,				/* cb_aread */
85	nodev				/* cb_awrite */
86};
87
88static struct dev_ops mc_ops = {
89	DEVO_REV,			/* rev */
90	0,				/* refcnt  */
91	ddi_getinfo_1to1,		/* getinfo */
92	nulldev,			/* identify */
93	nulldev,			/* probe */
94	mc_attach,			/* attach */
95	mc_detach,			/* detach */
96	nulldev,			/* reset */
97	&mc_cb_ops,			/* cb_ops */
98	(struct bus_ops *)0,		/* bus_ops */
99	nulldev,			/* power */
100	ddi_quiesce_not_needed,			/* quiesce */
101};
102
103/*
104 * Driver globals
105 */
106static void *mcp;
107static int nmcs = 0;
108static int seg_id = 0;
109static int nsegments = 0;
110static uint64_t memsize = 0;
111static int maxbanks = 0;
112
113static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
114static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
115static mc_dlist_t *device_head, *device_tail;
116
117static kmutex_t	mcmutex;
118static kmutex_t	mcdatamutex;
119
120static krwlock_t mcdimmsids_rw;
121
122/* pointer to cache of DIMM serial ids */
123static dimm_sid_cache_t	*mc_dimm_sids;
124static int		max_entries;
125
126extern struct mod_ops mod_driverops;
127
128static struct modldrv modldrv = {
129	&mod_driverops,			/* module type, this one is a driver */
130	"Memory-controller",		/* module name */
131	&mc_ops,			/* driver ops */
132};
133
134static struct modlinkage modlinkage = {
135	MODREV_1,		/* rev */
136	(void *)&modldrv,
137	NULL
138};
139
140static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
141    int buflen, int *lenp);
142static int mc_get_mem_info(int synd_code, uint64_t paddr,
143    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
144    int *segsp, int *banksp, int *mcidp);
145static int mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp);
146static int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
147static int mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr);
148static int mc_init_sid_cache(void);
149static int mc_get_mcregs(struct mc_soft_state *);
150static void mc_construct(int mc_id, void *dimminfop);
151static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop);
152static void mlayout_del(int mc_id, int delete);
153static struct seg_info *seg_match_base(u_longlong_t base);
154static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
155static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
156static mc_dlist_t *mc_node_get(int id, mc_dlist_t *head);
157static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm);
158static int mc_populate_sid_cache(void);
159static int mc_get_sid_cache_index(int mcid);
160static void mc_update_bank(struct bank_info *bank);
161
162#pragma weak p2get_mem_unum
163#pragma weak p2get_mem_info
164#pragma weak p2get_mem_sid
165#pragma weak p2get_mem_offset
166#pragma	weak p2get_mem_addr
167#pragma weak p2init_sid_cache
168#pragma weak plat_add_mem_unum_label
169#pragma weak plat_alloc_sid_cache
170#pragma weak plat_populate_sid_cache
171
172#define	QWORD_SIZE		144
173#define	QWORD_SIZE_BYTES	(QWORD_SIZE / 8)
174
175/*
176 * These are the module initialization routines.
177 */
178
179int
180_init(void)
181{
182	int error;
183
184	if ((error = ddi_soft_state_init(&mcp,
185	    sizeof (struct mc_soft_state), 1)) != 0)
186		return (error);
187
188	error =  mod_install(&modlinkage);
189	if (error == 0) {
190		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
191		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
192		rw_init(&mcdimmsids_rw, NULL, RW_DRIVER, NULL);
193	}
194
195	return (error);
196}
197
198int
199_fini(void)
200{
201	int error;
202
203	if ((error = mod_remove(&modlinkage)) != 0)
204		return (error);
205
206	ddi_soft_state_fini(&mcp);
207	mutex_destroy(&mcmutex);
208	mutex_destroy(&mcdatamutex);
209	rw_destroy(&mcdimmsids_rw);
210
211	if (mc_dimm_sids)
212		kmem_free(mc_dimm_sids, sizeof (dimm_sid_cache_t) *
213		    max_entries);
214
215	return (0);
216}
217
218int
219_info(struct modinfo *modinfop)
220{
221	return (mod_info(&modlinkage, modinfop));
222}
223
224static int
225mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
226{
227	struct mc_soft_state *softsp;
228	struct dimm_info *dimminfop;
229	int instance, len, err;
230
231	/* get the instance of this devi */
232	instance = ddi_get_instance(devi);
233
234	switch (cmd) {
235	case DDI_ATTACH:
236		break;
237
238	case DDI_RESUME:
239		/* get the soft state pointer for this device node */
240		softsp = ddi_get_soft_state(mcp, instance);
241		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: DDI_RESUME: updating MADRs\n",
242		    instance));
243		/*
244		 * During resume, the source and target board's bank_infos
245		 * need to be updated with the new mc MADR values.  This is
246		 * implemented with existing functionality by first removing
247		 * the props and allocated data structs, and then adding them
248		 * back in.
249		 */
250		if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
251		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
252		    MEM_CFG_PROP_NAME) == 1) {
253			(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
254			    MEM_CFG_PROP_NAME);
255		}
256		mlayout_del(softsp->portid, 0);
257		if (mc_get_mcregs(softsp) == -1) {
258			cmn_err(CE_WARN, "mc_attach: mc%d DDI_RESUME failure\n",
259			    instance);
260		}
261		return (DDI_SUCCESS);
262
263	default:
264		return (DDI_FAILURE);
265	}
266
267	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
268		return (DDI_FAILURE);
269
270	softsp = ddi_get_soft_state(mcp, instance);
271
272	/* Set the dip in the soft state */
273	softsp->dip = devi;
274
275	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
276	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
277		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property",
278		    instance, "portid"));
279		goto bad;
280	}
281
282	DPRINTF(MC_ATTACH_DEBUG, ("mc%d ATTACH: portid %d, cpuid %d\n",
283	    instance, softsp->portid, CPU->cpu_id));
284
285	/* map in the registers for this device. */
286	if (ddi_map_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0)) {
287		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to map registers",
288		    instance));
289		goto bad;
290	}
291
292	/*
293	 * Get the label of dimms and pin routing information at memory-layout
294	 * property if the memory controller is enabled.
295	 *
296	 * Basically every memory-controller node on every machine should
297	 * have one of these properties unless the memory controller is
298	 * physically not capable of having memory attached to it, e.g.
299	 * Excalibur's slave processor.
300	 */
301	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
302	    "memory-layout", (caddr_t)&dimminfop, &len);
303	if (err == DDI_PROP_SUCCESS) {
304		/*
305		 * Set the pointer and size of property in the soft state
306		 */
307		softsp->memlayoutp = dimminfop;
308		softsp->size = len;
309	} else if (err == DDI_PROP_NOT_FOUND) {
310		/*
311		 * This is a disable MC. Clear out the pointer and size
312		 * of property in the soft state
313		 */
314		softsp->memlayoutp = NULL;
315		softsp->size = 0;
316	} else {
317		DPRINTF(MC_ATTACH_DEBUG, ("mc%d is disabled: dimminfop %p\n",
318		    instance, (void *)dimminfop));
319		goto bad2;
320	}
321
322	DPRINTF(MC_ATTACH_DEBUG, ("mc%d: dimminfop=0x%p data=0x%lx len=%d\n",
323	    instance, (void *)dimminfop, *(uint64_t *)dimminfop, len));
324
325	/* Get MC registers and construct all needed data structure */
326	if (mc_get_mcregs(softsp) == -1)
327		goto bad1;
328
329	mutex_enter(&mcmutex);
330	if (nmcs == 1) {
331		if (&p2get_mem_unum)
332			p2get_mem_unum = mc_get_mem_unum;
333		if (&p2get_mem_info)
334			p2get_mem_info = mc_get_mem_info;
335		if (&p2get_mem_sid)
336			p2get_mem_sid = mc_get_mem_sid;
337		if (&p2get_mem_offset)
338			p2get_mem_offset = mc_get_mem_offset;
339		if (&p2get_mem_addr)
340			p2get_mem_addr = mc_get_mem_addr;
341		if (&p2init_sid_cache)
342			p2init_sid_cache = mc_init_sid_cache;
343	}
344
345	mutex_exit(&mcmutex);
346
347	/*
348	 * Update DIMM serial id information if the DIMM serial id
349	 * cache has already been initialized.
350	 */
351	if (mc_dimm_sids) {
352		rw_enter(&mcdimmsids_rw, RW_WRITER);
353		(void) mc_populate_sid_cache();
354		rw_exit(&mcdimmsids_rw);
355	}
356
357	if (ddi_create_minor_node(devi, "mc-us3", S_IFCHR, instance,
358	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
359		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
360		    " failed \n"));
361		goto bad1;
362	}
363
364	ddi_report_dev(devi);
365	return (DDI_SUCCESS);
366
367bad1:
368	/* release all allocated data struture for this MC */
369	mlayout_del(softsp->portid, 0);
370	if (softsp->memlayoutp != NULL)
371		kmem_free(softsp->memlayoutp, softsp->size);
372
373	/* remove the libdevinfo property */
374	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
375	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
376	    MEM_CFG_PROP_NAME) == 1) {
377		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
378		    MEM_CFG_PROP_NAME);
379	}
380
381bad2:
382	/* unmap the registers for this device. */
383	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
384
385bad:
386	ddi_soft_state_free(mcp, instance);
387	return (DDI_FAILURE);
388}
389
390/* ARGSUSED */
391static int
392mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
393{
394	int instance;
395	struct mc_soft_state *softsp;
396
397	/* get the instance of this devi */
398	instance = ddi_get_instance(devi);
399
400	/* get the soft state pointer for this device node */
401	softsp = ddi_get_soft_state(mcp, instance);
402
403	switch (cmd) {
404	case DDI_SUSPEND:
405		return (DDI_SUCCESS);
406
407	case DDI_DETACH:
408		break;
409
410	default:
411		return (DDI_FAILURE);
412	}
413
414	DPRINTF(MC_DETACH_DEBUG, ("mc%d DETACH: portid= %d, table 0x%p\n",
415	    instance, softsp->portid, softsp->memlayoutp));
416
417	/* remove the libdevinfo property */
418	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
419	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
420	    MEM_CFG_PROP_NAME) == 1) {
421		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
422		    MEM_CFG_PROP_NAME);
423	}
424
425	/* release all allocated data struture for this MC */
426	mlayout_del(softsp->portid, 1);
427	if (softsp->memlayoutp != NULL)
428		kmem_free(softsp->memlayoutp, softsp->size);
429
430	/* unmap the registers */
431	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
432
433	mutex_enter(&mcmutex);
434	if (nmcs == 0) {
435		if (&p2get_mem_unum)
436			p2get_mem_unum = NULL;
437		if (&p2get_mem_info)
438			p2get_mem_info = NULL;
439		if (&p2get_mem_sid)
440			p2get_mem_sid = NULL;
441		if (&p2get_mem_offset)
442			p2get_mem_offset = NULL;
443		if (&p2get_mem_addr)
444			p2get_mem_addr = NULL;
445		if (&p2init_sid_cache)
446			p2init_sid_cache = NULL;
447	}
448
449	mutex_exit(&mcmutex);
450
451	ddi_remove_minor_node(devi, NULL);
452
453	/* free up the soft state */
454	ddi_soft_state_free(mcp, instance);
455
456	return (DDI_SUCCESS);
457}
458
459/* ARGSUSED */
460static int
461mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
462{
463
464	/* verify that otyp is appropriate */
465	if (otyp != OTYP_CHR) {
466		return (EINVAL);
467	}
468
469	return (0);
470}
471
472/* ARGSUSED */
473static int
474mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
475{
476	return (0);
477}
478
479/*
480 * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
481 * MCIOC_CTRLCONF, MCIOC_CONTROL.
482 *
483 * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
484 * associated with various length struct. If given number is less than the
485 * number in kernel, update the number and return EINVAL so that user could
486 * allocate enough space for it.
487 *
488 */
489
490/* ARGSUSED */
491static int
492mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
493	int *rval_p)
494{
495	size_t	size;
496	struct mc_memconf mcmconf;
497	struct mc_memory *mcmem, mcmem_in;
498	struct mc_segment *mcseg, mcseg_in;
499	struct mc_bank mcbank;
500	struct mc_devgrp mcdevgrp;
501	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
502	struct mc_control *mccontrol, mccontrol_in;
503	struct seg_info *seg = NULL;
504	struct bank_info *bank = NULL;
505	struct dgrp_info *dgrp = NULL;
506	struct mctrl_info *mcport;
507	mc_dlist_t *mctrl;
508	int i, status = 0;
509	cpu_t *cpu;
510
511	switch (cmd) {
512	case MCIOC_MEMCONF:
513		mutex_enter(&mcdatamutex);
514
515		mcmconf.nmcs = nmcs;
516		mcmconf.nsegments = nsegments;
517		mcmconf.nbanks = maxbanks;
518		mcmconf.ndevgrps = NDGRPS;
519		mcmconf.ndevs = NDIMMS;
520		mcmconf.len_dev = MAX_DEVLEN;
521		mcmconf.xfer_size = TRANSFER_SIZE;
522
523		mutex_exit(&mcdatamutex);
524
525		if (copyout(&mcmconf, (void *)arg, sizeof (struct mc_memconf)))
526			return (EFAULT);
527		return (0);
528
529	/*
530	 * input: nsegments and allocate space for various length of segmentids
531	 *
532	 * return    0: size, number of segments, and all segment ids,
533	 *		where glocal and local ids are identical.
534	 *	EINVAL: if the given nsegments is less than that in kernel and
535	 *		nsegments of struct will be updated.
536	 *	EFAULT: if other errors in kernel.
537	 */
538	case MCIOC_MEM:
539		if (copyin((void *)arg, &mcmem_in,
540		    sizeof (struct mc_memory)) != 0)
541			return (EFAULT);
542
543		mutex_enter(&mcdatamutex);
544		if (mcmem_in.nsegments < nsegments) {
545			mcmem_in.nsegments = nsegments;
546			if (copyout(&mcmem_in, (void *)arg,
547			    sizeof (struct mc_memory)))
548				status = EFAULT;
549			else
550				status = EINVAL;
551
552			mutex_exit(&mcdatamutex);
553			return (status);
554		}
555
556		size = sizeof (struct mc_memory) + (nsegments - 1) *
557		    sizeof (mcmem->segmentids[0]);
558		mcmem = kmem_zalloc(size, KM_SLEEP);
559
560		mcmem->size = memsize;
561		mcmem->nsegments = nsegments;
562		seg = (struct seg_info *)seg_head;
563		for (i = 0; i < nsegments; i++) {
564			ASSERT(seg != NULL);
565			mcmem->segmentids[i].globalid = seg->seg_node.id;
566			mcmem->segmentids[i].localid = seg->seg_node.id;
567			seg = (struct seg_info *)seg->seg_node.next;
568		}
569		mutex_exit(&mcdatamutex);
570
571		if (copyout(mcmem, (void *)arg, size))
572			status = EFAULT;
573
574		kmem_free(mcmem, size);
575		return (status);
576
577	/*
578	 * input: id, nbanks and allocate space for various length of bankids
579	 *
580	 * return    0: base, size, number of banks, and all bank ids,
581	 *		where global id is unique of all banks and local id
582	 *		is only unique for mc.
583	 *	EINVAL: either id isn't found or if given nbanks is less than
584	 *		that in kernel and nbanks of struct will be updated.
585	 *	EFAULT: if other errors in kernel.
586	 */
587	case MCIOC_SEG:
588
589		if (copyin((void *)arg, &mcseg_in,
590		    sizeof (struct mc_segment)) != 0)
591			return (EFAULT);
592
593		mutex_enter(&mcdatamutex);
594		if ((seg = (struct seg_info *)mc_node_get(mcseg_in.id,
595		    seg_head)) == NULL) {
596			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
597			    "id %d\n", mcseg_in.id));
598			mutex_exit(&mcdatamutex);
599			return (EFAULT);
600		}
601
602		if (mcseg_in.nbanks < seg->nbanks) {
603			mcseg_in.nbanks = seg->nbanks;
604			if (copyout(&mcseg_in, (void *)arg,
605			    sizeof (struct mc_segment)))
606				status = EFAULT;
607			else
608				status = EINVAL;
609
610			mutex_exit(&mcdatamutex);
611			return (status);
612		}
613
614		size = sizeof (struct mc_segment) + (seg->nbanks - 1) *
615		    sizeof (mcseg->bankids[0]);
616		mcseg = kmem_zalloc(size, KM_SLEEP);
617
618		mcseg->id = seg->seg_node.id;
619		mcseg->ifactor = seg->ifactor;
620		mcseg->base = seg->base;
621		mcseg->size = seg->size;
622		mcseg->nbanks = seg->nbanks;
623
624		bank = seg->hb_inseg;
625
626		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg 0x%p bank %p\n",
627		    seg->nbanks, (void *)seg, (void *)bank));
628
629		i = 0;
630		while (bank != NULL) {
631			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
632			    i, bank->bank_node.id));
633			mcseg->bankids[i].globalid = bank->bank_node.id;
634			mcseg->bankids[i++].localid =
635			    bank->local_id;
636			bank = bank->n_inseg;
637		}
638		ASSERT(i == seg->nbanks);
639		mutex_exit(&mcdatamutex);
640
641		if (copyout(mcseg, (void *)arg, size))
642			status = EFAULT;
643
644		kmem_free(mcseg, size);
645		return (status);
646
647	/*
648	 * input: id
649	 *
650	 * return    0: mask, match, size, and devgrpid,
651	 *		where global id is unique of all devgrps and local id
652	 *		is only unique for mc.
653	 *	EINVAL: if id isn't found
654	 *	EFAULT: if other errors in kernel.
655	 */
656	case MCIOC_BANK:
657		if (copyin((void *)arg, &mcbank, sizeof (struct mc_bank)) != 0)
658			return (EFAULT);
659
660		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
661
662		mutex_enter(&mcdatamutex);
663
664		if ((bank = (struct bank_info *)mc_node_get(mcbank.id,
665		    bank_head)) == NULL) {
666			mutex_exit(&mcdatamutex);
667			return (EINVAL);
668		}
669
670		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank %d (0x%p) valid %hu\n",
671		    bank->bank_node.id, (void *)bank, bank->valid));
672
673		/*
674		 * If (Physic Address & MASK) == MATCH, Physic Address is
675		 * located at this bank. The lower physical address bits
676		 * are at [9-6].
677		 */
678		mcbank.mask = (~(bank->lk | ~(MADR_LK_MASK >>
679		    MADR_LK_SHIFT))) << MADR_LPA_SHIFT;
680		mcbank.match = bank->lm << MADR_LPA_SHIFT;
681		mcbank.size = bank->size;
682		mcbank.devgrpid.globalid = bank->devgrp_id;
683		mcbank.devgrpid.localid = bank->devgrp_id % NDGRPS;
684
685		mutex_exit(&mcdatamutex);
686
687		if (copyout(&mcbank, (void *)arg, sizeof (struct mc_bank)))
688			return (EFAULT);
689		return (0);
690
691	/*
692	 * input:id and allocate space for various length of deviceids
693	 *
694	 * return    0: size and number of devices.
695	 *	EINVAL: id isn't found
696	 *	EFAULT: if other errors in kernel.
697	 */
698	case MCIOC_DEVGRP:
699
700		if (copyin((void *)arg, &mcdevgrp,
701		    sizeof (struct mc_devgrp)) != 0)
702			return (EFAULT);
703
704		mutex_enter(&mcdatamutex);
705		if ((dgrp = (struct dgrp_info *)mc_node_get(mcdevgrp.id,
706		    dgrp_head)) == NULL) {
707			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
708			    "%d\n", mcdevgrp.id));
709			mutex_exit(&mcdatamutex);
710			return (EINVAL);
711		}
712
713		mcdevgrp.ndevices = dgrp->ndevices;
714		mcdevgrp.size = dgrp->size;
715
716		mutex_exit(&mcdatamutex);
717
718		if (copyout(&mcdevgrp, (void *)arg, sizeof (struct mc_devgrp)))
719			status = EFAULT;
720
721		return (status);
722
723	/*
724	 * input: nmcs and allocate space for various length of mcids
725	 *
726	 * return    0: number of mc, and all mcids,
727	 *		where glocal and local ids are identical.
728	 *	EINVAL: if the given nmcs is less than that in kernel and
729	 *		nmcs of struct will be updated.
730	 *	EFAULT: if other errors in kernel.
731	 */
732	case MCIOC_CTRLCONF:
733		if (copyin((void *)arg, &mcctrlconf_in,
734		    sizeof (struct mc_ctrlconf)) != 0)
735			return (EFAULT);
736
737		mutex_enter(&mcdatamutex);
738		if (mcctrlconf_in.nmcs < nmcs) {
739			mcctrlconf_in.nmcs = nmcs;
740			if (copyout(&mcctrlconf_in, (void *)arg,
741			    sizeof (struct mc_ctrlconf)))
742				status = EFAULT;
743			else
744				status = EINVAL;
745
746			mutex_exit(&mcdatamutex);
747			return (status);
748		}
749
750		/*
751		 * Cannot just use the size of the struct because of the various
752		 * length struct
753		 */
754		size = sizeof (struct mc_ctrlconf) + ((nmcs - 1) *
755		    sizeof (mcctrlconf->mcids[0]));
756		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
757
758		mcctrlconf->nmcs = nmcs;
759
760		/* Get all MC ids and add to mcctrlconf */
761		mctrl = mctrl_head;
762		i = 0;
763		while (mctrl != NULL) {
764			mcctrlconf->mcids[i].globalid = mctrl->id;
765			mcctrlconf->mcids[i].localid = mctrl->id;
766			i++;
767			mctrl = mctrl->next;
768		}
769		ASSERT(i == nmcs);
770
771		mutex_exit(&mcdatamutex);
772
773		if (copyout(mcctrlconf, (void *)arg, size))
774			status = EFAULT;
775
776		kmem_free(mcctrlconf, size);
777		return (status);
778
779	/*
780	 * input:id, ndevgrps and allocate space for various length of devgrpids
781	 *
782	 * return    0: number of devgrp, and all devgrpids,
783	 *		is unique of all devgrps and local id is only unique
784	 *		for mc.
785	 *	EINVAL: either if id isn't found or if the given ndevgrps is
786	 *		less than that in kernel and ndevgrps of struct will
787	 *		be updated.
788	 *	EFAULT: if other errors in kernel.
789	 */
790	case MCIOC_CONTROL:
791		if (copyin((void *)arg, &mccontrol_in,
792		    sizeof (struct mc_control)) != 0)
793			return (EFAULT);
794
795		mutex_enter(&mcdatamutex);
796		if ((mcport = (struct mctrl_info *)mc_node_get(mccontrol_in.id,
797		    mctrl_head)) == NULL) {
798			mutex_exit(&mcdatamutex);
799			return (EINVAL);
800		}
801
802		/*
803		 * mcport->ndevgrps zero means Memory Controller is disable.
804		 */
805		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
806		    (mcport->ndevgrps == 0)) {
807			mccontrol_in.ndevgrps = mcport->ndevgrps;
808			if (copyout(&mccontrol_in, (void *)arg,
809			    sizeof (struct mc_control)))
810				status = EFAULT;
811			else if (mcport->ndevgrps != 0)
812				status = EINVAL;
813
814			mutex_exit(&mcdatamutex);
815			return (status);
816		}
817
818		size = sizeof (struct mc_control) + (mcport->ndevgrps - 1) *
819		    sizeof (mccontrol->devgrpids[0]);
820		mccontrol = kmem_zalloc(size, KM_SLEEP);
821
822		mccontrol->id = mcport->mctrl_node.id;
823		mccontrol->ndevgrps = mcport->ndevgrps;
824		for (i = 0; i < mcport->ndevgrps; i++) {
825			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
826			mccontrol->devgrpids[i].localid =
827			    mcport->devgrpids[i] % NDGRPS;
828			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %lu\n",
829			    *(uint64_t *)&mccontrol->devgrpids[i]));
830		}
831		mutex_exit(&mcdatamutex);
832
833		if (copyout(mccontrol, (void *)arg, size))
834			status = EFAULT;
835
836		kmem_free(mccontrol, size);
837		return (status);
838
839	/*
840	 * input:id
841	 *
842	 * return    0: CPU flushed successfully.
843	 *	EINVAL: the id wasn't found
844	 */
845	case MCIOC_ECFLUSH:
846		mutex_enter(&cpu_lock);
847		cpu = cpu_get((processorid_t)arg);
848		mutex_exit(&cpu_lock);
849		if (cpu == NULL)
850			return (EINVAL);
851
852		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
853
854		return (0);
855
856	default:
857		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
858		return (EFAULT);
859	}
860}
861
862/*
863 * Get Memory Address Decoding Registers and construct list.
864 * flag is to workaround Cheetah's restriction where register cannot be mapped
865 * if port id(MC registers on it) == cpu id(process is running on it).
866 */
867static int
868mc_get_mcregs(struct mc_soft_state *softsp)
869{
870	int i;
871	int err = 0;
872	uint64_t madreg;
873	uint64_t ma_reg_array[NBANKS];	/* there are NBANKS of madrs */
874
875	/* Construct lists for MC, mctrl_info, dgrp_info, and device_info */
876	mc_construct(softsp->portid, softsp->memlayoutp);
877
878	/*
879	 * If memlayoutp is NULL, the Memory Controller is disable, and
880	 * doesn't need to create any bank and segment.
881	 */
882	if (softsp->memlayoutp == NULL)
883		goto exit;
884
885	/*
886	 * Get the content of 4 Memory Address Decoding Registers, and
887	 * construct lists of logical banks and segments.
888	 */
889	for (i = 0; i < NBANKS; i++) {
890		DPRINTF(MC_REG_DEBUG, ("get_mcregs: mapreg=0x%p portid=%d "
891		    "cpu=%d\n", (void *)softsp->mc_base, softsp->portid,
892		    CPU->cpu_id));
893
894		kpreempt_disable();
895		if (softsp->portid == (cpunodes[CPU->cpu_id].portid))
896			madreg = get_mcr(MADR0OFFSET + (i * REGOFFSET));
897		else
898			madreg = *((uint64_t *)(softsp->mc_base + MADR0OFFSET +
899			    (i * REGOFFSET)));
900		kpreempt_enable();
901
902		DPRINTF(MC_REG_DEBUG, ("get_mcregs 2: memlayoutp=0x%p madreg "
903		    "reg=0x%lx\n", softsp->memlayoutp, madreg));
904
905		ma_reg_array[i] = madreg;
906
907		if ((err = mlayout_add(softsp->portid, i, madreg,
908		    softsp->memlayoutp)) == -1)
909			break;
910	}
911
912	/*
913	 * Create the logical bank property for this mc node. This
914	 * property is an encoded array of the madr for each logical
915	 * bank (there are NBANKS of these).
916	 */
917	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
918	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
919	    MEM_CFG_PROP_NAME) != 1) {
920		(void) ddi_prop_create(DDI_DEV_T_NONE, softsp->dip,
921		    DDI_PROP_CANSLEEP, MEM_CFG_PROP_NAME,
922		    (caddr_t)&ma_reg_array, sizeof (ma_reg_array));
923	}
924
925exit:
926	if (!err) {
927		mutex_enter(&mcdatamutex);
928		nmcs++;
929		mutex_exit(&mcdatamutex);
930	}
931	return (err);
932}
933
934/*
935 * Translate a <DIMM, offset> pair to a physical address.
936 */
937static int
938mc_offset_to_addr(struct seg_info *seg,
939    struct bank_info *bank, uint64_t off, uint64_t *addr)
940{
941	uint64_t base, size, line, remainder;
942	uint32_t ifactor;
943
944	/*
945	 * Compute the half-dimm size in bytes.
946	 * Note that bank->size represents the number of data bytes,
947	 * and does not include the additional bits used for ecc, mtag,
948	 * and mtag ecc information in each 144-bit checkword.
949	 * For calculating the offset to a checkword we need the size
950	 * including the additional 8 bytes for each 64 data bytes of
951	 * a cache line.
952	 */
953	size = ((bank->size / 4) / 64) * 72;
954
955	/*
956	 * Check if the offset is within this bank. This depends on the position
957	 * of the bank, i.e., whether it is the front bank or the back bank.
958	 */
959	base = size * bank->pos;
960
961	if ((off < base) || (off >= (base + size)))
962		return (-1);
963
964	/*
965	 * Compute the offset within the half-dimm.
966	 */
967	off -= base;
968
969	/*
970	 * Compute the line within the half-dimm. This is the same as the line
971	 * within the bank since each DIMM in a bank contributes uniformly
972	 * 144 bits (18 bytes) to a cache line.
973	 */
974	line = off / QWORD_SIZE_BYTES;
975
976	remainder = off % QWORD_SIZE_BYTES;
977
978	/*
979	 * Compute the line within the segment.
980	 * The bank->lm field indicates the order in which cache lines are
981	 * distributed across the banks of a segment (See the Cheetah PRM).
982	 * The interleave factor the bank is programmed with is used instead
983	 * of the segment interleave factor since a segment can be composed
984	 * of banks with different interleave factors if the banks are not
985	 * uniform in size.
986	 */
987	ifactor = (bank->lk ^ 0xF) + 1;
988	line = (line * ifactor) + bank->lm;
989
990	/*
991	 * Compute the physical address assuming that there are 64 data bytes
992	 * in a cache line.
993	 */
994	*addr = (line << 6) + seg->base;
995	*addr += remainder * 16;
996
997	return (0);
998}
999
1000/*
1001 * Translate a physical address to a <DIMM, offset> pair.
1002 */
1003static void
1004mc_addr_to_offset(struct seg_info *seg,
1005    struct bank_info *bank, uint64_t addr, uint64_t *off)
1006{
1007	uint64_t base, size, line, remainder;
1008	uint32_t ifactor;
1009
1010	/*
1011	 * Compute the line within the segment assuming that there are 64 data
1012	 * bytes in a cache line.
1013	 */
1014	line = (addr - seg->base) / 64;
1015
1016	/*
1017	 * The lm (lower match) field from the Memory Address Decoding Register
1018	 * for this bank determines which lines within a memory segment this
1019	 * bank should respond to.  These are the actual address bits the
1020	 * interleave is done over (See the Cheetah PRM).
1021	 * In other words, the lm field indicates the order in which the cache
1022	 * lines are distributed across the banks of a segment, and thusly it
1023	 * can be used to compute the line within this bank. This is the same as
1024	 * the line within the half-dimm. This is because each DIMM in a bank
1025	 * contributes uniformly to every cache line.
1026	 */
1027	ifactor = (bank->lk ^ 0xF) + 1;
1028	line = (line - bank->lm)/ifactor;
1029
1030	/*
1031	 * Compute the offset within the half-dimm. This depends on whether
1032	 * or not the bank is a front logical bank or a back logical bank.
1033	 */
1034	*off = line * QWORD_SIZE_BYTES;
1035
1036	/*
1037	 * Compute the half-dimm size in bytes.
1038	 * Note that bank->size represents the number of data bytes,
1039	 * and does not include the additional bits used for ecc, mtag,
1040	 * and mtag ecc information in each 144-bit quadword.
1041	 * For calculating the offset to a checkword we need the size
1042	 * including the additional 8 bytes for each 64 data bytes of
1043	 * a cache line.
1044	 */
1045	size = ((bank->size / 4) / 64) * 72;
1046
1047	/*
1048	 * Compute the offset within the dimm to the nearest line. This depends
1049	 * on whether or not the bank is a front logical bank or a back logical
1050	 * bank.
1051	 */
1052	base = size * bank->pos;
1053	*off += base;
1054
1055	remainder = (addr - seg->base) % 64;
1056	remainder /= 16;
1057	*off += remainder;
1058}
1059
1060/*
1061 * A cache line is composed of four quadwords with the associated ECC, the
1062 * MTag along with its associated ECC. This is depicted below:
1063 *
1064 * |                    Data                    |   ECC   | Mtag |MTag ECC|
1065 *  127                                         0 8       0 2    0 3      0
1066 *
1067 * synd_code will be mapped as the following order to mc_get_mem_unum.
1068 *  143                                         16        7      4        0
1069 *
1070 * |  Quadword  0  |  Quadword  1  |  Quadword  2  |  Quadword  3  |
1071 *  575         432 431         288 287         144 143		   0
1072 *
1073 * dimm table: each bit at a cache line needs two bits to present one of
1074 *      four dimms. So it needs 144 bytes(576 * 2 / 8). The content is in
1075 *      big edian order, i.e. dimm_table[0] presents for bit 572 to 575.
1076 *
1077 * pin table: each bit at a cache line needs one byte to present pin position,
1078 *      where max. is 230. So it needs 576 bytes. The order of table index is
1079 *      the same as bit position at a cache line, i.e. pin_table[0] presents
1080 *      for bit 0, Mtag ECC 0 of Quadword 3.
1081 *
1082 * This is a mapping from syndrome code to QuadWord Logical layout at Safari.
1083 * Referring to Figure 3-4, Excalibur Architecture Manual.
1084 * This table could be moved to cheetah.c if other platform teams agree with
1085 * the bit layout at QuadWord.
1086 */
1087
1088static uint8_t qwordmap[] =
1089{
109016,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
109132,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
109248,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
109364,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
109480,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
109596,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1096112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1097128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
10987,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3,
1099};
1100
1101
1102/* ARGSUSED */
1103static int
1104mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1105{
1106	int i, upper_pa, lower_pa, dimmoffset;
1107	int quadword, pos_cacheline, position, index, idx4dimm;
1108	int qwlayout = synd_code;
1109	short offset, data;
1110	char unum[UNUM_NAMLEN];
1111	struct dimm_info *dimmp;
1112	struct pin_info *pinp;
1113	struct bank_info *bank;
1114
1115	/*
1116	 * Enforce old Openboot requirement for synd code, either a single-bit
1117	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1118	 */
1119	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1120		return (EINVAL);
1121
1122	unum[0] = '\0';
1123
1124	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1125	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1126
1127	DPRINTF(MC_GUNUM_DEBUG, ("qwlayout %d\n", qwlayout));
1128
1129	/*
1130	 * Scan all logical banks to get one responding to the physical
1131	 * address. Then compute the index to look up dimm and pin tables
1132	 * to generate the unum.
1133	 */
1134	mutex_enter(&mcdatamutex);
1135	bank = (struct bank_info *)bank_head;
1136	while (bank != NULL) {
1137		int bankid, mcid, bankno_permc;
1138
1139		bankid = bank->bank_node.id;
1140		bankno_permc = bankid % NBANKS;
1141		mcid = bankid / NBANKS;
1142
1143		/*
1144		 * The Address Decoding logic decodes the different fields
1145		 * in the Memory Address Decoding register to determine
1146		 * whether a particular logical bank should respond to a
1147		 * physical address.
1148		 */
1149		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1150		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1151			bank = (struct bank_info *)bank->bank_node.next;
1152			continue;
1153		}
1154
1155		dimmoffset = (bankno_permc % NDGRPS) * NDIMMS;
1156
1157		dimmp = (struct dimm_info *)bank->dimminfop;
1158		ASSERT(dimmp != NULL);
1159
1160		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1161			/*
1162			 * single-bit error handling, we can identify specific
1163			 * DIMM.
1164			 */
1165
1166			pinp = (struct pin_info *)&dimmp->data[0];
1167
1168			if (!dimmp->sym_flag)
1169				pinp++;
1170
1171			quadword = (paddr & 0x3f) / 16;
1172			/* or quadword = (paddr >> 4) % 4; */
1173			pos_cacheline = ((3 - quadword) * QWORD_SIZE) +
1174			    qwordmap[qwlayout];
1175			position = 575 - pos_cacheline;
1176			index = position * 2 / 8;
1177			offset = position % 4;
1178
1179			/*
1180			 * Trade-off: We couldn't add pin number to
1181			 * unum string because statistic number
1182			 * pumps up at the corresponding dimm not pin.
1183			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1184			 * pinp->pintable[pos_cacheline]);
1185			 */
1186			DPRINTF(MC_GUNUM_DEBUG, ("Pin number %1u\n",
1187			    (uint_t)pinp->pintable[pos_cacheline]));
1188			data = pinp->dimmtable[index];
1189			idx4dimm = (data >> ((3 - offset) * 2)) & 3;
1190
1191			(void) strncpy(unum,
1192			    (char *)dimmp->label[dimmoffset + idx4dimm],
1193			    UNUM_NAMLEN);
1194			DPRINTF(MC_GUNUM_DEBUG, ("unum %s\n", unum));
1195			/*
1196			 * platform hook for adding label information to unum.
1197			 */
1198			mc_add_mem_unum_label(unum, mcid, bankno_permc,
1199			    idx4dimm);
1200		} else {
1201			char *p = unum;
1202			size_t res = UNUM_NAMLEN;
1203
1204			/*
1205			 * multi-bit error handling, we can only identify
1206			 * bank of DIMMs.
1207			 */
1208
1209			for (i = 0; (i < NDIMMS) && (res > 0); i++) {
1210				(void) snprintf(p, res, "%s%s",
1211				    i == 0 ? "" : " ",
1212				    (char *)dimmp->label[dimmoffset + i]);
1213				res -= strlen(p);
1214				p += strlen(p);
1215			}
1216
1217			/*
1218			 * platform hook for adding label information
1219			 * to unum.
1220			 */
1221			mc_add_mem_unum_label(unum, mcid, bankno_permc, -1);
1222		}
1223		mutex_exit(&mcdatamutex);
1224		if ((strlen(unum) >= UNUM_NAMLEN) ||
1225		    (strlen(unum) >= buflen)) {
1226			return (ENAMETOOLONG);
1227		} else {
1228			(void) strncpy(buf, unum, buflen);
1229			*lenp = strlen(buf);
1230			return (0);
1231		}
1232	}	/* end of while loop for logical bank list */
1233
1234	mutex_exit(&mcdatamutex);
1235	return (ENXIO);
1236}
1237
1238/* ARGSUSED */
1239static int
1240mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
1241{
1242	int upper_pa, lower_pa;
1243	struct bank_info *bank;
1244	struct seg_info *seg;
1245
1246	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1247	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1248
1249	/*
1250	 * Scan all logical banks to get one responding to the physical
1251	 * address.
1252	 */
1253	mutex_enter(&mcdatamutex);
1254	bank = (struct bank_info *)bank_head;
1255	while (bank != NULL) {
1256		/*
1257		 * The Address Decoding logic decodes the different fields
1258		 * in the Memory Address Decoding register to determine
1259		 * whether a particular logical bank should respond to a
1260		 * physical address.
1261		 */
1262		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1263		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1264			bank = (struct bank_info *)bank->bank_node.next;
1265			continue;
1266		}
1267
1268		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1269		ASSERT(seg != NULL);
1270		ASSERT(paddr >= seg->base);
1271
1272		mc_addr_to_offset(seg, bank, paddr, offp);
1273
1274		mutex_exit(&mcdatamutex);
1275		return (0);
1276	}
1277
1278	mutex_exit(&mcdatamutex);
1279	return (ENXIO);
1280}
1281
1282/*
1283 * Translate a DIMM <id, offset> pair to a physical address.
1284 */
1285static int
1286mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr)
1287{
1288	struct seg_info *seg;
1289	struct bank_info *bank;
1290	int first_seg_id;
1291	int i, found;
1292
1293	ASSERT(sid != NULL);
1294
1295	mutex_enter(&mcdatamutex);
1296
1297	rw_enter(&mcdimmsids_rw, RW_READER);
1298
1299	/*
1300	 * If DIMM serial ids have not been cached yet, tell the
1301	 * caller to try again.
1302	 */
1303	if (mc_dimm_sids == NULL) {
1304		rw_exit(&mcdimmsids_rw);
1305		return (EAGAIN);
1306	}
1307
1308	for (i = 0; i < max_entries; i++) {
1309		if (mc_dimm_sids[i].mcid == mcid)
1310			break;
1311	}
1312
1313	if (i == max_entries) {
1314		rw_exit(&mcdimmsids_rw);
1315		mutex_exit(&mcdatamutex);
1316		return (ENODEV);
1317	}
1318
1319	first_seg_id = mc_dimm_sids[i].seg_id;
1320
1321	seg = (struct seg_info *)mc_node_get(first_seg_id, seg_head);
1322
1323	rw_exit(&mcdimmsids_rw);
1324
1325	if (seg == NULL) {
1326		mutex_exit(&mcdatamutex);
1327		return (ENODEV);
1328	}
1329
1330	found = 0;
1331
1332	for (bank = seg->hb_inseg; bank; bank = bank->n_inseg) {
1333		ASSERT(bank->valid);
1334
1335		for (i = 0; i < NDIMMS; i++) {
1336			if (strncmp((char *)bank->dimmsidp[i], sid,
1337			    DIMM_SERIAL_ID_LEN)  == 0)
1338				break;
1339		}
1340
1341		if (i == NDIMMS)
1342			continue;
1343
1344		if (mc_offset_to_addr(seg, bank, off, paddr) == -1)
1345			continue;
1346		found = 1;
1347		break;
1348	}
1349
1350	if (found) {
1351		mutex_exit(&mcdatamutex);
1352		return (0);
1353	}
1354
1355	/*
1356	 * If a bank wasn't found, it may be in another segment.
1357	 * This can happen if the different logical banks of an MC
1358	 * have different interleave factors.  To deal with this
1359	 * possibility, we'll do a brute-force search for banks
1360	 * for this MC with a different seg id then above.
1361	 */
1362	bank = (struct bank_info *)bank_head;
1363	while (bank != NULL) {
1364
1365		if (!bank->valid) {
1366			bank = (struct bank_info *)bank->bank_node.next;
1367			continue;
1368		}
1369
1370		if (bank->bank_node.id / NBANKS != mcid) {
1371			bank = (struct bank_info *)bank->bank_node.next;
1372			continue;
1373		}
1374
1375		/* Ignore banks in the segment we looked in above. */
1376		if (bank->seg_id == mc_dimm_sids[i].seg_id) {
1377			bank = (struct bank_info *)bank->bank_node.next;
1378			continue;
1379		}
1380
1381		for (i = 0; i < NDIMMS; i++) {
1382			if (strncmp((char *)bank->dimmsidp[i], sid,
1383			    DIMM_SERIAL_ID_LEN)  == 0)
1384				break;
1385		}
1386
1387		if (i == NDIMMS) {
1388			bank = (struct bank_info *)bank->bank_node.next;
1389			continue;
1390		}
1391
1392		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1393
1394		if (mc_offset_to_addr(seg, bank, off, paddr) == -1) {
1395			bank = (struct bank_info *)bank->bank_node.next;
1396			continue;
1397		}
1398
1399		found = 1;
1400		break;
1401	}
1402
1403	mutex_exit(&mcdatamutex);
1404
1405	if (found)
1406		return (0);
1407	else
1408		return (ENOENT);
1409}
1410
1411static int
1412mc_get_mem_info(int synd_code, uint64_t paddr,
1413    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1414    int *segsp, int *banksp, int *mcidp)
1415{
1416	int upper_pa, lower_pa;
1417	struct bank_info *bankp;
1418
1419	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1420		return (EINVAL);
1421
1422	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1423	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1424
1425	/*
1426	 * Scan all logical banks to get one responding to the physical
1427	 * address.
1428	 */
1429	mutex_enter(&mcdatamutex);
1430	bankp = (struct bank_info *)bank_head;
1431	while (bankp != NULL) {
1432		struct seg_info *segp;
1433		int bankid, mcid;
1434
1435		bankid = bankp->bank_node.id;
1436		mcid = bankid / NBANKS;
1437
1438		/*
1439		 * The Address Decoding logic decodes the different fields
1440		 * in the Memory Address Decoding register to determine
1441		 * whether a particular logical bank should respond to a
1442		 * physical address.
1443		 */
1444		if ((!bankp->valid) || ((~(~(upper_pa ^ bankp->um) |
1445		    bankp->uk)) || (~(~(lower_pa ^ bankp->lm) | bankp->lk)))) {
1446			bankp = (struct bank_info *)bankp->bank_node.next;
1447			continue;
1448		}
1449
1450		/*
1451		 * Get the corresponding segment.
1452		 */
1453		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1454		    seg_head)) == NULL) {
1455			mutex_exit(&mcdatamutex);
1456			return (EFAULT);
1457		}
1458
1459		*mem_sizep = memsize;
1460		*seg_sizep = segp->size;
1461		*bank_sizep = bankp->size;
1462		*segsp = nsegments;
1463		*banksp = segp->nbanks;
1464		*mcidp = mcid;
1465
1466		mutex_exit(&mcdatamutex);
1467
1468		return (0);
1469
1470	}	/* end of while loop for logical bank list */
1471
1472	mutex_exit(&mcdatamutex);
1473	return (ENXIO);
1474}
1475
1476/*
1477 * Construct lists for an enabled MC where size of memory is 0.
1478 * The lists are connected as follows:
1479 * Attached MC -> device group list -> device list(per devgrp).
1480 */
1481static void
1482mc_construct(int mc_id, void *dimminfop)
1483{
1484	int i, j, idx, dmidx;
1485	struct mctrl_info *mctrl;
1486	struct dgrp_info *dgrp;
1487	struct device_info *dev;
1488	struct	dimm_info *dimmp = (struct  dimm_info *)dimminfop;
1489
1490	mutex_enter(&mcdatamutex);
1491	/* allocate for mctrl_info and bank_info */
1492	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id,
1493	    mctrl_head)) != NULL) {
1494		cmn_err(CE_WARN, "mc_construct: mctrl %d exists\n", mc_id);
1495		mutex_exit(&mcdatamutex);
1496		return;
1497	}
1498
1499	mctrl = kmem_zalloc(sizeof (struct mctrl_info), KM_SLEEP);
1500
1501	/*
1502	 * If dimminfop is NULL, the Memory Controller is disable, and
1503	 * the number of device group will be zero.
1504	 */
1505	if (dimminfop == NULL) {
1506		mctrl->mctrl_node.id = mc_id;
1507		mctrl->ndevgrps = 0;
1508		mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1509		mutex_exit(&mcdatamutex);
1510		return;
1511	}
1512
1513	/* add the entry on dgrp_info list */
1514	for (i = 0; i < NDGRPS; i++) {
1515		idx = mc_id * NDGRPS + i;
1516		mctrl->devgrpids[i] = idx;
1517		if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head))
1518		    != NULL) {
1519			cmn_err(CE_WARN, "mc_construct: devgrp %d exists\n",
1520			    idx);
1521			continue;
1522		}
1523
1524		dgrp = kmem_zalloc(sizeof (struct dgrp_info), KM_SLEEP);
1525
1526		/* add the entry on device_info list */
1527		for (j = 0; j < NDIMMS; j++) {
1528			dmidx = idx * NDIMMS + j;
1529			dgrp->deviceids[j] = dmidx;
1530			if ((dev = (struct device_info *)
1531			    mc_node_get(dmidx, device_head)) != NULL) {
1532				cmn_err(CE_WARN, "mc_construct: device %d "
1533				    "exists\n", dmidx);
1534				continue;
1535			}
1536			dev = kmem_zalloc(sizeof (struct device_info),
1537			    KM_SLEEP);
1538			dev->dev_node.id = dmidx;
1539			dev->size = 0;
1540			(void) strncpy(dev->label, (char *)
1541			    dimmp->label[i * NDIMMS + j], MAX_DEVLEN);
1542
1543			mc_node_add((mc_dlist_t *)dev, &device_head,
1544			    &device_tail);
1545		}	/* for loop for constructing device_info */
1546
1547		dgrp->dgrp_node.id = idx;
1548		dgrp->ndevices = NDIMMS;
1549		dgrp->size = 0;
1550		mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1551
1552	}	/* end of for loop for constructing dgrp_info list */
1553
1554	mctrl->mctrl_node.id = mc_id;
1555	mctrl->ndevgrps = NDGRPS;
1556	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1557	mutex_exit(&mcdatamutex);
1558}
1559
1560/*
1561 * Construct lists for Memory Configuration at logical viewpoint.
1562 *
1563 * Retrieve information from Memory Address Decoding Register and set up
1564 * bank and segment lists. Link bank to its corresponding device group, and
1565 * update size of device group and devices. Also connect bank to the segment.
1566 *
1567 * Memory Address Decoding Register
1568 * -------------------------------------------------------------------------
1569 * |63|62    53|52      41|40  37|36     20|19 18|17  14|13 12|11  8|7     0|
1570 * |-----------|----------|------|---------|-----|------|-----|-----|-------|
1571 * |V |    -   |    UK    |   -  |    UM   |  -  |  LK  |  -  | LM  |   -   |
1572 * -------------------------------------------------------------------------
1573 *
1574 */
1575
1576static int
1577mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop)
1578{
1579	int i, dmidx, idx;
1580	uint32_t ifactor;
1581	int status = 0;
1582	uint64_t size, base;
1583	struct seg_info *seg_curr;
1584	struct bank_info *bank_curr;
1585	struct dgrp_info *dgrp;
1586	struct device_info *dev;
1587	union {
1588		struct {
1589			uint64_t valid	: 1;
1590			uint64_t resrv1	: 10;
1591			uint64_t uk	: 12;
1592			uint64_t resrv2	: 4;
1593			uint64_t um	: 17;
1594			uint64_t resrv3	: 2;
1595			uint64_t lk	: 4;
1596			uint64_t resrv4	: 2;
1597			uint64_t lm	: 4;
1598			uint64_t resrv5	: 8;
1599		} _s;
1600		uint64_t madreg;
1601	} mcreg;
1602
1603	mcreg.madreg = reg;
1604
1605	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: mc_id %d, bank num "
1606	    "%d, reg 0x%lx\n", mc_id, bank_no, reg));
1607
1608	/* add the entry on bank_info list */
1609	idx = mc_id * NBANKS + bank_no;
1610
1611	mutex_enter(&mcdatamutex);
1612	if ((bank_curr = (struct bank_info *)mc_node_get(idx, bank_head))
1613	    != NULL) {
1614		cmn_err(CE_WARN, "mlayout_add: bank %d exists\n", bank_no);
1615		goto exit;
1616	}
1617
1618	bank_curr = kmem_zalloc(sizeof (struct bank_info), KM_SLEEP);
1619	bank_curr->bank_node.id = idx;
1620	bank_curr->valid = mcreg._s.valid;
1621	bank_curr->dimminfop = dimminfop;
1622
1623	if (!mcreg._s.valid) {
1624		mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1625		goto exit;
1626	}
1627
1628	/*
1629	 * size of a logical bank = size of segment / interleave factor
1630	 * This fomula is not only working for regular configuration,
1631	 * i.e. number of banks at a segment equals to the max
1632	 * interleave factor, but also for special case, say 3 bank
1633	 * interleave. One bank is 2 way interleave and other two are
1634	 * 4 way. So the sizes of banks are size of segment/2 and /4
1635	 * respectively.
1636	 */
1637	ifactor = (mcreg._s.lk ^ 0xF) + 1;
1638	size = (((mcreg._s.uk & 0x3FF) + 1) * 0x4000000) / ifactor;
1639	base = mcreg._s.um & ~mcreg._s.uk;
1640	base <<= MADR_UPA_SHIFT;
1641
1642	bank_curr->uk = mcreg._s.uk;
1643	bank_curr->um = mcreg._s.um;
1644	bank_curr->lk = mcreg._s.lk;
1645	bank_curr->lm = mcreg._s.lm;
1646	bank_curr->size = size;
1647
1648	/*
1649	 * The bank's position depends on which halves of the DIMMs it consists
1650	 * of. The front-side halves of the 4 DIMMs constitute the front bank
1651	 * and the back-side halves constitute the back bank. Bank numbers
1652	 * 0 and 1 are front-side banks and bank numbers 2 and 3 are back side
1653	 * banks.
1654	 */
1655	bank_curr->pos = bank_no >> 1;
1656	ASSERT((bank_curr->pos == 0) || (bank_curr->pos == 1));
1657
1658	/*
1659	 * Workaround to keep gcc and SS12 lint happy.
1660	 * Lint expects lk, uk and um in the format statement below
1661	 * to use %lx, but this produces a warning when compiled with
1662	 * gcc.
1663	 */
1664
1665#if defined(lint)
1666	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1667	    "lk 0x%lx uk 0x%lx um 0x%lx ifactor 0x%x size 0x%lx base 0x%lx\n",
1668	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1669#else /* lint */
1670	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1671	    "lk 0x%x uk 0x%x um 0x%x ifactor 0x%x size 0x%lx base 0x%lx\n",
1672	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1673#endif /* lint */
1674
1675	/* connect the entry and update the size on dgrp_info list */
1676	idx = mc_id * NDGRPS + (bank_no % NDGRPS);
1677	if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) == NULL) {
1678		/* all avaiable dgrp should be linked at mc_construct */
1679		cmn_err(CE_WARN, "mlayout_add: dgrp %d doesn't exist\n", idx);
1680		kmem_free(bank_curr, sizeof (struct bank_info));
1681		status = -1;
1682		goto exit;
1683	}
1684
1685	bank_curr->devgrp_id = idx;
1686	dgrp->size += size;
1687
1688	/* Update the size of entry on device_info list */
1689	for (i = 0; i < NDIMMS; i++) {
1690		dmidx = dgrp->dgrp_node.id * NDIMMS + i;
1691		dgrp->deviceids[i] = dmidx;
1692
1693		/* avaiable device should be linked at mc_construct */
1694		if ((dev = (struct device_info *)mc_node_get(dmidx,
1695		    device_head)) == NULL) {
1696			cmn_err(CE_WARN, "mlayout_add:dev %d doesn't exist\n",
1697			    dmidx);
1698			kmem_free(bank_curr, sizeof (struct bank_info));
1699			status = -1;
1700			goto exit;
1701		}
1702
1703		dev->size += (size / NDIMMS);
1704
1705		DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add DIMM:id %d, size %lu\n",
1706		    dmidx, size));
1707	}
1708
1709	/*
1710	 * Get the segment by matching the base address, link this bank
1711	 * to the segment. If not matched, allocate a new segment and
1712	 * add it at segment list.
1713	 */
1714	if (seg_curr = seg_match_base(base)) {
1715		seg_curr->nbanks++;
1716		seg_curr->size += size;
1717		if (ifactor > seg_curr->ifactor)
1718			seg_curr->ifactor = ifactor;
1719		bank_curr->seg_id = seg_curr->seg_node.id;
1720	} else {
1721		seg_curr = (struct seg_info *)
1722		    kmem_zalloc(sizeof (struct seg_info), KM_SLEEP);
1723		bank_curr->seg_id = seg_id;
1724		seg_curr->seg_node.id = seg_id++;
1725		seg_curr->base = base;
1726		seg_curr->size = size;
1727		seg_curr->nbanks = 1;
1728		seg_curr->ifactor = ifactor;
1729		mc_node_add((mc_dlist_t *)seg_curr, &seg_head, &seg_tail);
1730
1731		nsegments++;
1732	}
1733
1734	/* Get the local id of bank which is only unique per segment. */
1735	bank_curr->local_id = seg_curr->nbanks - 1;
1736
1737	/* add bank at the end of the list; not sorted by bankid */
1738	if (seg_curr->hb_inseg != NULL) {
1739		bank_curr->p_inseg = seg_curr->tb_inseg;
1740		bank_curr->n_inseg = seg_curr->tb_inseg->n_inseg;
1741		seg_curr->tb_inseg->n_inseg = bank_curr;
1742		seg_curr->tb_inseg = bank_curr;
1743	} else {
1744		bank_curr->n_inseg = bank_curr->p_inseg = NULL;
1745		seg_curr->hb_inseg = seg_curr->tb_inseg = bank_curr;
1746	}
1747	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: + bank to seg, id %d\n",
1748	    seg_curr->seg_node.id));
1749
1750	if (mc_dimm_sids) {
1751		rw_enter(&mcdimmsids_rw, RW_WRITER);
1752		mc_update_bank(bank_curr);
1753		rw_exit(&mcdimmsids_rw);
1754	}
1755	mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1756
1757	memsize += size;
1758	if (seg_curr->nbanks > maxbanks)
1759		maxbanks = seg_curr->nbanks;
1760
1761exit:
1762	mutex_exit(&mcdatamutex);
1763	return (status);
1764}
1765
1766/*
1767 * Delete nodes related to the given MC on mc, device group, device,
1768 * and bank lists. Moreover, delete corresponding segment if its connected
1769 * banks are all removed.
1770 *
1771 * The "delete" argument is 1 if this is called as a result of DDI_DETACH. In
1772 * this case, the DIMM data structures need to be deleted. The argument is
1773 * 0 if this called as a result of DDI_SUSPEND/DDI_RESUME. In this case,
1774 * the DIMM data structures are left alone.
1775 */
1776static void
1777mlayout_del(int mc_id, int delete)
1778{
1779	int i, j, dgrpid, devid, bankid, ndevgrps;
1780	struct seg_info *seg;
1781	struct bank_info *bank_curr;
1782	struct mctrl_info *mctrl;
1783	mc_dlist_t *dgrp_ptr;
1784	mc_dlist_t *dev_ptr;
1785	uint64_t base;
1786
1787	mutex_enter(&mcdatamutex);
1788
1789	/* delete mctrl_info */
1790	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) !=
1791	    NULL) {
1792		ndevgrps = mctrl->ndevgrps;
1793		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1794		kmem_free(mctrl, sizeof (struct mctrl_info));
1795		nmcs--;
1796
1797		/*
1798		 * There is no other list left for disabled MC.
1799		 */
1800		if (ndevgrps == 0) {
1801			mutex_exit(&mcdatamutex);
1802			return;
1803		}
1804	} else
1805		cmn_err(CE_WARN, "MC mlayout_del: mctrl is not found\n");
1806
1807	/* Delete device groups and devices of the detached MC */
1808	for (i = 0; i < NDGRPS; i++) {
1809		dgrpid = mc_id * NDGRPS + i;
1810		if (!(dgrp_ptr = mc_node_get(dgrpid, dgrp_head))) {
1811			cmn_err(CE_WARN, "mlayout_del: no devgrp %d\n", dgrpid);
1812			continue;
1813		}
1814
1815		for (j = 0; j < NDIMMS; j++) {
1816			devid = dgrpid * NDIMMS + j;
1817			if (dev_ptr = mc_node_get(devid, device_head)) {
1818				mc_node_del(dev_ptr, &device_head,
1819				    &device_tail);
1820				kmem_free(dev_ptr, sizeof (struct device_info));
1821			} else {
1822				cmn_err(CE_WARN, "mlayout_del: no dev %d\n",
1823				    devid);
1824			}
1825		}
1826
1827		mc_node_del(dgrp_ptr, &dgrp_head, &dgrp_tail);
1828		kmem_free(dgrp_ptr, sizeof (struct dgrp_info));
1829	}
1830
1831	/* Delete banks and segments if it has no bank */
1832	for (i = 0; i < NBANKS; i++) {
1833		bankid = mc_id * NBANKS + i;
1834		DPRINTF(MC_DESTRC_DEBUG, ("bank id %d\n", bankid));
1835		if (!(bank_curr = (struct bank_info *)mc_node_get(bankid,
1836		    bank_head))) {
1837			cmn_err(CE_WARN, "mlayout_del: no bank %d\n", bankid);
1838			continue;
1839		}
1840
1841		if (bank_curr->valid) {
1842			base = bank_curr->um & ~bank_curr->uk;
1843			base <<= MADR_UPA_SHIFT;
1844			bank_curr->valid = 0;
1845			memsize -= bank_curr->size;
1846
1847			/* Delete bank at segment and segment if no bank left */
1848			if (!(seg = seg_match_base(base))) {
1849				cmn_err(CE_WARN, "mlayout_del: no seg\n");
1850				mc_node_del((mc_dlist_t *)bank_curr, &bank_head,
1851				    &bank_tail);
1852				kmem_free(bank_curr, sizeof (struct bank_info));
1853				continue;
1854			}
1855
1856			/* update the bank list at the segment */
1857			if (bank_curr->n_inseg == NULL) {
1858				/* node is at the tail of list */
1859				seg->tb_inseg = bank_curr->p_inseg;
1860			} else {
1861				bank_curr->n_inseg->p_inseg =
1862				    bank_curr->p_inseg;
1863			}
1864
1865			if (bank_curr->p_inseg == NULL) {
1866				/* node is at the head of list */
1867				seg->hb_inseg = bank_curr->n_inseg;
1868			} else {
1869				bank_curr->p_inseg->n_inseg =
1870				    bank_curr->n_inseg;
1871			}
1872
1873			seg->nbanks--;
1874			seg->size -= bank_curr->size;
1875
1876			if (seg->nbanks == 0) {
1877				mc_node_del((mc_dlist_t *)seg, &seg_head,
1878				    &seg_tail);
1879				kmem_free(seg, sizeof (struct seg_info));
1880				nsegments--;
1881			}
1882
1883		}
1884		mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1885		kmem_free(bank_curr, sizeof (struct bank_info));
1886	}	/* end of for loop for four banks */
1887
1888	if (mc_dimm_sids && delete) {
1889		rw_enter(&mcdimmsids_rw, RW_WRITER);
1890		i = mc_get_sid_cache_index(mc_id);
1891		if (i >= 0) {
1892			mc_dimm_sids[i].state = MC_DIMM_SIDS_INVALID;
1893			if (mc_dimm_sids[i].sids) {
1894				kmem_free(mc_dimm_sids[i].sids,
1895				    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS));
1896				mc_dimm_sids[i].sids = NULL;
1897			}
1898		}
1899		rw_exit(&mcdimmsids_rw);
1900	}
1901
1902	mutex_exit(&mcdatamutex);
1903}
1904
1905/*
1906 * Search the segment in the list starting at seg_head by base address
1907 * input: base address
1908 * return: pointer of found segment or null if not found.
1909 */
1910static struct seg_info *
1911seg_match_base(u_longlong_t base)
1912{
1913	static struct seg_info *seg_ptr;
1914
1915	seg_ptr = (struct seg_info *)seg_head;
1916	while (seg_ptr != NULL) {
1917		DPRINTF(MC_LIST_DEBUG, ("seg_match: base %lu,given base %llu\n",
1918		    seg_ptr->base, base));
1919		if (seg_ptr->base == base)
1920			break;
1921		seg_ptr = (struct seg_info *)seg_ptr->seg_node.next;
1922	}
1923	return (seg_ptr);
1924}
1925
1926/*
1927 * mc_dlist is a double linking list, including unique id, and pointers to
1928 * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1929 * and mctrl_info has it at the top to share the operations, add, del, and get.
1930 *
1931 * The new node is added at the tail and is not sorted.
1932 *
1933 * Input: The pointer of node to be added, head and tail of the list
1934 */
1935
1936static void
1937mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1938{
1939	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1940	    node->id, (void *)*head, (void *)*tail));
1941
1942	if (*head != NULL) {
1943		node->prev = *tail;
1944		node->next = (*tail)->next;
1945		(*tail)->next = node;
1946		*tail = node;
1947	} else {
1948		node->next = node->prev = NULL;
1949		*head = *tail = node;
1950	}
1951}
1952
1953/*
1954 * Input: The pointer of node to be deleted, head and tail of the list
1955 *
1956 * Deleted node will be at the following positions
1957 * 1. At the tail of the list
1958 * 2. At the head of the list
1959 * 3. At the head and tail of the list, i.e. only one left.
1960 * 4. At the middle of the list
1961 */
1962
1963static void
1964mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1965{
1966	if (node->next == NULL) {
1967		/* deleted node is at the tail of list */
1968		*tail = node->prev;
1969	} else {
1970		node->next->prev = node->prev;
1971	}
1972
1973	if (node->prev == NULL) {
1974		/* deleted node is at the head of list */
1975		*head = node->next;
1976	} else {
1977		node->prev->next = node->next;
1978	}
1979}
1980
1981/*
1982 * Search the list from the head of the list to match the given id
1983 * Input: id and the head of the list
1984 * Return: pointer of found node
1985 */
1986static mc_dlist_t *
1987mc_node_get(int id, mc_dlist_t *head)
1988{
1989	mc_dlist_t *node;
1990
1991	node = head;
1992	while (node != NULL) {
1993		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1994		    node->id, id));
1995		if (node->id == id)
1996			break;
1997		node = node->next;
1998	}
1999	return (node);
2000}
2001
2002/*
2003 * mc-us3 driver allows a platform to add extra label
2004 * information to the unum string. If a platform implements a
2005 * kernel function called plat_add_mem_unum_label() it will be
2006 * executed. This would typically be implemented in the platmod.
2007 */
2008static void
2009mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm)
2010{
2011	if (&plat_add_mem_unum_label)
2012		plat_add_mem_unum_label(buf, mcid, bank, dimm);
2013}
2014
2015static int
2016mc_get_sid_cache_index(int mcid)
2017{
2018	int	i;
2019
2020	for (i = 0; i < max_entries; i++) {
2021		if (mcid == mc_dimm_sids[i].mcid)
2022			return (i);
2023	}
2024
2025	return (-1);
2026}
2027
2028static void
2029mc_update_bank(struct bank_info *bank)
2030{
2031	int i, j;
2032	int bankid, mcid, dgrp_no;
2033
2034	/*
2035	 * Mark the MC if DIMM sids are not available.
2036	 * Mark which segment the DIMMs belong to.  Allocate
2037	 * space to store DIMM serial ids which are later
2038	 * provided by the platform layer, and update the bank_info
2039	 * structure with pointers to its serial ids.
2040	 */
2041	bankid = bank->bank_node.id;
2042	mcid = bankid / NBANKS;
2043	i = mc_get_sid_cache_index(mcid);
2044	if (mc_dimm_sids[i].state == MC_DIMM_SIDS_INVALID)
2045		mc_dimm_sids[i].state = MC_DIMM_SIDS_REQUESTED;
2046
2047	mc_dimm_sids[i].seg_id = bank->seg_id;
2048
2049	if (mc_dimm_sids[i].sids == NULL) {
2050		mc_dimm_sids[i].sids = (dimm_sid_t *)kmem_zalloc(
2051		    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS), KM_SLEEP);
2052	}
2053
2054	dgrp_no = bank->devgrp_id % NDGRPS;
2055
2056	for (j = 0; j < NDIMMS; j++) {
2057		bank->dimmsidp[j] =
2058		    &mc_dimm_sids[i].sids[j + (NDIMMS * dgrp_no)];
2059	}
2060}
2061
2062static int
2063mc_populate_sid_cache(void)
2064{
2065	struct bank_info	*bank;
2066
2067	if (&plat_populate_sid_cache == 0)
2068		return (ENOTSUP);
2069
2070	ASSERT(RW_WRITE_HELD(&mcdimmsids_rw));
2071
2072	bank = (struct bank_info *)bank_head;
2073	while (bank != NULL) {
2074		if (!bank->valid) {
2075			bank = (struct bank_info *)bank->bank_node.next;
2076			continue;
2077		}
2078
2079		mc_update_bank(bank);
2080
2081		bank = (struct bank_info *)bank->bank_node.next;
2082	}
2083
2084
2085	/*
2086	 * Call to the platform layer to populate the cache
2087	 * with DIMM serial ids.
2088	 */
2089	return (plat_populate_sid_cache(mc_dimm_sids, max_entries));
2090}
2091
2092static void
2093mc_init_sid_cache_thr(void)
2094{
2095	ASSERT(mc_dimm_sids == NULL);
2096
2097	mutex_enter(&mcdatamutex);
2098	rw_enter(&mcdimmsids_rw, RW_WRITER);
2099
2100	mc_dimm_sids = plat_alloc_sid_cache(&max_entries);
2101	(void) mc_populate_sid_cache();
2102
2103	rw_exit(&mcdimmsids_rw);
2104	mutex_exit(&mcdatamutex);
2105}
2106
2107static int
2108mc_init_sid_cache(void)
2109{
2110	if (&plat_alloc_sid_cache) {
2111		(void) thread_create(NULL, 0, mc_init_sid_cache_thr, NULL, 0,
2112		    &p0, TS_RUN, minclsyspri);
2113		return (0);
2114	} else
2115		return (ENOTSUP);
2116}
2117
2118static int
2119mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp)
2120{
2121	int	i;
2122
2123	if (buflen < DIMM_SERIAL_ID_LEN)
2124		return (ENOSPC);
2125
2126	/*
2127	 * If DIMM serial ids have not been cached yet, tell the
2128	 * caller to try again.
2129	 */
2130	if (!rw_tryenter(&mcdimmsids_rw, RW_READER))
2131		return (EAGAIN);
2132
2133	if (mc_dimm_sids == NULL) {
2134		rw_exit(&mcdimmsids_rw);
2135		return (EAGAIN);
2136	}
2137
2138	/*
2139	 * Find dimm serial id using mcid and dimm #
2140	 */
2141	for (i = 0; i < max_entries; i++) {
2142		if (mc_dimm_sids[i].mcid == mcid)
2143			break;
2144	}
2145	if ((i == max_entries) || (!mc_dimm_sids[i].sids)) {
2146		rw_exit(&mcdimmsids_rw);
2147		return (ENOENT);
2148	}
2149
2150	(void) strlcpy(buf, mc_dimm_sids[i].sids[dimm],
2151	    DIMM_SERIAL_ID_LEN);
2152	*lenp = strlen(buf);
2153
2154	rw_exit(&mcdimmsids_rw);
2155	return (0);
2156}
2157