mem_disc.c revision 6198:3e08c35b97c5
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * DIMM unum/device map construction
31 *
32 * The map is constructed from PICL configuration files, which contain a map
33 * between a form of the unum and the device to be used for serial number
34 * retrieval.  We massage the PICL unum into a form that matches the one used
35 * by mem FMRIs, creating a map entry from the munged version.  As described
36 * below, two configuration files must be correlated to determine the correct
37 * device path, and thus to build the mem_dimm_map_t list.  While platforms
38 * without PICL configuration files are acceptable (some platforms, like
39 * Serengeti and Starcat, don't have configuration files as of this writing),
40 * platforms with only one or the other aren't.
41 *
42 * On Sun4v platforms, we read the 'mdesc' machine description file in order
43 * to obtain the mapping between dimm unum+jnum strings (which denote slot
44 * names) and the serial numbers of the dimms occupying those slots.
45 */
46
47#include <sys/param.h>
48#include <sys/mdesc.h>
49
50#include <mem.h>
51#include <fm/fmd_fmri.h>
52
53#include <fcntl.h>
54#include <unistd.h>
55#include <stdio.h>
56#include <stdlib.h>
57#include <string.h>
58#include <strings.h>
59#include <errno.h>
60#include <time.h>
61#include <sys/mem.h>
62#include <sys/fm/ldom.h>
63
64extern ldom_hdl_t *mem_scheme_lhp;
65
66#define	PICL_FRUTREE_PATH \
67	"%s/usr/platform/%s/lib/picl/plugins/piclfrutree.conf"
68
69#define	PICL_FRUDATA_PATH \
70	"%s/usr/platform/%s/lib/picl/plugins/libpiclfrudata.conf"
71
72typedef struct mem_path_map {
73	struct mem_path_map *pm_next;
74	char *pm_path;
75	char *pm_fullpath;
76} mem_path_map_t;
77
78typedef struct label_xlators {
79	const char *lx_infmt;
80	uint_t lx_matches;
81	const char *lx_outfmt;
82} label_xlators_t;
83
84/*
85 * PICL configuration files use a different format for the DIMM name (unum)
86 * than that used in mem FMRIs.  The following patterns and routine are used
87 * to convert between the PICL and unum formats.
88 */
89static const label_xlators_t label_xlators[] = {
90	{ "/system-board/mem-slot?Label=J%4d%5$n", 1,
91	    "J%04d" },
92	{ "/system-board/mem-slot?Label=DIMM%1d%5$n", 1,
93	    "DIMM%d" },
94	{ "/system-board/cpu-mem-slot?Label=%4$c/mem-slot?Label=J%1$4d%5$n", 2,
95	    "Slot %4$c: J%1$4d" },
96	{ "/MB/system-board/mem-slot?Label=DIMM%1d%5$n", 1,
97	    "DIMM%d" },
98	{ "/MB/system-board/P%1d/cpu/B%1d/bank/D%1d%5$n", 3,
99	    "MB/P%d/B%d/D%d" },
100	{ "/MB/system-board/C%1d/cpu-module/P0/cpu/B%1d/bank/D%1d%5$n", 3,
101	    "MB/C%d/P0/B%d/D%d" },
102	{ "/MB/system-board/DIMM%1d%5$n", 1,
103	    "MB/DIMM%d" },
104	{ "/C%1d/system-board/P0/cpu/B%1d/bank/D%1d%5$n", 3,
105	    "C%d/P0/B%d/D%d" },
106	{ NULL }
107};
108
109static int
110label_xlate(char *buf)
111{
112	const label_xlators_t *xlator;
113
114	if (strncmp(buf, "/frutree/chassis", 16) != 0)
115		return (0);
116
117	for (xlator = label_xlators; xlator->lx_infmt != NULL; xlator++) {
118		uint_t len, a1, a2, a3;
119		char a4;
120
121		if (sscanf(buf + 16, xlator->lx_infmt, &a1, &a2, &a3, &a4,
122		    &len) == xlator->lx_matches && len == strlen(buf + 16)) {
123			(void) sprintf(buf, xlator->lx_outfmt, a1, a2, a3, a4);
124			return (0);
125		}
126	}
127
128	return (fmd_fmri_set_errno(EINVAL));
129}
130
131/*
132 * Match two paths taken from picl files.  This is a normal component-based path
133 * comparison, but for the fact that components `foo' and `foo@1,2' are assumed
134 * to be equal.  `foo@1,2' and `foo@3,4', however, are not assumed to be equal.
135 */
136static int
137picl_path_eq(const char *p1, const char *p2)
138{
139	for (;;) {
140		if (*p1 == *p2) {
141			if (*p1 == '\0')
142				return (1);
143			else {
144				p1++;
145				p2++;
146				continue;
147			}
148		}
149
150		if (*p1 == '@' && (*p2 == '/' || *p2 == '\0')) {
151			while (*p1 != '/' && *p1 != '\0')
152				p1++;
153			continue;
154		}
155
156		if ((*p1 == '/' || *p1 == '\0') && *p2 == '@') {
157			while (*p2 != '/' && *p2 != '\0')
158				p2++;
159			continue;
160		}
161
162		return (0);
163	}
164}
165
166/*
167 * PICL paths begin with `/platform' instead of `/devices', as they are
168 * intended to reference points in the PICL tree, rather than places in the
169 * device tree.  Furthermore, some paths use the construct `?UnitAddress=a,b'
170 * instead of `@a,b' to indicate unit number and address.  This routine
171 * replaces both constructs with forms more appropriate for /devices path
172 * lookup.
173 */
174static void
175path_depicl(char *path)
176{
177	char *c;
178
179	if (strncmp(path, "name:", 4) == 0)
180		bcopy(path + 5, path, strlen(path + 5) + 1);
181
182	for (c = path; (c = strstr(c, "?UnitAddress=")) != NULL; c++) {
183		uint_t len = 0;
184
185		(void) sscanf(c + 13, "%*x,%*x%n", &len);
186		if (len == 0)
187			continue;
188
189		*c = '@';
190		bcopy(c + 13, c + 1, strlen(c + 13) + 1);
191	}
192}
193
194/*
195 * The libpiclfrudata configuration file contains a map between the generic
196 * (minor-less) device and the specific device to be used for SPD/SEEPROM
197 * data access.
198 *
199 * Entries are of the form:
200 *
201 * name:/platform/generic-path
202 * PROP FRUDevicePath string r 0 "full-path"
203 *
204 * Where `generic-path' is the path, sans minor name, to be used for DIMM
205 * data access, and `full-path' is the path with the minor name.
206 */
207static int
208picl_frudata_parse(char *buf, char *path, void *arg)
209{
210	mem_path_map_t **mapp = arg;
211	mem_path_map_t *pm = NULL;
212	char fullpath[BUFSIZ];
213	uint_t len;
214
215	if (sscanf(buf, " PROP FRUDevicePath string r 0 \"%[^\"]\" \n%n",
216	    fullpath, &len) != 1 || fullpath[0] == '\0' || len != strlen(buf))
217		return (0);
218
219	path_depicl(path);
220
221	pm = fmd_fmri_alloc(sizeof (mem_path_map_t));
222	pm->pm_path = fmd_fmri_strdup(path);
223	pm->pm_fullpath = fmd_fmri_strdup(fullpath);
224
225	pm->pm_next = *mapp;
226	*mapp = pm;
227
228	return (1);
229}
230
231/*
232 * The piclfrutree configuration file contains a map between a form of the
233 * DIMM's unum and the generic (minor-less) device used for SPD/SEEPROM data
234 * access.
235 *
236 * Entries are of the form:
237 *
238 * name:/frutree/chassis/picl-unum
239 * REFNODE mem-module fru WITH /platform/generic-path
240 *
241 * Where `picl-unum' is the PICL form of the unum, which we'll massage into
242 * the form compatible with FMRIs (see label_xlate), and `generic-path' is
243 * the minor-less path into the PICL tree for the device used to access the
244 * DIMM.  It is this path that will be used as the key in the frudata
245 * configuration file to determine the proper /devices path.
246 */
247typedef struct dimm_map_arg {
248	mem_path_map_t *dma_pm;
249	mem_dimm_map_t *dma_dm;
250} dimm_map_arg_t;
251
252static int
253picl_frutree_parse(char *buf, char *label, void *arg)
254{
255	dimm_map_arg_t *dma = arg;
256	mem_dimm_map_t *dm = NULL;
257	mem_path_map_t *pm;
258	char path[BUFSIZ];
259	uint_t len;
260
261	/* LINTED - sscanf cannot exceed sizeof (path) */
262	if (sscanf(buf, " REFNODE mem-module fru WITH %s \n%n",
263	    path, &len) != 1 || path[0] == '\0' || len != strlen(buf))
264		return (0);
265
266	if (label_xlate(label) < 0)
267		return (-1); /* errno is set for us */
268
269	path_depicl(path);
270
271	for (pm = dma->dma_pm; pm != NULL; pm = pm->pm_next) {
272		if (picl_path_eq(pm->pm_path, path)) {
273			(void) strcpy(path, pm->pm_fullpath);
274			break;
275		}
276	}
277
278	dm = fmd_fmri_zalloc(sizeof (mem_dimm_map_t));
279	dm->dm_label = fmd_fmri_strdup(label);
280	dm->dm_device = fmd_fmri_strdup(path);
281
282	dm->dm_next = dma->dma_dm;
283	dma->dma_dm = dm;
284
285	return (1);
286}
287
288/*
289 * Both configuration files use the same format, thus allowing us to use the
290 * same parser to process them.
291 */
292static int
293picl_conf_parse(const char *pathpat, int (*func)(char *, char *, void *),
294    void *arg)
295{
296	char confpath[MAXPATHLEN];
297	char buf[BUFSIZ], label[BUFSIZ];
298	int line, len, rc;
299	FILE *fp;
300
301	(void) snprintf(confpath, sizeof (confpath), pathpat,
302	    fmd_fmri_get_rootdir(), fmd_fmri_get_platform());
303
304	if ((fp = fopen(confpath, "r")) == NULL)
305		return (-1); /* errno is set for us */
306
307	label[0] = '\0';
308	for (line = 1; fgets(buf, sizeof (buf), fp) != NULL; line++) {
309		if (buf[0] == '#')
310			continue;
311
312		if (buf[0] == '\n') {
313			label[0] = '\0';
314			continue;
315		}
316
317		/* LINTED - label length cannot exceed length of buf */
318		if (sscanf(buf, " name:%s \n%n", label, &len) == 1 &&
319		    label[0] != '\0' && len == strlen(buf))
320			continue;
321
322		if (label[0] != '\0') {
323			if ((rc = func(buf, label, arg)) < 0) {
324				int err = errno;
325				(void) fclose(fp);
326				return (fmd_fmri_set_errno(err));
327			} else if (rc != 0) {
328				label[0] = '\0';
329			}
330		}
331	}
332
333	(void) fclose(fp);
334	return (0);
335}
336
337static void
338path_map_destroy(mem_path_map_t *pm)
339{
340	mem_path_map_t *next;
341
342	for (/* */; pm != NULL; pm = next) {
343		next = pm->pm_next;
344
345		fmd_fmri_strfree(pm->pm_path);
346		fmd_fmri_strfree(pm->pm_fullpath);
347		fmd_fmri_free(pm, sizeof (mem_path_map_t));
348	}
349}
350
351uint16_t
352mem_log2(uint64_t v)
353{
354	uint16_t i;
355	for (i = 0; v > 1; i++) {
356		v = v >> 1;
357	}
358	return (i);
359}
360
361static mem_dimm_map_t *
362get_dimm_by_sn(char *sn)
363{
364	mem_dimm_map_t *dp;
365
366	for (dp = mem.mem_dm; dp != NULL; dp = dp->dm_next) {
367		if (strcmp(sn, dp->dm_serid) == 0)
368			return (dp);
369	}
370
371	return (NULL);
372}
373
374#define	MEM_BYTES_PER_CACHELINE	64
375
376static void
377mdesc_init_n1(md_t *mdp, mde_cookie_t *listp)
378{
379	int idx, mdesc_dimm_count;
380	mem_dimm_map_t *dm, *d;
381	uint64_t sysmem_size, i, drgen = fmd_fmri_get_drgen();
382	int dimms, min_chan, max_chan, min_rank, max_rank;
383	int chan, rank, dimm, chans, chan_step;
384	uint64_t mask, chan_mask, chan_value;
385	uint64_t rank_mask, rank_value;
386	char *unum, *serial, *part;
387	mem_seg_map_t *seg;
388	char s[20];
389
390	/*
391	 * Find first 'memory' node -- there should only be one.
392	 * Extract 'memory-generation-id#' value from it.
393	 */
394	mdesc_dimm_count = md_scan_dag(mdp,
395	    MDE_INVAL_ELEM_COOKIE, md_find_name(mdp, "memory"),
396	    md_find_name(mdp, "fwd"), listp);
397
398	if (md_get_prop_val(mdp, listp[0], "memory-generation-id#",
399	    &mem.mem_memconfig))
400		mem.mem_memconfig = 0;
401
402	mdesc_dimm_count = md_scan_dag(mdp,
403	    MDE_INVAL_ELEM_COOKIE, md_find_name(mdp, "dimm_data"),
404	    md_find_name(mdp, "fwd"), listp);
405
406	for (idx = 0; idx < mdesc_dimm_count; idx++) {
407
408		if (md_get_prop_str(mdp, listp[idx], "nac", &unum) < 0)
409			unum = "";
410		if (md_get_prop_str(mdp, listp[idx], "serial#",
411		    &serial) < 0)
412			serial = "";
413		if (md_get_prop_str(mdp, listp[idx], "part#",
414		    &part) < 0)
415			part = "";
416
417		dm = fmd_fmri_zalloc(sizeof (mem_dimm_map_t));
418		dm->dm_label = fmd_fmri_strdup(unum);
419		(void) strncpy(dm->dm_serid, serial,
420		    MEM_SERID_MAXLEN - 1);
421		dm->dm_part = fmd_fmri_strdup(part);
422		dm->dm_drgen = drgen;
423
424		dm->dm_next = mem.mem_dm;
425		mem.mem_dm = dm;
426	}
427	/* N1 (MD) specific segment initialization */
428
429	dimms = 0;
430	min_chan = 99;
431	max_chan = -1;
432	min_rank = 99;
433	max_rank = -1;
434
435	for (d = mem.mem_dm; d != NULL; d = d->dm_next) {
436		if (sscanf(d->dm_label, "MB/CMP0/CH%d/R%d/D%d",
437		    &chan, &rank, &dimm) != 3) /* didn't scan all 3 values */
438			return;
439		min_chan = MIN(min_chan, chan);
440		max_chan = MAX(max_chan, chan);
441		min_rank = MIN(min_rank, rank);
442		max_rank = MAX(max_rank, rank);
443		dimms++;
444	}
445
446	mdesc_dimm_count = md_scan_dag(mdp,
447	    MDE_INVAL_ELEM_COOKIE,
448	    md_find_name(mdp, "mblock"),
449	    md_find_name(mdp, "fwd"),
450	    listp);
451	sysmem_size = 0;
452	for (idx = 0; idx < mdesc_dimm_count; idx++) {
453		uint64_t size = 0;
454		if (md_get_prop_val(mdp, listp[idx], "size", &size) == 0)
455			sysmem_size += size;
456	}
457
458	for (i = 1 << 30; i < sysmem_size; i = i << 1)
459		;
460	if (max_rank > min_rank) {
461		chans = dimms/4;
462		rank_mask = i >> 1;
463	} else {
464		chans = dimms/2;
465		rank_mask = 0;
466	}
467
468	chan_mask = (uint64_t)((chans - 1) * MEM_BYTES_PER_CACHELINE);
469	mask = rank_mask | chan_mask;
470
471	if (chans > 2)
472		chan_step = 1;
473	else
474		chan_step = max_chan - min_chan;
475
476	for (rank = min_rank, rank_value = 0;
477	    rank <= max_rank;
478	    rank++, rank_value += rank_mask) {
479		for (chan = min_chan, chan_value = 0;
480		    chan <= max_chan;
481		    chan += chan_step,
482		    chan_value += MEM_BYTES_PER_CACHELINE) {
483			seg = fmd_fmri_zalloc(sizeof (mem_seg_map_t));
484			seg->sm_next = mem.mem_seg;
485			mem.mem_seg = seg;
486			seg->sm_base = 0;
487			seg->sm_size = sysmem_size;
488			seg->sm_mask = mask;
489			seg->sm_match = chan_value | rank_value;
490			seg->sm_shift = 1;
491			(void) sprintf(s, "MB/CMP0/CH%1d/R%1d", chan, rank);
492			for (d = mem.mem_dm; d != NULL; d = d->dm_next) {
493				if (strncmp(s, d->dm_label, strlen(s)) == 0)
494					d->dm_seg = seg;
495			}
496		}
497	}
498}
499
500static void
501mdesc_init_n2(md_t *mdp, mde_cookie_t *listp, int num_comps)
502{
503	mde_cookie_t *dl, t;
504	int idx, mdesc_dimm_count, mdesc_bank_count;
505	mem_dimm_map_t *dm, *dp;
506	uint64_t i, drgen = fmd_fmri_get_drgen();
507	int n;
508	uint64_t mask, match, base, size;
509	char *unum, *serial, *part, *dash;
510	mem_seg_map_t *smp;
511	char *type, *sp, *jnum, *nac;
512	size_t ss;
513
514	mdesc_dimm_count = 0;
515	for (idx = 0; idx < num_comps; idx++) {
516		if (md_get_prop_str(mdp, listp[idx], "type", &type) < 0)
517			continue;
518		if (strcmp(type, "dimm") == 0) {
519			mdesc_dimm_count++;
520			if (md_get_prop_str(mdp, listp[idx], "nac",
521			    &nac) < 0)
522				nac = "";
523			if (md_get_prop_str(mdp, listp[idx], "label",
524			    &jnum) < 0)
525				jnum = "";
526			if (md_get_prop_str(mdp, listp[idx],
527			    "serial_number", &serial) < 0)
528				serial = "";
529			if (md_get_prop_str(mdp, listp[idx],
530			    "part_number", &part) < 0)
531				part = "";
532			if (md_get_prop_str(mdp, listp[idx],
533			    "dash_number", &dash) < 0)
534				dash = "";
535
536			ss = strlen(part) + strlen(dash) + 1;
537			sp = fmd_fmri_alloc(ss);
538			sp = strcpy(sp, part);
539			sp = strncat(sp, dash, strlen(dash) + 1);
540
541			dm = fmd_fmri_zalloc(sizeof (mem_dimm_map_t));
542
543			if ((strcmp(nac, "") != 0) &&
544			    (strcmp(jnum, "") != 0)) {
545				ss = strlen(nac) + strlen(jnum) + 2;
546				unum = fmd_fmri_alloc(ss);
547				(void) snprintf(unum, ss, "%s/%s", nac,
548				    jnum);
549				dm->dm_label = unum;
550			} else {
551				unum = "";
552				dm->dm_label = fmd_fmri_strdup(unum);
553			}
554
555			(void) strncpy(dm->dm_serid, serial,
556			    MEM_SERID_MAXLEN - 1);
557			dm->dm_part = sp;
558			dm->dm_drgen = drgen;
559
560			dm->dm_next = mem.mem_dm;
561			mem.mem_dm = dm;
562		}
563	}
564
565	/* N2 (PRI) specific segment initialization occurs here */
566
567	mdesc_bank_count = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
568	    md_find_name(mdp, "memory-bank"),
569	    md_find_name(mdp, "fwd"),
570	    listp);
571
572	dl = fmd_fmri_zalloc(mdesc_dimm_count * sizeof (mde_cookie_t));
573
574	for (idx = 0; idx < mdesc_bank_count; idx++) {
575		if (md_get_prop_val(mdp, listp[idx], "mask", &mask) < 0)
576			mask = 0;
577		if (md_get_prop_val(mdp, listp[idx], "match", &match) < 0)
578			match = 0;
579		n = md_scan_dag(mdp, listp[idx],
580		    md_find_name(mdp, "memory-segment"),
581		    md_find_name(mdp, "back"),
582		    &t); /* only 1 "back" arc, so n must equal 1 here */
583		if (md_get_prop_val(mdp, t, "base", &base) < 0)
584			base = 0;
585		if (md_get_prop_val(mdp, t, "size", &size) < 0)
586			size = 0;
587		smp = fmd_fmri_zalloc(sizeof (mem_seg_map_t));
588		smp->sm_next = mem.mem_seg;
589		mem.mem_seg = smp;
590		smp->sm_base = base;
591		smp->sm_size = size;
592		smp->sm_mask = mask;
593		smp->sm_match = match;
594
595		n = md_scan_dag(mdp, listp[idx],
596		    md_find_name(mdp, "component"),
597		    md_find_name(mdp, "fwd"),
598		    dl);
599		smp->sm_shift = mem_log2(n);
600
601		for (i = 0; i < n; i++) {
602			if (md_get_prop_str(mdp, dl[i],
603			    "serial_number", &serial) < 0)
604				continue;
605			if ((dp = get_dimm_by_sn(serial)) == NULL)
606				continue;
607			dp->dm_seg = smp;
608		}
609	}
610	fmd_fmri_free(dl, mdesc_dimm_count * sizeof (mde_cookie_t));
611}
612
613int
614mem_discover_mdesc(md_t *mdp, size_t mdbufsz)
615{
616	mde_cookie_t *listp;
617	int num_nodes;
618	int num_comps = 0;
619
620	num_nodes = md_node_count(mdp);
621	listp = fmd_fmri_alloc(sizeof (mde_cookie_t) * num_nodes);
622
623	num_comps = md_scan_dag(mdp,
624	    MDE_INVAL_ELEM_COOKIE,
625	    md_find_name(mdp, "component"),
626	    md_find_name(mdp, "fwd"),
627	    listp);
628	if (num_comps == 0)
629		mdesc_init_n1(mdp, listp);
630	else
631		mdesc_init_n2(mdp, listp, num_comps);
632
633	fmd_fmri_free(listp, sizeof (mde_cookie_t) * num_nodes);
634	fmd_fmri_free(*mdp, mdbufsz);
635
636	(void) md_fini(mdp);
637	return (0);
638}
639
640int
641mem_discover_picl(void)
642{
643	mem_path_map_t *path_map = NULL;
644	dimm_map_arg_t dma;
645	int rc;
646
647	if (picl_conf_parse(PICL_FRUDATA_PATH, picl_frudata_parse,
648	    &path_map) < 0 && errno != ENOENT)
649		return (-1); /* errno is set for us */
650
651	dma.dma_pm = path_map;
652	dma.dma_dm = NULL;
653
654	if ((rc = picl_conf_parse(PICL_FRUTREE_PATH, picl_frutree_parse,
655	    &dma)) < 0 && errno == ENOENT && path_map == NULL) {
656		/*
657		 * This platform doesn't support serial number retrieval via
658		 * PICL mapping files.  Unfortunate, but not an error.
659		 */
660		return (0);
661	}
662
663	path_map_destroy(path_map);
664
665	if (rc < 0)
666		return (-1); /* errno is set for us */
667
668	if (dma.dma_dm == NULL) {
669		/*
670		 * This platform should support DIMM serial numbers, but we
671		 * weren't able to derive the paths.  Return an error.
672		 */
673		return (fmd_fmri_set_errno(EIO));
674	}
675
676	mem.mem_dm = dma.dma_dm;
677	return (0);
678}
679
680/*
681 * Initialize sun4v machine descriptor file for subsequent use.
682 * If the open fails (most likely because file doesn't exist), or if
683 * initialization fails, return NULL.
684 *
685 * If the open succeeds and initialization also succeeds, the returned value is
686 * a pointer to an md_impl_t, whose 1st element points to the buffer where
687 * the full mdesc has been read in.  The size of this buffer is returned
688 * as 'bufsiz'.  Caller is responsible for deallocating BOTH of these objects.
689 */
690static md_t *
691mdesc_devinit(size_t *bufsiz)
692{
693	uint64_t *bufp;
694	ssize_t size;
695
696	if ((size = ldom_get_core_md(mem_scheme_lhp, &bufp)) > 0) {
697		*bufsiz = (size_t)size;
698		return (md_init_intern(bufp, fmd_fmri_alloc, fmd_fmri_free));
699	}
700
701	return (NULL);
702}
703
704/*
705 * Sun4v: if a valid 'mdesc' machine description file exists,
706 * read the mapping of dimm unum+jnum to serial number from it.
707 */
708int
709mem_discover(void)
710{
711	size_t mdbufsz = 0;
712	md_t *mdp = mdesc_devinit(&mdbufsz);
713
714	if (mdp == NULL)
715		return (mem_discover_picl());
716	else
717		return (mem_discover_mdesc(mdp, mdbufsz));
718}
719
720int
721mem_update_mdesc(void)
722{
723	size_t mdbufsz = 0;
724	md_t *mdp = mdesc_devinit(&mdbufsz);
725
726	if (mdp == NULL) {
727		return (1);
728	} else {
729		mem_dimm_map_t *dm, *next;
730
731		for (dm = mem.mem_dm; dm != NULL; dm = next) {
732			next = dm->dm_next;
733			fmd_fmri_strfree(dm->dm_label);
734			fmd_fmri_strfree(dm->dm_part);
735			fmd_fmri_free(dm, sizeof (mem_dimm_map_t));
736		}
737		mem.mem_dm = NULL;
738
739		return (mem_discover_mdesc(mdp, mdbufsz));
740	}
741}
742
743/*
744 * Retry values for handling the case where the kernel is not yet ready
745 * to provide DIMM serial ids.  Some platforms acquire DIMM serial id
746 * information from their System Controller via a mailbox interface.
747 * The values chosen are for 10 retries 3 seconds apart to approximate the
748 * possible 30 second timeout length of a mailbox message request.
749 */
750#define	MAX_MEM_SID_RETRIES	10
751#define	MEM_SID_RETRY_WAIT	3
752
753/*
754 * The comparison is asymmetric. It compares up to the length of the
755 * argument unum.
756 */
757static mem_dimm_map_t *
758dm_lookup(const char *name)
759{
760	mem_dimm_map_t *dm;
761
762	for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
763		if (strncmp(name, dm->dm_label, strlen(name)) == 0)
764			return (dm);
765	}
766
767	return (NULL);
768}
769
770/*
771 * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
772 * the unum (or a component of same) wasn't found, -1 is returned with errno
773 * set to ENOENT.  If the kernel doesn't have support for serial numbers,
774 * -1 is returned with errno set to ENOTSUP.
775 */
776static int
777mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp)
778{
779	char **dimms, **serids;
780	size_t ndimms, nserids;
781	int i, rc = 0;
782	int fd;
783	int retries = MAX_MEM_SID_RETRIES;
784	mem_name_t mn;
785	struct timespec rqt;
786
787	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
788		return (-1);
789
790	if (mem_unum_burst(unum, &dimms, &ndimms) < 0) {
791		(void) close(fd);
792		return (-1); /* errno is set for us */
793	}
794
795	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
796	nserids = ndimms;
797
798	bzero(&mn, sizeof (mn));
799
800	for (i = 0; i < ndimms; i++) {
801		mn.m_namelen = strlen(dimms[i]) + 1;
802		mn.m_sidlen = MEM_SERID_MAXLEN;
803
804		mn.m_name = fmd_fmri_alloc(mn.m_namelen);
805		mn.m_sid = fmd_fmri_alloc(mn.m_sidlen);
806
807		(void) strcpy(mn.m_name, dimms[i]);
808
809		do {
810			rc = ioctl(fd, MEM_SID, &mn);
811
812			if (rc >= 0 || errno != EAGAIN)
813				break;
814
815			if (retries == 0) {
816				errno = ETIMEDOUT;
817				break;
818			}
819
820			/*
821			 * EAGAIN indicates the kernel is
822			 * not ready to provide DIMM serial
823			 * ids.  Sleep MEM_SID_RETRY_WAIT seconds
824			 * and try again.
825			 * nanosleep() is used instead of sleep()
826			 * to avoid interfering with fmd timers.
827			 */
828			rqt.tv_sec = MEM_SID_RETRY_WAIT;
829			rqt.tv_nsec = 0;
830			(void) nanosleep(&rqt, NULL);
831
832		} while (retries--);
833
834		if (rc < 0) {
835			/*
836			 * ENXIO can happen if the kernel memory driver
837			 * doesn't have the MEM_SID ioctl (e.g. if the
838			 * kernel hasn't been patched to provide the
839			 * support).
840			 *
841			 * If the MEM_SID ioctl is available but the
842			 * particular platform doesn't support providing
843			 * serial ids, ENOTSUP will be returned by the ioctl.
844			 */
845			if (errno == ENXIO)
846				errno = ENOTSUP;
847			fmd_fmri_free(mn.m_name, mn.m_namelen);
848			fmd_fmri_free(mn.m_sid, mn.m_sidlen);
849			mem_strarray_free(serids, nserids);
850			mem_strarray_free(dimms, ndimms);
851			(void) close(fd);
852			return (-1);
853		}
854
855		serids[i] = fmd_fmri_strdup(mn.m_sid);
856
857		fmd_fmri_free(mn.m_name, mn.m_namelen);
858		fmd_fmri_free(mn.m_sid, mn.m_sidlen);
859	}
860
861	mem_strarray_free(dimms, ndimms);
862
863	(void) close(fd);
864
865	*seridsp = serids;
866	*nseridsp = nserids;
867
868	return (0);
869}
870
871/*
872 * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
873 * the unum (or a component of same) wasn't found, -1 is returned with errno
874 * set to ENOENT.
875 */
876static int
877mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp)
878{
879	uint64_t drgen = fmd_fmri_get_drgen();
880	char **dimms, **serids;
881	size_t ndimms, nserids;
882	mem_dimm_map_t *dm;
883	int i, rc = 0;
884
885	if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
886		return (-1); /* errno is set for us */
887
888	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
889	nserids = ndimms;
890
891	for (i = 0; i < ndimms; i++) {
892		if ((dm = dm_lookup(dimms[i])) == NULL) {
893			rc = fmd_fmri_set_errno(EINVAL);
894			break;
895		}
896
897		if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) {
898			/*
899			 * We don't have a cached copy, or the copy we've got is
900			 * out of date.  Look it up again.
901			 */
902			if (mem_get_serid(dm->dm_device, dm->dm_serid,
903			    sizeof (dm->dm_serid)) < 0) {
904				rc = -1; /* errno is set for us */
905				break;
906			}
907
908			dm->dm_drgen = drgen;
909		}
910
911		serids[i] = fmd_fmri_strdup(dm->dm_serid);
912	}
913
914	mem_strarray_free(dimms, ndimms);
915
916	if (rc == 0) {
917		*seridsp = serids;
918		*nseridsp = nserids;
919	} else {
920		mem_strarray_free(serids, nserids);
921	}
922
923	return (rc);
924}
925
926/*
927 * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
928 * the unum (or a component of same) wasn't found, -1 is returned with errno
929 * set to ENOENT.
930 */
931static int
932mem_get_serids_from_mdesc(const char *unum, char ***seridsp, size_t *nseridsp)
933{
934	uint64_t drgen = fmd_fmri_get_drgen();
935	char **dimms, **serids;
936	size_t ndimms, nserids;
937	mem_dimm_map_t *dm;
938	int i, rc = 0;
939
940	if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
941		return (-1); /* errno is set for us */
942
943	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
944	nserids = ndimms;
945
946	/*
947	 * first go through dimms and see if dm_drgen entries are outdated
948	 */
949	for (i = 0; i < ndimms; i++) {
950		if ((dm = dm_lookup(dimms[i])) == NULL ||
951		    dm->dm_drgen != drgen)
952			break;
953	}
954
955	if (i < ndimms && mem_update_mdesc() != 0) {
956		mem_strarray_free(dimms, ndimms);
957		return (-1);
958	}
959
960	/*
961	 * get to this point if an up-to-date mdesc (and corresponding
962	 * entries in the global mem list) exists
963	 */
964	for (i = 0; i < ndimms; i++) {
965		if ((dm = dm_lookup(dimms[i])) == NULL) {
966			rc = fmd_fmri_set_errno(EINVAL);
967			break;
968		}
969
970		if (dm->dm_drgen != drgen)
971			dm->dm_drgen = drgen;
972
973		/*
974		 * mdesc and dm entry was updated by an earlier call to
975		 * mem_update_mdesc, so we go ahead and dup the serid
976		 */
977		serids[i] = fmd_fmri_strdup(dm->dm_serid);
978	}
979
980	mem_strarray_free(dimms, ndimms);
981
982	if (rc == 0) {
983		*seridsp = serids;
984		*nseridsp = nserids;
985	} else {
986		mem_strarray_free(serids, nserids);
987	}
988
989	return (rc);
990}
991
992/*
993 * Returns 0 with part numbers if found, returns -1 for errors.
994 */
995static int
996mem_get_parts_from_mdesc(const char *unum, char ***partsp, uint_t *npartsp)
997{
998	uint64_t drgen = fmd_fmri_get_drgen();
999	char **dimms, **parts;
1000	size_t ndimms, nparts;
1001	mem_dimm_map_t *dm;
1002	int i, rc = 0;
1003
1004	if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
1005		return (-1); /* errno is set for us */
1006
1007	parts = fmd_fmri_zalloc(sizeof (char *) * ndimms);
1008	nparts = ndimms;
1009
1010	/*
1011	 * first go through dimms and see if dm_drgen entries are outdated
1012	 */
1013	for (i = 0; i < ndimms; i++) {
1014		if ((dm = dm_lookup(dimms[i])) == NULL ||
1015		    dm->dm_drgen != drgen)
1016			break;
1017	}
1018
1019	if (i < ndimms && mem_update_mdesc() != 0) {
1020		mem_strarray_free(dimms, ndimms);
1021		mem_strarray_free(parts, nparts);
1022		return (-1);
1023	}
1024
1025	/*
1026	 * get to this point if an up-to-date mdesc (and corresponding
1027	 * entries in the global mem list) exists
1028	 */
1029	for (i = 0; i < ndimms; i++) {
1030		if ((dm = dm_lookup(dimms[i])) == NULL) {
1031			rc = fmd_fmri_set_errno(EINVAL);
1032			break;
1033		}
1034
1035		if (dm->dm_drgen != drgen)
1036			dm->dm_drgen = drgen;
1037
1038		/*
1039		 * mdesc and dm entry was updated by an earlier call to
1040		 * mem_update_mdesc, so we go ahead and dup the part
1041		 */
1042		if (dm->dm_part == NULL) {
1043			rc = -1;
1044			break;
1045		}
1046		parts[i] = fmd_fmri_strdup(dm->dm_part);
1047	}
1048
1049	mem_strarray_free(dimms, ndimms);
1050
1051	if (rc == 0) {
1052		*partsp = parts;
1053		*npartsp = nparts;
1054	} else {
1055		mem_strarray_free(parts, nparts);
1056	}
1057
1058	return (rc);
1059}
1060
1061static int
1062mem_get_parts_by_unum(const char *unum, char ***partp, uint_t *npartp)
1063{
1064	if (mem.mem_dm == NULL)
1065		return (-1);
1066	else
1067		return (mem_get_parts_from_mdesc(unum, partp, npartp));
1068}
1069
1070static int
1071get_seg_by_sn(char *sn, mem_seg_map_t **segmap)
1072{
1073	mem_dimm_map_t *dm;
1074
1075	for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
1076		if (strcmp(sn, dm->dm_serid) == 0) {
1077			*segmap = dm->dm_seg;
1078			return (0);
1079		}
1080	}
1081	return (-1);
1082}
1083
1084/*
1085 * Niagara-1, Niagara-2, and Victoria Falls all have physical address
1086 * spaces of 40 bits.
1087 */
1088
1089#define	MEM_PHYS_ADDRESS_LIMIT	0x10000000000ULL
1090
1091/*
1092 * The 'mask' argument to extract_bits has 1's in those bit positions of
1093 * the physical address used to select the DIMM (or set of DIMMs) which will
1094 * store the contents of the physical address.  If we extract those bits, ie.
1095 * remove them and collapse the holes, the result is the 'address' within the
1096 * DIMM or set of DIMMs where the contents are stored.
1097 */
1098
1099static uint64_t
1100extract_bits(uint64_t paddr, uint64_t mask)
1101{
1102	uint64_t from, to;
1103	uint64_t result = 0;
1104
1105	to = 1;
1106	for (from = 1; from <= MEM_PHYS_ADDRESS_LIMIT; from <<= 1) {
1107		if ((from & mask) == 0) {
1108			if ((from & paddr) != 0)
1109				result |= to;
1110			to <<= 1;
1111		}
1112	}
1113	return (result);
1114}
1115
1116/*
1117 * insert_bits is the reverse operation to extract_bits.  Where extract_bits
1118 * removes from the physical address those bits which select a DIMM or set
1119 * of DIMMs, insert_bits reconstitutes a physical address given the DIMM
1120 * selection 'mask' and the 'value' for the address bits denoted by 1s in
1121 * the 'mask'.
1122 */
1123static uint64_t
1124insert_bits(uint64_t offset, uint64_t mask, uint64_t value)
1125{
1126	uint64_t result = 0;
1127	uint64_t from, to;
1128
1129	from = 1;
1130	for (to = 1; to <= MEM_PHYS_ADDRESS_LIMIT; to <<= 1) {
1131		if ((to & mask) == 0) {
1132			if ((offset & from) != 0)
1133				result |= to;
1134			from <<= 1;
1135		} else {
1136			result |= to & value;
1137		}
1138	}
1139	return (result);
1140}
1141
1142int
1143mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp)
1144{
1145	/*
1146	 * Some platforms do not support the caching of serial ids by the
1147	 * mem scheme plugin but instead support making serial ids available
1148	 * via the kernel.
1149	 */
1150	if (mem.mem_dm == NULL)
1151		return (mem_get_serids_from_kernel(unum, seridsp, nseridsp));
1152	else if (mem_get_serids_from_mdesc(unum, seridsp, nseridsp) == 0)
1153		return (0);
1154	else
1155		return (mem_get_serids_from_cache(unum, seridsp, nseridsp));
1156}
1157
1158void
1159mem_expand_opt(nvlist_t *nvl, char *unum, char **serids)
1160{
1161	mem_seg_map_t *seg;
1162	uint64_t offset, physaddr;
1163	char **parts;
1164	uint_t nparts;
1165
1166	/*
1167	 * The following additional expansions are all optional.
1168	 * Failure to retrieve a data value, or failure to add it
1169	 * successfully to the FMRI, does NOT cause a failure of
1170	 * fmd_fmri_expand.  All optional expansions will be attempted
1171	 * once expand_opt is entered.
1172	 */
1173
1174	if ((mem.mem_seg != NULL) &&
1175	    (get_seg_by_sn(*serids, &seg) == 0) &&
1176	    (seg != NULL)) { /* seg can be NULL if segment missing from PRI */
1177
1178		if (nvlist_lookup_uint64(nvl,
1179		    FM_FMRI_MEM_OFFSET, &offset) == 0) {
1180			physaddr = insert_bits((offset<<seg->sm_shift),
1181			    seg->sm_mask, seg->sm_match);
1182			(void) nvlist_add_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
1183			    physaddr); /* displaces any previous physaddr */
1184		} else if (nvlist_lookup_uint64(nvl,
1185		    FM_FMRI_MEM_PHYSADDR, &physaddr) == 0) {
1186			offset = extract_bits(physaddr,
1187			    seg->sm_mask) >> seg->sm_shift;
1188			(void) (nvlist_add_uint64(nvl, FM_FMRI_MEM_OFFSET,
1189			    offset));
1190		}
1191	}
1192
1193	if (nvlist_lookup_string_array(nvl, FM_FMRI_HC_PART,
1194	    &parts, &nparts) != 0) {
1195		if (mem_get_parts_by_unum(unum, &parts, &nparts) == 0) {
1196			(void) nvlist_add_string_array(nvl,
1197			    FM_FMRI_HC_PART, parts, nparts);
1198			mem_strarray_free(parts, nparts);
1199		}
1200	}
1201}
1202