1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25
26/*
27 * Ereport-handling routines for memory errors
28 */
29
30#include <gmem_mem.h>
31#include <gmem_dimm.h>
32#include <gmem_page.h>
33#include <gmem.h>
34
35#include <strings.h>
36#include <string.h>
37#include <errno.h>
38#include <assert.h>
39#include <fm/fmd_api.h>
40#include <fm/libtopo.h>
41#include <sys/fm/protocol.h>
42#include <sys/async.h>
43#include <sys/errclassify.h>
44
45#define	OFFBIT  	0xFFFFFFFFFFFC07FFULL
46#define	BIT28_32	0x00000001F0000000ULL
47#define	BIT13_17	0x000000000003E000ULL
48#define	BIT18_19	0x00000000000C0000ULL
49#define	BIT11_12	0x0000000000001800ULL
50
51struct ce_name2type {
52	const char *name;
53	ce_dispact_t type;
54};
55
56nvlist_t *fru_nvl;
57
58static ce_dispact_t
59gmem_mem_name2type(const char *name)
60{
61	static const struct ce_name2type new[] = {
62		{ "mem-unk",		CE_DISP_UNKNOWN },
63		{ "mem-is",		CE_DISP_INTERMITTENT },
64		{ "mem-cs",		CE_DISP_PERS },
65		{ "mem-ss",		CE_DISP_STICKY },
66		{ NULL }
67	};
68	const struct ce_name2type *names = &new[0];
69	const struct ce_name2type *tp;
70
71	for (tp = names; tp->name != NULL; tp++) {
72		if (strcasecmp(name, tp->name) == 0)
73			return (tp->type);
74	}
75
76	return (CE_DISP_UNKNOWN);
77}
78
79/*ARGSUSED*/
80static int
81find_fault_fru(topo_hdl_t *thp, tnode_t *node, void *arg)
82{
83	nvlist_t *nvl = (nvlist_t *)arg;
84	nvlist_t *rsc = NULL, *fru = NULL;
85	nvlist_t **hcl, **topo_hcl;
86	uint_t n1, n2;
87	char *name, *name1, *name2;
88	char *id1, *id2;
89	int err, i;
90
91	if (topo_node_resource(node, &rsc, &err) < 0)
92		return (TOPO_WALK_NEXT);
93
94	err = nvlist_lookup_nvlist_array(rsc, FM_FMRI_HC_LIST, &topo_hcl, &n1);
95
96	if (err != 0) {
97		nvlist_free(rsc);
98		return (TOPO_WALK_NEXT);
99	}
100
101	(void) nvlist_lookup_string(topo_hcl[n1 - 1], FM_FMRI_HC_NAME, &name);
102	if (strcmp(name, "chip") != 0) {
103		nvlist_free(rsc);
104		return (TOPO_WALK_NEXT);
105	}
106
107	(void) nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n2);
108
109	if (n1 != n2) {
110		nvlist_free(rsc);
111		return (TOPO_WALK_NEXT);
112	}
113
114	for (i = 0; i < n1; i++) {
115		(void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_NAME,
116		    &name1);
117		(void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_ID, &id1);
118		(void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name2);
119		(void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id2);
120		if (strcmp(name1, name2) != 0 || strcmp(id1, id2) != 0) {
121			nvlist_free(rsc);
122			return (TOPO_WALK_NEXT);
123		}
124	}
125
126	(void) topo_node_fru(node, &fru, NULL, &err);
127	if (fru != NULL) {
128		(void) nvlist_dup(fru, &fru_nvl, NV_UNIQUE_NAME);
129		nvlist_free(fru);
130	}
131	nvlist_free(rsc);
132	return (TOPO_WALK_TERMINATE);
133}
134
135nvlist_t *
136gmem_find_fault_fru(fmd_hdl_t *hdl, nvlist_t *nvl) {
137	topo_hdl_t *thp;
138	topo_walk_t *twp;
139	int err;
140	fru_nvl = NULL;
141
142	if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL)
143		return (NULL);
144
145	if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
146	    find_fault_fru, nvl, &err)) == NULL) {
147		fmd_hdl_topo_rele(hdl, thp);
148		return (NULL);
149	}
150
151	(void) topo_walk_step(twp, TOPO_WALK_CHILD);
152	topo_walk_fini(twp);
153	fmd_hdl_topo_rele(hdl, thp);
154	return (fru_nvl);
155}
156
157/*
158 * fault the FRU of the common detector between two DIMMs
159 */
160void
161gmem_gen_datapath_fault(fmd_hdl_t *hdl, nvlist_t *det)
162{
163	char *name, *id;
164	nvlist_t **hcl1, **hcl;
165	uint_t n;
166	int i, j;
167	fmd_case_t *cp;
168	nvlist_t *fltlist, *rsrc;
169	nvlist_t *fru = NULL;
170
171	if (nvlist_lookup_nvlist_array(det, FM_FMRI_HC_LIST, &hcl1, &n) < 0)
172		return;
173
174	for (i = 0; i < n; i++) {
175		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
176		if (strcmp(name, "chip") == 0)
177			break;
178	}
179
180	n = i + 1;
181	hcl = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *) * n, FMD_SLEEP);
182	if (hcl == NULL)
183		return;
184
185	for (i = 0; i < n; i++) {
186		(void) nvlist_alloc(&hcl[i],
187		    NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
188	}
189
190	for (i = 0, j = 0; i < n; i++) {
191		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
192		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
193		(void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
194		(void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
195		j++;
196		if (strcmp(name, "chip") == 0)
197			break;
198	}
199
200	if (nvlist_alloc(&rsrc,  NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
201		for (i = 0; i < n; i++) {
202			if (hcl[i] != NULL)
203				nvlist_free(hcl[i]);
204		}
205		fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
206	}
207
208	if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
209	    nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
210	    nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
211	    nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
212	    nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
213		for (i = 0; i < n; i++) {
214			if (hcl[i] != NULL)
215				nvlist_free(hcl[i]);
216		}
217		fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
218		nvlist_free(rsrc);
219	}
220
221	fru = gmem_find_fault_fru(hdl, rsrc);
222	if (fru != NULL) {
223		cp = fmd_case_open(hdl, NULL);
224		fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
225		    100, fru, fru, fru);
226		fmd_case_add_suspect(hdl, cp, fltlist);
227		fmd_case_solve(hdl, cp);
228		nvlist_free(fru);
229	}
230
231	for (i = 0; i < n; i++) {
232		if (hcl[i] != NULL)
233			nvlist_free(hcl[i]);
234	}
235
236	fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
237	nvlist_free(rsrc);
238}
239
240/*
241 * formula to conver an unhashed address to hashed address
242 * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
243 */
244static void
245gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
246{
247
248	*addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
249	    | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
250}
251
252/*
253 * check if a dimm has n CEs that have the same symbol-in-error
254 */
255int
256upos_thresh_check(gmem_dimm_t *dimm, uint16_t upos, uint32_t threshold)
257{
258	int i;
259	gmem_mq_t *ip, *next;
260	int count = 0;
261
262	for (i = 0; i < GMEM_MAX_CKWDS; i++) {
263		for (ip = gmem_list_next(&dimm->mq_root[i]); ip != NULL;
264		    ip = next) {
265			next = gmem_list_next(ip);
266			if (ip->mq_unit_position == upos) {
267				count++;
268				if (count >= threshold)
269					return (1);
270			}
271		}
272	}
273	return (0);
274}
275
276/*
277 * check if smaller number of retired pages > 1/16 of larger number of
278 * retired pages
279 */
280int
281check_bad_rw_retired_pages(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2)
282{
283	uint_t sret, lret;
284	double ratio;
285
286	sret = lret = 0;
287
288	if (d2->dimm_nretired < d1->dimm_nretired) {
289		sret = d2->dimm_nretired;
290		lret = d1->dimm_nretired;
291	} else if (d2->dimm_nretired > d1->dimm_nretired) {
292		sret = d1->dimm_nretired;
293		lret = d2->dimm_nretired;
294	} else
295		return (0);
296
297	ratio = lret * GMEM_MQ_RATIO;
298
299	if (sret > ratio) {
300		fmd_hdl_debug(hdl, "sret=%d lret=%d ratio=%.3f",
301		    sret, lret, ratio);
302		return (1);
303	}
304	return (0);
305}
306
307/*
308 * check bad rw on any two DIMMs. The check succeeds if
309 * - each DIMM has a n CEs which have the same symbol-in-error,
310 * - the smaller number of retired pages > 1/16 larger number of retired pages
311 */
312static int
313check_bad_rw_between_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2,
314    uint16_t *rupos)
315{
316	int i;
317	gmem_mq_t *ip, *next;
318	uint16_t upos;
319
320	for (i = 0; i < GMEM_MAX_CKWDS; i++) {
321		for (ip = gmem_list_next(&d1->mq_root[i]); ip != NULL;
322		    ip = next) {
323			next = gmem_list_next(ip);
324			upos = ip->mq_unit_position;
325			if (upos_thresh_check(d1, upos, gmem.gm_nupos)) {
326				if (upos_thresh_check(d2, upos,
327				    gmem.gm_nupos)) {
328					if (check_bad_rw_retired_pages(hdl,
329					    d1, d2)) {
330						*rupos = upos;
331						return (1);
332					}
333				}
334			}
335		}
336	}
337
338	return (0);
339}
340
341static void
342bad_reader_writer_check(fmd_hdl_t *hdl, nvlist_t *det, gmem_dimm_t *ce_dimm)
343{
344	gmem_dimm_t *d, *next;
345	uint16_t upos;
346
347	for (d = gmem_list_next(&gmem.gm_dimms); d != NULL; d = next) {
348		next = gmem_list_next(d);
349		if (d == ce_dimm)
350			continue;
351		if (!gmem_same_datapath_dimms(hdl, ce_dimm, d))
352			continue;
353		if (check_bad_rw_between_dimms(hdl, ce_dimm, d, &upos)) {
354			gmem_gen_datapath_fault(hdl, det);
355			gmem_save_symbol_error(hdl, ce_dimm, upos);
356			fmd_hdl_debug(hdl,
357			    "check_bad_rw_dimms succeeded: %s %s\n",
358			    ce_dimm->dimm_serial, d->dimm_serial);
359			return;
360		}
361	}
362}
363
364/*
365 * rule 5a checking. The check succeeds if
366 * - nretired >= 512
367 * - nretired >= 128 and (addr_hi - addr_low) / (nretired -1 ) > 512KB
368 */
369static void
370ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
371{
372	nvlist_t *flt, *rsrc;
373	fmd_case_t *cp;
374	uint_t nret;
375	uint64_t delta_addr = 0;
376
377	if (dimm->dimm_flags & GMEM_F_FAULTING)
378		return;
379
380	nret = dimm->dimm_nretired;
381
382	if (nret < gmem.gm_low_ce_thresh)
383		return;
384
385	if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
386		delta_addr =
387		    (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
388		    (nret - 1);
389
390	if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
391
392		fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
393		dimm->dimm_flags |= GMEM_F_FAULTING;
394		gmem_dimm_dirty(hdl, dimm);
395
396		cp = fmd_case_open(hdl, NULL);
397		rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
398		flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
399		    GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
400		fmd_case_add_suspect(hdl, cp, flt);
401		fmd_case_solve(hdl, cp);
402		if (rsrc != NULL)
403			nvlist_free(rsrc);
404	}
405}
406
407/*
408 * rule 5b checking. The check succeeds if more than 120
409 * non-intermittent CEs are reported against one symbol
410 * position of one afar in 72 hours
411 */
412static void
413mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
414{
415	nvlist_t *flt, *rsrc;
416	fmd_case_t *cp;
417	gmem_mq_t *ip, *next;
418	int cw;
419
420	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
421		for (ip = gmem_list_next(&dimm->mq_root[cw]);
422		    ip != NULL; ip = next) {
423			next = gmem_list_next(ip);
424			if (ip->mq_dupce_count >= gmem.gm_dupce) {
425				fmd_hdl_debug(hdl,
426				    "mq_5b_check succeeded: duplicate CE=%d",
427				    ip->mq_dupce_count);
428				cp = fmd_case_open(hdl, NULL);
429				rsrc = gmem_find_dimm_rsc(hdl,
430				    dimm->dimm_serial);
431				flt = fmd_nvl_create_fault(hdl,
432				    GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
433				    NULL, gmem_dimm_fru(dimm), rsrc);
434				dimm->dimm_flags |= GMEM_F_FAULTING;
435				gmem_dimm_dirty(hdl, dimm);
436				fmd_case_add_suspect(hdl, cp, flt);
437				fmd_case_solve(hdl, cp);
438				if (rsrc != NULL)
439					nvlist_free(rsrc);
440				return;
441			}
442		}
443	}
444}
445
446/*
447 * delete the expired duplicate CE time stamps
448 */
449static void
450mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
451{
452	tstamp_t *tsp, *next;
453
454	for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
455	    tsp = next) {
456		next = gmem_list_next(tsp);
457		if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
458			gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
459			fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
460			ip->mq_dupce_count--;
461		}
462	}
463}
464
465static void
466mq_update(fmd_hdl_t *hdl, fmd_event_t *ep, gmem_mq_t *ip, uint64_t now)
467{
468	tstamp_t *tsp;
469
470	ip->mq_tstamp = now;
471	ip->mq_ep = ep;
472	if (fmd_serd_exists(hdl, ip->mq_serdnm))
473		fmd_serd_destroy(hdl, ip->mq_serdnm);
474
475	fmd_serd_create(hdl, ip->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
476	(void) fmd_serd_record(hdl, ip->mq_serdnm, ep);
477
478	tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
479	tsp->tstamp = now;
480	gmem_list_append(&ip->mq_dupce_tstamp, tsp);
481	ip->mq_dupce_count++;
482}
483
484/*
485 * Create a fresh index block for MQSC CE correlation.
486 */
487gmem_mq_t *
488mq_create(fmd_hdl_t *hdl, fmd_event_t *ep,
489    uint64_t afar, uint16_t upos, uint16_t ckwd, uint64_t now)
490{
491	gmem_mq_t *cp;
492	tstamp_t *tsp;
493
494	cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP);
495	cp->mq_tstamp = now;
496	cp->mq_ckwd = ckwd;
497	cp->mq_phys_addr = afar;
498	cp->mq_unit_position = upos;
499	cp->mq_ep = ep;
500	cp->mq_serdnm =
501	    gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos);
502
503	tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
504	tsp->tstamp = now;
505	gmem_list_append(&cp->mq_dupce_tstamp, tsp);
506	cp->mq_dupce_count = 1;
507
508	/*
509	 * Create SERD to keep this event from being removed
510	 * by fmd which may not know there is an event pointer
511	 * saved here. This SERD is *never* meant to fire.
512	 */
513	if (fmd_serd_exists(hdl, cp->mq_serdnm))
514		fmd_serd_destroy(hdl, cp->mq_serdnm);
515
516	fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
517	(void) fmd_serd_record(hdl, cp->mq_serdnm, ep);
518
519	return (cp);
520}
521
522gmem_mq_t *
523mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip)
524{
525	gmem_mq_t *jp = gmem_list_next(ip);
526	tstamp_t *tsp, *next;
527
528
529	if (ip->mq_serdnm != NULL) {
530		if (fmd_serd_exists(hdl, ip->mq_serdnm))
531			fmd_serd_destroy(hdl, ip->mq_serdnm);
532		fmd_hdl_strfree(hdl, ip->mq_serdnm);
533		ip->mq_serdnm = NULL;
534	}
535
536	for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
537	    tsp = next) {
538		next = gmem_list_next(tsp);
539		gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
540		fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
541	}
542
543	gmem_list_delete(lp, &ip->mq_l);
544	fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t));
545
546	return (jp);
547}
548
549
550/*
551 * Add an index block for a new CE, sorted
552 * a) by ascending unit position
553 * b) order of arrival (~= time order)
554 */
555void
556mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep,
557    uint64_t afar, uint16_t unit_position, uint16_t ckwd,
558    uint64_t now)
559{
560	gmem_mq_t *ip, *jp;
561	int cw = (int)ckwd;
562
563	for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
564		if (ip->mq_unit_position > unit_position) {
565			/* list is in unit position order */
566			break;
567		} else if (ip->mq_unit_position == unit_position &&
568		    ip->mq_phys_addr == afar) {
569			/*
570			 * Found a duplicate cw, unit_position, and afar.
571			 * Delete this node, to be superseded by the new
572			 * node added below.
573			 * update the mq_t structure
574			 */
575			mq_update(hdl, ep, ip, now);
576			return;
577		} else {
578			ip = gmem_list_next(ip);
579		}
580	}
581
582	jp = mq_create(hdl, ep, afar, unit_position, cw, now);
583	if (ip == NULL)
584		gmem_list_append(&dimm->mq_root[cw], jp);
585	else
586		gmem_list_insert_before(&dimm->mq_root[cw], ip, jp);
587}
588
589/*
590 * Prune the MQSC index lists (one for each checkword), by deleting
591 * outdated index blocks from each list.
592 */
593
594void
595mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now)
596{
597	gmem_mq_t *ip;
598	int cw;
599
600	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
601		for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
602			if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) {
603				/*
604				 * This event has timed out - delete the
605				 * mq block as well as serd for the event.
606				 */
607				ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
608			} else {
609				mq_prune_dup(hdl, ip, now);
610				/* tstamp < now - ce_t */
611				ip = gmem_list_next(ip);
612			}
613		} /* per checkword */
614	} /* cw = 0...3 */
615}
616
617/*
618 * Check the MQSC index lists (one for each checkword) by making a
619 * complete pass through each list, checking if the criteria for
620 * Rule 4A has been met.  Rule 4A checking is done for each checkword.
621 *
622 * Rule 4A: fault a DIMM  "whenever Solaris reports two or more CEs from
623 * two or more different physical addresses on each of two or more different
624 * bit positions from the same DIMM within 72 hours of each other, and all
625 * the addresses are in the same relative checkword (that is, the AFARs
626 * are all the same modulo 64).  [Note: This means at least 4 CEs; two
627 * from one bit position, with unique addresses, and two from another,
628 * also with unique addresses, and the lower 6 bits of all the addresses
629 * are the same."
630 */
631
632void
633mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
634{
635	int upos_pairs, curr_upos, cw, i, j;
636	nvlist_t *flt, *rsc;
637	typedef struct upos_pair {
638		int upos;
639		gmem_mq_t *mq1;
640		gmem_mq_t *mq2;
641	} upos_pair_t;
642	upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */
643	gmem_mq_t *ip;
644
645	/*
646	 * Each upos_array[] member represents a pair of CEs for the same
647	 * unit position (symbol) which is a 4 bit nibble.
648	 * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM
649	 * for rule 4A, and same DRAM for rule 4B) for a violation - this
650	 * is why CE pairs are tracked.
651	 */
652	upos_pairs = 0;
653	upos_array[0].mq1 = NULL;
654
655	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
656		i = upos_pairs;
657		curr_upos = -1;
658
659		/*
660		 * mq_root[] is an array of cumulative lists of CEs
661		 * indexed by checkword where the list is in unit position
662		 * order. Loop through checking for duplicate unit position
663		 * entries (filled in at mq_create()).
664		 * The upos_array[] is filled in each time a duplicate
665		 * unit position is found; the first time through the loop
666		 * of a unit position sets curr_upos but does not fill in
667		 * upos_array[] until the second symbol is found.
668		 */
669		for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL;
670		    ip = gmem_list_next(ip)) {
671			if (curr_upos != ip->mq_unit_position) {
672				/* Set initial current position */
673				curr_upos = ip->mq_unit_position;
674			} else if (i > upos_pairs &&
675			    curr_upos == upos_array[i-1].upos) {
676				/*
677				 * Only keep track of CE pairs; skip
678				 * triples, quads, etc...
679				 */
680				continue;
681			} else if (upos_array[i].mq1 == NULL) {
682				/* Have a pair. Add to upos_array[] */
683				fmd_hdl_debug(hdl, "pair:upos=%d",
684				    curr_upos);
685				upos_array[i].upos = curr_upos;
686				upos_array[i].mq1 = gmem_list_prev(ip);
687				upos_array[i].mq2 = ip;
688				upos_array[++i].mq1 = NULL;
689			}
690		}
691		if (i - upos_pairs >= 2) {
692			/* Rule 4A violation */
693			rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
694			flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
695			    GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
696			for (j = upos_pairs; j < i; j++) {
697				fmd_case_add_ereport(hdl,
698				    dimm->dimm_case.cc_cp,
699				    upos_array[j].mq1->mq_ep);
700				fmd_case_add_ereport(hdl,
701				    dimm->dimm_case.cc_cp,
702				    upos_array[j].mq2->mq_ep);
703			}
704			dimm->dimm_flags |= GMEM_F_FAULTING;
705			gmem_dimm_dirty(hdl, dimm);
706			fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
707			fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
708			if (rsc != NULL)
709				nvlist_free(rsc);
710			return;
711		}
712		upos_pairs = i;
713		assert(upos_pairs < 16);
714	}
715}
716
717/*ARGSUSED*/
718gmem_evdisp_t
719gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
720{
721	uint16_t symbol_pos, cw;
722	uint64_t phyaddr, offset, addr;
723	uint32_t filter_ratio = 0;
724	gmem_dimm_t *dimm;
725	gmem_page_t *page;
726	nvlist_t *fru = NULL;
727	nvlist_t *topo_rsc = NULL;
728	nvlist_t *rsrc, *det;
729	const char *uuid;
730	ce_dispact_t type;
731	boolean_t diagnose;
732	char *sn;
733	int err, rc;
734	uint64_t *now;
735	uint_t nelem;
736	int skip_error = 0;
737
738	err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE,
739	    &diagnose);
740	if (err != 0 || diagnose == 0)
741		return (GMEM_EVD_UNUSED);
742
743	if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR,
744	    &phyaddr) != 0) ||
745	    (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET,
746	    &offset) != 0)) {
747		fmd_hdl_debug(hdl, "Can't get page phyaddr or offset");
748		return (GMEM_EVD_BAD);
749	}
750
751	fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset);
752
753	if ((page = gmem_page_lookup(phyaddr)) != NULL &&
754	    page->page_case.cc_cp != NULL &&
755	    fmd_case_solved(hdl, page->page_case.cc_cp))
756		return (GMEM_EVD_REDUND);
757
758	if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE,
759	    &rsrc) != 0 ||
760	    nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
761		fmd_hdl_debug(hdl, "Can't get dimm serial\n");
762		return (GMEM_EVD_BAD);
763	}
764
765	fmd_hdl_debug(hdl, "serial %s", sn);
766
767	if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_DETECTOR, &det) != 0)
768		return (GMEM_EVD_BAD);
769
770	/*
771	 * Find dimm fru by serial number.
772	 */
773	fru = gmem_find_dimm_fru(hdl, sn);
774
775	if (fru == NULL) {
776		fmd_hdl_debug(hdl, "Dimm is not present\n");
777		return (GMEM_EVD_UNUSED);
778	}
779
780	if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL &&
781	    (dimm = gmem_dimm_create(hdl, fru, det)) == NULL) {
782		nvlist_free(fru);
783		return (GMEM_EVD_UNUSED);
784	}
785
786	if (dimm->dimm_case.cc_cp == NULL) {
787		dimm->dimm_case.cc_cp = gmem_case_create(hdl,
788		    &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid);
789	}
790
791	/*
792	 * Add to MQSC correlation lists all CEs which pass validity
793	 * checks above. If there is no symbol_pos & relative ckword
794	 * in the ereport, skip rule 4A checking.
795	 */
796
797	err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS,
798	    &symbol_pos);
799	err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw);
800
801	if (err == 0) {
802		fmd_hdl_debug(hdl, "symbol_pos=%d cw=%d", symbol_pos, cw);
803
804		if (nvlist_lookup_uint64_array(nvl,
805		    "__tod", &now, &nelem) == 0) {
806			skip_error = gmem_check_symbol_error(hdl, dimm,
807			    symbol_pos);
808
809			if (!skip_error ||
810			    !(dimm->dimm_flags & GMEM_F_FAULTING))
811				mq_add(hdl, dimm, ep, phyaddr, symbol_pos,
812				    cw, *now);
813
814			mq_prune(hdl, dimm, *now);
815
816			if (!skip_error)
817				bad_reader_writer_check(hdl, det, dimm);
818			if (!(dimm->dimm_flags & GMEM_F_FAULTING)) {
819				mq_check(hdl, dimm);
820				mq_5b_check(hdl, dimm);
821			}
822		}
823	}
824
825	type = gmem_mem_name2type(strstr(class, "mem"));
826
827	switch (type) {
828	case CE_DISP_UNKNOWN:
829		GMEM_STAT_BUMP(ce_unknown);
830		nvlist_free(fru);
831		return (GMEM_EVD_UNUSED);
832	case CE_DISP_INTERMITTENT:
833		GMEM_STAT_BUMP(ce_interm);
834		nvlist_free(fru);
835		return (GMEM_EVD_UNUSED);
836	case CE_DISP_PERS:
837		GMEM_STAT_BUMP(ce_clearable_persis);
838		break;
839	case CE_DISP_STICKY:
840		GMEM_STAT_BUMP(ce_sticky);
841		break;
842	default:
843		nvlist_free(fru);
844		return (GMEM_EVD_BAD);
845	}
846
847	if (gmem_check_symbol_error(hdl, dimm, symbol_pos)) {
848		nvlist_free(fru);
849		return (GMEM_EVD_REDUND);
850	}
851
852	if (page == NULL) {
853		page = gmem_page_create(hdl, fru, phyaddr, offset);
854		if (page == NULL) {
855			nvlist_free(fru);
856			return (GMEM_EVD_UNUSED);
857		}
858	}
859
860	nvlist_free(fru);
861
862	if (page->page_case.cc_cp == NULL) {
863		page->page_case.cc_cp = gmem_case_create(hdl,
864		    &page->page_header, GMEM_PTR_PAGE_CASE, &uuid);
865	}
866
867	switch (type) {
868	case CE_DISP_PERS:
869		fmd_hdl_debug(hdl, "adding persistent event to CE serd");
870		if (page->page_case.cc_serdnm == NULL)
871			gmem_page_serd_create(hdl, page, nvl);
872
873		filter_ratio = gmem_get_serd_filter_ratio(nvl);
874
875		fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio);
876
877		if (gmem_serd_record(hdl, page->page_case.cc_serdnm,
878		    filter_ratio, ep) == FMD_B_FALSE) {
879				return (GMEM_EVD_OK); /* engine hasn't fired */
880		}
881
882		fmd_hdl_debug(hdl, "ce page serd fired\n");
883		fmd_case_add_serd(hdl, page->page_case.cc_cp,
884		    page->page_case.cc_serdnm);
885		fmd_serd_reset(hdl, page->page_case.cc_serdnm);
886		break;	/* to retire */
887
888	case CE_DISP_STICKY:
889		fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep);
890		break;	/* to retire */
891	}
892
893
894	topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
895	rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc,
896	    ep, phyaddr, offset);
897
898	if (rc) {
899		gmem_to_hashed_addr(&addr, phyaddr);
900
901		if (addr > dimm->dimm_phys_addr_hi)
902			dimm->dimm_phys_addr_hi = addr;
903		if (addr < dimm->dimm_phys_addr_low)
904			dimm->dimm_phys_addr_low = addr;
905
906		dimm->dimm_nretired++;
907		dimm->dimm_retstat.fmds_value.ui64++;
908		gmem_dimm_dirty(hdl, dimm);
909		ce_thresh_check(hdl, dimm);
910	}
911	return (GMEM_EVD_OK);
912}
913
914void
915gmem_dimm_close(fmd_hdl_t *hdl, void *arg)
916{
917	gmem_dimm_destroy(hdl, arg);
918}
919