cmd_dp_page.c revision 1283:d3e5610e2d1c
113240Sprr/*
213240Sprr * CDDL HEADER START
313240Sprr *
413240Sprr * The contents of this file are subject to the terms of the
513240Sprr * Common Development and Distribution License, Version 1.0 only
613240Sprr * (the "License").  You may not use this file except in compliance
713240Sprr * with the License.
813240Sprr *
913240Sprr * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1013240Sprr * or http://www.opensolaris.org/os/licensing.
1113240Sprr * See the License for the specific language governing permissions
1213240Sprr * and limitations under the License.
1313240Sprr *
1413240Sprr * When distributing Covered Code, include this CDDL HEADER in each
1513240Sprr * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1613240Sprr * If applicable, add the following below this CDDL HEADER, with the
1713240Sprr * fields enclosed by brackets "[]" replaced with your own identifying
1813240Sprr * information: Portions Copyright [yyyy] [name of copyright owner]
1913240Sprr *
2013240Sprr * CDDL HEADER END
2113240Sprr */
2213240Sprr/*
2313240Sprr * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
2413240Sprr * Use is subject to license terms.
2513240Sprr */
2613240Sprr
2713240Sprr#pragma ident	"%Z%%M%	%I%	%E% SMI"
2813240Sprr
2913240Sprr/*
3013240Sprr * Support routines for managing potential page and bank faults that have
3113240Sprr * been deferred due to a datapath error.  Currently deferment only occurs
3213240Sprr * if a memory UE occurs while a datapath error is active.  When this happens
3313240Sprr * a page case is created with a special subtype of CMD_PTR_DP_PAGE_DEFER.  An
3413240Sprr * entry (a cmd_dp_defer_t) is added to a list of deferred pages.  The entry
3513240Sprr * links to the cmd_page_t in the cmd_pages list and also keeps track of what
3613240Sprr * memory controller ids are associated with the first AFAR and any more that
3713240Sprr * are seen while the page is deferred.  This information is used to determine
3813240Sprr * if the page should be faulted if the fault should be skipped because an
3913240Sprr * intervening datapath fault has occurred.  If a page is faulted when it is
4013240Sprr * replayed, the corresponding bank is faulted, too, since the original error
4113240Sprr * was a UE.  Note that no action is taken to undo any action taken by the
4213240Sprr * kernel when the UE was detected.  Currently the kernel will attempt to
4313240Sprr * immediately retire the page where a UE is detected and the retire may or
4413240Sprr * may not have completed by the time FMA receives an ereport.  The possibility
4513240Sprr * of a datapath fault resulting in memory UEs is very small, so the likelihood
4613240Sprr * of encountering this scenario is also very small.
4713240Sprr */
4813240Sprr
4913240Sprr#include <cmd.h>
5013240Sprr#include <cmd_dp.h>
5113240Sprr#include <cmd_dp_page.h>
5213240Sprr#include <cmd_bank.h>
5313240Sprr#include <cmd_page.h>
5413240Sprr
5513240Sprr#include <fm/fmd_api.h>
5613240Sprr#include <sys/nvpair.h>
5713240Sprr
5813240Sprrextern void cmd_bank_fault(fmd_hdl_t *, cmd_bank_t *);
5913240Sprr
6013240Sprrstatic void
6113240Sprrdp_page_defer_data_write(fmd_hdl_t *hdl, cmd_dp_defer_t *dpage)
6213240Sprr{
6313240Sprr	fmd_buf_write(hdl, dpage->dp_defer_page->page_case, "mcids",
6413240Sprr	    &dpage->dp_defer_mcids, sizeof (dpage->dp_defer_mcids));
6513240Sprr}
6613240Sprr
6713240Sprrstatic void
6813240Sprrdp_page_defer_data_restore(fmd_hdl_t *hdl, cmd_dp_defer_t *dpage)
6913240Sprr{
7013240Sprr	fmd_buf_read(hdl, dpage->dp_defer_page->page_case, "mcids",
7113240Sprr	    &dpage->dp_defer_mcids, sizeof (dpage->dp_defer_mcids));
7213240Sprr}
7313240Sprr
7413240Sprrstatic void
7513240Sprrdp_page_defer_add_data(fmd_hdl_t *hdl, cmd_dp_defer_t *dpage, uint64_t afar)
7613240Sprr{
7713240Sprr	int mcid;
7813240Sprr	int i;
7913240Sprr
8013240Sprr	if (cmd_dp_get_mcid(afar, &mcid) < 0)
8113240Sprr		fmd_hdl_abort(hdl, "cmd_dp_get_mcid failed");
8213240Sprr
8313240Sprr	for (i = 0; i < DP_MAX_MCS; i++) {
8413240Sprr		if (dpage->dp_defer_mcids[i] == -1) {
8513240Sprr			dpage->dp_defer_mcids[i] = mcid;
8613240Sprr			break;
8713240Sprr		}
8813240Sprr		if (dpage->dp_defer_mcids[i] == mcid)
8913240Sprr			break;
9013240Sprr	}
9113240Sprr
9213240Sprr	if (i == DP_MAX_MCS)
9313240Sprr		fmd_hdl_abort(hdl, "too many mcids for deferred page");
9413240Sprr
9513240Sprr	dp_page_defer_data_write(hdl, dpage);
9613240Sprr}
9713240Sprr
9813240Sprrstatic cmd_dp_defer_t *
9913240Sprrdp_page_defer_create(fmd_hdl_t *hdl, cmd_page_t *page, uint64_t afar)
10013240Sprr{
10113240Sprr	cmd_dp_defer_t *dpage;
10213240Sprr	int i;
10313240Sprr
10413240Sprr	dpage = fmd_hdl_zalloc(hdl, sizeof (cmd_dp_defer_t), FMD_SLEEP);
10513240Sprr
10613240Sprr	dpage->dp_defer_page = page;
10713240Sprr
10813240Sprr	for (i = 0; i < DP_MAX_MCS; i++)
10913240Sprr		dpage->dp_defer_mcids[i] = -1;
11013240Sprr
11113240Sprr	dp_page_defer_add_data(hdl, dpage, afar);
11213240Sprr
11313240Sprr	cmd_list_append(&cmd.cmd_deferred_pages, dpage);
11413240Sprr
11513240Sprr	return (dpage);
11613240Sprr}
11713240Sprr
11813240Sprrstatic cmd_dp_defer_t *
11913240Sprrdp_page_defer_lookup(cmd_page_t *page)
12013240Sprr{
12113240Sprr	cmd_dp_defer_t *dpage;
12213240Sprr
12313240Sprr	for (dpage = cmd_list_next(&cmd.cmd_deferred_pages); dpage != NULL;
12413240Sprr	    dpage = cmd_list_next(dpage)) {
12513240Sprr		if (dpage->dp_defer_page == page)
12613240Sprr			return (dpage);
12713240Sprr	}
12813240Sprr
12913240Sprr	return (NULL);
13013240Sprr}
13113240Sprr
13213240Sprrvoid
13313240Sprrcmd_dp_page_defer(fmd_hdl_t *hdl, nvlist_t *modasru, fmd_event_t *ep,
13413240Sprr    uint64_t afar)
13513240Sprr{
13613240Sprr	cmd_dp_defer_t *dpage;
13713240Sprr	cmd_page_t *page = cmd_page_lookup(afar);
13813240Sprr	const char *uuid;
13913240Sprr
14013240Sprr	if (page == NULL) {
14113240Sprr		page = cmd_page_create(hdl, modasru, afar);
14213240Sprr		dpage = dp_page_defer_create(hdl, page, afar);
14313240Sprr		page->page_case = cmd_case_create(hdl, &page->page_header,
14413240Sprr		    CMD_PTR_DP_PAGE_DEFER, &uuid);
14513240Sprr		fmd_case_setprincipal(hdl, page->page_case, ep);
14613240Sprr	} else {
14713240Sprr		dpage = dp_page_defer_lookup(page);
14813240Sprr		if (dpage == NULL)
14913240Sprr			fmd_hdl_abort(hdl, "deferred page with no defer data");
15013240Sprr		fmd_case_add_ereport(hdl, page->page_case, ep);
15113240Sprr	}
15213240Sprr
15313240Sprr	dp_page_defer_add_data(hdl, dpage, afar);
15413240Sprr}
15513240Sprr
15613240Sprrint
15713240Sprrcmd_dp_page_check(fmd_hdl_t *hdl, cmd_dp_defer_t *dpage)
15813240Sprr{
15913240Sprr	int i;
16013240Sprr
16113240Sprr	for (i = 0; i < DP_MAX_MCS; i++) {
16213240Sprr		if (dpage->dp_defer_mcids[i] == -1)
16313240Sprr			break;
16413240Sprr		/*
16513240Sprr		 * If there's no datapath fault corresponding to
16613240Sprr		 * an mcid, that means the page incurred an error
16713240Sprr		 * not attributable to a datapath fault.
16813240Sprr		 */
16913240Sprr		if (cmd_dp_lookup_fault(hdl, dpage->dp_defer_mcids[i]) == 0)
17013240Sprr			return (0);
17113240Sprr	}
17213240Sprr
17313240Sprr	return (1);
17413240Sprr}
17513240Sprr
17613240Sprrvoid
17713240Sprrcmd_dp_page_replay(fmd_hdl_t *hdl)
17813240Sprr{
17913240Sprr	fmd_event_t *ep;
18013240Sprr	cmd_page_t *page;
18113240Sprr	cmd_bank_t *bank;
18213240Sprr	cmd_dp_defer_t *dpage;
18313240Sprr	nvlist_t *nvl;
18413240Sprr
18513240Sprr	while ((dpage = cmd_list_next(&cmd.cmd_deferred_pages)) != NULL) {
18613240Sprr		fmd_hdl_debug(hdl, "replaying deferred page, "
18713240Sprr		    "pa=%llx\n", dpage->dp_defer_page->page_physbase);
18813240Sprr
18913240Sprr		page = dpage->dp_defer_page;
19013240Sprr
19113240Sprr		if (cmd_dp_page_check(hdl, dpage)) {
19213240Sprr			fmd_hdl_debug(hdl, "deferred memory UE  overtaken by "
19313240Sprr			    "dp fault");
19413240Sprr			CMD_STAT_BUMP(dp_ignored_ue);
19513240Sprr			fmd_case_close(hdl, page->page_case);
19613240Sprr			cmd_list_delete(&cmd.cmd_deferred_pages, dpage);
19713240Sprr			fmd_hdl_free(hdl, dpage, sizeof (cmd_dp_defer_t));
19813240Sprr			cmd_page_destroy(hdl, page);
19913240Sprr			continue;
20013240Sprr		}
20113240Sprr
20213240Sprr		nvl = page->page_asru_nvl;
20313240Sprr
20413240Sprr		bank = cmd_bank_lookup(hdl, nvl);
20513240Sprr
20613240Sprr		ep = fmd_case_getprincipal(hdl, page->page_case);
20713240Sprr		fmd_case_add_ereport(hdl, bank->bank_case.cc_cp, ep);
20813240Sprr
20913240Sprr		bank->bank_nretired++;
21013240Sprr		bank->bank_retstat.fmds_value.ui64++;
21113240Sprr		cmd_bank_dirty(hdl, bank);
21213240Sprr
21313240Sprr		fmd_case_reset(hdl, page->page_case);
21413240Sprr		cmd_case_fini(hdl, page->page_case, FMD_B_TRUE);
21513240Sprr
21613240Sprr		page->page_case = NULL;
21713240Sprr		cmd_page_fault(hdl, nvl, nvl, ep, page->page_physbase);
21813240Sprr		cmd_bank_fault(hdl, bank);
21913240Sprr
22013240Sprr		cmd_list_delete(&cmd.cmd_deferred_pages, dpage);
22115400Sprr		fmd_hdl_free(hdl, dpage, sizeof (cmd_dp_defer_t));
22213240Sprr	}
22313240Sprr
22413240Sprr	fmd_hdl_debug(hdl, "cmd_page_defer_replay() complete\n");
22513240Sprr}
22613240Sprr
22713240Sprrvoid
22813240Sprrcmd_dp_page_restore(fmd_hdl_t *hdl, cmd_page_t *page)
22913240Sprr{
23013240Sprr	cmd_dp_defer_t *dpage;
23113240Sprr
23213240Sprr	dpage = fmd_hdl_zalloc(hdl, sizeof (cmd_dp_defer_t), FMD_SLEEP);
23313240Sprr
23413240Sprr	dpage->dp_defer_page = page;
235
236	dp_page_defer_data_restore(hdl, dpage);
237
238	cmd_list_append(&cmd.cmd_deferred_pages, dpage);
239}
240
241void
242cmd_dp_page_validate(fmd_hdl_t *hdl)
243{
244	cmd_dp_defer_t *dpage, *next;
245	cmd_page_t *page;
246
247	for (dpage = cmd_list_next(&cmd.cmd_deferred_pages); dpage != NULL;
248	    dpage = next) {
249		next = cmd_list_next(dpage);
250
251		page = dpage->dp_defer_page;
252
253		if (!fmd_nvl_fmri_present(hdl, page->page_asru_nvl)) {
254			cmd_page_destroy(hdl, page);
255			cmd_list_delete(&cmd.cmd_deferred_pages, dpage);
256			fmd_hdl_free(hdl, dpage, sizeof (cmd_dp_defer_t));
257		}
258	}
259}
260
261/*ARGSUSED*/
262int
263cmd_dp_page_isdeferred(fmd_hdl_t *hdl, cmd_page_t *page)
264{
265	cmd_dp_defer_t *dpage, *next;
266
267	for (dpage = cmd_list_next(&cmd.cmd_deferred_pages); dpage != NULL;
268	    dpage = next) {
269		next = cmd_list_next(dpage);
270
271		if (dpage->dp_defer_page == page) {
272			return (1);
273		}
274	}
275
276	return (0);
277}
278