/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * Ereport-handling routines for Datapath errors * - receive datapath ereports and open datapath case * - solve datapath case when datapath fault ereports are received * - maintain state of datapath error flag * - close datapath case when timeout occurs (w/o fault) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Member Name Data Type Comments * ----------- --------- ----------- * version uint8 0 * class string "asic" * ENA uint64 ENA Format 1 * detector fmri aggregated ID data for SC-DE * * Datapath ereport subclasses and data payloads: * There will be two types of ereports (error and fault) which will be * identified by the "type" member. * * ereport.asic.*.cds.cds-dp * ereport.asic.*.dx.dx-dp * ereport.asic.*.sdi.sdi-dp * ereport.asic.*.cp.cp-dp * ereport.asic.*.rp.rp-dp // serengeti doesn't use "cp" term * * Member Name Data Type Comments * ----------- --------- ----------- * erptype uint16 derived from message type: error or * fault * t-value uint32 SC's datapath SERD timeout threshold * dp-list-sz uint8 number of dp-list array elements * dp-list array of uint16 Safari IDs of affected cpus */ static char *dperrtype[] = { DP_ERROR_CDS, /* Starcat types */ DP_ERROR_DX, DP_ERROR_EX, DP_ERROR_CP, DP_ERROR_CDS, /* Serengeti types */ DP_ERROR_DX, DP_ERROR_RP }; /* * Construct the ASRU(s)/FRU(s) associated with a data path fault, * construct the fault(s), and add the suspect(s) to the case * */ void cmd_dp_add_suspects(fmd_hdl_t *hdl, cmd_dp_t *dp) { const char *funcname = "cmd_dp_add_suspects()"; char class[DP_MAX_CLASS]; char frustr[3][DP_MAX_FRU]; int cpuid, numfru, sgpos, xcpos, i, err; nvlist_t *asru, *fru = NULL, *flt, *hcel; /* build ASRU, fault event class */ asru = cmd_dp_setasru(hdl, dp); (void) snprintf(class, DP_MAX_CLASS, "fault.asic.%s.%s", dperrtype[dp->dp_err], FM_ERROR_DATAPATH); cpuid = dp->dp_cpuid_list[0]; /* extract fru position */ sgpos = ((cpuid & 0x1f) / 4); xcpos = ((cpuid >> 5) & 0x1f); /* build FRU(s) for the particular error */ numfru = 0; switch (dp->dp_err) { case SC_DP_CDS_TYPE: case SC_DP_DX_TYPE: /* check for slot 1 (maxcat) */ if ((cpuid >> 3) & 0x1) (void) snprintf(frustr[0], DP_MAX_FRU, "IO%d", xcpos); else (void) snprintf(frustr[0], DP_MAX_FRU, "SB%d", xcpos); numfru = 1; break; case SC_DP_EX_TYPE: /* check for slot 1 (maxcat) */ if ((cpuid >> 3) & 0x1) (void) snprintf(frustr[0], DP_MAX_FRU, "IO%d", xcpos); else (void) snprintf(frustr[0], DP_MAX_FRU, "SB%d", xcpos); (void) snprintf(frustr[1], DP_MAX_FRU, "EX%d", xcpos); numfru = 2; break; case SC_DP_CP_TYPE: /* no way to know which CP half, be generic */ (void) snprintf(frustr[0], DP_MAX_FRU, "EX%d", xcpos); (void) snprintf(frustr[1], DP_MAX_FRU, "CP"); (void) snprintf(frustr[2], DP_MAX_FRU, "CS"); numfru = 3; break; case SG_DP_CDS_TYPE: case SG_DP_DX_TYPE: (void) snprintf(frustr[0], DP_MAX_FRU, "/N0/SB%d", sgpos); numfru = 1; break; case SG_DP_RP_TYPE: /* no way to know which RP, be generic */ (void) snprintf(frustr[0], DP_MAX_FRU, "/N0/SB%d", sgpos); (void) snprintf(frustr[1], DP_MAX_FRU, "RP"); numfru = 2; break; default: fmd_hdl_debug(hdl, "%s: invalid DP error type %d", funcname, dp->dp_err); nvlist_free(asru); return; } /* For each FRU, build an FMRI, create fault, add as suspect */ for (i = 0; i < numfru; i++) { /* build a FRU FMRI */ if (nvlist_alloc(&hcel, NV_UNIQUE_NAME, 0) != 0) { nvlist_free(asru); return; } err = nvlist_add_string(hcel, FM_FMRI_HC_NAME, FM_FMRI_LEGACY_HC); err |= nvlist_add_string(hcel, FM_FMRI_HC_ID, frustr[i]); if (err != 0) { nvlist_free(hcel); nvlist_free(asru); return; } /* put it in an HC scheme */ if (nvlist_alloc(&fru, NV_UNIQUE_NAME, 0) != 0) { nvlist_free(hcel); nvlist_free(asru); return; } err = nvlist_add_uint8(fru, FM_VERSION, FM_HC_SCHEME_VERSION); err |= nvlist_add_string(fru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); err |= nvlist_add_string(fru, FM_FMRI_HC_ROOT, ""); err |= nvlist_add_uint32(fru, FM_FMRI_HC_LIST_SZ, 1); err |= nvlist_add_nvlist_array(fru, FM_FMRI_HC_LIST, &hcel, 1); if (err != 0) { nvlist_free(fru); nvlist_free(hcel); nvlist_free(asru); return; } /* create the fault, add to case. */ flt = cmd_nvl_create_fault(hdl, class, 100/numfru, asru, fru, NULL); fmd_case_add_suspect(hdl, dp->dp_case, flt); /* free up memory */ nvlist_free(fru); nvlist_free(hcel); } /* free up ASRU */ nvlist_free(asru); } /*ARGSUSED*/ cmd_evdisp_t cmd_dp_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode, uint8_t dperr) { const char *funcname = "cmd_dp_common()"; const char *uuidp; cmd_dp_t *dpt, *ept; int err, i, fltflg; uint16_t *cpuid_list; uint64_t *serid_list; uint32_t ncpuids; /* extract common ereport contents */ dpt = fmd_hdl_zalloc(hdl, sizeof (cmd_dp_t), FMD_SLEEP); dpt->dp_nodetype = CMD_NT_DP; dpt->dp_version = CMD_DP_VERSION; dpt->dp_err = dperr; err = nvlist_lookup_pairs(nvl, 0, DP_EREPORT_TYPE, DATA_TYPE_UINT16, &dpt->dp_erpt_type, DP_TVALUE, DATA_TYPE_UINT32, &dpt->dp_t_value, DP_LIST_SIZE, DATA_TYPE_UINT32, &ncpuids, NULL); if (err != 0) { fmd_hdl_debug(hdl, "%s: unable to verify ereport contents " "(erptype, ena, t_value, dp_list_sz)", funcname); fmd_hdl_free(hdl, dpt, sizeof (cmd_dp_t)); return (CMD_EVD_UNUSED); } /* extract cpuid list from ereport */ err = nvlist_lookup_uint16_array(nvl, DP_LIST, &cpuid_list, &ncpuids); err |= nvlist_lookup_uint64_array(nvl, SN_LIST, &serid_list, &ncpuids); if (err != 0) { fmd_hdl_debug(hdl, "%s: unable to verify ereport contents " "(dp_list, sn_list)", funcname); fmd_hdl_free(hdl, dpt, sizeof (cmd_dp_t)); return (CMD_EVD_UNUSED); } for (i = 0; i < ncpuids; i++) { dpt->dp_cpuid_list[i] = cpuid_list[i]; dpt->dp_serid_list[i] = serid_list[i]; } dpt->dp_ncpus = ncpuids; switch (dpt->dp_erpt_type) { case DP_ERROR: /* * Scan existing faults on cmd.cmd_datapaths. If each * cpuid in the current datapath event already has an * associated DP fault, this is an uninteresting event. */ fltflg = 0; for (i = 0; i < ncpuids; i++) if (cmd_dp_lookup_fault(hdl, cpuid_list[i]) != NULL) fltflg++; if (fltflg == ncpuids) { fmd_hdl_debug(hdl, "%s: datapath fault(s) already " "experienced, event uninteresting\n", funcname); fmd_hdl_free(hdl, dpt, sizeof (cmd_dp_t)); return (CMD_EVD_UNUSED); } /* * Check for an existing datapath error, and if found * add this event to the existing case */ ept = cmd_dp_lookup_error(dpt); if (ept != NULL && !fmd_case_closed(hdl, ept->dp_case)) { fmd_hdl_debug(hdl, "%s: found existing datapath error, " "adding event to case\n", funcname); fmd_case_add_ereport(hdl, ept->dp_case, ep); /* check for t-value change */ if (dpt->dp_t_value != ept->dp_t_value) { fmd_event_t *ep; fmd_timer_remove(hdl, ept->dp_id); ep = fmd_case_getprincipal(hdl, ept->dp_case); ept->dp_id = fmd_timer_install(hdl, (void *)CMD_TIMERTYPE_DP, ep, (hrtime_t)NANOSEC * (dpt->dp_t_value + 120)); } fmd_hdl_free(hdl, dpt, sizeof (cmd_dp_t)); return (CMD_EVD_OK); } /* * Didn't find an existing datapath error. Create a new * case, add the event. Also, stash the datapath event on the * cmd.cmd_datapaths list */ fmd_hdl_debug(hdl, "%s: new datapath error, create case and " "add to cmd.cmd_datapaths\n", funcname); ++cmd.cmd_dp_flag; cmd_bufname(dpt->dp_bufname, sizeof (dpt->dp_bufname), "dp_err_%d_%s", dpt->dp_cpuid_list[0], dperrtype[dpt->dp_err]); dp_buf_write(hdl, dpt); dpt->dp_case = cmd_case_create(hdl, &dpt->dp_header, CMD_PTR_DP_CASE, &uuidp); fmd_case_setprincipal(hdl, dpt->dp_case, ep); dpt->dp_id = fmd_timer_install(hdl, (void *)CMD_TIMERTYPE_DP, ep, (hrtime_t)NANOSEC * (dpt->dp_t_value + 120)); cmd_list_append(&cmd.cmd_datapaths, dpt); break; case DP_FAULT: ++cmd.cmd_dp_flag; dpt->dp_erpt_type = DP_FAULT; dpt->dp_id = 0; cmd_bufname(dpt->dp_bufname, sizeof (dpt->dp_bufname), "dp_flt_%d_%s", dpt->dp_cpuid_list[0], dperrtype[dpt->dp_err]); dp_buf_write(hdl, dpt); /* * Check for an existing DP_ERROR on cmd.cmd_datapaths, and * if found, remove the DP_ERROR and close the case before * creating the DP_FAULT case. */ ept = cmd_dp_lookup_error(dpt); if (ept != NULL && !fmd_case_closed(hdl, ept->dp_case)) { fmd_hdl_debug(hdl, "%s: existing datapath error " "overtaken by datapath fault\n", funcname); fmd_timer_remove(hdl, ept->dp_id); cmd_dp_destroy(hdl, ept); } dpt->dp_case = cmd_case_create(hdl, &dpt->dp_header, CMD_PTR_DP_CASE, &uuidp); fmd_case_setprincipal(hdl, dpt->dp_case, ep); /* Add suspect(s) and solve the case. */ cmd_dp_add_suspects(hdl, dpt); fmd_case_solve(hdl, dpt->dp_case); /* add it to cmd.cmd_datapaths */ cmd_list_append(&cmd.cmd_datapaths, dpt); --cmd.cmd_dp_flag; if (cmd.cmd_dp_flag == 0) cmd_dp_page_replay(hdl); break; default: fmd_hdl_debug(hdl, "%s: unknown ereport type", funcname); fmd_hdl_free(hdl, dpt, sizeof (cmd_dp_t)); return (CMD_EVD_UNUSED); } return (CMD_EVD_OK); } cmd_evdisp_t cmd_dp_cds(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { if (fmd_nvl_class_match(hdl, nvl, "ereport.asic.starcat.*")) { return (cmd_dp_common(hdl, ep, nvl, class, clcode, SC_DP_CDS_TYPE)); } else return (cmd_dp_common(hdl, ep, nvl, class, clcode, SG_DP_CDS_TYPE)); } cmd_evdisp_t cmd_dp_dx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { if (fmd_nvl_class_match(hdl, nvl, "ereport.asic.starcat.*")) { return (cmd_dp_common(hdl, ep, nvl, class, clcode, SC_DP_DX_TYPE)); } else return (cmd_dp_common(hdl, ep, nvl, class, clcode, SG_DP_DX_TYPE)); } cmd_evdisp_t cmd_dp_ex(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { return (cmd_dp_common(hdl, ep, nvl, class, clcode, SC_DP_EX_TYPE)); } cmd_evdisp_t cmd_dp_cp(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, cmd_errcl_t clcode) { if (fmd_nvl_class_match(hdl, nvl, "ereport.asic.starcat.*")) { return (cmd_dp_common(hdl, ep, nvl, class, clcode, SC_DP_CP_TYPE)); } else return (cmd_dp_common(hdl, ep, nvl, class, clcode, SG_DP_RP_TYPE)); }