1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <strings.h> 29#include <string.h> 30#include <errno.h> 31#include <fm/fmd_api.h> 32#include <sys/fm/protocol.h> 33#include <sys/async.h> 34#include <sys/time.h> 35#include <cmd.h> 36#include <cmd_state.h> 37#include <cmd_mem.h> 38#include <cmd_dp.h> 39#include <cmd_dp_page.h> 40#include <libnvpair.h> 41#include <fcntl.h> 42#include <unistd.h> 43#include <sys/mem.h> 44#include <sys/plat_datapath.h> 45 46/*ARGSUSED*/ 47static nvlist_t * 48dp_cpu_fmri(fmd_hdl_t *hdl, uint32_t cpuid, uint64_t serial_id) 49{ 50 nvlist_t *nvl = NULL; 51 int err; 52 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 53 54 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) 55 return (NULL); 56 57 err = nvlist_add_string(nvl, FM_FMRI_SCHEME, FM_FMRI_SCHEME_CPU); 58 err |= nvlist_add_uint8(nvl, FM_VERSION, FM_CPU_SCHEME_VERSION); 59 err |= nvlist_add_uint32(nvl, FM_FMRI_CPU_ID, cpuid); 60 61 /* 62 * Version 1 calls for a string-based serial number 63 */ 64 (void) snprintf(sbuf, sizeof (sbuf), "%llX", (u_longlong_t)serial_id); 65 err |= nvlist_add_string(nvl, FM_FMRI_CPU_SERIAL_ID, sbuf); 66 if (err != 0) { 67 nvlist_free(nvl); 68 return (NULL); 69 } 70 return (nvl); 71} 72 73cmd_dp_t * 74cmd_dp_lookup_fault(fmd_hdl_t *hdl, uint32_t cpuid) 75{ 76 cmd_dp_t *ptr; 77 int i, found = 0; 78 79 /* 80 * Scan the cmd.cmd_datapaths list to see if there is 81 * a fault event present that impacts 'cpuid' 82 */ 83 for (ptr = cmd_list_next(&cmd.cmd_datapaths); ptr != NULL; 84 ptr = cmd_list_next(ptr)) { 85 if (ptr->dp_erpt_type == DP_FAULT) { 86 for (i = 0; i < ptr->dp_ncpus; i++) { 87 if (ptr->dp_cpuid_list[i] == cpuid) { 88 found = 1; 89 break; 90 } 91 } 92 } 93 if (found) 94 break; 95 } 96 97 /* 98 * Check if the FMRI for the found cpuid exists in the domain. 99 * If it does not, it implies a DR has been done and this DP_FAULT 100 * is no longer needed. 101 */ 102 if (ptr != NULL) { 103 nvlist_t *nvl; 104 105 nvl = dp_cpu_fmri(hdl, ptr->dp_cpuid_list[i], 106 ptr->dp_serid_list[i]); 107 108 if (nvl != NULL) { 109 if (!fmd_nvl_fmri_present(hdl, nvl)) { 110 cmd_dp_destroy(hdl, ptr); 111 ptr = NULL; 112 } 113 nvlist_free(nvl); 114 } 115 } 116 return (ptr); 117} 118 119cmd_dp_t * 120cmd_dp_lookup_error(cmd_dp_t *dp) 121{ 122 cmd_dp_t *ptr; 123 124 /* 125 * Scan the cmd.cmd_datapaths list to see if there is 126 * an existing error that matches 'dp'. A match is if 127 * both dp_err and the base cpuid are identical 128 */ 129 for (ptr = cmd_list_next(&cmd.cmd_datapaths); ptr != NULL; 130 ptr = cmd_list_next(ptr)) { 131 if (ptr->dp_erpt_type == DP_ERROR) { 132 if ((ptr->dp_err == dp->dp_err) && 133 (ptr->dp_cpuid_list[0] == dp->dp_cpuid_list[0])) 134 return (ptr); 135 } 136 } 137 return (NULL); 138} 139 140/* 141 * Allocates an nvlist_t, and sets ASRU information according to 142 * the cmd_dp_t provided. 143 */ 144/*ARGSUSED*/ 145nvlist_t * 146cmd_dp_setasru(fmd_hdl_t *hdl, cmd_dp_t *dpt) 147{ 148 nvlist_t *asru, *hcelem[DP_MAX_ASRUS]; 149 int i, j, sz, err; 150 char buf[DP_MAX_BUF]; 151 152 sz = dpt->dp_ncpus; 153 154 /* put ASRUs in an nvlist */ 155 for (i = 0; i < sz; i++) { 156 (void) snprintf(buf, DP_MAX_BUF, "%d", dpt->dp_cpuid_list[i]); 157 if (nvlist_alloc(&hcelem[i], NV_UNIQUE_NAME, 0) != 0) 158 return (NULL); 159 160 err = nvlist_add_string(hcelem[i], FM_FMRI_HC_NAME, 161 FM_FMRI_CPU_ID); 162 err |= nvlist_add_string(hcelem[i], FM_FMRI_HC_ID, buf); 163 if (err != 0) { 164 for (j = 0; j < i + 1; j++) 165 nvlist_free(hcelem[j]); 166 return (NULL); 167 } 168 } 169 170 /* put it in an HC scheme */ 171 if (nvlist_alloc(&asru, NV_UNIQUE_NAME, 0) != 0) { 172 for (j = 0; j < sz; j++) 173 nvlist_free(hcelem[j]); 174 return (NULL); 175 } 176 err = nvlist_add_uint8(asru, FM_VERSION, FM_HC_SCHEME_VERSION); 177 err |= nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 178 err |= nvlist_add_string(asru, FM_FMRI_HC_ROOT, ""); 179 err |= nvlist_add_uint32(asru, FM_FMRI_HC_LIST_SZ, sz); 180 err |= nvlist_add_nvlist_array(asru, FM_FMRI_HC_LIST, &hcelem[0], 181 dpt->dp_ncpus); 182 if (err != 0) { 183 for (j = 0; j < sz; j++) 184 nvlist_free(hcelem[j]); 185 nvlist_free(asru); 186 return (NULL); 187 } 188 189 /* free up memory */ 190 for (j = 0; j < sz; j++) 191 nvlist_free(hcelem[j]); 192 193 /* return the ASRU */ 194 return (asru); 195} 196 197void 198dp_buf_write(fmd_hdl_t *hdl, cmd_dp_t *dp) 199{ 200 size_t sz; 201 202 if ((sz = fmd_buf_size(hdl, NULL, dp->dp_bufname)) != 0 && 203 sz != sizeof (cmd_dp_pers_t)) 204 fmd_buf_destroy(hdl, NULL, dp->dp_bufname); 205 206 fmd_buf_write(hdl, NULL, dp->dp_bufname, &dp->dp_pers, 207 sizeof (cmd_dp_pers_t)); 208} 209 210static cmd_dp_t * 211dp_wrapv0(fmd_hdl_t *hdl, cmd_dp_pers_t *pers, size_t psz) 212{ 213 cmd_dp_t *dp; 214 215 if (psz != sizeof (cmd_dp_pers_t)) { 216 fmd_hdl_abort(hdl, "size of state doesn't match size of " 217 "version 1 state (%u bytes).\n", sizeof (cmd_dp_pers_t)); 218 } 219 220 dp = fmd_hdl_zalloc(hdl, sizeof (cmd_dp_t), FMD_SLEEP); 221 bcopy(pers, dp, sizeof (cmd_dp_pers_t)); 222 fmd_hdl_free(hdl, pers, psz); 223 return (dp); 224} 225 226void * 227cmd_dp_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr) 228{ 229 cmd_dp_t *dp; 230 231 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL; 232 dp = cmd_list_next(dp)) { 233 if (dp->dp_case == cp) 234 break; 235 } 236 237 if (dp == NULL) { 238 size_t dpsz; 239 240 fmd_hdl_debug(hdl, "restoring dp from %s\n", ptr->ptr_name); 241 242 if ((dpsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) { 243 if (fmd_case_solved(hdl, cp) || 244 fmd_case_closed(hdl, cp)) { 245 fmd_hdl_debug(hdl, "dp %s from case %s not " 246 "found. Case is already solved or closed\n", 247 ptr->ptr_name, fmd_case_uuid(hdl, cp)); 248 return (NULL); 249 } else { 250 fmd_hdl_abort(hdl, "dp referenced by case %s " 251 "does not exist in saved state\n", 252 fmd_case_uuid(hdl, cp)); 253 } 254 } else if (dpsz > CMD_DP_MAXSIZE || 255 dpsz < CMD_DP_MINSIZE) { 256 fmd_hdl_abort(hdl, "dp buffer referenced by " 257 "case %s is out of bounds (is %u bytes, " 258 "max %u, min %u)\n", fmd_case_uuid(hdl, cp), 259 dpsz, CMD_DP_MAXSIZE, CMD_DP_MINSIZE); 260 } 261 262 if ((dp = cmd_buf_read(hdl, NULL, ptr->ptr_name, dpsz)) == NULL) 263 fmd_hdl_abort(hdl, "failed to read dp buf %s", 264 ptr->ptr_name); 265 266 switch (dp->dp_version) { 267 case CMD_DP_VERSION_0: 268 dp = dp_wrapv0(hdl, (cmd_dp_pers_t *)dp, dpsz); 269 break; 270 default: 271 fmd_hdl_abort(hdl, "unknown version (found %d) " 272 "for dp state referenced by case %s.\n", 273 dp->dp_version, fmd_case_uuid(hdl, cp)); 274 break; 275 } 276 277 dp->dp_case = cp; 278 279 if (dp->dp_erpt_type == DP_ERROR) { 280 fmd_event_t *ep = fmd_case_getprincipal(hdl, cp); 281 282 ++cmd.cmd_dp_flag; 283 284 dp->dp_id = fmd_timer_install(hdl, 285 (void *)CMD_TIMERTYPE_DP, ep, 286 (hrtime_t)NANOSEC * (dp->dp_t_value + 120)); 287 } 288 289 cmd_list_append(&cmd.cmd_datapaths, dp); 290 } 291 292 return (dp); 293} 294 295void 296cmd_dp_close(fmd_hdl_t *hdl, void *arg) 297{ 298 cmd_dp_destroy(hdl, arg); 299} 300 301void 302cmd_dp_timeout(fmd_hdl_t *hdl, id_t id) 303{ 304 cmd_dp_t *dp; 305 306 /* close case associated with the timer */ 307 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL; 308 dp = cmd_list_next(dp)) { 309 if (dp->dp_id == id) { 310 cmd_dp_destroy(hdl, dp); 311 break; 312 } 313 } 314 315 fmd_hdl_debug(hdl, "cmd_dp_timeout() complete\n"); 316} 317 318/* 319 * Validate by matching each cmd_dp_t cpu and serial id to what is 320 * installed and active on this machine or domain. Delete the cmd_dp_t 321 * if no match is made. 322 */ 323void 324cmd_dp_validate(fmd_hdl_t *hdl) 325{ 326 cmd_dp_t *dp, *next; 327 nvlist_t *nvl; 328 int i, no_match; 329 330 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL; dp = next) { 331 next = cmd_list_next(dp); 332 333 for (i = 0, no_match = 0; i < dp->dp_ncpus; i++) { 334 nvl = dp_cpu_fmri(hdl, dp->dp_cpuid_list[i], 335 dp->dp_serid_list[i]); 336 337 if (nvl == NULL) 338 fmd_hdl_abort(hdl, "could not make CPU fmri"); 339 340 if (!fmd_nvl_fmri_present(hdl, nvl)) 341 no_match = 1; 342 343 nvlist_free(nvl); 344 345 if (no_match) { 346 cmd_dp_destroy(hdl, dp); 347 break; 348 } 349 } 350 } 351} 352 353static void 354cmd_dp_free(fmd_hdl_t *hdl, cmd_dp_t *dp, int destroy) 355{ 356 if (dp->dp_case != NULL) 357 cmd_case_fini(hdl, dp->dp_case, destroy); 358 359 if (destroy && dp->dp_erpt_type == DP_ERROR) { 360 --cmd.cmd_dp_flag; 361 /* 362 * If there are no active datapath events, replay any 363 * pages that were deferred. 364 */ 365 if (cmd.cmd_dp_flag == 0) 366 cmd_dp_page_replay(hdl); 367 } 368 369 if (destroy) 370 fmd_buf_destroy(hdl, NULL, dp->dp_bufname); 371 372 cmd_list_delete(&cmd.cmd_datapaths, dp); 373 fmd_hdl_free(hdl, dp, sizeof (cmd_dp_t)); 374} 375 376void 377cmd_dp_destroy(fmd_hdl_t *hdl, cmd_dp_t *dp) 378{ 379 cmd_dp_free(hdl, dp, FMD_B_TRUE); 380} 381 382/*ARGSUSED*/ 383int 384cmd_dp_error(fmd_hdl_t *hdl) 385{ 386 if (cmd.cmd_dp_flag) 387 return (1); 388 else 389 return (0); 390} 391 392int 393cmd_dp_get_mcid(uint64_t addr, int *mcid) 394{ 395 int fd, rc; 396 mem_info_t data; 397 398 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 399 return (-1); 400 401 data.m_addr = addr; 402 data.m_synd = 0; 403 if ((rc = ioctl(fd, MEM_INFO, &data)) < 0) { 404 (void) close(fd); 405 return (rc); 406 } 407 408 (void) close(fd); 409 *mcid = data.m_mcid; 410 411 return (0); 412} 413 414/*ARGSUSED*/ 415int 416cmd_dp_fault(fmd_hdl_t *hdl, uint64_t addr) 417{ 418 int mcid; 419 420 if (cmd_dp_get_mcid(addr, &mcid) < 0) 421 fmd_hdl_abort(hdl, "cmd_dp_get_mcid failed"); 422 423 if (cmd_dp_lookup_fault(hdl, mcid) != NULL) 424 return (1); 425 else 426 return (0); 427} 428 429void 430cmd_dp_fini(fmd_hdl_t *hdl) 431{ 432 cmd_dp_t *dp; 433 cmd_dp_defer_t *dpage; 434 435 while ((dp = cmd_list_next(&cmd.cmd_datapaths)) != NULL) 436 cmd_dp_free(hdl, dp, FMD_B_FALSE); 437 438 while ((dpage = cmd_list_next(&cmd.cmd_deferred_pages)) != NULL) { 439 cmd_list_delete(&cmd.cmd_deferred_pages, dpage); 440 fmd_hdl_free(hdl, dpage, sizeof (cmd_dp_defer_t)); 441 } 442} 443