1139790Simp/*- 2206570Smarcel * Copyright (c) 2002-2010 Marcel Moolenaar 395517Smarcel * All rights reserved. 495517Smarcel * 595517Smarcel * Redistribution and use in source and binary forms, with or without 695517Smarcel * modification, are permitted provided that the following conditions 795517Smarcel * are met: 895517Smarcel * 995517Smarcel * 1. Redistributions of source code must retain the above copyright 1095517Smarcel * notice, this list of conditions and the following disclaimer. 1195517Smarcel * 2. Redistributions in binary form must reproduce the above copyright 1295517Smarcel * notice, this list of conditions and the following disclaimer in the 1395517Smarcel * documentation and/or other materials provided with the distribution. 1495517Smarcel * 1595517Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1695517Smarcel * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1795517Smarcel * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1895517Smarcel * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1995517Smarcel * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2095517Smarcel * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2195517Smarcel * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2295517Smarcel * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2395517Smarcel * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2495517Smarcel * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2595517Smarcel * 2695517Smarcel * $FreeBSD: releng/10.3/sys/ia64/ia64/mca.c 253559 2013-07-23 02:38:23Z marcel $ 2795517Smarcel */ 2895517Smarcel 2995517Smarcel#include <sys/param.h> 3095517Smarcel#include <sys/systm.h> 31209671Smarcel#include <sys/bus.h> 3295517Smarcel#include <sys/kernel.h> 3395892Smarcel#include <sys/lock.h> 3495517Smarcel#include <sys/malloc.h> 3595892Smarcel#include <sys/mutex.h> 3695517Smarcel#include <sys/sysctl.h> 3797443Smarcel#include <sys/uuid.h> 3895517Smarcel#include <vm/vm.h> 3995517Smarcel#include <vm/vm_kern.h> 40209671Smarcel#include <machine/intr.h> 4195517Smarcel#include <machine/mca.h> 42206570Smarcel#include <machine/pal.h> 4395517Smarcel#include <machine/sal.h> 4495517Smarcel#include <machine/smp.h> 4595517Smarcel 46227293Sedstatic MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); 4795517Smarcel 48188119Sjhbstruct mca_info { 49188119Sjhb STAILQ_ENTRY(mca_info) mi_link; 50206570Smarcel u_long mi_seqnr; 51206570Smarcel u_int mi_cpuid; 52188119Sjhb size_t mi_recsz; 53188119Sjhb char mi_record[0]; 54188119Sjhb}; 55188119Sjhb 56206570SmarcelSTAILQ_HEAD(mca_info_list, mca_info); 57188119Sjhb 58206570Smarcelstatic int64_t mca_info_size[SAL_INFO_TYPES]; 59206570Smarcelstatic vm_offset_t mca_info_block; 60206570Smarcelstatic struct mtx mca_info_block_lock; 6195517Smarcel 62227309Sedstatic SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RW, NULL, "MCA container"); 6395517Smarcel 6495517Smarcelstatic int mca_count; /* Number of records stored. */ 6595517Smarcelstatic int mca_first; /* First (lowest) record ID. */ 6695517Smarcelstatic int mca_last; /* Last (highest) record ID. */ 6795517Smarcel 6895517SmarcelSYSCTL_INT(_hw_mca, OID_AUTO, count, CTLFLAG_RD, &mca_count, 0, 6995517Smarcel "Record count"); 7095517SmarcelSYSCTL_INT(_hw_mca, OID_AUTO, first, CTLFLAG_RD, &mca_first, 0, 7195517Smarcel "First record id"); 7295517SmarcelSYSCTL_INT(_hw_mca, OID_AUTO, last, CTLFLAG_RD, &mca_last, 0, 7395517Smarcel "Last record id"); 7495517Smarcel 75206570Smarcelstatic struct mtx mca_sysctl_lock; 76206570Smarcel 77209671Smarcelstatic u_int mca_xiv_cmc; 78209671Smarcel 7995517Smarcelstatic int 80206570Smarcelmca_sysctl_inject(SYSCTL_HANDLER_ARGS) 81206570Smarcel{ 82206570Smarcel struct ia64_pal_result res; 83206570Smarcel u_int val; 84206570Smarcel int error; 85206570Smarcel 86206570Smarcel val = 0; 87206570Smarcel error = sysctl_wire_old_buffer(req, sizeof(u_int)); 88206570Smarcel if (!error) 89206570Smarcel error = sysctl_handle_int(oidp, &val, 0, req); 90206570Smarcel 91206570Smarcel if (error != 0 || req->newptr == NULL) 92206570Smarcel return (error); 93206570Smarcel 94209749Smarcel /* 95209749Smarcel * Example values for injecting PAL determined machine checks: 96209749Smarcel * corrected 9 97209749Smarcel * recoverable 73 98209749Smarcel * fatal 137 99209749Smarcel */ 100206570Smarcel res = ia64_call_pal_stacked(PAL_MC_ERROR_INJECT, val, 0, 0); 101206570Smarcel printf("%s: %#lx, %#lx, %#lx, %#lx\n", __func__, res.pal_status, 102206570Smarcel res.pal_result[0], res.pal_result[1], res.pal_result[2]); 103206570Smarcel return (0); 104206570Smarcel} 105206570SmarcelSYSCTL_PROC(_hw_mca, OID_AUTO, inject, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, 106206570Smarcel mca_sysctl_inject, "I", "set to trigger a MCA"); 107206570Smarcel 108206570Smarcelstatic int 10995517Smarcelmca_sysctl_handler(SYSCTL_HANDLER_ARGS) 11095517Smarcel{ 11195517Smarcel int error = 0; 11295517Smarcel 11395517Smarcel if (!arg1) 11495517Smarcel return (EINVAL); 11595517Smarcel error = SYSCTL_OUT(req, arg1, arg2); 11695517Smarcel 11795517Smarcel if (error || !req->newptr) 11895517Smarcel return (error); 11995517Smarcel 12095517Smarcel error = SYSCTL_IN(req, arg1, arg2); 12195517Smarcel return (error); 12295517Smarcel} 12395517Smarcel 124206570Smarcelstatic void 125206570Smarcelia64_mca_collect_state(int type, struct mca_info_list *reclst) 126188119Sjhb{ 12795517Smarcel struct ia64_sal_result result; 12895517Smarcel struct mca_record_header *hdr; 129188119Sjhb struct mca_info *rec; 13095892Smarcel uint64_t seqnr; 131188119Sjhb size_t recsz; 13295517Smarcel 13395517Smarcel /* 13495517Smarcel * Don't try to get the state if we couldn't get the size of 13595517Smarcel * the state information previously. 13695517Smarcel */ 13795517Smarcel if (mca_info_size[type] == -1) 13895517Smarcel return; 13995517Smarcel 140169757Smarcel if (mca_info_block == 0) 141169757Smarcel return; 142169757Smarcel 14395517Smarcel while (1) { 144206570Smarcel mtx_lock_spin(&mca_info_block_lock); 14595517Smarcel result = ia64_sal_entry(SAL_GET_STATE_INFO, type, 0, 14695517Smarcel mca_info_block, 0, 0, 0, 0); 14795892Smarcel if (result.sal_status < 0) { 14895892Smarcel mtx_unlock_spin(&mca_info_block_lock); 149206570Smarcel break; 15095892Smarcel } 15195517Smarcel 15295517Smarcel hdr = (struct mca_record_header *)mca_info_block; 15395517Smarcel recsz = hdr->rh_length; 15495892Smarcel seqnr = hdr->rh_seqnr; 15595517Smarcel 15695892Smarcel mtx_unlock_spin(&mca_info_block_lock); 15795892Smarcel 158188119Sjhb rec = malloc(sizeof(struct mca_info) + recsz, M_MCA, 159188119Sjhb M_NOWAIT | M_ZERO); 160188119Sjhb if (rec == NULL) 161188119Sjhb /* XXX: Not sure what to do. */ 162206570Smarcel break; 16395517Smarcel 164206570Smarcel rec->mi_seqnr = seqnr; 165206570Smarcel rec->mi_cpuid = PCPU_GET(cpuid); 166188119Sjhb 16795892Smarcel mtx_lock_spin(&mca_info_block_lock); 16895892Smarcel 16995892Smarcel /* 17095892Smarcel * If the info block doesn't have our record anymore because 17195892Smarcel * we temporarily unlocked it, get it again from SAL. I assume 17295892Smarcel * that it's possible that we could get a different record. 17395892Smarcel * I expect this to happen in a SMP configuration where the 17495892Smarcel * record has been cleared by a different processor. So, if 17595892Smarcel * we get a different record we simply abort with this record 17695892Smarcel * and start over. 17795892Smarcel */ 17895892Smarcel if (seqnr != hdr->rh_seqnr) { 17995892Smarcel result = ia64_sal_entry(SAL_GET_STATE_INFO, type, 0, 18095892Smarcel mca_info_block, 0, 0, 0, 0); 18195892Smarcel if (seqnr != hdr->rh_seqnr) { 18295892Smarcel mtx_unlock_spin(&mca_info_block_lock); 183188119Sjhb free(rec, M_MCA); 18495892Smarcel continue; 18595892Smarcel } 18695892Smarcel } 18795892Smarcel 188188119Sjhb rec->mi_recsz = recsz; 189188119Sjhb bcopy((char*)mca_info_block, rec->mi_record, recsz); 19095517Smarcel 19195892Smarcel /* 19295892Smarcel * Clear the state so that we get any other records when 19395892Smarcel * they exist. 19495892Smarcel */ 19595517Smarcel result = ia64_sal_entry(SAL_CLEAR_STATE_INFO, type, 0, 0, 0, 19695517Smarcel 0, 0, 0); 197206570Smarcel 198206570Smarcel mtx_unlock_spin(&mca_info_block_lock); 199206570Smarcel 200206570Smarcel STAILQ_INSERT_TAIL(reclst, rec, mi_link); 20195517Smarcel } 20295517Smarcel} 20395517Smarcel 20495517Smarcelvoid 205206570Smarcelia64_mca_save_state(int type) 206206570Smarcel{ 207206570Smarcel char name[64]; 208206570Smarcel struct mca_info_list reclst = STAILQ_HEAD_INITIALIZER(reclst); 209206570Smarcel struct mca_info *rec; 210206570Smarcel struct sysctl_oid *oid; 211206570Smarcel 212206570Smarcel ia64_mca_collect_state(type, &reclst); 213206570Smarcel 214206570Smarcel STAILQ_FOREACH(rec, &reclst, mi_link) { 215206570Smarcel sprintf(name, "%lu", rec->mi_seqnr); 216206570Smarcel oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), 217206570Smarcel OID_AUTO, name, CTLFLAG_RW, NULL, name); 218206570Smarcel if (oid == NULL) 219206570Smarcel continue; 220206570Smarcel 221206570Smarcel mtx_lock(&mca_sysctl_lock); 222206570Smarcel if (mca_count > 0) { 223206570Smarcel if (rec->mi_seqnr < mca_first) 224206570Smarcel mca_first = rec->mi_seqnr; 225206570Smarcel else if (rec->mi_seqnr > mca_last) 226206570Smarcel mca_last = rec->mi_seqnr; 227206570Smarcel } else 228206570Smarcel mca_first = mca_last = rec->mi_seqnr; 229206570Smarcel mca_count++; 230206570Smarcel mtx_unlock(&mca_sysctl_lock); 231206570Smarcel 232206570Smarcel sprintf(name, "%u", rec->mi_cpuid); 233206570Smarcel SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), rec->mi_cpuid, 234206570Smarcel name, CTLTYPE_OPAQUE | CTLFLAG_RD, rec->mi_record, 235206570Smarcel rec->mi_recsz, mca_sysctl_handler, "S,MCA", "MCA record"); 236206570Smarcel } 237206570Smarcel} 238206570Smarcel 239209671Smarcelstatic u_int 240209671Smarcelia64_mca_intr(struct thread *td, u_int xiv, struct trapframe *tf) 241209671Smarcel{ 242209671Smarcel 243209671Smarcel if (xiv == mca_xiv_cmc) { 244209671Smarcel printf("MCA: corrected machine check (CMC) interrupt\n"); 245209671Smarcel return (0); 246209671Smarcel } 247209671Smarcel 248209671Smarcel return (0); 249209671Smarcel} 250209671Smarcel 251206570Smarcelvoid 252209671Smarcelia64_mca_init_ap(void) 253209671Smarcel{ 254209671Smarcel 255209671Smarcel if (mca_xiv_cmc != 0) 256209671Smarcel ia64_set_cmcv(mca_xiv_cmc); 257209671Smarcel} 258209671Smarcel 259209671Smarcelvoid 26095517Smarcelia64_mca_init(void) 26195517Smarcel{ 26295517Smarcel struct ia64_sal_result result; 26395517Smarcel uint64_t max_size; 26495517Smarcel char *p; 26595517Smarcel int i; 26695517Smarcel 26795517Smarcel /* 26895517Smarcel * Get the sizes of the state information we can get from SAL and 26995517Smarcel * allocate a common block (forgive me my Fortran :-) for use by 27095517Smarcel * support functions. We create a region 7 address to make it 27195517Smarcel * easy on the OS_MCA or OS_INIT handlers to get the state info 27295517Smarcel * under unreliable conditions. 27395517Smarcel */ 27495517Smarcel max_size = 0; 275107205Smarcel for (i = 0; i < SAL_INFO_TYPES; i++) { 27695517Smarcel result = ia64_sal_entry(SAL_GET_STATE_INFO_SIZE, i, 0, 0, 0, 27795517Smarcel 0, 0, 0); 27895517Smarcel if (result.sal_status == 0) { 27995517Smarcel mca_info_size[i] = result.sal_result[0]; 28095517Smarcel if (mca_info_size[i] > max_size) 28195517Smarcel max_size = mca_info_size[i]; 28295517Smarcel } else 28395517Smarcel mca_info_size[i] = -1; 28495517Smarcel } 28595517Smarcel max_size = round_page(max_size); 28695517Smarcel 287253559Smarcel p = (max_size) ? contigmalloc(max_size, M_TEMP, M_NOWAIT, 0ul, ~0ul, 288253559Smarcel PAGE_SIZE, 256*1024*1024) : NULL; 289169757Smarcel if (p != NULL) { 290110211Smarcel mca_info_block = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)p)); 29195517Smarcel 292110211Smarcel if (bootverbose) 293110211Smarcel printf("MCA: allocated %ld bytes for state info.\n", 294110211Smarcel max_size); 295110211Smarcel } 29695517Smarcel 29795517Smarcel /* 29895892Smarcel * Initialize the spin lock used to protect the info block. When APs 29995892Smarcel * get launched, there's a short moment of contention, but in all other 30095892Smarcel * cases it's not a hot spot. I think it's possible to have the MCA 30195892Smarcel * handler be called on multiple processors at the same time, but that 30295892Smarcel * should be rare. On top of that, performance is not an issue when 30395892Smarcel * dealing with machine checks... 30495892Smarcel */ 305206570Smarcel mtx_init(&mca_info_block_lock, "MCA info lock", NULL, MTX_SPIN); 30695892Smarcel 30795892Smarcel /* 308206570Smarcel * Serialize sysctl operations with a sleep lock. Note that this 309206570Smarcel * implies that we update the sysctl tree in a context that allows 310206570Smarcel * sleeping. 311206570Smarcel */ 312206570Smarcel mtx_init(&mca_sysctl_lock, "MCA sysctl lock", NULL, MTX_DEF); 313206570Smarcel 314206570Smarcel /* 31595517Smarcel * Get and save any processor and platfom error records. Note that in 31695517Smarcel * a SMP configuration the processor records are for the BSP only. We 31795517Smarcel * let the APs get and save their own records when we wake them up. 31895517Smarcel */ 31995517Smarcel for (i = 0; i < SAL_INFO_TYPES; i++) 32095517Smarcel ia64_mca_save_state(i); 321209671Smarcel 322209671Smarcel /* 323209671Smarcel * Allocate a XIV for CMC interrupts, so that we can collect and save 324209671Smarcel * the corrected processor checks. 325209671Smarcel */ 326209671Smarcel mca_xiv_cmc = ia64_xiv_alloc(PI_SOFT, IA64_XIV_PLAT, ia64_mca_intr); 327209671Smarcel if (mca_xiv_cmc != 0) 328209671Smarcel ia64_set_cmcv(mca_xiv_cmc); 329209671Smarcel else 330209671Smarcel printf("MCA: CMC vector could not be allocated\n"); 33195517Smarcel} 332