1/* $NetBSD: ecc_plb.c,v 1.13 2011/06/17 19:03:01 matt Exp $ */ 2 3/* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <sys/cdefs.h> 39__KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.13 2011/06/17 19:03:01 matt Exp $"); 40 41#include "locators.h" 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/device.h> 46#include <sys/cpu.h> 47 48#include <prop/proplib.h> 49 50#include <powerpc/ibm4xx/cpu.h> 51#include <powerpc/ibm4xx/dcr4xx.h> 52#include <powerpc/ibm4xx/dev/plbvar.h> 53 54 55struct ecc_plb_softc { 56 device_t sc_dev; 57 uint64_t sc_ecc_tb; 58 uint64_t sc_ecc_iv; /* Interval */ 59 uint32_t sc_ecc_cnt; 60 u_int sc_memsize; 61 int sc_irq; 62}; 63 64static int ecc_plbmatch(device_t, cfdata_t, void *); 65static void ecc_plbattach(device_t, device_t, void *); 66static void ecc_plb_deferred(device_t); 67static int ecc_plb_intr(void *); 68 69CFATTACH_DECL_NEW(ecc_plb, sizeof(struct ecc_plb_softc), 70 ecc_plbmatch, ecc_plbattach, NULL, NULL); 71 72static int ecc_plb_found; 73 74static int 75ecc_plbmatch(device_t parent, cfdata_t cf, void *aux) 76{ 77 struct plb_attach_args *paa = aux; 78 79 if (strcmp(paa->plb_name, cf->cf_name) != 0) 80 return (0); 81 82 if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT) 83 panic("ecc_plbmatch: wildcard IRQ not allowed"); 84 85 paa->plb_irq = cf->cf_loc[PLBCF_IRQ]; 86 87 return (!ecc_plb_found); 88} 89 90static void 91ecc_plbattach(device_t parent, device_t self, void *aux) 92{ 93 struct ecc_plb_softc *sc = device_private(self); 94 struct plb_attach_args *paa = aux; 95 unsigned int processor_freq; 96 unsigned int memsiz; 97 prop_number_t pn; 98 99 ecc_plb_found++; 100 101 pn = prop_dictionary_get(board_properties, "processor-frequency"); 102 KASSERT(pn != NULL); 103 processor_freq = (unsigned int) prop_number_integer_value(pn); 104 105 pn = prop_dictionary_get(board_properties, "mem-size"); 106 KASSERT(pn != NULL); 107 memsiz = (unsigned int) prop_number_integer_value(pn); 108 109 aprint_normal(": ECC controller\n"); 110 111 sc->sc_dev = self; 112 sc->sc_ecc_tb = 0; 113 sc->sc_ecc_cnt = 0; 114 sc->sc_ecc_iv = processor_freq; /* Set interval */ 115 sc->sc_memsize = memsiz; 116 sc->sc_irq = paa->plb_irq; 117 118 /* 119 * Defer hooking the interrupt until all PLB devices have attached 120 * since the interrupt controller may well be one of those devices... 121 */ 122 config_defer(self, ecc_plb_deferred); 123} 124 125static void 126ecc_plb_deferred(device_t self) 127{ 128 struct ecc_plb_softc *sc = device_private(self); 129 130 intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, sc); 131} 132 133/* 134 * ECC fault handler. 135 */ 136static int 137ecc_plb_intr(void *arg) 138{ 139 struct ecc_plb_softc *sc = arg; 140 u_int32_t esr, ear; 141 int ce, ue; 142 u_quad_t tb; 143 u_long tmp, msr, dat; 144 145 /* This code needs to be improved to handle double-bit errors */ 146 /* in some intelligent fashion. */ 147 148 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 149 esr = mfdcr(DCR_SDRAM0_CFGDATA); 150 151 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR); 152 ear = mfdcr(DCR_SDRAM0_CFGDATA); 153 154 /* Always clear the error to stop the intr ASAP. */ 155 156 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 157 mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff); 158 159 if (esr == 0x00) { 160 /* No current error. Could happen due to intr. nesting */ 161 return(1); 162 } 163 164 /* 165 * Only report errors every once per second max. Do this using the TB, 166 * because the system time (via microtime) may be adjusted when the 167 * date is set and can't reliably be used to measure intervals. 168 */ 169 170 __asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b" 171 : "=r"(tb), "=r"(tmp)); 172 sc->sc_ecc_cnt++; 173 174 if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv) 175 return(1); 176 177 ce = (esr & SDRAM0_ECCESR_CE) != 0x00; 178 ue = (esr & SDRAM0_ECCESR_UE) != 0x00; 179 180 printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d " 181 "BLCE=%d%d%d%d CBE=%d%d.\n", 182 sc->sc_ecc_cnt, esr, ear, 183 (ue) ? "Uncorrectable" : "Correctable", 184 ((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00), 185 ((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00), 186 ((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00), 187 ((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00), 188 ((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00), 189 ((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00), 190 ((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00), 191 ((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00), 192 ((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00), 193 ((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00)); 194 195 /* Should check for uncorrectable errors and panic... */ 196 197 if (sc->sc_ecc_cnt > 1000) { 198 printf("ECC: Too many errors, recycling entire " 199 "SDRAM (size = %d).\n", sc->sc_memsize); 200 201 /* 202 * Can this code be changed to run without disabling data MMU 203 * and disabling intrs? 204 * Does kernel always map all of physical RAM VA=PA? If so, 205 * just loop over lowmem. 206 */ 207 __asm volatile( 208 "mfmsr %0;" 209 "li %1, 0x00;" 210 "ori %1, %1, 0x8010;" 211 "andc %1, %0, %1;" 212 "mtmsr %1;" 213 "sync;isync;" 214 "li %1, 0x00;" 215 "1:" 216 "dcbt 0, %1;" 217 "sync;isync;" 218 "lwz %2, 0(%1);" 219 "stw %2, 0(%1);" 220 "sync;isync;" 221 "dcbf 0, %1;" 222 "sync;isync;" 223 "addi %1, %1, 0x20;" 224 "addic. %3, %3, -0x20;" 225 "bge 1b;" 226 "mtmsr %0;" 227 "sync;isync;" 228 : "=&r" (msr), "=&r" (tmp), "=&r" (dat) 229 : "r" (sc->sc_memsize) : "0" ); 230 231 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 232 esr = mfdcr(DCR_SDRAM0_CFGDATA); 233 234 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 235 mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff); 236 237 /* 238 * Correctable errors here are OK, mem should be clean now. 239 * 240 * Should check for uncorrectable errors and panic... 241 */ 242 printf("ECC: Recycling complete, ESR=%x. " 243 "Checking for persistent errors.\n", esr); 244 245 __asm volatile( 246 "mfmsr %0;" 247 "li %1, 0x00;" 248 "ori %1, %1, 0x8010;" 249 "andc %1, %0, %1;" 250 "mtmsr %1;" 251 "sync;isync;" 252 "li %1, 0x00;" 253 "1:" 254 "dcbt 0, %1;" 255 "sync;isync;" 256 "lwz %2, 0(%1);" 257 "stw %2, 0(%1);" 258 "sync;isync;" 259 "dcbf 0, %1;" 260 "sync;isync;" 261 "addi %1, %1, 0x20;" 262 "addic. %3, %3, -0x20;" 263 "bge 1b;" 264 "mtmsr %0;" 265 "sync;isync;" 266 : "=&r" (msr), "=&r" (tmp), "=&r" (dat) 267 : "r" (sc->sc_memsize) : "0" ); 268 269 mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR); 270 esr = mfdcr(DCR_SDRAM0_CFGDATA); 271 272 /* 273 * If esr is non zero here, we're screwed. 274 * Should check this and panic. 275 */ 276 printf("ECC: Persistent error check complete, " 277 "final ESR=%x.\n", esr); 278 } 279 280 sc->sc_ecc_tb = tb; 281 sc->sc_ecc_cnt = 0; 282 283 return(1); 284} 285