1/*	$NetBSD: ecc_plb.c,v 1.13 2011/06/17 19:03:01 matt Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <sys/cdefs.h>
39__KERNEL_RCSID(0, "$NetBSD: ecc_plb.c,v 1.13 2011/06/17 19:03:01 matt Exp $");
40
41#include "locators.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/device.h>
46#include <sys/cpu.h>
47
48#include <prop/proplib.h>
49
50#include <powerpc/ibm4xx/cpu.h>
51#include <powerpc/ibm4xx/dcr4xx.h>
52#include <powerpc/ibm4xx/dev/plbvar.h>
53
54
55struct ecc_plb_softc {
56	device_t sc_dev;
57	uint64_t sc_ecc_tb;
58	uint64_t sc_ecc_iv;	 /* Interval */
59	uint32_t sc_ecc_cnt;
60	u_int sc_memsize;
61	int sc_irq;
62};
63
64static int	ecc_plbmatch(device_t, cfdata_t, void *);
65static void	ecc_plbattach(device_t, device_t, void *);
66static void	ecc_plb_deferred(device_t);
67static int	ecc_plb_intr(void *);
68
69CFATTACH_DECL_NEW(ecc_plb, sizeof(struct ecc_plb_softc),
70    ecc_plbmatch, ecc_plbattach, NULL, NULL);
71
72static int ecc_plb_found;
73
74static int
75ecc_plbmatch(device_t parent, cfdata_t cf, void *aux)
76{
77	struct plb_attach_args *paa = aux;
78
79	if (strcmp(paa->plb_name, cf->cf_name) != 0)
80		return (0);
81
82	if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
83		panic("ecc_plbmatch: wildcard IRQ not allowed");
84
85	paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
86
87	return (!ecc_plb_found);
88}
89
90static void
91ecc_plbattach(device_t parent, device_t self, void *aux)
92{
93	struct ecc_plb_softc *sc = device_private(self);
94	struct plb_attach_args *paa = aux;
95	unsigned int processor_freq;
96	unsigned int memsiz;
97	prop_number_t pn;
98
99	ecc_plb_found++;
100
101	pn = prop_dictionary_get(board_properties, "processor-frequency");
102	KASSERT(pn != NULL);
103	processor_freq = (unsigned int) prop_number_integer_value(pn);
104
105	pn = prop_dictionary_get(board_properties, "mem-size");
106	KASSERT(pn != NULL);
107	memsiz = (unsigned int) prop_number_integer_value(pn);
108
109	aprint_normal(": ECC controller\n");
110
111	sc->sc_dev = self;
112	sc->sc_ecc_tb = 0;
113	sc->sc_ecc_cnt = 0;
114	sc->sc_ecc_iv = processor_freq; /* Set interval */
115	sc->sc_memsize = memsiz;
116	sc->sc_irq = paa->plb_irq;
117
118	/*
119	 * Defer hooking the interrupt until all PLB devices have attached
120	 * since the interrupt controller may well be one of those devices...
121	 */
122	config_defer(self, ecc_plb_deferred);
123}
124
125static void
126ecc_plb_deferred(device_t self)
127{
128	struct ecc_plb_softc *sc = device_private(self);
129
130	intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, sc);
131}
132
133/*
134 * ECC fault handler.
135 */
136static int
137ecc_plb_intr(void *arg)
138{
139	struct ecc_plb_softc *sc = arg;
140	u_int32_t		esr, ear;
141	int			ce, ue;
142	u_quad_t		tb;
143	u_long			tmp, msr, dat;
144
145	/* This code needs to be improved to handle double-bit errors */
146	/* in some intelligent fashion. */
147
148	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
149	esr = mfdcr(DCR_SDRAM0_CFGDATA);
150
151	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
152	ear = mfdcr(DCR_SDRAM0_CFGDATA);
153
154	/* Always clear the error to stop the intr ASAP. */
155
156	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
157	mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
158
159	if (esr == 0x00) {
160		/* No current error.  Could happen due to intr. nesting */
161		return(1);
162	}
163
164	/*
165	 * Only report errors every once per second max. Do this using the TB,
166	 * because the system time (via microtime) may be adjusted when the
167	 * date is set and can't reliably be used to measure intervals.
168	 */
169
170	__asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
171		: "=r"(tb), "=r"(tmp));
172	sc->sc_ecc_cnt++;
173
174	if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
175		return(1);
176
177	ce = (esr & SDRAM0_ECCESR_CE) != 0x00;
178	ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
179
180	printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
181		"BLCE=%d%d%d%d CBE=%d%d.\n",
182		sc->sc_ecc_cnt, esr, ear,
183		(ue) ? "Uncorrectable" : "Correctable",
184		((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
185		((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
186		((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
187		((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
188		((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
189		((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
190		((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
191		((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
192		((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
193		((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
194
195	/* Should check for uncorrectable errors and panic... */
196
197	if (sc->sc_ecc_cnt > 1000) {
198		printf("ECC: Too many errors, recycling entire "
199			"SDRAM (size = %d).\n", sc->sc_memsize);
200
201		/*
202		 * Can this code be changed to run without disabling data MMU
203		 * and disabling intrs?
204		 * Does kernel always map all of physical RAM VA=PA? If so,
205		 * just loop over lowmem.
206		 */
207		__asm volatile(
208			"mfmsr 	%0;"
209			"li	%1, 0x00;"
210			"ori	%1, %1, 0x8010;"
211			"andc	%1, %0, %1;"
212			"mtmsr	%1;"
213			"sync;isync;"
214			"li	%1, 0x00;"
215			"1:"
216			"dcbt	0, %1;"
217			"sync;isync;"
218			"lwz	%2, 0(%1);"
219			"stw	%2, 0(%1);"
220			"sync;isync;"
221			"dcbf	0, %1;"
222			"sync;isync;"
223			"addi	%1, %1, 0x20;"
224			"addic.	%3, %3, -0x20;"
225			"bge 	1b;"
226			"mtmsr %0;"
227			"sync;isync;"
228		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
229		: "r" (sc->sc_memsize) : "0" );
230
231		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
232		esr = mfdcr(DCR_SDRAM0_CFGDATA);
233
234		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
235		mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
236
237		/*
238		 * Correctable errors here are OK, mem should be clean now.
239		 *
240		 * Should check for uncorrectable errors and panic...
241		 */
242		printf("ECC: Recycling complete, ESR=%x. "
243			"Checking for persistent errors.\n", esr);
244
245		__asm volatile(
246			"mfmsr 	%0;"
247			"li	%1, 0x00;"
248			"ori	%1, %1, 0x8010;"
249			"andc	%1, %0, %1;"
250			"mtmsr	%1;"
251			"sync;isync;"
252			"li	%1, 0x00;"
253			"1:"
254			"dcbt	0, %1;"
255			"sync;isync;"
256			"lwz	%2, 0(%1);"
257			"stw	%2, 0(%1);"
258			"sync;isync;"
259			"dcbf	0, %1;"
260			"sync;isync;"
261			"addi	%1, %1, 0x20;"
262			"addic.	%3, %3, -0x20;"
263			"bge 	1b;"
264			"mtmsr %0;"
265			"sync;isync;"
266		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
267		: "r" (sc->sc_memsize) : "0" );
268
269		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
270		esr = mfdcr(DCR_SDRAM0_CFGDATA);
271
272		/*
273		 * If esr is non zero here, we're screwed.
274		 * Should check this and panic.
275		 */
276		printf("ECC: Persistent error check complete, "
277			"final ESR=%x.\n", esr);
278	}
279
280	sc->sc_ecc_tb = tb;
281	sc->sc_ecc_cnt = 0;
282
283	return(1);
284}
285