1/* $NetBSD: dec_kn8ae.c,v 1.40 2011/06/14 15:34:22 matt Exp $ */
2
3/*
4 * Copyright (c) 1997 by Matthew Jacob
5 * NASA AMES Research Center.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice immediately at the beginning of the file, without modification,
13 *    this list of conditions, and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
34
35__KERNEL_RCSID(0, "$NetBSD: dec_kn8ae.c,v 1.40 2011/06/14 15:34:22 matt Exp $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/device.h>
40#include <sys/termios.h>
41#include <sys/conf.h>
42#include <dev/cons.h>
43
44#include <machine/rpb.h>
45#include <machine/autoconf.h>
46#include <machine/cpuconf.h>
47#include <machine/frame.h>
48#include <machine/alpha.h>
49#include <machine/logout.h>
50
51#include <dev/ic/comreg.h>
52#include <dev/ic/comvar.h>
53
54#include <dev/isa/isavar.h>
55#include <dev/pci/pcireg.h>
56#include <dev/pci/pcivar.h>
57
58#include <dev/scsipi/scsi_all.h>
59#include <dev/scsipi/scsipi_all.h>
60#include <dev/scsipi/scsiconf.h>
61
62#include <alpha/tlsb/tlsbreg.h>
63#include <alpha/tlsb/tlsbvar.h>
64#include <alpha/tlsb/kftxxreg.h>
65#include <alpha/tlsb/kftxxvar.h>
66#define	KV(_addr)	((void *)ALPHA_PHYS_TO_K0SEG((_addr)))
67
68
69void dec_kn8ae_init(void);
70void dec_kn8ae_cons_init(void);
71static void dec_kn8ae_device_register(device_t, void *);
72
73static void dec_kn8ae_mcheck_handler
74(unsigned long, struct trapframe *, unsigned long, unsigned long);
75
76const struct alpha_variation_table dec_kn8ae_variations[] = {
77	{ 0, "AlphaServer 8400" },
78	{ 0, NULL },
79};
80
81void
82dec_kn8ae_init(void)
83{
84	uint64_t variation;
85
86	platform.family = "AlphaServer 8400";
87
88	if ((platform.model = alpha_dsr_sysname()) == NULL) {
89		variation = hwrpb->rpb_variation & SV_ST_MASK;
90		if ((platform.model = alpha_variation_name(variation,
91		    dec_kn8ae_variations)) == NULL)
92			platform.model = alpha_unknown_sysname();
93	}
94
95	platform.iobus = "tlsb";
96	platform.cons_init = dec_kn8ae_cons_init;
97	platform.device_register = dec_kn8ae_device_register;
98	platform.mcheck_handler = dec_kn8ae_mcheck_handler;
99}
100
101void
102dec_kn8ae_cons_init(void)
103{
104
105	/*
106	 * Info to retain:
107	 *
108	 *	The AXP 8X00 seems to encode the
109	 *	type of console in the ctb_type field,
110	 *	not the ctb_term_type field.
111	 *
112	 *	XXX Not Type 4 CTB?
113	 */
114}
115
116/* #define	BDEBUG	1 */
117static void
118dec_kn8ae_device_register(device_t dev, void *aux)
119{
120	static int found, initted, diskboot, netboot;
121	static device_t primarydev, pcidev, ctrlrdev;
122	struct bootdev_data *b = bootdev_data;
123	device_t parent = device_parent(dev);
124
125	if (found)
126		return;
127
128	if (!initted) {
129		diskboot = (strcasecmp(b->protocol, "SCSI") == 0);
130		netboot = (strcasecmp(b->protocol, "BOOTP") == 0) ||
131		    (strcasecmp(b->protocol, "MOP") == 0);
132#if	BDEBUG
133		printf("proto:%s bus:%d slot:%d chan:%d", b->protocol,
134		    b->bus, b->slot, b->channel);
135		if (b->remote_address)
136			printf(" remote_addr:%s", b->remote_address);
137		printf(" un:%d bdt:%d", b->unit, b->boot_dev_type);
138		if (b->ctrl_dev_type)
139			printf(" cdt:%s\n", b->ctrl_dev_type);
140		else
141			printf("\n");
142		printf("diskboot = %d, netboot = %d\n", diskboot, netboot);
143#endif
144		initted = 1;
145	}
146
147	if (primarydev == NULL) {
148		if (!device_is_a(dev, "dwlpx"))
149			return;
150		else {
151			struct kft_dev_attach_args *ka = aux;
152
153			if (b->bus != ka->ka_hosenum)
154				return;
155			primarydev = dev;
156#ifdef BDEBUG
157			printf("\nprimarydev = %s\n", device_xname(dev));
158#endif
159			return;
160		}
161	}
162
163	if (pcidev == NULL) {
164		if (!device_is_a(dev, "pci"))
165			return;
166		/*
167		 * Try to find primarydev anywhere in the ancestry.  This is
168		 * necessary if the PCI bus is hidden behind a bridge.
169		 */
170		while (parent) {
171			if (parent == primarydev)
172				break;
173			parent = device_parent(parent);
174		}
175		if (!parent)
176			return;
177		else {
178			struct pcibus_attach_args *pba = aux;
179
180			if ((b->slot / 1000) != pba->pba_bus)
181				return;
182
183			pcidev = dev;
184#if	BDEBUG
185			printf("\npcidev = %s\n", device_xname(dev));
186#endif
187			return;
188		}
189	}
190
191	if (ctrlrdev == NULL) {
192		if (parent != pcidev)
193			return;
194		else {
195			struct pci_attach_args *pa = aux;
196			int slot;
197
198			slot = pa->pa_bus * 1000 + pa->pa_function * 100 +
199			    pa->pa_device;
200			if (b->slot != slot)
201				return;
202
203			if (netboot) {
204				booted_device = dev;
205#ifdef BDEBUG
206				printf("\nbooted_device = %s\n", device_xname(dev));
207#endif
208				found = 1;
209			} else {
210				ctrlrdev = dev;
211#if	BDEBUG
212				printf("\nctrlrdev = %s\n", device_xname(dev));
213#endif
214			}
215			return;
216		}
217	}
218
219	if (!diskboot)
220		return;
221
222	if (device_is_a(dev, "sd") ||
223	    device_is_a(dev, "st") ||
224	    device_is_a(dev, "cd")) {
225		struct scsipibus_attach_args *sa = aux;
226		struct scsipi_periph *periph = sa->sa_periph;
227		int unit;
228
229		if (device_parent(parent) != ctrlrdev)
230			return;
231
232		unit = periph->periph_target * 100 + periph->periph_lun;
233		if (b->unit != unit)
234			return;
235		if (b->channel != periph->periph_channel->chan_channel)
236			return;
237
238		/* we've found it! */
239		booted_device = dev;
240#if	BDEBUG
241		printf("\nbooted_device = %s\n", device_xname(dev));
242#endif
243		found = 1;
244	}
245}
246
247/*
248 * KN8AE Machine Check Handlers.
249 */
250void kn8ae_harderr(unsigned long, unsigned long,
251    unsigned long, struct trapframe *);
252
253static void kn8ae_softerr(unsigned long, unsigned long,
254    unsigned long, struct trapframe *);
255
256void kn8ae_mcheck(unsigned long, unsigned long,
257    unsigned long, struct trapframe *);
258
259/*
260 * Support routine for clearing errors
261 */
262static void clear_tlsb_ebits(int);
263
264static void
265clear_tlsb_ebits(int cpuonly)
266{
267	int node;
268	uint32_t tldev;
269
270	for (node = 0; node <= TLSB_NODE_MAX; ++node) {
271		if ((tlsb_found & (1 << node)) == 0)
272			continue;
273		tldev = TLSB_GET_NODEREG(node, TLDEV);
274		if (tldev == 0) {
275			/* "cannot happen" */
276			continue;
277		}
278		/*
279		 * Registers to clear for all nodes.
280		 */
281		if (TLSB_GET_NODEREG(node, TLBER) &
282		    (TLBER_UDE|TLBER_CWDE|TLBER_CRDE)) {
283			TLSB_PUT_NODEREG(node, TLESR0,
284			    TLSB_GET_NODEREG(node, TLESR0));
285			TLSB_PUT_NODEREG(node, TLESR1,
286			    TLSB_GET_NODEREG(node, TLESR1));
287			TLSB_PUT_NODEREG(node, TLESR2,
288			    TLSB_GET_NODEREG(node, TLESR2));
289			TLSB_PUT_NODEREG(node, TLESR3,
290			    TLSB_GET_NODEREG(node, TLESR3));
291		}
292		TLSB_PUT_NODEREG(node, TLBER,
293		    TLSB_GET_NODEREG(node, TLBER));
294		TLSB_PUT_NODEREG(node, TLFADR0,
295		    TLSB_GET_NODEREG(node, TLFADR0));
296		TLSB_PUT_NODEREG(node, TLFADR1,
297		    TLSB_GET_NODEREG(node, TLFADR1));
298
299		if (TLDEV_ISCPU(tldev)) {
300			TLSB_PUT_NODEREG(node, TLEPAERR,
301			    TLSB_GET_NODEREG(node, TLEPAERR));
302			TLSB_PUT_NODEREG(node, TLEPDERR,
303			    TLSB_GET_NODEREG(node, TLEPDERR));
304			TLSB_PUT_NODEREG(node, TLEPMERR,
305			    TLSB_GET_NODEREG(node, TLEPMERR));
306			continue;
307		}
308		/*
309		 * If we're only doing CPU nodes, or this was a memory
310		 * node, we're done. Onwards.
311		 */
312		if (cpuonly || TLDEV_ISMEM(tldev)) {
313			continue;
314		}
315
316		TLSB_PUT_NODEREG(node, KFT_ICCNSE,
317		    TLSB_GET_NODEREG(node, KFT_ICCNSE));
318		TLSB_PUT_NODEREG(node, KFT_IDPNSE0,
319		    TLSB_GET_NODEREG(node, KFT_IDPNSE0));
320		TLSB_PUT_NODEREG(node, KFT_IDPNSE1,
321		    TLSB_GET_NODEREG(node, KFT_IDPNSE1));
322		if (TLDEV_DTYPE(tldev) == TLDEV_DTYPE_KFTHA) {
323			TLSB_PUT_NODEREG(node, KFT_IDPNSE2,
324			    TLSB_GET_NODEREG(node, KFT_IDPNSE2));
325			TLSB_PUT_NODEREG(node, KFT_IDPNSE3,
326			    TLSB_GET_NODEREG(node, KFT_IDPNSE3));
327		}
328		/*
329		 * Digital Unix cleares the Mailbox Transaction Register
330		 * here. I don't think we should because we aren't using
331		 * mailboxes yet, and the tech manual makes dire warnings
332		 * about *not* rewriting this register.
333		 */
334	}
335}
336
337/*
338 * System Corrected Errors.
339 */
340static const char *fmt1 = "        %-25s = 0x%l016x\n";
341
342void
343kn8ae_harderr(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep)
344{
345	int whami, cpuwerr, dof_cnt;
346	mc_hdr_ev5 *hdr;
347	mc_cc_ev5 *mptr;
348	struct tlsb_mchk_fatal *ptr;
349
350	hdr = (mc_hdr_ev5 *) logout;
351	mptr = (mc_cc_ev5 *) (logout + sizeof (*hdr));
352	ptr = (struct tlsb_mchk_fatal *)
353		(logout + sizeof (*hdr) + sizeof (*mptr));
354	whami = alpha_pal_whami();
355
356	printf("kn8ae: CPU ID %d system correctable error\n", whami);
357
358	printf("    Machine Check Code 0x%lx\n", hdr->mcheck_code);
359	printf(fmt1, "EI Status", mptr->ei_stat);
360	printf(fmt1, "EI Address", mptr->ei_addr);
361	printf(fmt1, "Fill Syndrome", mptr->fill_syndrome);
362	printf(fmt1, "Interrupt Status Reg.", mptr->isr);
363	printf("\n");
364	dof_cnt = (ptr->rsvdheader & 0xffffffff00000000) >> 32;
365	cpuwerr = ptr->rsvdheader & 0xffff;
366
367	printf(fmt1, "CPU W/Error.", cpuwerr);
368	printf(fmt1, "DOF Count.", dof_cnt);
369	printf(fmt1, "TLDEV", ptr->tldev);
370	printf(fmt1, "TLSB Bus Error", ptr->tlber);
371	printf(fmt1, "TLSB CNR", ptr->tlcnr);
372	printf(fmt1, "TLSB VID", ptr->tlvid);
373	printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0);
374	printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1);
375	printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2);
376	printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3);
377	printf(fmt1, "TLSB LEP_AERR", ptr->tlepaerr);
378	printf(fmt1, "TLSB MODCONF", ptr->tlmodconfig);
379	printf(fmt1, "TLSB LEP_MERR", ptr->tlepmerr);
380	printf(fmt1, "TLSB LEP_DERR", ptr->tlepderr);
381	printf(fmt1, "TLSB INTRMASK0", ptr->tlintrmask0);
382	printf(fmt1, "TLSB INTRMASK1", ptr->tlintrmask1);
383	printf(fmt1, "TLSB INTRSUM0", ptr->tlintrsum0);
384	printf(fmt1, "TLSB INTRSUM1", ptr->tlintrsum1);
385	printf(fmt1, "TLSB VMG", ptr->tlep_vmg);
386
387	/* CLEAN UP */
388	/*
389	 * Here's what Digital Unix says to do-
390	 *
391	 * 1. Log the ECC error that got us here
392	 *
393	 * 2. Turn off error reporting
394	 *
395	 * 3. Attempt to have CPU read bad memory location (specified by the
396	 *    tlfadr reg of the TIOP or TMEM (depending on type of error,
397	 *    see upcoming code branches) and write data back to location.
398	 *
399	 * 4. When the CPU attempts to read the location, another 620 interrupt
400	 *    should occur for the CPU at which instant PAL will scrub the
401	 *    location. Then the o.s. scrub routine finishes. If the PAL scrubs
402	 *    the location then the scrubbed flag should be 0 (this is what we
403	 *    expect).
404	 *
405	 *    If it's a 1 then the alpha_scrub_long routine did the scrub.
406	 *
407	 * 5. We renable correctable error logging and continue
408	 */
409	printf("WARNING THIS IS NOT DONE YET YOU MAY GET DATA CORRUPTION");
410	clear_tlsb_ebits(0);
411	/*
412	 * Clear error by rewriting register.
413	 */
414	alpha_pal_wrmces(mces);
415}
416
417/*
418 *  Processor Corrected Errors- BCACHE ECC errors.
419 */
420
421static void
422kn8ae_softerr(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep)
423{
424	int whami, cpuwerr, dof_cnt;
425	mc_hdr_ev5 *hdr;
426	mc_cc_ev5 *mptr;
427	struct tlsb_mchk_soft *ptr;
428
429	hdr = (mc_hdr_ev5 *) logout;
430	mptr = (mc_cc_ev5 *) (logout + sizeof (*hdr));
431	ptr = (struct tlsb_mchk_soft *)
432		(logout + sizeof (*hdr) + sizeof (*mptr));
433	whami = alpha_pal_whami();
434
435	printf("kn8ae: CPU ID %d processor correctable error\n", whami);
436	printf("    Machine Check Code 0x%lx\n", hdr->mcheck_code);
437	printf(fmt1, "EI Status", mptr->ei_stat);
438	printf(fmt1, "EI Address", mptr->ei_addr);
439	printf(fmt1, "Fill Syndrome", mptr->fill_syndrome);
440	printf(fmt1, "Interrupt Status Reg.", mptr->isr);
441	printf("\n");
442	dof_cnt = (ptr->rsvdheader & 0xffffffff00000000) >> 32;
443	cpuwerr = ptr->rsvdheader & 0xffff;
444
445	printf(fmt1, "CPU W/Error.", cpuwerr);
446	printf(fmt1, "DOF Count.", dof_cnt);
447	printf(fmt1, "TLDEV", ptr->tldev);
448	printf(fmt1, "TLSB Bus Error", ptr->tlber);
449	printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0);
450	printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1);
451	printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2);
452	printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3);
453
454	/*
455	 * Clear TLSB bits on all CPU TLSB nodes.
456	 */
457	clear_tlsb_ebits(1);
458
459	/*
460	 * Clear error by rewriting register.
461	 */
462	alpha_pal_wrmces(mces);
463}
464
465/*
466 * KN8AE specific machine check handler
467 */
468
469void
470kn8ae_mcheck(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep)
471{
472	struct mchkinfo *mcp;
473	int get_dwlpx_regs;
474	struct tlsb_mchk_fatal mcs[TLSB_NODE_MAX+1], *ptr;
475	mc_hdr_ev5 *hdr;
476	mc_uc_ev5 *mptr;
477
478	/*
479	 * If we expected a machine check, just go handle it in common code.
480	 */
481	mcp = &curcpu()->ci_mcinfo;
482	if (mcp->mc_expected) {
483		machine_check(mces, framep, type, logout);
484		return;
485	}
486
487	get_dwlpx_regs = 0;
488	ptr = NULL;
489	memset(mcs, 0, sizeof (mcs));
490
491	hdr = (mc_hdr_ev5 *) logout;
492	mptr = (mc_uc_ev5 *) (logout + sizeof (*hdr));
493
494	/*
495	 * If detected by the system, we print out some TLASER registers.
496	 */
497	if (type == ALPHA_SYS_MCHECK) {
498#if	0
499		int get_lsb_regs = 0;
500		int get_dwlpx_regs = 0;
501#endif
502
503		ptr = (struct tlsb_mchk_fatal *)
504		    (logout + sizeof (*hdr) + sizeof (*mptr));
505
506#if	0
507		if (ptr->tlepaerr & TLEPAERR_WSPC_RD) {
508			get_dwlpx_regs++;
509		}
510		if ((ptr->tlepaerr & TLEPAERR_IBOX_TMO) &&
511		    (mptr->ic_perr_stat & EV5_IC_PERR_IBOXTMO) &&
512		    (ptr->tlepderr & TLEPDERR_GBTMO)) {
513			get_dwlpx_regs++;
514		}
515#endif
516	} else {
517		/*
518		 * We have a processor machine check- which doesn't
519		 * have information with it about any TLSB related
520		 * failures.
521		 */
522	}
523
524	/*
525	 * Now we can finally print some stuff...
526	 */
527	ev5_logout_print(hdr, mptr);
528	if (type == ALPHA_SYS_MCHECK) {
529		if (ptr->tlepaerr & TLEPAERR_WSPC_RD) {
530			printf("\tWSPC READ error\n");
531		}
532		if ((ptr->tlepaerr & TLEPAERR_IBOX_TMO) &&
533		    (mptr->ic_perr_stat & EV5_IC_PERR_IBOXTMO) &&
534		    (ptr->tlepderr & TLEPDERR_GBTMO)) {
535			printf ("\tWSPC IBOX timeout detected\n");
536		}
537#ifdef	DIAGNOSTIC
538		printf(fmt1, "TLDEV", ptr->tldev);
539		printf(fmt1, "TLSB Bus Error", ptr->tlber);
540		printf(fmt1, "TLSB CNR", ptr->tlcnr);
541		printf(fmt1, "TLSB VID", ptr->tlvid);
542		printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0);
543		printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1);
544		printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2);
545		printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3);
546		printf(fmt1, "TLSB LEP_AERR", ptr->tlepaerr);
547		printf(fmt1, "TLSB MODCONF", ptr->tlmodconfig);
548		printf(fmt1, "TLSB LEP_MERR", ptr->tlepmerr);
549		printf(fmt1, "TLSB LEP_DERR", ptr->tlepderr);
550		printf(fmt1, "TLSB INTRMASK0", ptr->tlintrmask0);
551		printf(fmt1, "TLSB INTRMASK1", ptr->tlintrmask1);
552		printf(fmt1, "TLSB INTRSUM0", ptr->tlintrsum0);
553		printf(fmt1, "TLSB INTRSUM1", ptr->tlintrsum1);
554		printf(fmt1, "TLSB VMG", ptr->tlep_vmg);
555#endif
556	} else {
557	}
558
559	/*
560	 * Now that we've printed all sorts of useful information
561	 * and have decided that we really can't do any more to
562	 * respond to the error, go on to the common code for
563	 * final disposition. Usually this means that we die.
564	 */
565	clear_tlsb_ebits(0);
566
567	machine_check(mces, framep, type, logout);
568}
569
570static void
571dec_kn8ae_mcheck_handler(unsigned long mces, struct trapframe *framep, unsigned long vector, unsigned long param)
572{
573	switch (vector) {
574	case ALPHA_SYS_ERROR:
575		kn8ae_harderr(mces, vector, param, framep);
576		break;
577
578	case ALPHA_PROC_ERROR:
579		kn8ae_softerr(mces, vector, param, framep);
580		break;
581
582	case ALPHA_SYS_MCHECK:
583	case ALPHA_PROC_MCHECK:
584		kn8ae_mcheck(mces, vector, param, framep);
585		break;
586	default:
587		printf("KN8AE_MCHECK: unknown check vector 0x%lx\n", vector);
588		machine_check(mces, framep, vector, param);
589		break;
590	}
591}
592