1/*-
2 * Copyright (c) 2002-2010 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/bus.h>
32#include <sys/kernel.h>
33#include <sys/lock.h>
34#include <sys/malloc.h>
35#include <sys/mutex.h>
36#include <sys/sysctl.h>
37#include <sys/uuid.h>
38#include <vm/vm.h>
39#include <vm/vm_kern.h>
40#include <machine/intr.h>
41#include <machine/mca.h>
42#include <machine/pal.h>
43#include <machine/sal.h>
44#include <machine/smp.h>
45
46static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
47
48struct mca_info {
49	STAILQ_ENTRY(mca_info) mi_link;
50	u_long	mi_seqnr;
51	u_int	mi_cpuid;
52	size_t	mi_recsz;
53	char	mi_record[0];
54};
55
56STAILQ_HEAD(mca_info_list, mca_info);
57
58static int64_t		mca_info_size[SAL_INFO_TYPES];
59static vm_offset_t	mca_info_block;
60static struct mtx	mca_info_block_lock;
61
62static SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RW, NULL, "MCA container");
63
64static int mca_count;		/* Number of records stored. */
65static int mca_first;		/* First (lowest) record ID. */
66static int mca_last;		/* Last (highest) record ID. */
67
68SYSCTL_INT(_hw_mca, OID_AUTO, count, CTLFLAG_RD, &mca_count, 0,
69    "Record count");
70SYSCTL_INT(_hw_mca, OID_AUTO, first, CTLFLAG_RD, &mca_first, 0,
71    "First record id");
72SYSCTL_INT(_hw_mca, OID_AUTO, last, CTLFLAG_RD, &mca_last, 0,
73    "Last record id");
74
75static struct mtx mca_sysctl_lock;
76
77static u_int mca_xiv_cmc;
78
79static int
80mca_sysctl_inject(SYSCTL_HANDLER_ARGS)
81{
82	struct ia64_pal_result res;
83	u_int val;
84	int error;
85
86	val = 0;
87	error = sysctl_wire_old_buffer(req, sizeof(u_int));
88	if (!error)
89		error = sysctl_handle_int(oidp, &val, 0, req);
90
91	if (error != 0 || req->newptr == NULL)
92		return (error);
93
94	/*
95	 * Example values for injecting PAL determined machine checks:
96	 *	corrected	9
97	 *	recoverable	73
98	 *	fatal		137
99	 */
100	res = ia64_call_pal_stacked(PAL_MC_ERROR_INJECT, val, 0, 0);
101	printf("%s: %#lx, %#lx, %#lx, %#lx\n", __func__, res.pal_status,
102	    res.pal_result[0], res.pal_result[1], res.pal_result[2]);
103	return (0);
104}
105SYSCTL_PROC(_hw_mca, OID_AUTO, inject, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
106    mca_sysctl_inject, "I", "set to trigger a MCA");
107
108static int
109mca_sysctl_handler(SYSCTL_HANDLER_ARGS)
110{
111	int error = 0;
112
113	if (!arg1)
114		return (EINVAL);
115	error = SYSCTL_OUT(req, arg1, arg2);
116
117	if (error || !req->newptr)
118		return (error);
119
120	error = SYSCTL_IN(req, arg1, arg2);
121	return (error);
122}
123
124static void
125ia64_mca_collect_state(int type, struct mca_info_list *reclst)
126{
127	struct ia64_sal_result result;
128	struct mca_record_header *hdr;
129	struct mca_info *rec;
130	uint64_t seqnr;
131	size_t recsz;
132
133	/*
134	 * Don't try to get the state if we couldn't get the size of
135	 * the state information previously.
136	 */
137	if (mca_info_size[type] == -1)
138		return;
139
140	if (mca_info_block == 0)
141		return;
142
143	while (1) {
144		mtx_lock_spin(&mca_info_block_lock);
145		result = ia64_sal_entry(SAL_GET_STATE_INFO, type, 0,
146		    mca_info_block, 0, 0, 0, 0);
147		if (result.sal_status < 0) {
148			mtx_unlock_spin(&mca_info_block_lock);
149			break;
150		}
151
152		hdr = (struct mca_record_header *)mca_info_block;
153		recsz = hdr->rh_length;
154		seqnr = hdr->rh_seqnr;
155
156		mtx_unlock_spin(&mca_info_block_lock);
157
158		rec = malloc(sizeof(struct mca_info) + recsz, M_MCA,
159		    M_NOWAIT | M_ZERO);
160		if (rec == NULL)
161			/* XXX: Not sure what to do. */
162			break;
163
164		rec->mi_seqnr = seqnr;
165		rec->mi_cpuid = PCPU_GET(cpuid);
166
167		mtx_lock_spin(&mca_info_block_lock);
168
169		/*
170		 * If the info block doesn't have our record anymore because
171		 * we temporarily unlocked it, get it again from SAL. I assume
172		 * that it's possible that we could get a different record.
173		 * I expect this to happen in a SMP configuration where the
174		 * record has been cleared by a different processor. So, if
175		 * we get a different record we simply abort with this record
176		 * and start over.
177		 */
178		if (seqnr != hdr->rh_seqnr) {
179			result = ia64_sal_entry(SAL_GET_STATE_INFO, type, 0,
180			    mca_info_block, 0, 0, 0, 0);
181			if (seqnr != hdr->rh_seqnr) {
182				mtx_unlock_spin(&mca_info_block_lock);
183				free(rec, M_MCA);
184				continue;
185			}
186		}
187
188		rec->mi_recsz = recsz;
189		bcopy((char*)mca_info_block, rec->mi_record, recsz);
190
191		/*
192		 * Clear the state so that we get any other records when
193		 * they exist.
194		 */
195		result = ia64_sal_entry(SAL_CLEAR_STATE_INFO, type, 0, 0, 0,
196		    0, 0, 0);
197
198		mtx_unlock_spin(&mca_info_block_lock);
199
200		STAILQ_INSERT_TAIL(reclst, rec, mi_link);
201	}
202}
203
204void
205ia64_mca_save_state(int type)
206{
207	char name[64];
208	struct mca_info_list reclst = STAILQ_HEAD_INITIALIZER(reclst);
209	struct mca_info *rec;
210	struct sysctl_oid *oid;
211
212	ia64_mca_collect_state(type, &reclst);
213
214	STAILQ_FOREACH(rec, &reclst, mi_link) {
215		sprintf(name, "%lu", rec->mi_seqnr);
216		oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca),
217		    OID_AUTO, name, CTLFLAG_RW, NULL, name);
218		if (oid == NULL)
219			continue;
220
221		mtx_lock(&mca_sysctl_lock);
222		if (mca_count > 0) {
223			if (rec->mi_seqnr < mca_first)
224				mca_first = rec->mi_seqnr;
225			else if (rec->mi_seqnr > mca_last)
226				mca_last = rec->mi_seqnr;
227		} else
228			mca_first = mca_last = rec->mi_seqnr;
229		mca_count++;
230		mtx_unlock(&mca_sysctl_lock);
231
232		sprintf(name, "%u", rec->mi_cpuid);
233		SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), rec->mi_cpuid,
234		    name, CTLTYPE_OPAQUE | CTLFLAG_RD, rec->mi_record,
235		    rec->mi_recsz, mca_sysctl_handler, "S,MCA", "MCA record");
236	}
237}
238
239static u_int
240ia64_mca_intr(struct thread *td, u_int xiv, struct trapframe *tf)
241{
242
243	if (xiv == mca_xiv_cmc) {
244		printf("MCA: corrected machine check (CMC) interrupt\n");
245		return (0);
246	}
247
248	return (0);
249}
250
251void
252ia64_mca_init_ap(void)
253{
254
255	if (mca_xiv_cmc != 0)
256		ia64_set_cmcv(mca_xiv_cmc);
257}
258
259void
260ia64_mca_init(void)
261{
262	struct ia64_sal_result result;
263	uint64_t max_size;
264	char *p;
265	int i;
266
267	/*
268	 * Get the sizes of the state information we can get from SAL and
269	 * allocate a common block (forgive me my Fortran :-) for use by
270	 * support functions. We create a region 7 address to make it
271	 * easy on the OS_MCA or OS_INIT handlers to get the state info
272	 * under unreliable conditions.
273	 */
274	max_size = 0;
275	for (i = 0; i < SAL_INFO_TYPES; i++) {
276		result = ia64_sal_entry(SAL_GET_STATE_INFO_SIZE, i, 0, 0, 0,
277		    0, 0, 0);
278		if (result.sal_status == 0) {
279			mca_info_size[i] = result.sal_result[0];
280			if (mca_info_size[i] > max_size)
281				max_size = mca_info_size[i];
282		} else
283			mca_info_size[i] = -1;
284	}
285	max_size = round_page(max_size);
286
287	p = (max_size) ? contigmalloc(max_size, M_TEMP, 0, 0ul,
288	    256*1024*1024 - 1, PAGE_SIZE, 256*1024*1024) : NULL;
289	if (p != NULL) {
290		mca_info_block = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)p));
291
292		if (bootverbose)
293			printf("MCA: allocated %ld bytes for state info.\n",
294			    max_size);
295	}
296
297	/*
298	 * Initialize the spin lock used to protect the info block. When APs
299	 * get launched, there's a short moment of contention, but in all other
300	 * cases it's not a hot spot. I think it's possible to have the MCA
301	 * handler be called on multiple processors at the same time, but that
302	 * should be rare. On top of that, performance is not an issue when
303	 * dealing with machine checks...
304	 */
305	mtx_init(&mca_info_block_lock, "MCA info lock", NULL, MTX_SPIN);
306
307	/*
308	 * Serialize sysctl operations with a sleep lock. Note that this
309	 * implies that we update the sysctl tree in a context that allows
310	 * sleeping.
311	 */
312	mtx_init(&mca_sysctl_lock, "MCA sysctl lock", NULL, MTX_DEF);
313
314	/*
315	 * Get and save any processor and platfom error records. Note that in
316	 * a SMP configuration the processor records are for the BSP only. We
317	 * let the APs get and save their own records when we wake them up.
318	 */
319	for (i = 0; i < SAL_INFO_TYPES; i++)
320		ia64_mca_save_state(i);
321
322	/*
323	 * Allocate a XIV for CMC interrupts, so that we can collect and save
324	 * the corrected processor checks.
325	 */
326	mca_xiv_cmc = ia64_xiv_alloc(PI_SOFT, IA64_XIV_PLAT, ia64_mca_intr);
327	if (mca_xiv_cmc != 0)
328		ia64_set_cmcv(mca_xiv_cmc);
329	else
330		printf("MCA: CMC vector could not be allocated\n");
331}
332