mca.c revision 206571
1/*
2 * Copyright (c) 2002 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sbin/mca/mca.c 206571 2010-04-13 22:27:39Z marcel $");
29
30#include <sys/types.h>
31#include <sys/mman.h>
32#include <sys/sysctl.h>
33#include <sys/uuid.h>
34
35/*
36 * Hack to make this compile on non-ia64 machines.
37 */
38#ifdef __ia64__
39#include <machine/mca.h>
40#else
41#include "../../sys/ia64/include/mca.h"
42#endif
43
44#include <err.h>
45#include <errno.h>
46#include <fcntl.h>
47#include <stdarg.h>
48#include <stdio.h>
49#include <stdlib.h>
50#include <string.h>
51#include <unistd.h>
52#include <uuid.h>
53
54#define	BCD(x)	((x >> 4) * 10 + (x & 15))
55
56#define	HW_MCA_MAX_CPUID	255
57
58static char hw_mca_count[] = "hw.mca.count";
59static char hw_mca_first[] = "hw.mca.first";
60static char hw_mca_last[] = "hw.mca.last";
61static char hw_mca_recid[] = "hw.mca.%lu.%u";
62
63static char default_dumpfile[] = "/var/log/mca.log";
64
65int fl_dump;
66char *file;
67
68static const char *
69severity(int error)
70{
71
72	switch (error) {
73	case MCA_RH_ERROR_RECOVERABLE:
74		return ("recoverable");
75	case MCA_RH_ERROR_FATAL:
76		return ("fatal");
77	case MCA_RH_ERROR_CORRECTED:
78		return ("corrected");
79	}
80
81	return ("unknown");
82}
83
84static const char *
85uuid(uuid_t *id)
86{
87	static char buffer[64];
88	char *s;
89
90	uuid_to_string(id, &s, NULL);
91	strcpy(buffer, s);
92	free(s);
93	return (buffer);
94}
95
96static int
97show_value(int indent, const char *var, const char *fmt, ...)
98{
99	va_list ap;
100	int len;
101
102	len = indent;
103	while (indent--)
104		putchar(' ');
105	len += printf("<%s>", var);
106	va_start(ap, fmt);
107	len += vprintf(fmt, ap);
108	len += printf("</%s>\n", var);
109	return (len);
110}
111
112static size_t
113show_header(struct mca_record_header *rh)
114{
115
116	printf("  <header>\n");
117	show_value(4, "seqnr", "%lld", (long long)rh->rh_seqnr);
118	show_value(4, "revision", "%d.%d", BCD(rh->rh_major),
119	    BCD(rh->rh_minor));
120	show_value(4, "severity", "%s", severity(rh->rh_error));
121	show_value(4, "length", "%lld", (long long)rh->rh_length);
122	show_value(4, "date", "%d%02d/%02d/%02d",
123	    BCD(rh->rh_time[MCA_RH_TIME_CENT]),
124	    BCD(rh->rh_time[MCA_RH_TIME_YEAR]),
125	    BCD(rh->rh_time[MCA_RH_TIME_MON]),
126	    BCD(rh->rh_time[MCA_RH_TIME_MDAY]));
127	show_value(4, "time", "%02d:%02d:%02d",
128	    BCD(rh->rh_time[MCA_RH_TIME_HOUR]),
129	    BCD(rh->rh_time[MCA_RH_TIME_MIN]),
130	    BCD(rh->rh_time[MCA_RH_TIME_SEC]));
131	if (rh->rh_flags & MCA_RH_FLAGS_PLATFORM_ID)
132		show_value(4, "platform", "%s", uuid(&rh->rh_platform));
133	printf("  </header>\n");
134	return (rh->rh_length);
135}
136
137static void
138show_cpu_mod(const char *what, int idx, struct mca_cpu_mod *cpu_mod)
139{
140	printf("      <%s-%d>\n", what, idx);
141	if (cpu_mod->cpu_mod_flags & MCA_CPU_MOD_FLAGS_INFO)
142		show_value(8, "info", "0x%016llx",
143		    (long long)cpu_mod->cpu_mod_info);
144	if (cpu_mod->cpu_mod_flags & MCA_CPU_MOD_FLAGS_REQID)
145		show_value(8, "requester", "0x%016llx",
146		    (long long)cpu_mod->cpu_mod_reqid);
147	if (cpu_mod->cpu_mod_flags & MCA_CPU_MOD_FLAGS_RSPID)
148		show_value(8, "responder", "0x%016llx",
149		    (long long)cpu_mod->cpu_mod_rspid);
150	if (cpu_mod->cpu_mod_flags & MCA_CPU_MOD_FLAGS_TGTID)
151		show_value(8, "target", "0x%016llx",
152		    (long long)cpu_mod->cpu_mod_tgtid);
153	if (cpu_mod->cpu_mod_flags & MCA_CPU_MOD_FLAGS_IP)
154		show_value(8, "ip", "0x%016llx",
155		    (long long)cpu_mod->cpu_mod_ip);
156	printf("      </%s-%d>\n", what, idx);
157}
158
159static void
160show_cpu(struct mca_cpu_record *cpu)
161{
162	char var[16];
163	struct mca_cpu_mod *mod;
164	struct mca_cpu_cpuid *cpuid;
165	struct mca_cpu_psi *psi;
166	int i, n;
167
168	printf("    <cpu>\n");
169
170	if (cpu->cpu_flags & MCA_CPU_FLAGS_ERRMAP)
171		show_value(6, "errmap", "0x%016llx",
172		    (long long)cpu->cpu_errmap);
173	if (cpu->cpu_flags & MCA_CPU_FLAGS_STATE)
174		show_value(6, "state", "0x%016llx",
175		    (long long)cpu->cpu_state);
176	if (cpu->cpu_flags & MCA_CPU_FLAGS_CR_LID)
177		show_value(6, "cr_lid", "0x%016llx",
178		    (long long)cpu->cpu_cr_lid);
179
180	mod = (struct mca_cpu_mod*)(cpu + 1);
181	n = MCA_CPU_FLAGS_CACHE(cpu->cpu_flags);
182	for (i = 0; i < n; i++)
183		show_cpu_mod("cache", i, mod++);
184	n = MCA_CPU_FLAGS_TLB(cpu->cpu_flags);
185	for (i = 0; i < n; i++)
186		show_cpu_mod("tlb", i, mod++);
187	n = MCA_CPU_FLAGS_BUS(cpu->cpu_flags);
188	for (i = 0; i < n; i++)
189		show_cpu_mod("bus", i, mod++);
190	n = MCA_CPU_FLAGS_REG(cpu->cpu_flags);
191	for (i = 0; i < n; i++)
192		show_cpu_mod("reg", i, mod++);
193	n = MCA_CPU_FLAGS_MS(cpu->cpu_flags);
194	for (i = 0; i < n; i++)
195		show_cpu_mod("ms", i, mod++);
196
197	cpuid = (struct mca_cpu_cpuid*)mod;
198	for (i = 0; i < 6; i++) {
199		sprintf(var, "cpuid-%d", i);
200		show_value(6, var, "0x%016llx", (long long)cpuid->cpuid[i]);
201	}
202
203	psi = (struct mca_cpu_psi*)(cpuid + 1);
204	/* TODO: Dump PSI */
205
206	printf("    </cpu>\n");
207}
208
209static void
210show_memory(struct mca_mem_record *mem)
211{
212	printf("    <memory>\n");
213
214	if (mem->mem_flags & MCA_MEM_FLAGS_STATUS)
215		show_value(6, "status", "0x%016llx",
216		    (long long)mem->mem_status);
217	if (mem->mem_flags & MCA_MEM_FLAGS_ADDR)
218		show_value(6, "address", "0x%016llx",
219		    (long long)mem->mem_addr);
220	if (mem->mem_flags & MCA_MEM_FLAGS_ADDRMASK)
221		show_value(6, "mask", "0x%016llx",
222		    (long long)mem->mem_addrmask);
223	if (mem->mem_flags & MCA_MEM_FLAGS_NODE)
224		show_value(6, "node", "0x%04x", mem->mem_node);
225	if (mem->mem_flags & MCA_MEM_FLAGS_CARD)
226		show_value(6, "card", "0x%04x", mem->mem_card);
227	if (mem->mem_flags & MCA_MEM_FLAGS_MODULE)
228		show_value(6, "module", "0x%04x", mem->mem_module);
229	if (mem->mem_flags & MCA_MEM_FLAGS_BANK)
230		show_value(6, "bank", "0x%04x", mem->mem_bank);
231	if (mem->mem_flags & MCA_MEM_FLAGS_DEVICE)
232		show_value(6, "device", "0x%04x", mem->mem_device);
233	if (mem->mem_flags & MCA_MEM_FLAGS_ROW)
234		show_value(6, "row", "0x%04x", mem->mem_row);
235	if (mem->mem_flags & MCA_MEM_FLAGS_COLUMN)
236		show_value(6, "column", "0x%04x", mem->mem_column);
237	if (mem->mem_flags & MCA_MEM_FLAGS_BITPOS)
238		show_value(6, "bit", "0x%04x", mem->mem_bitpos);
239	if (mem->mem_flags & MCA_MEM_FLAGS_REQID)
240		show_value(6, "requester", "0x%016llx",
241		    (long long)mem->mem_reqid);
242	if (mem->mem_flags & MCA_MEM_FLAGS_RSPID)
243		show_value(6, "responder", "0x%016llx",
244		    (long long)mem->mem_rspid);
245	if (mem->mem_flags & MCA_MEM_FLAGS_TGTID)
246		show_value(6, "target", "0x%016llx",
247		    (long long)mem->mem_tgtid);
248	if (mem->mem_flags & MCA_MEM_FLAGS_BUSDATA)
249		show_value(6, "status", "0x%016llx",
250		    (long long)mem->mem_busdata);
251	if (mem->mem_flags & MCA_MEM_FLAGS_OEM_ID)
252		show_value(6, "oem", "%s", uuid(&mem->mem_oem_id));
253	/* TODO: Dump OEM data */
254
255	printf("    </memory>\n");
256}
257
258static void
259show_sel(void)
260{
261	printf("    # SEL\n");
262}
263
264static void
265show_pci_bus(struct mca_pcibus_record *pcibus)
266{
267	printf("    <pci-bus>\n");
268
269	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_STATUS)
270		show_value(6, "status", "0x%016llx",
271		    (long long)pcibus->pcibus_status);
272	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_ERROR)
273		show_value(6, "error", "0x%04x", pcibus->pcibus_error);
274	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_BUS)
275		show_value(6, "bus", "0x%04x", pcibus->pcibus_bus);
276	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_ADDR)
277		show_value(6, "address", "0x%016llx",
278		    (long long)pcibus->pcibus_addr);
279	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_DATA)
280		show_value(6, "data", "0x%016llx",
281		    (long long)pcibus->pcibus_data);
282	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_CMD)
283		show_value(6, "cmd", "0x%016llx",
284		    (long long)pcibus->pcibus_cmd);
285	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_REQID)
286		show_value(6, "requester", "0x%016llx",
287		    (long long)pcibus->pcibus_reqid);
288	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_RSPID)
289		show_value(6, "responder", "0x%016llx",
290		    (long long)pcibus->pcibus_rspid);
291	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_TGTID)
292		show_value(6, "target", "0x%016llx",
293		    (long long)pcibus->pcibus_tgtid);
294	if (pcibus->pcibus_flags & MCA_PCIBUS_FLAGS_OEM_ID)
295		show_value(6, "oem", "%s", uuid(&pcibus->pcibus_oem_id));
296	/* TODO: Dump OEM data */
297
298	printf("    </pci-bus>\n");
299}
300
301static void
302show_smbios(void)
303{
304	printf("    # SMBIOS\n");
305}
306
307static void
308show_pci_dev(struct mca_pcidev_record *pcidev)
309{
310	printf("    <pci-dev>\n");
311
312	if (pcidev->pcidev_flags & MCA_PCIDEV_FLAGS_STATUS)
313		show_value(6, "status", "0x%016llx",
314		    (long long)pcidev->pcidev_status);
315	if (pcidev->pcidev_flags & MCA_PCIDEV_FLAGS_INFO) {
316		show_value(6, "vendor", "0x%04x",
317		    pcidev->pcidev_info.info_vendor);
318		show_value(6, "device", "0x%04x",
319		    pcidev->pcidev_info.info_device);
320		show_value(6, "class", "0x%06x",
321		    MCA_PCIDEV_INFO_CLASS(pcidev->pcidev_info.info_ccfn));
322		show_value(6, "function", "0x%02x",
323		    MCA_PCIDEV_INFO_FUNCTION(pcidev->pcidev_info.info_ccfn));
324		show_value(6, "slot", "0x%02x", pcidev->pcidev_info.info_slot);
325		show_value(6, "bus", "0x%04x", pcidev->pcidev_info.info_bus);
326		show_value(6, "segment", "0x%04x",
327		    pcidev->pcidev_info.info_segment);
328	}
329	/* TODO: dump registers */
330	/* TODO: Dump OEM data */
331
332	printf("    </pci-dev>\n");
333}
334
335static void
336show_generic(void)
337{
338	printf("    # GENERIC\n");
339}
340
341static size_t
342show_section(struct mca_section_header *sh)
343{
344	static uuid_t uuid_cpu = MCA_UUID_CPU;
345	static uuid_t uuid_memory = MCA_UUID_MEMORY;
346	static uuid_t uuid_sel = MCA_UUID_SEL;
347	static uuid_t uuid_pci_bus = MCA_UUID_PCI_BUS;
348	static uuid_t uuid_smbios = MCA_UUID_SMBIOS;
349	static uuid_t uuid_pci_dev = MCA_UUID_PCI_DEV;
350	static uuid_t uuid_generic = MCA_UUID_GENERIC;
351
352	printf("  <section>\n");
353	show_value(4, "uuid", "%s", uuid(&sh->sh_uuid));
354	show_value(4, "revision", "%d.%d", BCD(sh->sh_major),
355	    BCD(sh->sh_minor));
356
357	if (uuid_equal(&sh->sh_uuid, &uuid_cpu, NULL))
358		show_cpu((void*)(sh + 1));
359	else if (uuid_equal(&sh->sh_uuid, &uuid_memory, NULL))
360		show_memory((void*)(sh + 1));
361	else if (uuid_equal(&sh->sh_uuid, &uuid_sel, NULL))
362		show_sel();
363	else if (uuid_equal(&sh->sh_uuid, &uuid_pci_bus, NULL))
364		show_pci_bus((void*)(sh + 1));
365	else if (uuid_equal(&sh->sh_uuid, &uuid_smbios, NULL))
366		show_smbios();
367	else if (uuid_equal(&sh->sh_uuid, &uuid_pci_dev, NULL))
368		show_pci_dev((void*)(sh + 1));
369	else if (uuid_equal(&sh->sh_uuid, &uuid_generic, NULL))
370		show_generic();
371
372	printf("  </section>\n");
373	return (sh->sh_length);
374}
375
376static void
377show(char *data, const char *mib)
378{
379	size_t reclen, seclen;
380
381	if (mib != NULL)
382		printf("<!-- MIB: %s -->\n", mib);
383
384	printf("<record>\n");
385	reclen = show_header((void*)data) - sizeof(struct mca_record_header);
386	data += sizeof(struct mca_record_header);
387	while (reclen > sizeof(struct mca_section_header)) {
388		seclen = show_section((void*)data);
389		reclen -= seclen;
390		data += seclen;
391	}
392	printf("</record>\n");
393}
394
395static void
396showall(char *buf, size_t buflen)
397{
398	struct mca_record_header *rh;
399	size_t reclen;
400
401	do {
402		if (buflen < sizeof(struct mca_record_header))
403			return;
404
405		rh = (void*)buf;
406		reclen = rh->rh_length;
407		if (buflen < reclen)
408			return;
409
410		show(buf, NULL);
411
412		buf += reclen;
413		buflen -= reclen;
414	}
415	while (1);
416}
417
418static void
419dump(char *data)
420{
421	struct mca_record_header *rh;
422	const char *fn;
423	int fd;
424
425	rh = (void*)data;
426	fn = (file) ? file : default_dumpfile;
427	fd = open(fn, O_WRONLY|O_CREAT|O_APPEND, 0660);
428	if (fd == -1)
429		err(2, "open(%s)", fn);
430	if (write(fd, (void*)rh, rh->rh_length) == -1)
431		err(2, "write(%s)", fn);
432	close(fd);
433}
434
435static void
436usage(void)
437{
438
439	fprintf(stderr, "usage: mca [-df]\n");
440	exit (1);
441}
442
443int
444main(int argc, char **argv)
445{
446	char mib[32];
447	char *buf;
448	size_t len;
449	int ch, error, fd;
450	int count, first, last, cpuid;
451
452	while ((ch = getopt(argc, argv, "df:")) != -1) {
453		switch(ch) {
454		case 'd':	/* dump */
455			fl_dump = 1;
456			break;
457		case 'f':
458			if (file)
459				free(file);		/* XXX complain! */
460			file = strdup(optarg);
461			break;
462		default:
463			usage();
464		}
465	}
466
467	argc -= optind;
468	argv += optind;
469
470	if (file == NULL || fl_dump) {
471		len = sizeof(count);
472		error = sysctlbyname(hw_mca_count, &count, &len, NULL, 0);
473		if (error)
474			err(1, hw_mca_count);
475
476		if (count == 0)
477			errx(0, "no error records found");
478
479		len = sizeof(first);
480		error = sysctlbyname(hw_mca_first, &first, &len, NULL, 0);
481		if (error)
482			err(1, hw_mca_first);
483
484		len = sizeof(last);
485		error = sysctlbyname(hw_mca_last, &last, &len, NULL, 0);
486		if (error)
487			err(1, hw_mca_last);
488
489		cpuid = 0;
490		while (count && first <= last) {
491			do {
492				sprintf(mib, hw_mca_recid, first, cpuid);
493				len = 0;
494				error = sysctlbyname(mib, NULL, &len, NULL, 0);
495				if (error != ENOENT)
496					break;
497				cpuid++;
498			} while (cpuid <= HW_MCA_MAX_CPUID);
499			if (error == ENOENT && cpuid > HW_MCA_MAX_CPUID) {
500				first++;
501				cpuid = 0;
502				continue;
503			}
504			if (error)
505				err(1, "%s(1)", mib);
506
507			buf = malloc(len);
508			if (buf == NULL)
509				err(1, "buffer");
510
511			error = sysctlbyname(mib, buf, &len, NULL, 0);
512			if (error)
513				err(1, "%s(2)", mib);
514
515			if (fl_dump)
516				dump(buf);
517			else
518				show(buf, mib);
519
520			free(buf);
521			count--;
522			if (cpuid == HW_MCA_MAX_CPUID) {
523				first++;
524				cpuid = 0;
525			} else
526				cpuid++;
527		}
528	} else {
529		fd = open(file, O_RDONLY);
530		if (fd == -1)
531			err(1, "open(%s)", file);
532
533		len = lseek(fd, 0LL, SEEK_END);
534		buf = mmap(NULL, len, PROT_READ, 0U, fd, 0LL);
535		if (buf == MAP_FAILED)
536			err(1, "mmap(%s)", file);
537
538		showall(buf, len);
539
540		munmap(buf, len);
541		close(fd);
542	}
543
544	return (0);
545}
546