1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*
27 * Panic software-diagnosis subsidiary
28 *
29 * We model a system panic as a defect diagnosis in FMA. When a system
30 * panicks, savecore publishes events which we subscribe to here.
31 *
32 * Our driving events are all raised by savecore, run either from
33 * startup of the dumpadm service or interactively at the command line.
34 * The following describes the logic for the handling of these events.
35 *
36 * On reboot after panic we will run savecore as part of the dumpadm
37 * service startup; we run savecore even if savecore is otherwise
38 * disabled (ie dumpadm -n in effect) - we run savecore -c to check for
39 * a valid dump and raise the initial event.
40 *
41 * If savecore (or savecore -c) observes a valid dump pending on the
42 * device, it raises a "dump_pending_on_device" event provided this
43 * was not an FMA-initiated panic (for those we will replay ereports
44 * from the dump device as usual and make a diagnosis from those; we do
45 * not need to open a case for the panic).  We subscribe to the
46 * "dump_pending_on_device" event and use that to open a case;  we
47 * open a case requesting the same case uuid as the panic dump image
48 * has for the OS instance uuid - if that fails because of a duplicate
49 * uuid then we have already opened a case for this panic so no need
50 * to open another.
51 *
52 * Included in the "dump_pending_on_device" event is an indication of
53 * whether or not dumpadm is enabled.  If not (dumpadm -n in effect)
54 * then we do not expect any further events regarding this panic
55 * until such time as the admin runs savecore manually (if ever).
56 * So in this case we solve the case immediately after open.  If/when
57 * subsequent events arrive when savecore is run manually, we will toss
58 * them.
59 *
60 * If dumpadm is enabled then savecore, run from dumpadm service startup,
61 * will attempt to process the dump - either to copy it off the dump
62 * device (if saving compressed) or to uncompress it off the dump device.
63 * If this succeeds savecore raises a "dump_available" event which
64 * includes information on the directory it was saved in, the instance
65 * number, image uuid, compressed form or not, and whether the dump
66 * was complete (as per the dumphdr).  If the savecore fails for
67 * some reason then it exits and raises a "savecore_failure" event.
68 * These two events are raised even for FMA-initiated panics.
69 *
70 * We subscribe to both the "dump_available" and "savecore_failed" events,
71 * and in the handling thereof we will close the case opened earlier (if
72 * this is not an FMA-initiated panic).  On receipt of the initial
73 * "dump_available" event we also arm a timer for +10 minutes if
74 * dumpadm is enabled - if no "dump_available" or "savecore_failed" arrives
75 * in that time we will solve the case on timeout.
76 *
77 * When the timer fires we check whether the initial event for each panic
78 * case was received more than 30 minutes ago; if it was we solve the case
79 * with what we have.  If we're still within the waiting period we rearm
80 * for a further 10 minutes.  The timer is shared by all cases that we
81 * create, which is why the fire interval is shorter than the maximum time
82 * we are prepared to wait.
83 */
84
85#include <strings.h>
86#include <sys/panic.h>
87#include <alloca.h>
88#include <zone.h>
89
90#include "../../common/sw.h"
91#include "panic.h"
92
93#define	MAX_STRING_LEN 160
94
95static id_t myid;
96
97static id_t mytimerid;
98
99/*
100 * Our serialization structure type.
101 */
102#define	SWDE_PANIC_CASEDATA_VERS	1
103
104typedef struct swde_panic_casedata {
105	uint32_t scd_vers;		/* must be first member */
106	uint64_t scd_receive_time;	/* when we first knew of this panic */
107	size_t scd_nvlbufsz;		/* size of following buffer */
108					/* packed attr nvlist follows */
109} swde_panic_casedata_t;
110
111static struct {
112	fmd_stat_t swde_panic_diagnosed;
113	fmd_stat_t swde_panic_badclass;
114	fmd_stat_t swde_panic_noattr;
115	fmd_stat_t swde_panic_unexpected_fm_panic;
116	fmd_stat_t swde_panic_badattr;
117	fmd_stat_t swde_panic_badfmri;
118	fmd_stat_t swde_panic_noinstance;
119	fmd_stat_t swde_panic_nouuid;
120	fmd_stat_t swde_panic_dupuuid;
121	fmd_stat_t swde_panic_nocase;
122	fmd_stat_t swde_panic_notime;
123	fmd_stat_t swde_panic_nopanicstr;
124	fmd_stat_t swde_panic_nodumpdir;
125	fmd_stat_t swde_panic_nostack;
126	fmd_stat_t swde_panic_incomplete;
127	fmd_stat_t swde_panic_failed;
128	fmd_stat_t swde_panic_basecasedata;
129	fmd_stat_t swde_panic_failsrlz;
130} swde_panic_stats = {
131	{ "swde_panic_diagnosed", FMD_TYPE_UINT64,
132	    "panic defects published" },
133	{ "swde_panic_badclass", FMD_TYPE_UINT64,
134	    "incorrect event class received" },
135	{ "swde_panic_noattr", FMD_TYPE_UINT64,
136	    "malformed event - missing attr nvlist" },
137	{ "swde_panic_unexpected_fm_panic", FMD_TYPE_UINT64,
138	    "dump available for an fm_panic()" },
139	{ "swde_panic_badattr", FMD_TYPE_UINT64,
140	    "malformed event - invalid attr list" },
141	{ "swde_panic_badfmri", FMD_TYPE_UINT64,
142	    "malformed event - fmri2str fails" },
143	{ "swde_panic_noinstance", FMD_TYPE_UINT64,
144	    "malformed event - no instance number" },
145	{ "swde_panic_nouuid", FMD_TYPE_UINT64,
146	    "malformed event - missing uuid" },
147	{ "swde_panic_dupuuid", FMD_TYPE_UINT64,
148	    "duplicate events received" },
149	{ "swde_panic_nocase", FMD_TYPE_UINT64,
150	    "case missing for uuid" },
151	{ "swde_panic_notime", FMD_TYPE_UINT64,
152	    "missing crash dump time" },
153	{ "swde_panic_nopanicstr", FMD_TYPE_UINT64,
154	    "missing panic string" },
155	{ "swde_panic_nodumpdir", FMD_TYPE_UINT64,
156	    "missing crashdump save directory" },
157	{ "swde_panic_nostack", FMD_TYPE_UINT64,
158	    "missing panic stack" },
159	{ "swde_panic_incomplete", FMD_TYPE_UINT64,
160	    "missing panic incomplete" },
161	{ "swde_panic_failed", FMD_TYPE_UINT64,
162	    "missing panic failed" },
163	{ "swde_panic_badcasedata", FMD_TYPE_UINT64,
164	    "bad case data during timeout" },
165	{ "swde_panic_failsrlz", FMD_TYPE_UINT64,
166	    "failures to serialize case data" },
167};
168
169#define	BUMPSTAT(stat)		swde_panic_stats.stat.fmds_value.ui64++
170
171static nvlist_t *
172panic_sw_fmri(fmd_hdl_t *hdl, char *object)
173{
174	nvlist_t *fmri;
175	nvlist_t *sw_obj;
176	int err = 0;
177
178	fmri = fmd_nvl_alloc(hdl, FMD_SLEEP);
179	err |= nvlist_add_uint8(fmri, FM_VERSION, FM_SW_SCHEME_VERSION);
180	err |= nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_SW);
181
182	sw_obj = fmd_nvl_alloc(hdl, FMD_SLEEP);
183	err |= nvlist_add_string(sw_obj, FM_FMRI_SW_OBJ_PATH, object);
184	err |= nvlist_add_nvlist(fmri, FM_FMRI_SW_OBJ, sw_obj);
185	if (sw_obj)
186		nvlist_free(sw_obj);
187	if (!err)
188		return (fmri);
189	else
190		return (0);
191}
192
193static const char *dumpfiles[2] = { "unix.%lld", "vmcore.%lld" };
194static const char *dumpfiles_comp[2] = { "vmdump.%lld", NULL};
195
196static void
197swde_panic_solve(fmd_hdl_t *hdl, fmd_case_t *cp,
198    nvlist_t *attr, fmd_event_t *ep, boolean_t savecore_success)
199{
200	char *dumpdir, *path, *uuid;
201	nvlist_t *defect, *rsrc;
202	nvpair_t *nvp;
203	int i;
204
205	/*
206	 * Attribute members to include in event-specific defect
207	 * payload.  Some attributes will not be present for some
208	 * cases - e.g., if we timed out and solved the case without
209	 * a "dump_available" report.
210	 */
211	const char *toadd[] = {
212		"os-instance-uuid",	/* same as case uuid */
213		"panicstr",		/* for initial classification work */
214		"panicstack",		/* for initial classification work */
215		"crashtime",		/* in epoch time */
216		"panic-time",		/* Formatted crash time */
217	};
218
219	if (ep != NULL)
220		fmd_case_add_ereport(hdl, cp, ep);
221	/*
222	 * As a temporary solution we create and fmri in the sw scheme
223	 * in panic_sw_fmri. This should become a generic fmri constructor
224	 *
225	 * We need to user a resource FMRI which will have a sufficiently
226	 * unique string representation such that fmd will not see
227	 * repeated panic diagnoses (all using the same defect class)
228	 * as duplicates and discard later cases.  We can't actually diagnose
229	 * the panic to anything specific (e.g., a path to a module and
230	 * function/line etc therein).  We could pick on a generic
231	 * representative such as /kernel/genunix but that could lead
232	 * to misunderstanding.  So we choose a path based on <dumpdir>
233	 * and the OS instance UUID - "<dumpdir>/.<os-instance-uuid>".
234	 * There's no file at that path (*) but no matter.  We can't use
235	 * <dumpdir>/vmdump.N or similar because if savecore is disabled
236	 * or failed we don't have any file or instance number.
237	 *
238	 * (*) Some day it would seem tidier to keep all files to do
239	 * with a single crash (unix/vmcore/vmdump, analysis output etc)
240	 * in a distinct directory, and <dumpdir>/.<uuid> seems like a good
241	 * choice.  For compatability we'd symlink into it.  So that is
242	 * another reason for this choice - some day it may exist!
243	 */
244	(void) nvlist_lookup_string(attr, "dumpdir", &dumpdir);
245	(void) nvlist_lookup_string(attr, "os-instance-uuid", &uuid);
246	path = alloca(strlen(dumpdir) + 1 + 1 + 36 + 1);
247	/* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */
248	(void) sprintf(path, "%s/.%s", dumpdir, uuid);
249	rsrc = panic_sw_fmri(hdl, path);
250
251	defect = fmd_nvl_create_defect(hdl, SW_SUNOS_PANIC_DEFECT,
252	    100, rsrc, NULL, rsrc);
253	nvlist_free(rsrc);
254
255	(void) nvlist_add_boolean_value(defect, "savecore-succcess",
256	    savecore_success);
257
258	if (savecore_success) {
259		boolean_t compressed;
260		int64_t instance;
261		const char **pathfmts;
262		char buf[2][32];
263		int files = 0;
264		char *arr[2];
265		int i;
266
267		(void) nvlist_lookup_int64(attr, "instance", &instance);
268		(void) nvlist_lookup_boolean_value(attr, "compressed",
269		    &compressed);
270
271		pathfmts = compressed ? &dumpfiles_comp[0] : &dumpfiles[0];
272
273		for (i = 0; i < 2; i++) {
274			if (pathfmts[i] == NULL) {
275				arr[i] = NULL;
276				continue;
277			}
278
279			(void) snprintf(buf[i], 32, pathfmts[i], instance);
280			arr[i] = buf[i];
281			files++;
282		}
283
284		(void) nvlist_add_string(defect, "dump-dir", dumpdir);
285		(void) nvlist_add_string_array(defect, "dump-files", arr,
286		    files);
287	} else {
288		char *rsn;
289
290		if (nvlist_lookup_string(attr, "failure-reason", &rsn) == 0)
291			(void) nvlist_add_string(defect, "failure-reason", rsn);
292	}
293
294	/*
295	 * Not all attributes will necessarily be available - eg if
296	 * dumpadm was not enabled there'll be no instance and dumpdir.
297	 */
298	for (i = 0; i < sizeof (toadd) / sizeof (toadd[0]); i++) {
299		if (nvlist_lookup_nvpair(attr, toadd[i], &nvp) == 0)
300			(void) nvlist_add_nvpair(defect, nvp);
301	}
302
303	fmd_case_add_suspect(hdl, cp, defect);
304	fmd_case_solve(hdl, cp);
305
306	/*
307	 * Close the case.  Do no free casedata - framework does that for us
308	 * on closure callback.
309	 */
310	fmd_case_close(hdl, cp);
311	BUMPSTAT(swde_panic_diagnosed);
312}
313
314/*ARGSUSED*/
315static void
316swde_panic_timeout(fmd_hdl_t *hdl, id_t timerid, void *data)
317{
318	fmd_case_t *cp = swde_case_first(hdl, myid);
319	swde_panic_casedata_t *cdp;
320	time_t now = time(NULL);
321	nvlist_t *attr;
322	int remain = 0;
323	uint32_t vers;
324
325	while (cp != NULL) {
326		cdp = swde_case_data(hdl, cp, &vers);
327		if (vers != SWDE_PANIC_CASEDATA_VERS)
328			fmd_hdl_abort(hdl, "case data version confused\n");
329
330		if (now > cdp->scd_receive_time + 30 * 60) {
331			if (nvlist_unpack((char *)cdp + sizeof (*cdp),
332			    cdp->scd_nvlbufsz, &attr, 0) == 0) {
333				swde_panic_solve(hdl, cp, attr, NULL, B_FALSE);
334				nvlist_free(attr);
335			} else {
336				BUMPSTAT(swde_panic_basecasedata);
337				fmd_case_close(hdl, cp);
338			}
339		} else {
340			remain++;
341		}
342
343
344		cp = swde_case_next(hdl, cp);
345	}
346
347	if (remain) {
348		mytimerid = sw_timer_install(hdl, myid, NULL, NULL,
349		    10ULL * NANOSEC * 60);
350	}
351}
352
353/*
354 * Our verify entry point is called for each of our open cases during
355 * module load.  We must return 0 for the case to be closed by our caller,
356 * or 1 to keep it (or if we have already closed it during this call).
357 */
358static int
359swde_panic_vrfy(fmd_hdl_t *hdl, fmd_case_t *cp)
360{
361	swde_panic_casedata_t *cdp;
362	time_t now = time(NULL);
363	nvlist_t *attr;
364	uint32_t vers;
365
366	cdp = swde_case_data(hdl, cp, &vers);
367
368	if (vers != SWDE_PANIC_CASEDATA_VERS)
369		return (0);	/* case will be closed */
370
371	if (now > cdp->scd_receive_time + 30 * 60) {
372		if (nvlist_unpack((char *)cdp + sizeof (*cdp),
373		    cdp->scd_nvlbufsz, &attr, 0) == 0) {
374			swde_panic_solve(hdl, cp, attr, NULL, B_FALSE);
375			nvlist_free(attr);
376			return (1);	/* case already closed */
377		} else {
378			return (0);	/* close case */
379		}
380	}
381
382	if (mytimerid != 0)
383		mytimerid = sw_timer_install(hdl, myid,
384		    NULL, NULL, 10ULL * NANOSEC * 60);
385
386	return (1);	/* retain case */
387}
388
389/*
390 * Handler for ireport.os.sunos.panic.dump_pending_on_device.
391 *
392 * A future RFE should try adding a means of avoiding diagnosing repeated
393 * defects on panic loops, which would just add to the mayhem and potentially
394 * log lots of calls through ASR.  Panics with similar enough panic
395 * strings and/or stacks should not diagnose to new defects with some
396 * period of time, for example.
397 */
398
399/*ARGSUSED*/
400void
401swde_panic_detected(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
402    const char *class, void *arg)
403{
404	boolean_t fm_panic, expect_savecore;
405	swde_panic_casedata_t *cdp;
406	nvlist_t *attr;
407	fmd_case_t *cp;
408	char *fmribuf;
409	char *uuid;
410	size_t sz;
411
412	fmd_hdl_debug(hdl, "swde_panic_detected\n");
413
414	if (nvlist_lookup_nvlist(nvl, FM_IREPORT_ATTRIBUTES, &attr) != 0) {
415		BUMPSTAT(swde_panic_noattr);
416		return;
417	}
418
419	if (nvlist_lookup_string(attr, "os-instance-uuid", &uuid) != 0) {
420		BUMPSTAT(swde_panic_nouuid);
421		return;
422	}
423
424	fmd_hdl_debug(hdl, "swde_panic_detected: OS instance %s\n", uuid);
425
426	if (nvlist_lookup_boolean_value(attr, "fm-panic", &fm_panic) != 0 ||
427	    fm_panic == B_TRUE) {
428		BUMPSTAT(swde_panic_unexpected_fm_panic);
429		return;
430	}
431
432	/*
433	 * Prepare serialization data to be associated with a new
434	 * case.  Our serialization data consists of a swde_panic_casedata_t
435	 * structure followed by a packed nvlist of the attributes of
436	 * the initial event.
437	 */
438	if (nvlist_size(attr, &sz, NV_ENCODE_NATIVE) != 0) {
439		BUMPSTAT(swde_panic_failsrlz);
440		return;
441	}
442
443	cdp = fmd_hdl_zalloc(hdl, sizeof (*cdp) + sz, FMD_SLEEP);
444	fmribuf = (char *)cdp + sizeof (*cdp);
445	cdp->scd_vers = SWDE_PANIC_CASEDATA_VERS;
446	cdp->scd_receive_time = time(NULL);
447	cdp->scd_nvlbufsz = sz;
448
449	/*
450	 * Open a case with UUID matching the the panicking kernel, add this
451	 * event to the case.
452	 */
453	if ((cp = swde_case_open(hdl, myid, uuid, SWDE_PANIC_CASEDATA_VERS,
454	    cdp, sizeof (*cdp) + sz)) == NULL) {
455		BUMPSTAT(swde_panic_dupuuid);
456		fmd_hdl_debug(hdl, "swde_case_open returned NULL - dup?\n");
457		fmd_hdl_free(hdl, cdp, sizeof (*cdp) + sz);
458		return;
459	}
460
461	fmd_case_setprincipal(hdl, cp, ep);
462
463	if (nvlist_lookup_boolean_value(attr, "will-attempt-savecore",
464	    &expect_savecore) != 0 || expect_savecore == B_FALSE) {
465		fmd_hdl_debug(hdl, "savecore not being attempted - "
466		    "solve now\n");
467		swde_panic_solve(hdl, cp, attr, ep, B_FALSE);
468		return;
469	}
470
471	/*
472	 * We expect to see either a "dump_available" or a "savecore_failed"
473	 * event before too long.  In case that never shows up, for whatever
474	 * reason, we want to be able to solve the case anyway.
475	 */
476	fmd_case_add_ereport(hdl, cp, ep);
477	(void) nvlist_pack(attr, &fmribuf, &sz, NV_ENCODE_NATIVE, 0);
478	swde_case_data_write(hdl, cp);
479
480	if (mytimerid == 0) {
481		mytimerid = sw_timer_install(hdl, myid, NULL, ep,
482		    10ULL * NANOSEC * 60);
483		fmd_hdl_debug(hdl, "armed timer\n");
484	} else {
485		fmd_hdl_debug(hdl, "timer already armed\n");
486	}
487}
488
489/*
490 * savecore has now run and saved a crash dump to the filesystem. It is
491 * either a compressed dump (vmdump.n) or uncompressed {unix.n, vmcore.n}
492 * Savecore has raised an ireport to say the dump is there.
493 */
494
495/*ARGSUSED*/
496void
497swde_panic_savecore_done(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
498    const char *class, void *arg)
499{
500	boolean_t savecore_success = (arg != NULL);
501	boolean_t fm_panic;
502	nvlist_t *attr;
503	fmd_case_t *cp;
504	char *uuid;
505
506	fmd_hdl_debug(hdl, "savecore_done (%s)\n", savecore_success ?
507	    "success" : "fail");
508
509	if (nvlist_lookup_nvlist(nvl, FM_IREPORT_ATTRIBUTES, &attr) != 0) {
510		BUMPSTAT(swde_panic_noattr);
511		return;
512	}
513
514	if (nvlist_lookup_boolean_value(attr, "fm-panic", &fm_panic) != 0 ||
515	    fm_panic == B_TRUE) {
516		return;		/* not expected, but just in case */
517	}
518
519	if (nvlist_lookup_string(attr, "os-instance-uuid", &uuid) != 0) {
520		BUMPSTAT(swde_panic_nouuid);
521		return;
522	}
523
524	/*
525	 * Find the case related to the panicking kernel; our cases have
526	 * the same uuid as the crashed OS image.
527	 */
528	cp = fmd_case_uulookup(hdl, uuid);
529	if (!cp) {
530		/* Unable to find the case. */
531		fmd_hdl_debug(hdl, "savecore_done: can't find case for "
532		    "image %s\n", uuid);
533		BUMPSTAT(swde_panic_nocase);
534		return;
535	}
536
537	fmd_hdl_debug(hdl, "savecore_done: solving case %s\n", uuid);
538	swde_panic_solve(hdl, cp, attr, ep, savecore_success);
539}
540
541const struct sw_disp swde_panic_disp[] = {
542	{ SW_SUNOS_PANIC_DETECTED, swde_panic_detected, NULL },
543	{ SW_SUNOS_PANIC_AVAIL, swde_panic_savecore_done, (void *)1 },
544	{ SW_SUNOS_PANIC_FAILURE, swde_panic_savecore_done, NULL },
545	/*
546	 * Something has to subscribe to every fault
547	 * or defect diagnosed in fmd.  We do that here, but throw it away.
548	 */
549	{ SW_SUNOS_PANIC_DEFECT, NULL, NULL },
550	{ NULL, NULL, NULL }
551};
552
553/*ARGSUSED*/
554int
555swde_panic_init(fmd_hdl_t *hdl, id_t id, const struct sw_disp **dpp,
556    int *nelemp)
557{
558	myid = id;
559
560	if (getzoneid() != GLOBAL_ZONEID)
561		return (SW_SUB_INIT_FAIL_VOLUNTARY);
562
563	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
564	    sizeof (swde_panic_stats) / sizeof (fmd_stat_t),
565	    (fmd_stat_t *)&swde_panic_stats);
566
567	fmd_hdl_subscribe(hdl, SW_SUNOS_PANIC_DETECTED);
568	fmd_hdl_subscribe(hdl, SW_SUNOS_PANIC_FAILURE);
569	fmd_hdl_subscribe(hdl, SW_SUNOS_PANIC_AVAIL);
570
571	*dpp = &swde_panic_disp[0];
572	*nelemp = sizeof (swde_panic_disp) / sizeof (swde_panic_disp[0]);
573	return (SW_SUB_INIT_SUCCESS);
574}
575
576void
577swde_panic_fini(fmd_hdl_t *hdl)
578{
579	if (mytimerid)
580		sw_timer_remove(hdl, myid, mytimerid);
581}
582
583const struct sw_subinfo panic_diag_info = {
584	"panic diagnosis",		/* swsub_name */
585	SW_CASE_PANIC,			/* swsub_casetype */
586	swde_panic_init,		/* swsub_init */
587	swde_panic_fini,		/* swsub_fini */
588	swde_panic_timeout,		/* swsub_timeout */
589	NULL,				/* swsub_case_close */
590	swde_panic_vrfy,		/* swsub_case_vrfy */
591};
592