savecore.c revision 1.24
1/*	$NetBSD: savecore.c,v 1.24 1996/03/08 08:52:12 mycroft Exp $	*/
2
3/*-
4 * Copyright (c) 1986, 1992, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by the University of
18 *	California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#ifndef lint
37static char copyright[] =
38"@(#) Copyright (c) 1986, 1992, 1993\n\
39	The Regents of the University of California.  All rights reserved.\n";
40#endif /* not lint */
41
42#ifndef lint
43#if 0
44static char sccsid[] = "@(#)savecore.c	8.3 (Berkeley) 1/2/94";
45#else
46static char rcsid[] = "$NetBSD: savecore.c,v 1.24 1996/03/08 08:52:12 mycroft Exp $";
47#endif
48#endif /* not lint */
49
50#include <sys/param.h>
51#include <sys/stat.h>
52#include <sys/mount.h>
53#include <sys/syslog.h>
54#include <sys/time.h>
55
56#include <dirent.h>
57#include <errno.h>
58#include <fcntl.h>
59#include <nlist.h>
60#include <paths.h>
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64#include <tzfile.h>
65#include <unistd.h>
66
67extern FILE *zopen __P((const char *fname, const char *mode, int bits));
68
69#define ok(number) ((number) - KERNBASE)
70
71struct nlist current_nl[] = {	/* Namelist for currently running system. */
72#define X_DUMPDEV	0
73	{ "_dumpdev" },
74#define X_DUMPLO	1
75	{ "_dumplo" },
76#define X_TIME		2
77	{ "_time" },
78#define	X_DUMPSIZE	3
79	{ "_dumpsize" },
80#define X_VERSION	4
81	{ "_version" },
82#define X_PANICSTR	5
83	{ "_panicstr" },
84#define	X_DUMPMAG	6
85	{ "_dumpmag" },
86	{ "" },
87};
88int cursyms[] = { X_DUMPDEV, X_DUMPLO, X_VERSION, X_DUMPMAG, -1 };
89int dumpsyms[] = { X_TIME, X_DUMPSIZE, X_VERSION, X_PANICSTR, X_DUMPMAG, -1 };
90
91struct nlist dump_nl[] = {	/* Name list for dumped system. */
92	{ "_dumpdev" },		/* Entries MUST be the same as */
93	{ "_dumplo" },		/*	those in current_nl[].  */
94	{ "_time" },
95	{ "_dumpsize" },
96	{ "_version" },
97	{ "_panicstr" },
98	{ "_dumpmag" },
99	{ "" },
100};
101
102/* Types match kernel declarations. */
103long	dumplo;				/* where dump starts on dumpdev */
104int	dumpmag;			/* magic number in dump */
105int	dumpsize;			/* amount of memory dumped */
106
107char	*kernel;
108char	*dirname;			/* directory to save dumps in */
109char	*ddname;			/* name of dump device */
110dev_t	dumpdev;			/* dump device */
111int	dumpfd;				/* read/write descriptor on block dev */
112time_t	now;				/* current date */
113char	panic_mesg[1024];
114int	panicstr;
115char	vers[1024];
116
117int	clear, compress, force, verbose;	/* flags */
118
119void	 check_kmem __P((void));
120int	 check_space __P((void));
121void	 clear_dump __P((void));
122int	 Create __P((char *, int));
123int	 dump_exists __P((void));
124char	*find_dev __P((dev_t, int));
125int	 get_crashtime __P((void));
126void	 kmem_setup __P((void));
127void	 log __P((int, char *, ...));
128void	 Lseek __P((int, off_t, int));
129int	 Open __P((char *, int rw));
130int	 Read __P((int, void *, int));
131char	*rawname __P((char *s));
132void	 save_core __P((void));
133void	 usage __P((void));
134void	 Write __P((int, void *, int));
135
136int
137main(argc, argv)
138	int argc;
139	char *argv[];
140{
141	int ch;
142
143	openlog("savecore", LOG_PERROR, LOG_DAEMON);
144
145	while ((ch = getopt(argc, argv, "cdfN:vz")) != -1)
146		switch(ch) {
147		case 'c':
148			clear = 1;
149			break;
150		case 'd':		/* Not documented. */
151		case 'v':
152			verbose = 1;
153			break;
154		case 'f':
155			force = 1;
156			break;
157		case 'N':
158			kernel = optarg;
159			break;
160		case 'z':
161			compress = 1;
162			break;
163		case '?':
164		default:
165			usage();
166		}
167	argc -= optind;
168	argv += optind;
169
170	if (!clear) {
171		if (argc != 1 && argc != 2)
172			usage();
173		dirname = argv[0];
174	}
175	if (argc == 2)
176		kernel = argv[1];
177
178	(void)time(&now);
179	kmem_setup();
180
181	if (clear) {
182		clear_dump();
183		exit(0);
184	}
185
186	if (!dump_exists() && !force)
187		exit(1);
188
189	check_kmem();
190
191	if (panicstr)
192		syslog(LOG_ALERT, "reboot after panic: %s", panic_mesg);
193	else
194		syslog(LOG_ALERT, "reboot");
195
196	if ((!get_crashtime() || !check_space()) && !force)
197		exit(1);
198
199	save_core();
200
201	clear_dump();
202	exit(0);
203}
204
205void
206kmem_setup()
207{
208	FILE *fp;
209	int kmem, i;
210	char *dump_sys;
211
212	/*
213	 * Some names we need for the currently running system, others for
214	 * the system that was running when the dump was made.  The values
215	 * obtained from the current system are used to look for things in
216	 * /dev/kmem that cannot be found in the dump_sys namelist, but are
217	 * presumed to be the same (since the disk partitions are probably
218	 * the same!)
219	 */
220	if ((nlist(_PATH_UNIX, current_nl)) == -1)
221		syslog(LOG_ERR, "%s: nlist: %s", _PATH_UNIX, strerror(errno));
222	for (i = 0; cursyms[i] != -1; i++)
223		if (current_nl[cursyms[i]].n_value == 0) {
224			syslog(LOG_ERR, "%s: %s not in namelist",
225			    _PATH_UNIX, current_nl[cursyms[i]].n_name);
226			exit(1);
227		}
228
229	dump_sys = kernel ? kernel : _PATH_UNIX;
230	if ((nlist(dump_sys, dump_nl)) == -1)
231		syslog(LOG_ERR, "%s: nlist: %s", dump_sys, strerror(errno));
232	for (i = 0; dumpsyms[i] != -1; i++)
233		if (dump_nl[dumpsyms[i]].n_value == 0) {
234			syslog(LOG_ERR, "%s: %s not in namelist",
235			    dump_sys, dump_nl[dumpsyms[i]].n_name);
236			exit(1);
237		}
238
239	kmem = Open(_PATH_KMEM, O_RDONLY);
240	Lseek(kmem, (off_t)current_nl[X_DUMPDEV].n_value, L_SET);
241	(void)Read(kmem, &dumpdev, sizeof(dumpdev));
242	if (dumpdev == NODEV) {
243		syslog(LOG_WARNING, "no core dump (no dumpdev)");
244		exit(1);
245	}
246	Lseek(kmem, (off_t)current_nl[X_DUMPLO].n_value, L_SET);
247	(void)Read(kmem, &dumplo, sizeof(dumplo));
248	dumplo *= DEV_BSIZE;
249	if (verbose)
250		(void)printf("dumplo = %d (%d * %d)\n",
251		    dumplo, dumplo / DEV_BSIZE, DEV_BSIZE);
252	Lseek(kmem, (off_t)current_nl[X_DUMPMAG].n_value, L_SET);
253	(void)Read(kmem, &dumpmag, sizeof(dumpmag));
254	ddname = find_dev(dumpdev, S_IFBLK);
255	dumpfd = Open(ddname, O_RDWR);
256	fp = fdopen(kmem, "r");
257	if (fp == NULL) {
258		syslog(LOG_ERR, "%s: fdopen: %m", _PATH_KMEM);
259		exit(1);
260	}
261	if (kernel)
262		return;
263	(void)fseek(fp, (off_t)current_nl[X_VERSION].n_value, L_SET);
264	(void)fgets(vers, sizeof(vers), fp);
265
266	/* Don't fclose(fp), we use dumpfd later. */
267}
268
269void
270check_kmem()
271{
272	register char *cp;
273	FILE *fp;
274	char core_vers[1024];
275
276	fp = fdopen(dumpfd, "r");
277	if (fp == NULL) {
278		syslog(LOG_ERR, "%s: fdopen: %m", ddname);
279		exit(1);
280	}
281	fseek(fp, (off_t)(dumplo + ok(dump_nl[X_VERSION].n_value)), L_SET);
282	fgets(core_vers, sizeof(core_vers), fp);
283	if (strcmp(vers, core_vers) && kernel == 0)
284		syslog(LOG_WARNING,
285		    "warning: %s version mismatch:\n\t%s\nand\t%s\n",
286		    _PATH_UNIX, vers, core_vers);
287	(void)fseek(fp,
288	    (off_t)(dumplo + ok(dump_nl[X_PANICSTR].n_value)), L_SET);
289	(void)fread(&panicstr, sizeof(panicstr), 1, fp);
290	if (panicstr) {
291		(void)fseek(fp, dumplo + ok(panicstr), L_SET);
292		cp = panic_mesg;
293		do
294			*cp = getc(fp);
295		while (*cp++ && cp < &panic_mesg[sizeof(panic_mesg)]);
296	}
297	/* Don't fclose(fp), we use dumpfd later. */
298}
299
300void
301clear_dump()
302{
303	long newdumplo;
304
305	newdumplo = 0;
306	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
307	Write(dumpfd, &newdumplo, sizeof(newdumplo));
308}
309
310int
311dump_exists()
312{
313	int newdumpmag;
314
315	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
316	(void)Read(dumpfd, &newdumpmag, sizeof(newdumpmag));
317
318	/* Read the dump size. */
319	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPSIZE].n_value)), L_SET);
320	(void)Read(dumpfd, &dumpsize, sizeof(dumpsize));
321	dumpsize *= getpagesize();
322
323	/*
324	 * Return zero if core dump doesn't seem to be there, and note
325	 * it for syslog.  This check and return happens after the dump size
326	 * is read, so dumpsize is whether or not the core is valid (for -f).
327	 */
328	if (newdumpmag != dumpmag) {
329		if (verbose)
330			syslog(LOG_WARNING, "magic number mismatch (%x != %x)",
331			    newdumpmag, dumpmag);
332		syslog(LOG_WARNING, "no core dump");
333		return (0);
334	}
335	return (1);
336}
337
338char buf[1024 * 1024];
339
340void
341save_core()
342{
343	register FILE *fp;
344	register int bounds, ifd, nr, nw, ofd;
345	char *rawp, path[MAXPATHLEN];
346
347	/*
348	 * Get the current number and update the bounds file.  Do the update
349	 * now, because may fail later and don't want to overwrite anything.
350	 */
351	(void)snprintf(path, sizeof(path), "%s/bounds", dirname);
352	if ((fp = fopen(path, "r")) == NULL)
353		goto err1;
354	if (fgets(buf, sizeof(buf), fp) == NULL) {
355		if (ferror(fp))
356err1:			syslog(LOG_WARNING, "%s: %s", path, strerror(errno));
357		bounds = 0;
358	} else
359		bounds = atoi(buf);
360	if (fp != NULL)
361		(void)fclose(fp);
362	if ((fp = fopen(path, "w")) == NULL)
363		syslog(LOG_ERR, "%s: %m", path);
364	else {
365		(void)fprintf(fp, "%d\n", bounds + 1);
366		(void)fclose(fp);
367	}
368	(void)fclose(fp);
369
370	/* Create the core file. */
371	(void)snprintf(path, sizeof(path), "%s/netbsd.%d.core%s",
372	    dirname, bounds, compress ? ".Z" : "");
373	if (compress) {
374		if ((fp = zopen(path, "w", 0)) == NULL) {
375			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
376			exit(1);
377		}
378	} else
379		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
380
381	/* Open the raw device. */
382	rawp = rawname(ddname);
383	if ((ifd = open(rawp, O_RDONLY)) == -1) {
384		syslog(LOG_WARNING, "%s: %m; using block device", rawp);
385		ifd = dumpfd;
386	}
387
388	/* Seek to the start of the core. */
389	Lseek(ifd, (off_t)dumplo, L_SET);
390
391	/* Copy the core file. */
392	syslog(LOG_NOTICE, "writing %score to %s",
393	    compress ? "compressed " : "", path);
394	for (; dumpsize > 0; dumpsize -= nr) {
395		(void)printf("%6dK\r", dumpsize / 1024);
396		(void)fflush(stdout);
397		nr = read(ifd, buf, MIN(dumpsize, sizeof(buf)));
398		if (nr <= 0) {
399			if (nr == 0)
400				syslog(LOG_WARNING,
401				    "WARNING: EOF on dump device");
402			else
403				syslog(LOG_ERR, "%s: %m", rawp);
404			goto err2;
405		}
406		if (compress)
407			nw = fwrite(buf, 1, nr, fp);
408		else
409			nw = write(ofd, buf, nr);
410		if (nw != nr) {
411			syslog(LOG_ERR, "%s: %s",
412			    path, strerror(nw == 0 ? EIO : errno));
413err2:			syslog(LOG_WARNING,
414			    "WARNING: core may be incomplete");
415			(void)printf("\n");
416			exit(1);
417		}
418	}
419	(void)close(ifd);
420	if (compress)
421		(void)fclose(fp);
422	else
423		(void)close(ofd);
424
425	/* Copy the kernel. */
426	ifd = Open(kernel ? kernel : _PATH_UNIX, O_RDONLY);
427	(void)snprintf(path, sizeof(path), "%s/netbsd.%d%s",
428	    dirname, bounds, compress ? ".Z" : "");
429	if (compress) {
430		if ((fp = zopen(path, "w", 0)) == NULL) {
431			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
432			exit(1);
433		}
434	} else
435		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
436	syslog(LOG_NOTICE, "writing %skernel to %s",
437	    compress ? "compressed " : "", path);
438	while ((nr = read(ifd, buf, sizeof(buf))) > 0) {
439		if (compress)
440			nw = fwrite(buf, 1, nr, fp);
441		else
442			nw = write(ofd, buf, nr);
443		if (nw != nr) {
444			syslog(LOG_ERR, "%s: %s",
445			    path, strerror(nw == 0 ? EIO : errno));
446			syslog(LOG_WARNING,
447			    "WARNING: kernel may be incomplete");
448			exit(1);
449		}
450	}
451	if (nr < 0) {
452		syslog(LOG_ERR, "%s: %s",
453		    kernel ? kernel : _PATH_UNIX, strerror(errno));
454		syslog(LOG_WARNING,
455		    "WARNING: kernel may be incomplete");
456		exit(1);
457	}
458	if (compress)
459		(void)fclose(fp);
460	else
461		(void)close(ofd);
462}
463
464char *
465find_dev(dev, type)
466	register dev_t dev;
467	register int type;
468{
469	register DIR *dfd;
470	struct dirent *dir;
471	struct stat sb;
472	char *dp, devname[MAXPATHLEN + 1];
473
474	if ((dfd = opendir(_PATH_DEV)) == NULL) {
475		syslog(LOG_ERR, "%s: %s", _PATH_DEV, strerror(errno));
476		exit(1);
477	}
478	(void)strcpy(devname, _PATH_DEV);
479	while ((dir = readdir(dfd))) {
480		(void)strcpy(devname + sizeof(_PATH_DEV) - 1, dir->d_name);
481		if (lstat(devname, &sb)) {
482			syslog(LOG_ERR, "%s: %s", devname, strerror(errno));
483			continue;
484		}
485		if ((sb.st_mode & S_IFMT) != type)
486			continue;
487		if (dev == sb.st_rdev) {
488			closedir(dfd);
489			if ((dp = strdup(devname)) == NULL) {
490				syslog(LOG_ERR, "%s", strerror(errno));
491				exit(1);
492			}
493			return (dp);
494		}
495	}
496	closedir(dfd);
497	syslog(LOG_ERR, "can't find device %d/%d", major(dev), minor(dev));
498	exit(1);
499}
500
501char *
502rawname(s)
503	char *s;
504{
505	char *sl, name[MAXPATHLEN];
506
507	if ((sl = strrchr(s, '/')) == NULL || sl[1] == '0') {
508		syslog(LOG_ERR,
509		    "can't make raw dump device name from %s", s);
510		return (s);
511	}
512	(void)snprintf(name, sizeof(name), "%.*s/r%s", sl - s, s, sl + 1);
513	if ((sl = strdup(name)) == NULL) {
514		syslog(LOG_ERR, "%s", strerror(errno));
515		exit(1);
516	}
517	return (sl);
518}
519
520int
521get_crashtime()
522{
523	time_t dumptime;			/* Time the dump was taken. */
524
525	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_TIME].n_value)), L_SET);
526	(void)Read(dumpfd, &dumptime, sizeof(dumptime));
527	if (dumptime == 0) {
528		if (verbose)
529			syslog(LOG_ERR, "dump time is zero");
530		return (0);
531	}
532	(void)printf("savecore: system went down at %s", ctime(&dumptime));
533#define	LEEWAY	(7 * SECSPERDAY)
534	if (dumptime < now - LEEWAY || dumptime > now + LEEWAY) {
535		(void)printf("dump time is unreasonable\n");
536		return (0);
537	}
538	return (1);
539}
540
541int
542check_space()
543{
544	register FILE *fp;
545	char *tkernel;
546	off_t minfree, spacefree, kernelsize, needed;
547	struct stat st;
548	struct statfs fsbuf;
549	char buf[100], path[MAXPATHLEN];
550
551	tkernel = kernel ? kernel : _PATH_UNIX;
552	if (stat(tkernel, &st) < 0) {
553		syslog(LOG_ERR, "%s: %m", tkernel);
554		exit(1);
555	}
556	kernelsize = st.st_blocks * S_BLKSIZE;
557	if (statfs(dirname, &fsbuf) < 0) {
558		syslog(LOG_ERR, "%s: %m", dirname);
559		exit(1);
560	}
561 	spacefree = (fsbuf.f_bavail * fsbuf.f_bsize) / 1024;
562
563	(void)snprintf(path, sizeof(path), "%s/minfree", dirname);
564	if ((fp = fopen(path, "r")) == NULL)
565		minfree = 0;
566	else {
567		if (fgets(buf, sizeof(buf), fp) == NULL)
568			minfree = 0;
569		else
570			minfree = atoi(buf);
571		(void)fclose(fp);
572	}
573
574	needed = (dumpsize + kernelsize) / 1024;
575 	if (minfree > 0 && spacefree - needed < minfree) {
576		syslog(LOG_WARNING,
577		    "no dump, not enough free space on device");
578		return (0);
579	}
580	if (spacefree - needed < minfree)
581		syslog(LOG_WARNING,
582		    "dump performed, but free space threshold crossed");
583	return (1);
584}
585
586int
587Open(name, rw)
588	char *name;
589	int rw;
590{
591	int fd;
592
593	if ((fd = open(name, rw, 0)) < 0) {
594		syslog(LOG_ERR, "%s: %m", name);
595		exit(1);
596	}
597	return (fd);
598}
599
600int
601Read(fd, bp, size)
602	int fd, size;
603	void *bp;
604{
605	int nr;
606
607	nr = read(fd, bp, size);
608	if (nr != size) {
609		syslog(LOG_ERR, "read: %m");
610		exit(1);
611	}
612	return (nr);
613}
614
615void
616Lseek(fd, off, flag)
617	int fd, flag;
618	off_t off;
619{
620	off_t ret;
621
622	ret = lseek(fd, off, flag);
623	if (ret == -1) {
624		syslog(LOG_ERR, "lseek: %m");
625		exit(1);
626	}
627}
628
629int
630Create(file, mode)
631	char *file;
632	int mode;
633{
634	register int fd;
635
636	fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, mode);
637	if (fd < 0) {
638		syslog(LOG_ERR, "%s: %m", file);
639		exit(1);
640	}
641	return (fd);
642}
643
644void
645Write(fd, bp, size)
646	int fd, size;
647	void *bp;
648{
649	int n;
650
651	if ((n = write(fd, bp, size)) < size) {
652		syslog(LOG_ERR, "write: %s", strerror(n == -1 ? errno : EIO));
653		exit(1);
654	}
655}
656
657void
658usage()
659{
660	(void)syslog(LOG_ERR, "usage: savecore [-cfvz] [-N system] directory");
661	exit(1);
662}
663