savecore.c revision 1.22
1/*	$NetBSD: savecore.c,v 1.22 1995/06/25 06:28:13 cgd Exp $	*/
2
3/*-
4 * Copyright (c) 1986, 1992, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by the University of
18 *	California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#ifndef lint
37static char copyright[] =
38"@(#) Copyright (c) 1986, 1992, 1993\n\
39	The Regents of the University of California.  All rights reserved.\n";
40#endif /* not lint */
41
42#ifndef lint
43#if 0
44static char sccsid[] = "@(#)savecore.c	8.3 (Berkeley) 1/2/94";
45#else
46static char rcsid[] = "$NetBSD: savecore.c,v 1.22 1995/06/25 06:28:13 cgd Exp $";
47#endif
48#endif /* not lint */
49
50#include <sys/param.h>
51#include <sys/stat.h>
52#include <sys/mount.h>
53#include <sys/syslog.h>
54#include <sys/time.h>
55
56#include <dirent.h>
57#include <errno.h>
58#include <fcntl.h>
59#include <nlist.h>
60#include <paths.h>
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64#include <tzfile.h>
65#include <unistd.h>
66
67extern FILE *zopen __P((const char *fname, const char *mode, int bits));
68
69#define ok(number) ((number) - KERNBASE)
70
71struct nlist current_nl[] = {	/* Namelist for currently running system. */
72#define X_DUMPDEV	0
73	{ "_dumpdev" },
74#define X_DUMPLO	1
75	{ "_dumplo" },
76#define X_TIME		2
77	{ "_time" },
78#define	X_DUMPSIZE	3
79	{ "_dumpsize" },
80#define X_VERSION	4
81	{ "_version" },
82#define X_PANICSTR	5
83	{ "_panicstr" },
84#define	X_DUMPMAG	6
85	{ "_dumpmag" },
86	{ "" },
87};
88int cursyms[] = { X_DUMPDEV, X_DUMPLO, X_VERSION, X_DUMPMAG, -1 };
89int dumpsyms[] = { X_TIME, X_DUMPSIZE, X_VERSION, X_PANICSTR, X_DUMPMAG, -1 };
90
91struct nlist dump_nl[] = {	/* Name list for dumped system. */
92	{ "_dumpdev" },		/* Entries MUST be the same as */
93	{ "_dumplo" },		/*	those in current_nl[].  */
94	{ "_time" },
95	{ "_dumpsize" },
96	{ "_version" },
97	{ "_panicstr" },
98	{ "_dumpmag" },
99	{ "" },
100};
101
102/* Types match kernel declarations. */
103long	dumplo;				/* where dump starts on dumpdev */
104int	dumpmag;			/* magic number in dump */
105int	dumpsize;			/* amount of memory dumped */
106
107char	*kernel;
108char	*dirname;			/* directory to save dumps in */
109char	*ddname;			/* name of dump device */
110dev_t	dumpdev;			/* dump device */
111int	dumpfd;				/* read/write descriptor on block dev */
112time_t	now;				/* current date */
113char	panic_mesg[1024];
114int	panicstr;
115char	vers[1024];
116
117int	clear, compress, force, verbose;	/* flags */
118
119void	 check_kmem __P((void));
120int	 check_space __P((void));
121void	 clear_dump __P((void));
122int	 Create __P((char *, int));
123int	 dump_exists __P((void));
124char	*find_dev __P((dev_t, int));
125int	 get_crashtime __P((void));
126void	 kmem_setup __P((void));
127void	 log __P((int, char *, ...));
128void	 Lseek __P((int, off_t, int));
129int	 Open __P((char *, int rw));
130int	 Read __P((int, void *, int));
131char	*rawname __P((char *s));
132void	 save_core __P((void));
133void	 usage __P((void));
134void	 Write __P((int, void *, int));
135
136int
137main(argc, argv)
138	int argc;
139	char *argv[];
140{
141	int ch;
142
143	openlog("savecore", LOG_PERROR, LOG_DAEMON);
144
145	while ((ch = getopt(argc, argv, "cdfN:vz")) != -1)
146		switch(ch) {
147		case 'c':
148			clear = 1;
149			break;
150		case 'd':		/* Not documented. */
151		case 'v':
152			verbose = 1;
153			break;
154		case 'f':
155			force = 1;
156			break;
157		case 'N':
158			kernel = optarg;
159			break;
160		case 'z':
161			compress = 1;
162			break;
163		case '?':
164		default:
165			usage();
166		}
167	argc -= optind;
168	argv += optind;
169
170	if (!clear) {
171		if (argc != 1 && argc != 2)
172			usage();
173		dirname = argv[0];
174	}
175	if (argc == 2)
176		kernel = argv[1];
177
178	(void)time(&now);
179	kmem_setup();
180
181	if (clear) {
182		clear_dump();
183		exit(0);
184	}
185
186	if (!dump_exists() && !force)
187		exit(1);
188
189	check_kmem();
190
191	if (panicstr)
192		syslog(LOG_ALERT, "reboot after panic: %s", panic_mesg);
193	else
194		syslog(LOG_ALERT, "reboot");
195
196	if ((!get_crashtime() || !check_space()) && !force)
197		exit(1);
198
199	save_core();
200
201	clear_dump();
202	exit(0);
203}
204
205void
206kmem_setup()
207{
208	FILE *fp;
209	int kmem, i;
210	char *dump_sys;
211
212	/*
213	 * Some names we need for the currently running system, others for
214	 * the system that was running when the dump was made.  The values
215	 * obtained from the current system are used to look for things in
216	 * /dev/kmem that cannot be found in the dump_sys namelist, but are
217	 * presumed to be the same (since the disk partitions are probably
218	 * the same!)
219	 */
220	if ((nlist(_PATH_UNIX, current_nl)) == -1)
221		syslog(LOG_ERR, "%s: nlist: %s", _PATH_UNIX, strerror(errno));
222	for (i = 0; cursyms[i] != -1; i++)
223		if (current_nl[cursyms[i]].n_value == 0) {
224			syslog(LOG_ERR, "%s: %s not in namelist",
225			    _PATH_UNIX, current_nl[cursyms[i]].n_name);
226			exit(1);
227		}
228
229	dump_sys = kernel ? kernel : _PATH_UNIX;
230	if ((nlist(dump_sys, dump_nl)) == -1)
231		syslog(LOG_ERR, "%s: nlist: %s", dump_sys, strerror(errno));
232	for (i = 0; dumpsyms[i] != -1; i++)
233		if (dump_nl[dumpsyms[i]].n_value == 0) {
234			syslog(LOG_ERR, "%s: %s not in namelist",
235			    dump_sys, dump_nl[dumpsyms[i]].n_name);
236			exit(1);
237		}
238
239	kmem = Open(_PATH_KMEM, O_RDONLY);
240	Lseek(kmem, (off_t)current_nl[X_DUMPDEV].n_value, L_SET);
241	(void)Read(kmem, &dumpdev, sizeof(dumpdev));
242	if (dumpdev == NODEV) {
243		syslog(LOG_WARNING, "no core dump (no dumpdev)");
244		exit(1);
245	}
246	Lseek(kmem, (off_t)current_nl[X_DUMPLO].n_value, L_SET);
247	(void)Read(kmem, &dumplo, sizeof(dumplo));
248	dumplo *= DEV_BSIZE;
249	if (verbose)
250		(void)printf("dumplo = %d (%d * %d)\n",
251		    dumplo, dumplo / DEV_BSIZE, DEV_BSIZE);
252	Lseek(kmem, (off_t)current_nl[X_DUMPMAG].n_value, L_SET);
253	(void)Read(kmem, &dumpmag, sizeof(dumpmag));
254	ddname = find_dev(dumpdev, S_IFBLK);
255	dumpfd = Open(ddname, O_RDWR);
256	fp = fdopen(kmem, "r");
257	if (fp == NULL) {
258		syslog(LOG_ERR, "%s: fdopen: %m", _PATH_KMEM);
259		exit(1);
260	}
261	if (kernel)
262		return;
263	(void)fseek(fp, (off_t)current_nl[X_VERSION].n_value, L_SET);
264	(void)fgets(vers, sizeof(vers), fp);
265
266	/* Don't fclose(fp), we use dumpfd later. */
267}
268
269void
270check_kmem()
271{
272	register char *cp;
273	FILE *fp;
274	char core_vers[1024];
275
276	fp = fdopen(dumpfd, "r");
277	if (fp == NULL) {
278		syslog(LOG_ERR, "%s: fdopen: %m", ddname);
279		exit(1);
280	}
281	fseek(fp, (off_t)(dumplo + ok(dump_nl[X_VERSION].n_value)), L_SET);
282	fgets(core_vers, sizeof(core_vers), fp);
283	if (strcmp(vers, core_vers) && kernel == 0)
284		syslog(LOG_WARNING,
285		    "warning: %s version mismatch:\n\t%s\nand\t%s\n",
286		    _PATH_UNIX, vers, core_vers);
287	(void)fseek(fp,
288	    (off_t)(dumplo + ok(dump_nl[X_PANICSTR].n_value)), L_SET);
289	(void)fread(&panicstr, sizeof(panicstr), 1, fp);
290	if (panicstr) {
291		(void)fseek(fp, dumplo + ok(panicstr), L_SET);
292		cp = panic_mesg;
293		do
294			*cp = getc(fp);
295		while (*cp++ && cp < &panic_mesg[sizeof(panic_mesg)]);
296	}
297	/* Don't fclose(fp), we use dumpfd later. */
298}
299
300void
301clear_dump()
302{
303	long newdumplo;
304
305	newdumplo = 0;
306	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
307	Write(dumpfd, &newdumplo, sizeof(newdumplo));
308}
309
310int
311dump_exists()
312{
313	int newdumpmag;
314
315	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
316	(void)Read(dumpfd, &newdumpmag, sizeof(newdumpmag));
317	if (newdumpmag != dumpmag) {
318		if (verbose)
319			syslog(LOG_WARNING, "magic number mismatch (%x != %x)",
320			    newdumpmag, dumpmag);
321		syslog(LOG_WARNING, "no core dump");
322		return (0);
323	}
324	return (1);
325}
326
327char buf[1024 * 1024];
328
329void
330save_core()
331{
332	register FILE *fp;
333	register int bounds, ifd, nr, nw, ofd;
334	char *rawp, path[MAXPATHLEN];
335
336	/*
337	 * Get the current number and update the bounds file.  Do the update
338	 * now, because may fail later and don't want to overwrite anything.
339	 */
340	(void)snprintf(path, sizeof(path), "%s/bounds", dirname);
341	if ((fp = fopen(path, "r")) == NULL)
342		goto err1;
343	if (fgets(buf, sizeof(buf), fp) == NULL) {
344		if (ferror(fp))
345err1:			syslog(LOG_WARNING, "%s: %s", path, strerror(errno));
346		bounds = 0;
347	} else
348		bounds = atoi(buf);
349	if (fp != NULL)
350		(void)fclose(fp);
351	if ((fp = fopen(path, "w")) == NULL)
352		syslog(LOG_ERR, "%s: %m", path);
353	else {
354		(void)fprintf(fp, "%d\n", bounds + 1);
355		(void)fclose(fp);
356	}
357	(void)fclose(fp);
358
359	/* Create the core file. */
360	(void)snprintf(path, sizeof(path), "%s/netbsd.%d%s.core",
361	    dirname, bounds, compress ? ".Z" : "");
362	if (compress) {
363		if ((fp = zopen(path, "w", 0)) == NULL) {
364			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
365			exit(1);
366		}
367	} else
368		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
369
370	/* Open the raw device. */
371	rawp = rawname(ddname);
372	if ((ifd = open(rawp, O_RDONLY)) == -1) {
373		syslog(LOG_WARNING, "%s: %m; using block device", rawp);
374		ifd = dumpfd;
375	}
376
377	/* Read the dump size. */
378	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPSIZE].n_value)), L_SET);
379	(void)Read(dumpfd, &dumpsize, sizeof(dumpsize));
380
381	/* Seek to the start of the core. */
382	Lseek(ifd, (off_t)dumplo, L_SET);
383
384	/* Copy the core file. */
385	dumpsize *= getpagesize();
386	syslog(LOG_NOTICE, "writing %score to %s",
387	    compress ? "compressed " : "", path);
388	for (; dumpsize > 0; dumpsize -= nr) {
389		(void)printf("%6dK\r", dumpsize / 1024);
390		(void)fflush(stdout);
391		nr = read(ifd, buf, MIN(dumpsize, sizeof(buf)));
392		if (nr <= 0) {
393			if (nr == 0)
394				syslog(LOG_WARNING,
395				    "WARNING: EOF on dump device");
396			else
397				syslog(LOG_ERR, "%s: %m", rawp);
398			goto err2;
399		}
400		if (compress)
401			nw = fwrite(buf, 1, nr, fp);
402		else
403			nw = write(ofd, buf, nr);
404		if (nw != nr) {
405			syslog(LOG_ERR, "%s: %s",
406			    path, strerror(nw == 0 ? EIO : errno));
407err2:			syslog(LOG_WARNING,
408			    "WARNING: core may be incomplete");
409			(void)printf("\n");
410			exit(1);
411		}
412	}
413	(void)close(ifd);
414	if (compress)
415		(void)fclose(fp);
416	else
417		(void)close(ofd);
418
419	/* Copy the kernel. */
420	ifd = Open(kernel ? kernel : _PATH_UNIX, O_RDONLY);
421	(void)snprintf(path, sizeof(path), "%s/netbsd.%d%s",
422	    dirname, bounds, compress ? ".Z" : "");
423	if (compress) {
424		if ((fp = zopen(path, "w", 0)) == NULL) {
425			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
426			exit(1);
427		}
428	} else
429		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
430	syslog(LOG_NOTICE, "writing %skernel to %s",
431	    compress ? "compressed " : "", path);
432	while ((nr = read(ifd, buf, sizeof(buf))) > 0) {
433		if (compress)
434			nw = fwrite(buf, 1, nr, fp);
435		else
436			nw = write(ofd, buf, nr);
437		if (nw != nr) {
438			syslog(LOG_ERR, "%s: %s",
439			    path, strerror(nw == 0 ? EIO : errno));
440			syslog(LOG_WARNING,
441			    "WARNING: kernel may be incomplete");
442			exit(1);
443		}
444	}
445	if (nr < 0) {
446		syslog(LOG_ERR, "%s: %s",
447		    kernel ? kernel : _PATH_UNIX, strerror(errno));
448		syslog(LOG_WARNING,
449		    "WARNING: kernel may be incomplete");
450		exit(1);
451	}
452	if (compress)
453		(void)fclose(fp);
454	else
455		(void)close(ofd);
456}
457
458char *
459find_dev(dev, type)
460	register dev_t dev;
461	register int type;
462{
463	register DIR *dfd;
464	struct dirent *dir;
465	struct stat sb;
466	char *dp, devname[MAXPATHLEN + 1];
467
468	if ((dfd = opendir(_PATH_DEV)) == NULL) {
469		syslog(LOG_ERR, "%s: %s", _PATH_DEV, strerror(errno));
470		exit(1);
471	}
472	(void)strcpy(devname, _PATH_DEV);
473	while ((dir = readdir(dfd))) {
474		(void)strcpy(devname + sizeof(_PATH_DEV) - 1, dir->d_name);
475		if (lstat(devname, &sb)) {
476			syslog(LOG_ERR, "%s: %s", devname, strerror(errno));
477			continue;
478		}
479		if ((sb.st_mode & S_IFMT) != type)
480			continue;
481		if (dev == sb.st_rdev) {
482			closedir(dfd);
483			if ((dp = strdup(devname)) == NULL) {
484				syslog(LOG_ERR, "%s", strerror(errno));
485				exit(1);
486			}
487			return (dp);
488		}
489	}
490	closedir(dfd);
491	syslog(LOG_ERR, "can't find device %d/%d", major(dev), minor(dev));
492	exit(1);
493}
494
495char *
496rawname(s)
497	char *s;
498{
499	char *sl, name[MAXPATHLEN];
500
501	if ((sl = strrchr(s, '/')) == NULL || sl[1] == '0') {
502		syslog(LOG_ERR,
503		    "can't make raw dump device name from %s", s);
504		return (s);
505	}
506	(void)snprintf(name, sizeof(name), "%.*s/r%s", sl - s, s, sl + 1);
507	if ((sl = strdup(name)) == NULL) {
508		syslog(LOG_ERR, "%s", strerror(errno));
509		exit(1);
510	}
511	return (sl);
512}
513
514int
515get_crashtime()
516{
517	time_t dumptime;			/* Time the dump was taken. */
518
519	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_TIME].n_value)), L_SET);
520	(void)Read(dumpfd, &dumptime, sizeof(dumptime));
521	if (dumptime == 0) {
522		if (verbose)
523			syslog(LOG_ERR, "dump time is zero");
524		return (0);
525	}
526	(void)printf("savecore: system went down at %s", ctime(&dumptime));
527#define	LEEWAY	(7 * SECSPERDAY)
528	if (dumptime < now - LEEWAY || dumptime > now + LEEWAY) {
529		(void)printf("dump time is unreasonable\n");
530		return (0);
531	}
532	return (1);
533}
534
535int
536check_space()
537{
538	register FILE *fp;
539	char *tkernel;
540	off_t minfree, spacefree, kernelsize, needed;
541	struct stat st;
542	struct statfs fsbuf;
543	char buf[100], path[MAXPATHLEN];
544
545	tkernel = kernel ? kernel : _PATH_UNIX;
546	if (stat(tkernel, &st) < 0) {
547		syslog(LOG_ERR, "%s: %m", tkernel);
548		exit(1);
549	}
550	kernelsize = st.st_blocks * S_BLKSIZE;
551	if (statfs(dirname, &fsbuf) < 0) {
552		syslog(LOG_ERR, "%s: %m", dirname);
553		exit(1);
554	}
555 	spacefree = (fsbuf.f_bavail * fsbuf.f_bsize) / 1024;
556
557	(void)snprintf(path, sizeof(path), "%s/minfree", dirname);
558	if ((fp = fopen(path, "r")) == NULL)
559		minfree = 0;
560	else {
561		if (fgets(buf, sizeof(buf), fp) == NULL)
562			minfree = 0;
563		else
564			minfree = atoi(buf);
565		(void)fclose(fp);
566	}
567
568	needed = (dumpsize + kernelsize) / 1024;
569 	if (minfree > 0 && spacefree - needed < minfree) {
570		syslog(LOG_WARNING,
571		    "no dump, not enough free space on device");
572		return (0);
573	}
574	if (spacefree - needed < minfree)
575		syslog(LOG_WARNING,
576		    "dump performed, but free space threshold crossed");
577	return (1);
578}
579
580int
581Open(name, rw)
582	char *name;
583	int rw;
584{
585	int fd;
586
587	if ((fd = open(name, rw, 0)) < 0) {
588		syslog(LOG_ERR, "%s: %m", name);
589		exit(1);
590	}
591	return (fd);
592}
593
594int
595Read(fd, bp, size)
596	int fd, size;
597	void *bp;
598{
599	int nr;
600
601	nr = read(fd, bp, size);
602	if (nr != size) {
603		syslog(LOG_ERR, "read: %m");
604		exit(1);
605	}
606	return (nr);
607}
608
609void
610Lseek(fd, off, flag)
611	int fd, flag;
612	off_t off;
613{
614	off_t ret;
615
616	ret = lseek(fd, off, flag);
617	if (ret == -1) {
618		syslog(LOG_ERR, "lseek: %m");
619		exit(1);
620	}
621}
622
623int
624Create(file, mode)
625	char *file;
626	int mode;
627{
628	register int fd;
629
630	fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, mode);
631	if (fd < 0) {
632		syslog(LOG_ERR, "%s: %m", file);
633		exit(1);
634	}
635	return (fd);
636}
637
638void
639Write(fd, bp, size)
640	int fd, size;
641	void *bp;
642{
643	int n;
644
645	if ((n = write(fd, bp, size)) < size) {
646		syslog(LOG_ERR, "write: %s", strerror(n == -1 ? errno : EIO));
647		exit(1);
648	}
649}
650
651void
652usage()
653{
654	(void)syslog(LOG_ERR, "usage: savecore [-cfvz] [-N system] directory");
655	exit(1);
656}
657