savecore.c revision 1.21
1/*	$NetBSD: savecore.c,v 1.21 1995/03/18 15:01:02 cgd Exp $	*/
2
3/*-
4 * Copyright (c) 1986, 1992, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by the University of
18 *	California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#ifndef lint
37static char copyright[] =
38"@(#) Copyright (c) 1986, 1992, 1993\n\
39	The Regents of the University of California.  All rights reserved.\n";
40#endif /* not lint */
41
42#ifndef lint
43#if 0
44static char sccsid[] = "@(#)savecore.c	8.3 (Berkeley) 1/2/94";
45#else
46static char rcsid[] = "$NetBSD: savecore.c,v 1.21 1995/03/18 15:01:02 cgd Exp $";
47#endif
48#endif /* not lint */
49
50#include <sys/param.h>
51#include <sys/stat.h>
52#include <sys/mount.h>
53#include <sys/syslog.h>
54#include <sys/time.h>
55
56#include <dirent.h>
57#include <errno.h>
58#include <fcntl.h>
59#include <nlist.h>
60#include <paths.h>
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64#include <tzfile.h>
65#include <unistd.h>
66
67extern FILE *zopen __P((const char *fname, const char *mode, int bits));
68
69#define ok(number) ((number) - KERNBASE)
70
71struct nlist current_nl[] = {	/* Namelist for currently running system. */
72#define X_DUMPDEV	0
73	{ "_dumpdev" },
74#define X_DUMPLO	1
75	{ "_dumplo" },
76#define X_TIME		2
77	{ "_time" },
78#define	X_DUMPSIZE	3
79	{ "_dumpsize" },
80#define X_VERSION	4
81	{ "_version" },
82#define X_PANICSTR	5
83	{ "_panicstr" },
84#define	X_DUMPMAG	6
85	{ "_dumpmag" },
86	{ "" },
87};
88int cursyms[] = { X_DUMPDEV, X_DUMPLO, X_VERSION, X_DUMPMAG, -1 };
89int dumpsyms[] = { X_TIME, X_DUMPSIZE, X_VERSION, X_PANICSTR, X_DUMPMAG, -1 };
90
91struct nlist dump_nl[] = {	/* Name list for dumped system. */
92	{ "_dumpdev" },		/* Entries MUST be the same as */
93	{ "_dumplo" },		/*	those in current_nl[].  */
94	{ "_time" },
95	{ "_dumpsize" },
96	{ "_version" },
97	{ "_panicstr" },
98	{ "_dumpmag" },
99	{ "" },
100};
101
102/* Types match kernel declarations. */
103long	dumplo;				/* where dump starts on dumpdev */
104int	dumpmag;			/* magic number in dump */
105int	dumpsize;			/* amount of memory dumped */
106
107char	*kernel;
108char	*dirname;			/* directory to save dumps in */
109char	*ddname;			/* name of dump device */
110dev_t	dumpdev;			/* dump device */
111int	dumpfd;				/* read/write descriptor on block dev */
112time_t	now;				/* current date */
113char	panic_mesg[1024];
114int	panicstr;
115char	vers[1024];
116
117int	clear, compress, force, verbose;	/* flags */
118
119void	 check_kmem __P((void));
120int	 check_space __P((void));
121void	 clear_dump __P((void));
122int	 Create __P((char *, int));
123int	 dump_exists __P((void));
124char	*find_dev __P((dev_t, int));
125int	 get_crashtime __P((void));
126void	 kmem_setup __P((void));
127void	 log __P((int, char *, ...));
128void	 Lseek __P((int, off_t, int));
129int	 Open __P((char *, int rw));
130int	 Read __P((int, void *, int));
131char	*rawname __P((char *s));
132void	 save_core __P((void));
133void	 usage __P((void));
134void	 Write __P((int, void *, int));
135
136int
137main(argc, argv)
138	int argc;
139	char *argv[];
140{
141	int ch;
142
143	openlog("savecore", LOG_PERROR, LOG_DAEMON);
144
145	while ((ch = getopt(argc, argv, "cdfN:vz")) != -1)
146		switch(ch) {
147		case 'c':
148			clear = 1;
149			break;
150		case 'd':		/* Not documented. */
151		case 'v':
152			verbose = 1;
153			break;
154		case 'f':
155			force = 1;
156			break;
157		case 'N':
158			kernel = optarg;
159			break;
160		case 'z':
161			compress = 1;
162			break;
163		case '?':
164		default:
165			usage();
166		}
167	argc -= optind;
168	argv += optind;
169
170	if (!clear) {
171		if (argc != 1 && argc != 2)
172			usage();
173		dirname = argv[0];
174	}
175	if (argc == 2)
176		kernel = argv[1];
177
178	(void)time(&now);
179	kmem_setup();
180
181	if (clear) {
182		clear_dump();
183		exit(0);
184	}
185
186	if (!dump_exists() && !force)
187		exit(1);
188
189	check_kmem();
190
191	if (panicstr)
192		syslog(LOG_ALERT, "reboot after panic: %s", panic_mesg);
193	else
194		syslog(LOG_ALERT, "reboot");
195
196	if ((!get_crashtime() || !check_space()) && !force)
197		exit(1);
198
199	save_core();
200
201	clear_dump();
202	exit(0);
203}
204
205void
206kmem_setup()
207{
208	FILE *fp;
209	int kmem, i;
210	char *dump_sys;
211
212	/*
213	 * Some names we need for the currently running system, others for
214	 * the system that was running when the dump was made.  The values
215	 * obtained from the current system are used to look for things in
216	 * /dev/kmem that cannot be found in the dump_sys namelist, but are
217	 * presumed to be the same (since the disk partitions are probably
218	 * the same!)
219	 */
220	if ((nlist(_PATH_UNIX, current_nl)) == -1)
221		syslog(LOG_ERR, "%s: nlist: %s", _PATH_UNIX, strerror(errno));
222	for (i = 0; cursyms[i] != -1; i++)
223		if (current_nl[cursyms[i]].n_value == 0) {
224			syslog(LOG_ERR, "%s: %s not in namelist",
225			    _PATH_UNIX, current_nl[cursyms[i]].n_name);
226			exit(1);
227		}
228
229	dump_sys = kernel ? kernel : _PATH_UNIX;
230	if ((nlist(dump_sys, dump_nl)) == -1)
231		syslog(LOG_ERR, "%s: nlist: %s", dump_sys, strerror(errno));
232	for (i = 0; dumpsyms[i] != -1; i++)
233		if (dump_nl[dumpsyms[i]].n_value == 0) {
234			syslog(LOG_ERR, "%s: %s not in namelist",
235			    dump_sys, dump_nl[dumpsyms[i]].n_name);
236			exit(1);
237		}
238
239	kmem = Open(_PATH_KMEM, O_RDONLY);
240	Lseek(kmem, (off_t)current_nl[X_DUMPDEV].n_value, L_SET);
241	(void)Read(kmem, &dumpdev, sizeof(dumpdev));
242	if (dumpdev == NODEV) {
243		syslog(LOG_WARNING, "no core dump (no dumpdev)");
244		exit(1);
245	}
246	Lseek(kmem, (off_t)current_nl[X_DUMPLO].n_value, L_SET);
247	(void)Read(kmem, &dumplo, sizeof(dumplo));
248	if (verbose)
249		(void)printf("dumplo = %d (%d * %d)\n",
250		    dumplo, dumplo/DEV_BSIZE, DEV_BSIZE);
251	Lseek(kmem, (off_t)current_nl[X_DUMPMAG].n_value, L_SET);
252	(void)Read(kmem, &dumpmag, sizeof(dumpmag));
253	dumplo *= DEV_BSIZE;
254	ddname = find_dev(dumpdev, S_IFBLK);
255	dumpfd = Open(ddname, O_RDWR);
256	fp = fdopen(kmem, "r");
257	if (fp == NULL) {
258		syslog(LOG_ERR, "%s: fdopen: %m", _PATH_KMEM);
259		exit(1);
260	}
261	if (kernel)
262		return;
263	(void)fseek(fp, (off_t)current_nl[X_VERSION].n_value, L_SET);
264	(void)fgets(vers, sizeof(vers), fp);
265
266	/* Don't fclose(fp), we use dumpfd later. */
267}
268
269void
270check_kmem()
271{
272	register char *cp;
273	FILE *fp;
274	char core_vers[1024];
275
276	fp = fdopen(dumpfd, "r");
277	if (fp == NULL) {
278		syslog(LOG_ERR, "%s: fdopen: %m", ddname);
279		exit(1);
280	}
281	fseek(fp, (off_t)(dumplo + ok(dump_nl[X_VERSION].n_value)), L_SET);
282	fgets(core_vers, sizeof(core_vers), fp);
283	if (strcmp(vers, core_vers) && kernel == 0)
284		syslog(LOG_WARNING,
285		    "warning: %s version mismatch:\n\t%s\nand\t%s\n",
286		    _PATH_UNIX, vers, core_vers);
287	(void)fseek(fp,
288	    (off_t)(dumplo + ok(dump_nl[X_PANICSTR].n_value)), L_SET);
289	(void)fread(&panicstr, sizeof(panicstr), 1, fp);
290	if (panicstr) {
291		(void)fseek(fp, dumplo + ok(panicstr), L_SET);
292		cp = panic_mesg;
293		do
294			*cp = getc(fp);
295		while (*cp++ && cp < &panic_mesg[sizeof(panic_mesg)]);
296	}
297	/* Don't fclose(fp), we use dumpfd later. */
298}
299
300void
301clear_dump()
302{
303	long newdumplo;
304
305	newdumplo = 0;
306	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
307	Write(dumpfd, &newdumplo, sizeof(newdumplo));
308}
309
310int
311dump_exists()
312{
313	int newdumpmag;
314
315	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
316	(void)Read(dumpfd, &newdumpmag, sizeof(newdumpmag));
317	if (newdumpmag != dumpmag) {
318		if (verbose)
319			syslog(LOG_WARNING, "magic number mismatch (%x != %x)",
320			    newdumpmag, dumpmag);
321		syslog(LOG_WARNING, "no core dump");
322		return (0);
323	}
324	return (1);
325}
326
327char buf[1024 * 1024];
328
329void
330save_core()
331{
332	register FILE *fp;
333	register int bounds, ifd, nr, nw, ofd;
334	char *rawp, path[MAXPATHLEN];
335
336	/*
337	 * Get the current number and update the bounds file.  Do the update
338	 * now, because may fail later and don't want to overwrite anything.
339	 */
340	(void)snprintf(path, sizeof(path), "%s/bounds", dirname);
341	if ((fp = fopen(path, "r")) == NULL)
342		goto err1;
343	if (fgets(buf, sizeof(buf), fp) == NULL) {
344		if (ferror(fp))
345err1:			syslog(LOG_WARNING, "%s: %s", path, strerror(errno));
346		bounds = 0;
347	} else
348		bounds = atoi(buf);
349	if (fp != NULL)
350		(void)fclose(fp);
351	if ((fp = fopen(path, "w")) == NULL)
352		syslog(LOG_ERR, "%s: %m", path);
353	else {
354		(void)fprintf(fp, "%d\n", bounds + 1);
355		(void)fclose(fp);
356	}
357	(void)fclose(fp);
358
359	/* Create the core file. */
360	(void)snprintf(path, sizeof(path), "%s/netbsd.%d%s.core",
361	    dirname, bounds, compress ? ".Z" : "");
362	if (compress) {
363		if ((fp = zopen(path, "w", 0)) == NULL) {
364			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
365			exit(1);
366		}
367	} else
368		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
369
370	/* Open the raw device. */
371	rawp = rawname(ddname);
372	if ((ifd = open(rawp, O_RDONLY)) == -1) {
373		syslog(LOG_WARNING, "%s: %m; using block device", rawp);
374		ifd = dumpfd;
375	}
376
377	/* Read the dump size. */
378	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPSIZE].n_value)), L_SET);
379	(void)Read(dumpfd, &dumpsize, sizeof(dumpsize));
380
381	/* Seek to the start of the core. */
382	Lseek(ifd, (off_t)dumplo, L_SET);
383
384	/* Copy the core file. */
385	dumpsize *= getpagesize();
386	syslog(LOG_NOTICE, "writing %score to %s",
387	    compress ? "compressed " : "", path);
388	for (; dumpsize > 0; dumpsize -= nr) {
389		(void)printf("%6dK\r", dumpsize / 1024);
390		(void)fflush(stdout);
391		nr = read(ifd, buf, MIN(dumpsize, sizeof(buf)));
392		if (nr <= 0) {
393			if (nr == 0)
394				syslog(LOG_WARNING,
395				    "WARNING: EOF on dump device");
396			else
397				syslog(LOG_ERR, "%s: %m", rawp);
398			goto err2;
399		}
400		if (compress)
401			nw = fwrite(buf, 1, nr, fp);
402		else
403			nw = write(ofd, buf, nr);
404		if (nw != nr) {
405			syslog(LOG_ERR, "%s: %s",
406			    path, strerror(nw == 0 ? EIO : errno));
407err2:			syslog(LOG_WARNING,
408			    "WARNING: core may be incomplete");
409			(void)printf("\n");
410			exit(1);
411		}
412	}
413	(void)printf("\n");
414	(void)close(ifd);
415	if (compress)
416		(void)fclose(fp);
417	else
418		(void)close(ofd);
419
420	/* Copy the kernel. */
421	ifd = Open(kernel ? kernel : _PATH_UNIX, O_RDONLY);
422	(void)snprintf(path, sizeof(path), "%s/netbsd.%d%s",
423	    dirname, bounds, compress ? ".Z" : "");
424	if (compress) {
425		if ((fp = zopen(path, "w", 0)) == NULL) {
426			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
427			exit(1);
428		}
429	} else
430		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
431	syslog(LOG_NOTICE, "writing %skernel to %s",
432	    compress ? "compressed " : "", path);
433	while ((nr = read(ifd, buf, sizeof(buf))) > 0) {
434		if (compress)
435			nw = fwrite(buf, 1, nr, fp);
436		else
437			nw = write(ofd, buf, nr);
438		if (nw != nr) {
439			syslog(LOG_ERR, "%s: %s",
440			    path, strerror(nw == 0 ? EIO : errno));
441			syslog(LOG_WARNING,
442			    "WARNING: kernel may be incomplete");
443			exit(1);
444		}
445	}
446	if (nr < 0) {
447		syslog(LOG_ERR, "%s: %s",
448		    kernel ? kernel : _PATH_UNIX, strerror(errno));
449		syslog(LOG_WARNING,
450		    "WARNING: kernel may be incomplete");
451		exit(1);
452	}
453	if (compress)
454		(void)fclose(fp);
455	else
456		(void)close(ofd);
457}
458
459char *
460find_dev(dev, type)
461	register dev_t dev;
462	register int type;
463{
464	register DIR *dfd;
465	struct dirent *dir;
466	struct stat sb;
467	char *dp, devname[MAXPATHLEN + 1];
468
469	if ((dfd = opendir(_PATH_DEV)) == NULL) {
470		syslog(LOG_ERR, "%s: %s", _PATH_DEV, strerror(errno));
471		exit(1);
472	}
473	(void)strcpy(devname, _PATH_DEV);
474	while ((dir = readdir(dfd))) {
475		(void)strcpy(devname + sizeof(_PATH_DEV) - 1, dir->d_name);
476		if (lstat(devname, &sb)) {
477			syslog(LOG_ERR, "%s: %s", devname, strerror(errno));
478			continue;
479		}
480		if ((sb.st_mode & S_IFMT) != type)
481			continue;
482		if (dev == sb.st_rdev) {
483			closedir(dfd);
484			if ((dp = strdup(devname)) == NULL) {
485				syslog(LOG_ERR, "%s", strerror(errno));
486				exit(1);
487			}
488			return (dp);
489		}
490	}
491	closedir(dfd);
492	syslog(LOG_ERR, "can't find device %d/%d", major(dev), minor(dev));
493	exit(1);
494}
495
496char *
497rawname(s)
498	char *s;
499{
500	char *sl, name[MAXPATHLEN];
501
502	if ((sl = strrchr(s, '/')) == NULL || sl[1] == '0') {
503		syslog(LOG_ERR,
504		    "can't make raw dump device name from %s", s);
505		return (s);
506	}
507	(void)snprintf(name, sizeof(name), "%.*s/r%s", sl - s, s, sl + 1);
508	if ((sl = strdup(name)) == NULL) {
509		syslog(LOG_ERR, "%s", strerror(errno));
510		exit(1);
511	}
512	return (sl);
513}
514
515int
516get_crashtime()
517{
518	time_t dumptime;			/* Time the dump was taken. */
519
520	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_TIME].n_value)), L_SET);
521	(void)Read(dumpfd, &dumptime, sizeof(dumptime));
522	if (dumptime == 0) {
523		if (verbose)
524			syslog(LOG_ERR, "dump time is zero");
525		return (0);
526	}
527	(void)printf("savecore: system went down at %s", ctime(&dumptime));
528#define	LEEWAY	(7 * SECSPERDAY)
529	if (dumptime < now - LEEWAY || dumptime > now + LEEWAY) {
530		(void)printf("dump time is unreasonable\n");
531		return (0);
532	}
533	return (1);
534}
535
536int
537check_space()
538{
539	register FILE *fp;
540	char *tkernel;
541	off_t minfree, spacefree, kernelsize, needed;
542	struct stat st;
543	struct statfs fsbuf;
544	char buf[100], path[MAXPATHLEN];
545
546	tkernel = kernel ? kernel : _PATH_UNIX;
547	if (stat(tkernel, &st) < 0) {
548		syslog(LOG_ERR, "%s: %m", tkernel);
549		exit(1);
550	}
551	kernelsize = st.st_blocks * S_BLKSIZE;
552	if (statfs(dirname, &fsbuf) < 0) {
553		syslog(LOG_ERR, "%s: %m", dirname);
554		exit(1);
555	}
556 	spacefree = (fsbuf.f_bavail * fsbuf.f_bsize) / 1024;
557
558	(void)snprintf(path, sizeof(path), "%s/minfree", dirname);
559	if ((fp = fopen(path, "r")) == NULL)
560		minfree = 0;
561	else {
562		if (fgets(buf, sizeof(buf), fp) == NULL)
563			minfree = 0;
564		else
565			minfree = atoi(buf);
566		(void)fclose(fp);
567	}
568
569	needed = (dumpsize + kernelsize) / 1024;
570 	if (minfree > 0 && spacefree - needed < minfree) {
571		syslog(LOG_WARNING,
572		    "no dump, not enough free space on device");
573		return (0);
574	}
575	if (spacefree - needed < minfree)
576		syslog(LOG_WARNING,
577		    "dump performed, but free space threshold crossed");
578	return (1);
579}
580
581int
582Open(name, rw)
583	char *name;
584	int rw;
585{
586	int fd;
587
588	if ((fd = open(name, rw, 0)) < 0) {
589		syslog(LOG_ERR, "%s: %m", name);
590		exit(1);
591	}
592	return (fd);
593}
594
595int
596Read(fd, bp, size)
597	int fd, size;
598	void *bp;
599{
600	int nr;
601
602	nr = read(fd, bp, size);
603	if (nr != size) {
604		syslog(LOG_ERR, "read: %m");
605		exit(1);
606	}
607	return (nr);
608}
609
610void
611Lseek(fd, off, flag)
612	int fd, flag;
613	off_t off;
614{
615	off_t ret;
616
617	ret = lseek(fd, off, flag);
618	if (ret == -1) {
619		syslog(LOG_ERR, "lseek: %m");
620		exit(1);
621	}
622}
623
624int
625Create(file, mode)
626	char *file;
627	int mode;
628{
629	register int fd;
630
631	fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, mode);
632	if (fd < 0) {
633		syslog(LOG_ERR, "%s: %m", file);
634		exit(1);
635	}
636	return (fd);
637}
638
639void
640Write(fd, bp, size)
641	int fd, size;
642	void *bp;
643{
644	int n;
645
646	if ((n = write(fd, bp, size)) < size) {
647		syslog(LOG_ERR, "write: %s", strerror(n == -1 ? errno : EIO));
648		exit(1);
649	}
650}
651
652void
653usage()
654{
655	(void)syslog(LOG_ERR, "usage: savecore [-cfvz] [-N system] directory");
656	exit(1);
657}
658