minidump_machdep.c revision 225194
1/*-
2 * Copyright (c) 2006 Peter Wemm
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 225194 2011-08-26 17:08:22Z jhb $");
29
30#include "opt_pmap.h"
31#include "opt_watchdog.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/conf.h>
36#include <sys/cons.h>
37#include <sys/kernel.h>
38#include <sys/kerneldump.h>
39#include <sys/msgbuf.h>
40#ifdef SW_WATCHDOG
41#include <sys/watchdog.h>
42#endif
43#include <vm/vm.h>
44#include <vm/pmap.h>
45#include <machine/atomic.h>
46#include <machine/elf.h>
47#include <machine/md_var.h>
48#include <machine/vmparam.h>
49#include <machine/minidump.h>
50
51CTASSERT(sizeof(struct kerneldumpheader) == 512);
52
53/*
54 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
55 * is to protect us from metadata and to protect metadata from us.
56 */
57#define	SIZEOF_METADATA		(64*1024)
58
59#define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
60#define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
61
62uint64_t *vm_page_dump;
63int vm_page_dump_size;
64
65static struct kerneldumpheader kdh;
66static off_t dumplo;
67
68/* Handle chunked writes. */
69static size_t fragsz;
70static void *dump_va;
71static size_t counter, progress, dumpsize;
72
73CTASSERT(sizeof(*vm_page_dump) == 8);
74
75static int
76is_dumpable(vm_paddr_t pa)
77{
78	int i;
79
80	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
81		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
82			return (1);
83	}
84	return (0);
85}
86
87#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
88
89static int
90blk_flush(struct dumperinfo *di)
91{
92	int error;
93
94	if (fragsz == 0)
95		return (0);
96
97	error = dump_write(di, dump_va, 0, dumplo, fragsz);
98	dumplo += fragsz;
99	fragsz = 0;
100	return (error);
101}
102
103static struct {
104	int min_per;
105	int max_per;
106	int visited;
107} progress_track[10] = {
108	{  0,  10, 0},
109	{ 10,  20, 0},
110	{ 20,  30, 0},
111	{ 30,  40, 0},
112	{ 40,  50, 0},
113	{ 50,  60, 0},
114	{ 60,  70, 0},
115	{ 70,  80, 0},
116	{ 80,  90, 0},
117	{ 90, 100, 0}
118};
119
120static void
121report_progress(size_t progress, size_t dumpsize)
122{
123	int sofar, i;
124
125	sofar = 100 - ((progress * 100) / dumpsize);
126	for (i = 0; i < 10; i++) {
127		if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per)
128			continue;
129		if (progress_track[i].visited)
130			return;
131		progress_track[i].visited = 1;
132		printf("..%d%%", sofar);
133		return;
134	}
135}
136
137static int
138blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
139{
140	size_t len;
141	int error, i, c;
142	u_int maxdumpsz;
143
144	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
145	if (maxdumpsz == 0)	/* seatbelt */
146		maxdumpsz = PAGE_SIZE;
147	error = 0;
148	if ((sz % PAGE_SIZE) != 0) {
149		printf("size not page aligned\n");
150		return (EINVAL);
151	}
152	if (ptr != NULL && pa != 0) {
153		printf("cant have both va and pa!\n");
154		return (EINVAL);
155	}
156	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
157		printf("address not page aligned\n");
158		return (EINVAL);
159	}
160	if (ptr != NULL) {
161		/* If we're doing a virtual dump, flush any pre-existing pa pages */
162		error = blk_flush(di);
163		if (error)
164			return (error);
165	}
166	while (sz) {
167		len = maxdumpsz - fragsz;
168		if (len > sz)
169			len = sz;
170		counter += len;
171		progress -= len;
172		if (counter >> 24) {
173			report_progress(progress, dumpsize);
174			counter &= (1<<24) - 1;
175		}
176#ifdef SW_WATCHDOG
177		wdog_kern_pat(WD_LASTVAL);
178#endif
179		if (ptr) {
180			error = dump_write(di, ptr, 0, dumplo, len);
181			if (error)
182				return (error);
183			dumplo += len;
184			ptr += len;
185			sz -= len;
186		} else {
187			for (i = 0; i < len; i += PAGE_SIZE)
188				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
189			fragsz += len;
190			pa += len;
191			sz -= len;
192			if (fragsz == maxdumpsz) {
193				error = blk_flush(di);
194				if (error)
195					return (error);
196			}
197		}
198
199		/* Check for user abort. */
200		c = cncheckc();
201		if (c == 0x03)
202			return (ECANCELED);
203		if (c != -1)
204			printf(" (CTRL-C to abort) ");
205	}
206
207	return (0);
208}
209
210/* A fake page table page, to avoid having to handle both 4K and 2M pages */
211static pd_entry_t fakepd[NPDEPG];
212
213void
214minidumpsys(struct dumperinfo *di)
215{
216	uint32_t pmapsize;
217	vm_offset_t va;
218	int error;
219	uint64_t bits;
220	uint64_t *pdp, *pd, *pt, pa;
221	int i, j, k, n, bit;
222	int retry_count;
223	struct minidumphdr mdhdr;
224
225	retry_count = 0;
226 retry:
227	retry_count++;
228	counter = 0;
229	/* Walk page table pages, set bits in vm_page_dump */
230	pmapsize = 0;
231	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
232	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
233	    kernel_vm_end); ) {
234		/*
235		 * We always write a page, even if it is zero. Each
236		 * page written corresponds to 1GB of space
237		 */
238		pmapsize += PAGE_SIZE;
239		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
240		if ((pdp[i] & PG_V) == 0) {
241			va += NBPDP;
242			continue;
243		}
244
245		/*
246		 * 1GB page is represented as 512 2MB pages in a dump.
247		 */
248		if ((pdp[i] & PG_PS) != 0) {
249			va += NBPDP;
250			pa = pdp[i] & PG_PS_FRAME;
251			for (n = 0; n < NPDEPG * NPTEPG; n++) {
252				if (is_dumpable(pa))
253					dump_add_page(pa);
254				pa += PAGE_SIZE;
255			}
256			continue;
257		}
258
259		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
260		for (n = 0; n < NPDEPG; n++, va += NBPDR) {
261			j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1);
262
263			if ((pd[j] & PG_V) == 0)
264				continue;
265
266			if ((pd[j] & PG_PS) != 0) {
267				/* This is an entire 2M page. */
268				pa = pd[j] & PG_PS_FRAME;
269				for (k = 0; k < NPTEPG; k++) {
270					if (is_dumpable(pa))
271						dump_add_page(pa);
272					pa += PAGE_SIZE;
273				}
274				continue;
275			}
276
277			pa = pd[j] & PG_FRAME;
278			/* set bit for this PTE page */
279			if (is_dumpable(pa))
280				dump_add_page(pa);
281			/* and for each valid page in this 2MB block */
282			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
283			for (k = 0; k < NPTEPG; k++) {
284				if ((pt[k] & PG_V) == 0)
285					continue;
286				pa = pt[k] & PG_FRAME;
287				if (is_dumpable(pa))
288					dump_add_page(pa);
289			}
290		}
291	}
292
293	/* Calculate dump size. */
294	dumpsize = pmapsize;
295	dumpsize += round_page(msgbufp->msg_size);
296	dumpsize += round_page(vm_page_dump_size);
297	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
298		bits = vm_page_dump[i];
299		while (bits) {
300			bit = bsfq(bits);
301			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
302			/* Clear out undumpable pages now if needed */
303			if (is_dumpable(pa)) {
304				dumpsize += PAGE_SIZE;
305			} else {
306				dump_drop_page(pa);
307			}
308			bits &= ~(1ul << bit);
309		}
310	}
311	dumpsize += PAGE_SIZE;
312
313	/* Determine dump offset on device. */
314	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
315		error = E2BIG;
316		goto fail;
317	}
318	dumplo = di->mediaoffset + di->mediasize - dumpsize;
319	dumplo -= sizeof(kdh) * 2;
320	progress = dumpsize;
321
322	/* Initialize mdhdr */
323	bzero(&mdhdr, sizeof(mdhdr));
324	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
325	mdhdr.version = MINIDUMP_VERSION;
326	mdhdr.msgbufsize = msgbufp->msg_size;
327	mdhdr.bitmapsize = vm_page_dump_size;
328	mdhdr.pmapsize = pmapsize;
329	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
330	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
331	mdhdr.dmapend = DMAP_MAX_ADDRESS;
332
333	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
334
335	printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
336	    ptoa((uintmax_t)physmem) / 1048576);
337
338	/* Dump leader */
339	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
340	if (error)
341		goto fail;
342	dumplo += sizeof(kdh);
343
344	/* Dump my header */
345	bzero(&fakepd, sizeof(fakepd));
346	bcopy(&mdhdr, &fakepd, sizeof(mdhdr));
347	error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
348	if (error)
349		goto fail;
350
351	/* Dump msgbuf up front */
352	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
353	if (error)
354		goto fail;
355
356	/* Dump bitmap */
357	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
358	if (error)
359		goto fail;
360
361	/* Dump kernel page directory pages */
362	bzero(fakepd, sizeof(fakepd));
363	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
364	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
365	    kernel_vm_end); va += NBPDP) {
366		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
367
368		/* We always write a page, even if it is zero */
369		if ((pdp[i] & PG_V) == 0) {
370			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
371			if (error)
372				goto fail;
373			/* flush, in case we reuse fakepd in the same block */
374			error = blk_flush(di);
375			if (error)
376				goto fail;
377			continue;
378		}
379
380		/* 1GB page is represented as 512 2MB pages in a dump */
381		if ((pdp[i] & PG_PS) != 0) {
382			/* PDPE and PDP have identical layout in this case */
383			fakepd[0] = pdp[i];
384			for (j = 1; j < NPDEPG; j++)
385				fakepd[j] = fakepd[j - 1] + NBPDR;
386			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
387			if (error)
388				goto fail;
389			/* flush, in case we reuse fakepd in the same block */
390			error = blk_flush(di);
391			if (error)
392				goto fail;
393			bzero(fakepd, sizeof(fakepd));
394			continue;
395		}
396
397		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
398		error = blk_write(di, (char *)pd, 0, PAGE_SIZE);
399		if (error)
400			goto fail;
401		error = blk_flush(di);
402		if (error)
403			goto fail;
404	}
405
406	/* Dump memory chunks */
407	/* XXX cluster it up and use blk_dump() */
408	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
409		bits = vm_page_dump[i];
410		while (bits) {
411			bit = bsfq(bits);
412			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
413			error = blk_write(di, 0, pa, PAGE_SIZE);
414			if (error)
415				goto fail;
416			bits &= ~(1ul << bit);
417		}
418	}
419
420	error = blk_flush(di);
421	if (error)
422		goto fail;
423
424	/* Dump trailer */
425	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
426	if (error)
427		goto fail;
428	dumplo += sizeof(kdh);
429
430	/* Signal completion, signoff and exit stage left. */
431	dump_write(di, NULL, 0, 0, 0);
432	printf("\nDump complete\n");
433	return;
434
435 fail:
436	if (error < 0)
437		error = -error;
438
439	printf("\n");
440	if (error == ENOSPC) {
441		printf("Dump map grown while dumping. ");
442		if (retry_count < 5) {
443			printf("Retrying...\n");
444			goto retry;
445		}
446		printf("Dump failed.\n");
447	}
448	else if (error == ECANCELED)
449		printf("Dump aborted\n");
450	else if (error == E2BIG)
451		printf("Dump failed. Partition too small.\n");
452	else
453		printf("** DUMP FAILED (ERROR %d) **\n", error);
454}
455
456void
457dump_add_page(vm_paddr_t pa)
458{
459	int idx, bit;
460
461	pa >>= PAGE_SHIFT;
462	idx = pa >> 6;		/* 2^6 = 64 */
463	bit = pa & 63;
464	atomic_set_long(&vm_page_dump[idx], 1ul << bit);
465}
466
467void
468dump_drop_page(vm_paddr_t pa)
469{
470	int idx, bit;
471
472	pa >>= PAGE_SHIFT;
473	idx = pa >> 6;		/* 2^6 = 64 */
474	bit = pa & 63;
475	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
476}
477