minidump_machdep.c revision 221069
1/*-
2 * Copyright (c) 2006 Peter Wemm
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 221069 2011-04-26 16:14:55Z sobomax $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/conf.h>
33#include <sys/cons.h>
34#include <sys/kernel.h>
35#include <sys/kerneldump.h>
36#include <sys/msgbuf.h>
37#include <vm/vm.h>
38#include <vm/pmap.h>
39#include <machine/atomic.h>
40#include <machine/elf.h>
41#include <machine/md_var.h>
42#include <machine/vmparam.h>
43#include <machine/minidump.h>
44
45CTASSERT(sizeof(struct kerneldumpheader) == 512);
46
47/*
48 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
49 * is to protect us from metadata and to protect metadata from us.
50 */
51#define	SIZEOF_METADATA		(64*1024)
52
53#define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
54#define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
55
56uint64_t *vm_page_dump;
57int vm_page_dump_size;
58
59static struct kerneldumpheader kdh;
60static off_t dumplo;
61
62/* Handle chunked writes. */
63static size_t fragsz;
64static void *dump_va;
65static size_t counter, progress, dumpsize;
66
67CTASSERT(sizeof(*vm_page_dump) == 8);
68
69static int
70is_dumpable(vm_paddr_t pa)
71{
72	int i;
73
74	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
75		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
76			return (1);
77	}
78	return (0);
79}
80
81#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
82
83static int
84blk_flush(struct dumperinfo *di)
85{
86	int error;
87
88	if (fragsz == 0)
89		return (0);
90
91	error = dump_write(di, dump_va, 0, dumplo, fragsz);
92	dumplo += fragsz;
93	fragsz = 0;
94	return (error);
95}
96
97static struct {
98	int min_per;
99	int max_per;
100	int visited;
101} progress_track[10] = {
102	{  0,  10, 0},
103	{ 10,  20, 0},
104	{ 20,  30, 0},
105	{ 30,  40, 0},
106	{ 40,  50, 0},
107	{ 50,  60, 0},
108	{ 60,  70, 0},
109	{ 70,  80, 0},
110	{ 80,  90, 0},
111	{ 90, 100, 0}
112};
113
114static void
115report_progress(size_t progress, size_t dumpsize)
116{
117	int sofar, i;
118
119	sofar = 100 - ((progress * 100) / dumpsize);
120	for (i = 0; i < 10; i++) {
121		if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per)
122			continue;
123		if (progress_track[i].visited)
124			return;
125		progress_track[i].visited = 1;
126		printf("..%d%%", sofar);
127		return;
128	}
129}
130
131static int
132blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
133{
134	size_t len;
135	int error, i, c;
136	u_int maxdumpsz;
137
138	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
139	if (maxdumpsz == 0)	/* seatbelt */
140		maxdumpsz = PAGE_SIZE;
141	error = 0;
142	if ((sz % PAGE_SIZE) != 0) {
143		printf("size not page aligned\n");
144		return (EINVAL);
145	}
146	if (ptr != NULL && pa != 0) {
147		printf("cant have both va and pa!\n");
148		return (EINVAL);
149	}
150	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
151		printf("address not page aligned\n");
152		return (EINVAL);
153	}
154	if (ptr != NULL) {
155		/* If we're doing a virtual dump, flush any pre-existing pa pages */
156		error = blk_flush(di);
157		if (error)
158			return (error);
159	}
160	while (sz) {
161		len = maxdumpsz - fragsz;
162		if (len > sz)
163			len = sz;
164		counter += len;
165		progress -= len;
166		if (counter >> 24) {
167			report_progress(progress, dumpsize);
168			counter &= (1<<24) - 1;
169		}
170		if (ptr) {
171			error = dump_write(di, ptr, 0, dumplo, len);
172			if (error)
173				return (error);
174			dumplo += len;
175			ptr += len;
176			sz -= len;
177		} else {
178			for (i = 0; i < len; i += PAGE_SIZE)
179				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
180			fragsz += len;
181			pa += len;
182			sz -= len;
183			if (fragsz == maxdumpsz) {
184				error = blk_flush(di);
185				if (error)
186					return (error);
187			}
188		}
189
190		/* Check for user abort. */
191		c = cncheckc();
192		if (c == 0x03)
193			return (ECANCELED);
194		if (c != -1)
195			printf(" (CTRL-C to abort) ");
196	}
197
198	return (0);
199}
200
201/* A fake page table page, to avoid having to handle both 4K and 2M pages */
202static pd_entry_t fakepd[NPDEPG];
203
204void
205minidumpsys(struct dumperinfo *di)
206{
207	uint32_t pmapsize;
208	vm_offset_t va;
209	int error;
210	uint64_t bits;
211	uint64_t *pdp, *pd, *pt, pa;
212	int i, j, k, n, bit;
213	int retry_count;
214	struct minidumphdr mdhdr;
215
216	retry_count = 0;
217 retry:
218	retry_count++;
219	counter = 0;
220	/* Walk page table pages, set bits in vm_page_dump */
221	pmapsize = 0;
222	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
223	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
224	    kernel_vm_end); ) {
225		/*
226		 * We always write a page, even if it is zero. Each
227		 * page written corresponds to 1GB of space
228		 */
229		pmapsize += PAGE_SIZE;
230		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
231		if ((pdp[i] & PG_V) == 0) {
232			va += NBPDP;
233			continue;
234		}
235
236		/*
237		 * 1GB page is represented as 512 2MB pages in a dump.
238		 */
239		if ((pdp[i] & PG_PS) != 0) {
240			va += NBPDP;
241			pa = pdp[i] & PG_PS_FRAME;
242			for (n = 0; n < NPDEPG * NPTEPG; n++) {
243				if (is_dumpable(pa))
244					dump_add_page(pa);
245				pa += PAGE_SIZE;
246			}
247			continue;
248		}
249
250		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
251		for (n = 0; n < NPDEPG; n++, va += NBPDR) {
252			j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1);
253
254			if ((pd[j] & PG_V) == 0)
255				continue;
256
257			if ((pd[j] & PG_PS) != 0) {
258				/* This is an entire 2M page. */
259				pa = pd[j] & PG_PS_FRAME;
260				for (k = 0; k < NPTEPG; k++) {
261					if (is_dumpable(pa))
262						dump_add_page(pa);
263					pa += PAGE_SIZE;
264				}
265				continue;
266			}
267
268			pa = pd[j] & PG_FRAME;
269			/* set bit for this PTE page */
270			if (is_dumpable(pa))
271				dump_add_page(pa);
272			/* and for each valid page in this 2MB block */
273			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
274			for (k = 0; k < NPTEPG; k++) {
275				if ((pt[k] & PG_V) == 0)
276					continue;
277				pa = pt[k] & PG_FRAME;
278				if (is_dumpable(pa))
279					dump_add_page(pa);
280			}
281		}
282	}
283
284	/* Calculate dump size. */
285	dumpsize = pmapsize;
286	dumpsize += round_page(msgbufp->msg_size);
287	dumpsize += round_page(vm_page_dump_size);
288	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
289		bits = vm_page_dump[i];
290		while (bits) {
291			bit = bsfq(bits);
292			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
293			/* Clear out undumpable pages now if needed */
294			if (is_dumpable(pa)) {
295				dumpsize += PAGE_SIZE;
296			} else {
297				dump_drop_page(pa);
298			}
299			bits &= ~(1ul << bit);
300		}
301	}
302	dumpsize += PAGE_SIZE;
303
304	/* Determine dump offset on device. */
305	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
306		error = E2BIG;
307		goto fail;
308	}
309	dumplo = di->mediaoffset + di->mediasize - dumpsize;
310	dumplo -= sizeof(kdh) * 2;
311	progress = dumpsize;
312
313	/* Initialize mdhdr */
314	bzero(&mdhdr, sizeof(mdhdr));
315	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
316	mdhdr.version = MINIDUMP_VERSION;
317	mdhdr.msgbufsize = msgbufp->msg_size;
318	mdhdr.bitmapsize = vm_page_dump_size;
319	mdhdr.pmapsize = pmapsize;
320	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
321	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
322	mdhdr.dmapend = DMAP_MAX_ADDRESS;
323
324	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
325
326	printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
327	    ptoa((uintmax_t)physmem) / 1048576);
328
329	/* Dump leader */
330	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
331	if (error)
332		goto fail;
333	dumplo += sizeof(kdh);
334
335	/* Dump my header */
336	bzero(&fakepd, sizeof(fakepd));
337	bcopy(&mdhdr, &fakepd, sizeof(mdhdr));
338	error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
339	if (error)
340		goto fail;
341
342	/* Dump msgbuf up front */
343	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
344	if (error)
345		goto fail;
346
347	/* Dump bitmap */
348	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
349	if (error)
350		goto fail;
351
352	/* Dump kernel page directory pages */
353	bzero(fakepd, sizeof(fakepd));
354	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
355	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
356	    kernel_vm_end); va += NBPDP) {
357		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
358
359		/* We always write a page, even if it is zero */
360		if ((pdp[i] & PG_V) == 0) {
361			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
362			if (error)
363				goto fail;
364			/* flush, in case we reuse fakepd in the same block */
365			error = blk_flush(di);
366			if (error)
367				goto fail;
368			continue;
369		}
370
371		/* 1GB page is represented as 512 2MB pages in a dump */
372		if ((pdp[i] & PG_PS) != 0) {
373			/* PDPE and PDP have identical layout in this case */
374			fakepd[0] = pdp[i];
375			for (j = 1; j < NPDEPG; j++)
376				fakepd[j] = fakepd[j - 1] + NBPDR;
377			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
378			if (error)
379				goto fail;
380			/* flush, in case we reuse fakepd in the same block */
381			error = blk_flush(di);
382			if (error)
383				goto fail;
384			bzero(fakepd, sizeof(fakepd));
385			continue;
386		}
387
388		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
389		error = blk_write(di, (char *)pd, 0, PAGE_SIZE);
390		if (error)
391			goto fail;
392		error = blk_flush(di);
393		if (error)
394			goto fail;
395	}
396
397	/* Dump memory chunks */
398	/* XXX cluster it up and use blk_dump() */
399	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
400		bits = vm_page_dump[i];
401		while (bits) {
402			bit = bsfq(bits);
403			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
404			error = blk_write(di, 0, pa, PAGE_SIZE);
405			if (error)
406				goto fail;
407			bits &= ~(1ul << bit);
408		}
409	}
410
411	error = blk_flush(di);
412	if (error)
413		goto fail;
414
415	/* Dump trailer */
416	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
417	if (error)
418		goto fail;
419	dumplo += sizeof(kdh);
420
421	/* Signal completion, signoff and exit stage left. */
422	dump_write(di, NULL, 0, 0, 0);
423	printf("\nDump complete\n");
424	return;
425
426 fail:
427	if (error < 0)
428		error = -error;
429
430	printf("\n");
431	if (error == ENOSPC) {
432		printf("Dump map grown while dumping. ");
433		if (retry_count < 5) {
434			printf("Retrying...\n");
435			goto retry;
436		}
437		printf("Dump failed.\n");
438	}
439	else if (error == ECANCELED)
440		printf("Dump aborted\n");
441	else if (error == E2BIG)
442		printf("Dump failed. Partition too small.\n");
443	else
444		printf("** DUMP FAILED (ERROR %d) **\n", error);
445}
446
447void
448dump_add_page(vm_paddr_t pa)
449{
450	int idx, bit;
451
452	pa >>= PAGE_SHIFT;
453	idx = pa >> 6;		/* 2^6 = 64 */
454	bit = pa & 63;
455	atomic_set_long(&vm_page_dump[idx], 1ul << bit);
456}
457
458void
459dump_drop_page(vm_paddr_t pa)
460{
461	int idx, bit;
462
463	pa >>= PAGE_SHIFT;
464	idx = pa >> 6;		/* 2^6 = 64 */
465	bit = pa & 63;
466	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
467}
468