minidump_machdep.c revision 193066
1256905Sray/*-
2256905Sray * Copyright (c) 2006 Peter Wemm
3256905Sray * All rights reserved.
4256905Sray *
5256905Sray * Redistribution and use in source and binary forms, with or without
6256905Sray * modification, are permitted provided that the following conditions
7256905Sray * are met:
8256905Sray *
9256905Sray * 1. Redistributions of source code must retain the above copyright
10256905Sray *    notice, this list of conditions and the following disclaimer.
11256905Sray * 2. Redistributions in binary form must reproduce the above copyright
12256905Sray *    notice, this list of conditions and the following disclaimer in the
13256905Sray *    documentation and/or other materials provided with the distribution.
14256905Sray *
15256905Sray * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16256905Sray * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17256905Sray * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18256905Sray * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19256905Sray * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20256905Sray * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21256905Sray * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22256905Sray * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23256905Sray * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24256905Sray * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25256905Sray */
26256905Sray
27256905Sray#include <sys/cdefs.h>
28256905Sray__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 193066 2009-05-29 21:27:12Z jamie $");
29256905Sray
30256905Sray#include <sys/param.h>
31256905Sray#include <sys/systm.h>
32256905Sray#include <sys/conf.h>
33256905Sray#include <sys/cons.h>
34256905Sray#include <sys/kernel.h>
35256905Sray#include <sys/kerneldump.h>
36256905Sray#include <sys/msgbuf.h>
37256905Sray#include <vm/vm.h>
38256905Sray#include <vm/pmap.h>
39256905Sray#include <machine/atomic.h>
40256905Sray#include <machine/elf.h>
41257438Sray#include <machine/md_var.h>
42256905Sray#include <machine/vmparam.h>
43256905Sray#include <machine/minidump.h>
44256905Sray
45257438SrayCTASSERT(sizeof(struct kerneldumpheader) == 512);
46256905Sray
47256905Sray/*
48257438Sray * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
49257438Sray * is to protect us from metadata and to protect metadata from us.
50257438Sray */
51257727Sray#define	SIZEOF_METADATA		(64*1024)
52256905Sray
53256905Sray#define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
54279752Shselasky#define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
55279752Shselasky
56279752Shselaskyextern uint64_t KPDPphys;
57257438Sray
58257438Srayuint64_t *vm_page_dump;
59256905Srayint vm_page_dump_size;
60256905Sray
61256905Sraystatic struct kerneldumpheader kdh;
62256905Sraystatic off_t dumplo;
63256905Sray
64256905Sray/* Handle chunked writes. */
65256905Sraystatic size_t fragsz;
66256905Sraystatic void *dump_va;
67257438Sraystatic size_t counter, progress;
68257438Sray
69257438SrayCTASSERT(sizeof(*vm_page_dump) == 8);
70257438Sray
71257438Sraystatic int
72256905Srayis_dumpable(vm_paddr_t pa)
73256905Sray{
74256905Sray	int i;
75256905Sray
76256905Sray	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
77256905Sray		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
78256905Sray			return (1);
79256905Sray	}
80256905Sray	return (0);
81256905Sray}
82256905Sray
83256905Sray#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
84256905Sray
85256905Sraystatic int
86256905Srayblk_flush(struct dumperinfo *di)
87256905Sray{
88256905Sray	int error;
89256905Sray
90256905Sray	if (fragsz == 0)
91256905Sray		return (0);
92256905Sray
93256905Sray	error = dump_write(di, dump_va, 0, dumplo, fragsz);
94256905Sray	dumplo += fragsz;
95256905Sray	fragsz = 0;
96256905Sray	return (error);
97256905Sray}
98256905Sray
99256905Sraystatic int
100256905Srayblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
101256905Sray{
102256905Sray	size_t len;
103256905Sray	int error, i, c;
104256905Sray	u_int maxdumpsz;
105256905Sray
106256905Sray	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
107256905Sray	if (maxdumpsz == 0)	/* seatbelt */
108256905Sray		maxdumpsz = PAGE_SIZE;
109256905Sray	error = 0;
110256905Sray	if ((sz % PAGE_SIZE) != 0) {
111256905Sray		printf("size not page aligned\n");
112256905Sray		return (EINVAL);
113256905Sray	}
114256905Sray	if (ptr != NULL && pa != 0) {
115258491Sray		printf("cant have both va and pa!\n");
116258491Sray		return (EINVAL);
117256905Sray	}
118258491Sray	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
119258491Sray		printf("address not page aligned\n");
120258491Sray		return (EINVAL);
121258491Sray	}
122258491Sray	if (ptr != NULL) {
123258491Sray		/* If we're doing a virtual dump, flush any pre-existing pa pages */
124258491Sray		error = blk_flush(di);
125258491Sray		if (error)
126258491Sray			return (error);
127258491Sray	}
128258491Sray	while (sz) {
129258491Sray		len = maxdumpsz - fragsz;
130258491Sray		if (len > sz)
131258491Sray			len = sz;
132258491Sray		counter += len;
133258491Sray		progress -= len;
134258491Sray		if (counter >> 24) {
135258491Sray			printf(" %ld", PG2MB(progress >> PAGE_SHIFT));
136258491Sray			counter &= (1<<24) - 1;
137258491Sray		}
138258491Sray		if (ptr) {
139258491Sray			error = dump_write(di, ptr, 0, dumplo, len);
140278846Shselasky			if (error)
141278846Shselasky				return (error);
142258491Sray			dumplo += len;
143258491Sray			ptr += len;
144258491Sray			sz -= len;
145258491Sray		} else {
146258491Sray			for (i = 0; i < len; i += PAGE_SIZE)
147258491Sray				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
148258491Sray			fragsz += len;
149256905Sray			pa += len;
150256905Sray			sz -= len;
151256905Sray			if (fragsz == maxdumpsz) {
152256905Sray				error = blk_flush(di);
153256905Sray				if (error)
154256905Sray					return (error);
155256905Sray			}
156256905Sray		}
157256905Sray
158256905Sray		/* Check for user abort. */
159256905Sray		c = cncheckc();
160256905Sray		if (c == 0x03)
161256905Sray			return (ECANCELED);
162256905Sray		if (c != -1)
163256905Sray			printf(" (CTRL-C to abort) ");
164256905Sray	}
165256905Sray
166256905Sray	return (0);
167256905Sray}
168256905Sray
169256905Sray/* A fake page table page, to avoid having to handle both 4K and 2M pages */
170256905Sraystatic pt_entry_t fakept[NPTEPG];
171256905Sray
172269620Snwhitehornvoid
173279752Shselaskyminidumpsys(struct dumperinfo *di)
174269620Snwhitehorn{
175269620Snwhitehorn	uint64_t dumpsize;
176279752Shselasky	uint32_t ptesize;
177279752Shselasky	vm_offset_t va;
178279752Shselasky	int error;
179279752Shselasky	uint64_t bits;
180279752Shselasky	uint64_t *pdp, *pd, *pt, pa;
181307589Sgonzo	int i, j, k, bit;
182307589Sgonzo	struct minidumphdr mdhdr;
183256905Sray
184256905Sray	counter = 0;
185256905Sray	/* Walk page table pages, set bits in vm_page_dump */
186256905Sray	ptesize = 0;
187256905Sray	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
188256905Sray	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
189256905Sray	    kernel_vm_end); va += NBPDR) {
190256905Sray		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
191256905Sray		/*
192256905Sray		 * We always write a page, even if it is zero. Each
193256905Sray		 * page written corresponds to 2MB of space
194256905Sray		 */
195256905Sray		ptesize += PAGE_SIZE;
196256905Sray		if ((pdp[i] & PG_V) == 0)
197259777Sray			continue;
198256905Sray		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
199256905Sray		j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
200256905Sray		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
201256905Sray			/* This is an entire 2M page. */
202256905Sray			pa = pd[j] & PG_PS_FRAME;
203256905Sray			for (k = 0; k < NPTEPG; k++) {
204256905Sray				if (is_dumpable(pa))
205256905Sray					dump_add_page(pa);
206256905Sray				pa += PAGE_SIZE;
207256905Sray			}
208256905Sray			continue;
209256905Sray		}
210256905Sray		if ((pd[j] & PG_V) == PG_V) {
211256905Sray			/* set bit for each valid page in this 2MB block */
212256905Sray			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
213256905Sray			for (k = 0; k < NPTEPG; k++) {
214256905Sray				if ((pt[k] & PG_V) == PG_V) {
215256905Sray					pa = pt[k] & PG_FRAME;
216256905Sray					if (is_dumpable(pa))
217256905Sray						dump_add_page(pa);
218256905Sray				}
219256905Sray			}
220256905Sray		} else {
221256905Sray			/* nothing, we're going to dump a null page */
222256905Sray		}
223256905Sray	}
224256905Sray
225256905Sray	/* Calculate dump size. */
226256905Sray	dumpsize = ptesize;
227256905Sray	dumpsize += round_page(msgbufp->msg_size);
228256905Sray	dumpsize += round_page(vm_page_dump_size);
229256905Sray	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
230256905Sray		bits = vm_page_dump[i];
231256905Sray		while (bits) {
232256905Sray			bit = bsfq(bits);
233256905Sray			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
234256905Sray			/* Clear out undumpable pages now if needed */
235256905Sray			if (is_dumpable(pa)) {
236256905Sray				dumpsize += PAGE_SIZE;
237256905Sray			} else {
238256905Sray				dump_drop_page(pa);
239256905Sray			}
240256905Sray			bits &= ~(1ul << bit);
241256905Sray		}
242256905Sray	}
243256905Sray	dumpsize += PAGE_SIZE;
244256905Sray
245256905Sray	/* Determine dump offset on device. */
246256905Sray	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
247256905Sray		error = ENOSPC;
248256905Sray		goto fail;
249256905Sray	}
250256905Sray	dumplo = di->mediaoffset + di->mediasize - dumpsize;
251256905Sray	dumplo -= sizeof(kdh) * 2;
252256905Sray	progress = dumpsize;
253256905Sray
254256905Sray	/* Initialize mdhdr */
255256905Sray	bzero(&mdhdr, sizeof(mdhdr));
256256905Sray	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
257269620Snwhitehorn	mdhdr.version = MINIDUMP_VERSION;
258269779Sdumbbell	mdhdr.msgbufsize = msgbufp->msg_size;
259269779Sdumbbell	mdhdr.bitmapsize = vm_page_dump_size;
260269779Sdumbbell	mdhdr.ptesize = ptesize;
261269620Snwhitehorn	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
262256905Sray	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
263256905Sray	mdhdr.dmapend = DMAP_MAX_ADDRESS;
264256905Sray
265256905Sray	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
266256905Sray
267256905Sray	printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
268256905Sray	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
269256905Sray
270256905Sray	/* Dump leader */
271256905Sray	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
272256905Sray	if (error)
273256905Sray		goto fail;
274279488Sdumbbell	dumplo += sizeof(kdh);
275279488Sdumbbell
276256905Sray	/* Dump my header */
277256905Sray	bzero(&fakept, sizeof(fakept));
278256905Sray	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
279256905Sray	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
280256905Sray	if (error)
281256905Sray		goto fail;
282256905Sray
283256905Sray	/* Dump msgbuf up front */
284256905Sray	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
285256905Sray	if (error)
286256905Sray		goto fail;
287256905Sray
288256905Sray	/* Dump bitmap */
289256905Sray	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
290256905Sray	if (error)
291256905Sray		goto fail;
292256905Sray
293256905Sray	/* Dump kernel page table pages */
294256905Sray	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
295256905Sray	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
296256905Sray	    kernel_vm_end); va += NBPDR) {
297256905Sray		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
298256905Sray		/* We always write a page, even if it is zero */
299256905Sray		if ((pdp[i] & PG_V) == 0) {
300256905Sray			bzero(fakept, sizeof(fakept));
301256905Sray			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
302256905Sray			if (error)
303256905Sray				goto fail;
304256905Sray			/* flush, in case we reuse fakept in the same block */
305256905Sray			error = blk_flush(di);
306256905Sray			if (error)
307257438Sray				goto fail;
308257438Sray			continue;
309257438Sray		}
310257438Sray		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
311257438Sray		j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
312257438Sray		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
313257438Sray			/* This is a single 2M block. Generate a fake PTP */
314257438Sray			pa = pd[j] & PG_PS_FRAME;
315257438Sray			for (k = 0; k < NPTEPG; k++) {
316257438Sray				fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
317257438Sray			}
318257438Sray			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
319257438Sray			if (error)
320257438Sray				goto fail;
321257438Sray			/* flush, in case we reuse fakept in the same block */
322257438Sray			error = blk_flush(di);
323257438Sray			if (error)
324257438Sray				goto fail;
325257438Sray			continue;
326257546Sray		}
327257546Sray		if ((pd[j] & PG_V) == PG_V) {
328257438Sray			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
329257438Sray			error = blk_write(di, (char *)pt, 0, PAGE_SIZE);
330257438Sray			if (error)
331257438Sray				goto fail;
332257438Sray		} else {
333257438Sray			bzero(fakept, sizeof(fakept));
334257438Sray			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
335257438Sray			if (error)
336257438Sray				goto fail;
337257438Sray			/* flush, in case we reuse fakept in the same block */
338257438Sray			error = blk_flush(di);
339257438Sray			if (error)
340257438Sray				goto fail;
341257438Sray		}
342257438Sray	}
343257438Sray
344257438Sray	/* Dump memory chunks */
345257438Sray	/* XXX cluster it up and use blk_dump() */
346257438Sray	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
347257438Sray		bits = vm_page_dump[i];
348257438Sray		while (bits) {
349257438Sray			bit = bsfq(bits);
350257438Sray			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
351257438Sray			error = blk_write(di, 0, pa, PAGE_SIZE);
352257438Sray			if (error)
353257438Sray				goto fail;
354257438Sray			bits &= ~(1ul << bit);
355257438Sray		}
356257438Sray	}
357257438Sray
358257438Sray	error = blk_flush(di);
359257438Sray	if (error)
360257438Sray		goto fail;
361257438Sray
362257438Sray	/* Dump trailer */
363257438Sray	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
364257438Sray	if (error)
365257438Sray		goto fail;
366257517Sray	dumplo += sizeof(kdh);
367279752Shselasky
368257438Sray	/* Signal completion, signoff and exit stage left. */
369257438Sray	dump_write(di, NULL, 0, 0, 0);
370	printf("\nDump complete\n");
371	return;
372
373 fail:
374	if (error < 0)
375		error = -error;
376
377	if (error == ECANCELED)
378		printf("\nDump aborted\n");
379	else if (error == ENOSPC)
380		printf("\nDump failed. Partition too small.\n");
381	else
382		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
383}
384
385void
386dump_add_page(vm_paddr_t pa)
387{
388	int idx, bit;
389
390	pa >>= PAGE_SHIFT;
391	idx = pa >> 6;		/* 2^6 = 64 */
392	bit = pa & 63;
393	atomic_set_long(&vm_page_dump[idx], 1ul << bit);
394}
395
396void
397dump_drop_page(vm_paddr_t pa)
398{
399	int idx, bit;
400
401	pa >>= PAGE_SHIFT;
402	idx = pa >> 6;		/* 2^6 = 64 */
403	bit = pa & 63;
404	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
405}
406