minidump_machdep.c revision 181803
1/*-
2 * Copyright (c) 2006 Peter Wemm
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 181803 2008-08-17 23:27:27Z bz $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/conf.h>
33#include <sys/cons.h>
34#include <sys/kernel.h>
35#include <sys/kerneldump.h>
36#include <sys/msgbuf.h>
37#include <sys/vimage.h>
38#include <vm/vm.h>
39#include <vm/pmap.h>
40#include <machine/atomic.h>
41#include <machine/elf.h>
42#include <machine/md_var.h>
43#include <machine/vmparam.h>
44#include <machine/minidump.h>
45
46CTASSERT(sizeof(struct kerneldumpheader) == 512);
47
48/*
49 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
50 * is to protect us from metadata and to protect metadata from us.
51 */
52#define	SIZEOF_METADATA		(64*1024)
53
54#define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
55#define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
56
57extern uint64_t KPDPphys;
58
59uint64_t *vm_page_dump;
60int vm_page_dump_size;
61
62static struct kerneldumpheader kdh;
63static off_t dumplo;
64
65/* Handle chunked writes. */
66static size_t fragsz;
67static void *dump_va;
68static size_t counter, progress;
69
70CTASSERT(sizeof(*vm_page_dump) == 8);
71
72static int
73is_dumpable(vm_paddr_t pa)
74{
75	int i;
76
77	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
78		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
79			return (1);
80	}
81	return (0);
82}
83
84/* XXX should be MI */
85static void
86mkdumpheader(struct kerneldumpheader *kdh, uint32_t archver, uint64_t dumplen,
87    uint32_t blksz)
88{
89
90	bzero(kdh, sizeof(*kdh));
91	strncpy(kdh->magic, KERNELDUMPMAGIC, sizeof(kdh->magic));
92	strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
93	kdh->version = htod32(KERNELDUMPVERSION);
94	kdh->architectureversion = htod32(archver);
95	kdh->dumplength = htod64(dumplen);
96	kdh->dumptime = htod64(time_second);
97	kdh->blocksize = htod32(blksz);
98	strncpy(kdh->hostname, G_hostname, sizeof(kdh->hostname));
99	strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
100	if (panicstr != NULL)
101		strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
102	kdh->parity = kerneldump_parity(kdh);
103}
104
105#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
106
107static int
108blk_flush(struct dumperinfo *di)
109{
110	int error;
111
112	if (fragsz == 0)
113		return (0);
114
115	error = dump_write(di, dump_va, 0, dumplo, fragsz);
116	dumplo += fragsz;
117	fragsz = 0;
118	return (error);
119}
120
121static int
122blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
123{
124	size_t len;
125	int error, i, c;
126	u_int maxdumpsz;
127
128	maxdumpsz = di->maxiosize;
129	if (maxdumpsz == 0)	/* seatbelt */
130		maxdumpsz = PAGE_SIZE;
131	error = 0;
132	if ((sz % PAGE_SIZE) != 0) {
133		printf("size not page aligned\n");
134		return (EINVAL);
135	}
136	if (ptr != NULL && pa != 0) {
137		printf("cant have both va and pa!\n");
138		return (EINVAL);
139	}
140	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
141		printf("address not page aligned\n");
142		return (EINVAL);
143	}
144	if (ptr != NULL) {
145		/* If we're doing a virtual dump, flush any pre-existing pa pages */
146		error = blk_flush(di);
147		if (error)
148			return (error);
149	}
150	while (sz) {
151		len = maxdumpsz - fragsz;
152		if (len > sz)
153			len = sz;
154		counter += len;
155		progress -= len;
156		if (counter >> 24) {
157			printf(" %ld", PG2MB(progress >> PAGE_SHIFT));
158			counter &= (1<<24) - 1;
159		}
160		if (ptr) {
161			error = dump_write(di, ptr, 0, dumplo, len);
162			if (error)
163				return (error);
164			dumplo += len;
165			ptr += len;
166			sz -= len;
167		} else {
168			for (i = 0; i < len; i += PAGE_SIZE)
169				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
170			fragsz += len;
171			pa += len;
172			sz -= len;
173			if (fragsz == maxdumpsz) {
174				error = blk_flush(di);
175				if (error)
176					return (error);
177			}
178		}
179
180		/* Check for user abort. */
181		c = cncheckc();
182		if (c == 0x03)
183			return (ECANCELED);
184		if (c != -1)
185			printf(" (CTRL-C to abort) ");
186	}
187
188	return (0);
189}
190
191/* A fake page table page, to avoid having to handle both 4K and 2M pages */
192static pt_entry_t fakept[NPTEPG];
193
194void
195minidumpsys(struct dumperinfo *di)
196{
197	uint64_t dumpsize;
198	uint32_t ptesize;
199	vm_offset_t va;
200	int error;
201	uint64_t bits;
202	uint64_t *pdp, *pd, *pt, pa;
203	int i, j, k, bit;
204	struct minidumphdr mdhdr;
205
206	counter = 0;
207	/* Walk page table pages, set bits in vm_page_dump */
208	ptesize = 0;
209	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
210	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
211	    kernel_vm_end); va += NBPDR) {
212		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
213		/*
214		 * We always write a page, even if it is zero. Each
215		 * page written corresponds to 2MB of space
216		 */
217		ptesize += PAGE_SIZE;
218		if ((pdp[i] & PG_V) == 0)
219			continue;
220		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
221		j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
222		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
223			/* This is an entire 2M page. */
224			pa = pd[j] & PG_PS_FRAME;
225			for (k = 0; k < NPTEPG; k++) {
226				if (is_dumpable(pa))
227					dump_add_page(pa);
228				pa += PAGE_SIZE;
229			}
230			continue;
231		}
232		if ((pd[j] & PG_V) == PG_V) {
233			/* set bit for each valid page in this 2MB block */
234			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
235			for (k = 0; k < NPTEPG; k++) {
236				if ((pt[k] & PG_V) == PG_V) {
237					pa = pt[k] & PG_FRAME;
238					if (is_dumpable(pa))
239						dump_add_page(pa);
240				}
241			}
242		} else {
243			/* nothing, we're going to dump a null page */
244		}
245	}
246
247	/* Calculate dump size. */
248	dumpsize = ptesize;
249	dumpsize += round_page(msgbufp->msg_size);
250	dumpsize += round_page(vm_page_dump_size);
251	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
252		bits = vm_page_dump[i];
253		while (bits) {
254			bit = bsfq(bits);
255			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
256			/* Clear out undumpable pages now if needed */
257			if (is_dumpable(pa)) {
258				dumpsize += PAGE_SIZE;
259			} else {
260				dump_drop_page(pa);
261			}
262			bits &= ~(1ul << bit);
263		}
264	}
265	dumpsize += PAGE_SIZE;
266
267	/* Determine dump offset on device. */
268	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
269		error = ENOSPC;
270		goto fail;
271	}
272	dumplo = di->mediaoffset + di->mediasize - dumpsize;
273	dumplo -= sizeof(kdh) * 2;
274	progress = dumpsize;
275
276	/* Initialize mdhdr */
277	bzero(&mdhdr, sizeof(mdhdr));
278	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
279	mdhdr.version = MINIDUMP_VERSION;
280	mdhdr.msgbufsize = msgbufp->msg_size;
281	mdhdr.bitmapsize = vm_page_dump_size;
282	mdhdr.ptesize = ptesize;
283	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
284	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
285	mdhdr.dmapend = DMAP_MAX_ADDRESS;
286
287	mkdumpheader(&kdh, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
288
289	printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
290	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
291
292	/* Dump leader */
293	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
294	if (error)
295		goto fail;
296	dumplo += sizeof(kdh);
297
298	/* Dump my header */
299	bzero(&fakept, sizeof(fakept));
300	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
301	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
302	if (error)
303		goto fail;
304
305	/* Dump msgbuf up front */
306	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
307	if (error)
308		goto fail;
309
310	/* Dump bitmap */
311	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
312	if (error)
313		goto fail;
314
315	/* Dump kernel page table pages */
316	pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
317	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NBPDR,
318	    kernel_vm_end); va += NBPDR) {
319		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
320		/* We always write a page, even if it is zero */
321		if ((pdp[i] & PG_V) == 0) {
322			bzero(fakept, sizeof(fakept));
323			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
324			if (error)
325				goto fail;
326			/* flush, in case we reuse fakept in the same block */
327			error = blk_flush(di);
328			if (error)
329				goto fail;
330			continue;
331		}
332		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
333		j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
334		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
335			/* This is a single 2M block. Generate a fake PTP */
336			pa = pd[j] & PG_PS_FRAME;
337			for (k = 0; k < NPTEPG; k++) {
338				fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
339			}
340			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
341			if (error)
342				goto fail;
343			/* flush, in case we reuse fakept in the same block */
344			error = blk_flush(di);
345			if (error)
346				goto fail;
347			continue;
348		}
349		if ((pd[j] & PG_V) == PG_V) {
350			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
351			error = blk_write(di, (char *)pt, 0, PAGE_SIZE);
352			if (error)
353				goto fail;
354		} else {
355			bzero(fakept, sizeof(fakept));
356			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
357			if (error)
358				goto fail;
359			/* flush, in case we reuse fakept in the same block */
360			error = blk_flush(di);
361			if (error)
362				goto fail;
363		}
364	}
365
366	/* Dump memory chunks */
367	/* XXX cluster it up and use blk_dump() */
368	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
369		bits = vm_page_dump[i];
370		while (bits) {
371			bit = bsfq(bits);
372			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
373			error = blk_write(di, 0, pa, PAGE_SIZE);
374			if (error)
375				goto fail;
376			bits &= ~(1ul << bit);
377		}
378	}
379
380	error = blk_flush(di);
381	if (error)
382		goto fail;
383
384	/* Dump trailer */
385	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
386	if (error)
387		goto fail;
388	dumplo += sizeof(kdh);
389
390	/* Signal completion, signoff and exit stage left. */
391	dump_write(di, NULL, 0, 0, 0);
392	printf("\nDump complete\n");
393	return;
394
395 fail:
396	if (error < 0)
397		error = -error;
398
399	if (error == ECANCELED)
400		printf("\nDump aborted\n");
401	else if (error == ENOSPC)
402		printf("\nDump failed. Partition too small.\n");
403	else
404		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
405}
406
407void
408dump_add_page(vm_paddr_t pa)
409{
410	int idx, bit;
411
412	pa >>= PAGE_SHIFT;
413	idx = pa >> 6;		/* 2^6 = 64 */
414	bit = pa & 63;
415	atomic_set_long(&vm_page_dump[idx], 1ul << bit);
416}
417
418void
419dump_drop_page(vm_paddr_t pa)
420{
421	int idx, bit;
422
423	pa >>= PAGE_SHIFT;
424	idx = pa >> 6;		/* 2^6 = 64 */
425	bit = pa & 63;
426	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
427}
428