1157908Speter/*-
2157908Speter * Copyright (c) 2006 Peter Wemm
3157908Speter * All rights reserved.
4157908Speter *
5157908Speter * Redistribution and use in source and binary forms, with or without
6157908Speter * modification, are permitted provided that the following conditions
7157908Speter * are met:
8157908Speter *
9157908Speter * 1. Redistributions of source code must retain the above copyright
10157908Speter *    notice, this list of conditions and the following disclaimer.
11157908Speter * 2. Redistributions in binary form must reproduce the above copyright
12157908Speter *    notice, this list of conditions and the following disclaimer in the
13157908Speter *    documentation and/or other materials provided with the distribution.
14157908Speter *
15157908Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16157908Speter * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17157908Speter * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18157908Speter * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19157908Speter * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20157908Speter * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21157908Speter * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22157908Speter * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23157908Speter * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24157908Speter * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25157908Speter */
26157908Speter
27157908Speter#include <sys/cdefs.h>
28157908Speter__FBSDID("$FreeBSD$");
29157908Speter
30225194Sjhb#include "opt_pmap.h"
31221173Sattilio#include "opt_watchdog.h"
32221173Sattilio
33157908Speter#include <sys/param.h>
34157908Speter#include <sys/systm.h>
35157908Speter#include <sys/conf.h>
36157908Speter#include <sys/cons.h>
37157908Speter#include <sys/kernel.h>
38157908Speter#include <sys/kerneldump.h>
39157908Speter#include <sys/msgbuf.h>
40221173Sattilio#include <sys/watchdog.h>
41157908Speter#include <vm/vm.h>
42254065Skib#include <vm/vm_param.h>
43230623Skmacy#include <vm/vm_page.h>
44243132Skib#include <vm/vm_phys.h>
45157908Speter#include <vm/pmap.h>
46157908Speter#include <machine/atomic.h>
47157908Speter#include <machine/elf.h>
48157908Speter#include <machine/md_var.h>
49157908Speter#include <machine/vmparam.h>
50157908Speter#include <machine/minidump.h>
51157908Speter
52157908SpeterCTASSERT(sizeof(struct kerneldumpheader) == 512);
53157908Speter
54157908Speter/*
55157908Speter * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
56157908Speter * is to protect us from metadata and to protect metadata from us.
57157908Speter */
58157908Speter#define	SIZEOF_METADATA		(64*1024)
59157908Speter
60157908Speter#define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
61157908Speter#define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
62157908Speter
63157908Speteruint64_t *vm_page_dump;
64157908Speterint vm_page_dump_size;
65157908Speter
66157908Speterstatic struct kerneldumpheader kdh;
67157908Speterstatic off_t dumplo;
68157908Speter
69157908Speter/* Handle chunked writes. */
70157908Speterstatic size_t fragsz;
71157908Speterstatic void *dump_va;
72221069Ssobomaxstatic size_t counter, progress, dumpsize;
73157908Speter
74157908SpeterCTASSERT(sizeof(*vm_page_dump) == 8);
75157908Speter
76157908Speterstatic int
77157908Speteris_dumpable(vm_paddr_t pa)
78157908Speter{
79230623Skmacy	vm_page_t m;
80157908Speter	int i;
81157908Speter
82230623Skmacy	if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
83230623Skmacy		return ((m->flags & PG_NODUMP) == 0);
84157908Speter	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
85157908Speter		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
86157908Speter			return (1);
87157908Speter	}
88157908Speter	return (0);
89157908Speter}
90157908Speter
91157908Speter#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
92157908Speter
93157908Speterstatic int
94157908Speterblk_flush(struct dumperinfo *di)
95157908Speter{
96157908Speter	int error;
97157908Speter
98157908Speter	if (fragsz == 0)
99157908Speter		return (0);
100157908Speter
101175768Sru	error = dump_write(di, dump_va, 0, dumplo, fragsz);
102157908Speter	dumplo += fragsz;
103157908Speter	fragsz = 0;
104157908Speter	return (error);
105157908Speter}
106157908Speter
107221069Ssobomaxstatic struct {
108221069Ssobomax	int min_per;
109221069Ssobomax	int max_per;
110221069Ssobomax	int visited;
111221069Ssobomax} progress_track[10] = {
112221069Ssobomax	{  0,  10, 0},
113221069Ssobomax	{ 10,  20, 0},
114221069Ssobomax	{ 20,  30, 0},
115221069Ssobomax	{ 30,  40, 0},
116221069Ssobomax	{ 40,  50, 0},
117221069Ssobomax	{ 50,  60, 0},
118221069Ssobomax	{ 60,  70, 0},
119221069Ssobomax	{ 70,  80, 0},
120221069Ssobomax	{ 80,  90, 0},
121221069Ssobomax	{ 90, 100, 0}
122221069Ssobomax};
123221069Ssobomax
124221069Ssobomaxstatic void
125221069Ssobomaxreport_progress(size_t progress, size_t dumpsize)
126221069Ssobomax{
127221069Ssobomax	int sofar, i;
128221069Ssobomax
129221069Ssobomax	sofar = 100 - ((progress * 100) / dumpsize);
130257575Skib	for (i = 0; i < nitems(progress_track); i++) {
131257575Skib		if (sofar < progress_track[i].min_per ||
132257575Skib		    sofar > progress_track[i].max_per)
133221069Ssobomax			continue;
134221069Ssobomax		if (progress_track[i].visited)
135221069Ssobomax			return;
136221069Ssobomax		progress_track[i].visited = 1;
137221069Ssobomax		printf("..%d%%", sofar);
138221069Ssobomax		return;
139221069Ssobomax	}
140221069Ssobomax}
141221069Ssobomax
142157908Speterstatic int
143157908Speterblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
144157908Speter{
145157908Speter	size_t len;
146157908Speter	int error, i, c;
147176304Sscottl	u_int maxdumpsz;
148157908Speter
149184499Skib	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
150176304Sscottl	if (maxdumpsz == 0)	/* seatbelt */
151176304Sscottl		maxdumpsz = PAGE_SIZE;
152157908Speter	error = 0;
153157908Speter	if ((sz % PAGE_SIZE) != 0) {
154157908Speter		printf("size not page aligned\n");
155157908Speter		return (EINVAL);
156157908Speter	}
157157908Speter	if (ptr != NULL && pa != 0) {
158157908Speter		printf("cant have both va and pa!\n");
159157908Speter		return (EINVAL);
160157908Speter	}
161257575Skib	if ((((uintptr_t)pa) % PAGE_SIZE) != 0) {
162257575Skib		printf("address not page aligned %p\n", ptr);
163157908Speter		return (EINVAL);
164157908Speter	}
165157908Speter	if (ptr != NULL) {
166157908Speter		/* If we're doing a virtual dump, flush any pre-existing pa pages */
167157908Speter		error = blk_flush(di);
168157908Speter		if (error)
169157908Speter			return (error);
170157908Speter	}
171157908Speter	while (sz) {
172176304Sscottl		len = maxdumpsz - fragsz;
173157908Speter		if (len > sz)
174157908Speter			len = sz;
175157908Speter		counter += len;
176157908Speter		progress -= len;
177157908Speter		if (counter >> 24) {
178221069Ssobomax			report_progress(progress, dumpsize);
179157908Speter			counter &= (1<<24) - 1;
180157908Speter		}
181236503Savg
182221173Sattilio		wdog_kern_pat(WD_LASTVAL);
183236503Savg
184157908Speter		if (ptr) {
185175768Sru			error = dump_write(di, ptr, 0, dumplo, len);
186157908Speter			if (error)
187157908Speter				return (error);
188157908Speter			dumplo += len;
189157908Speter			ptr += len;
190157908Speter			sz -= len;
191157908Speter		} else {
192157908Speter			for (i = 0; i < len; i += PAGE_SIZE)
193157908Speter				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
194157908Speter			fragsz += len;
195157908Speter			pa += len;
196157908Speter			sz -= len;
197176304Sscottl			if (fragsz == maxdumpsz) {
198157908Speter				error = blk_flush(di);
199157908Speter				if (error)
200157908Speter					return (error);
201157908Speter			}
202157908Speter		}
203157908Speter
204157908Speter		/* Check for user abort. */
205157908Speter		c = cncheckc();
206157908Speter		if (c == 0x03)
207157908Speter			return (ECANCELED);
208157908Speter		if (c != -1)
209157908Speter			printf(" (CTRL-C to abort) ");
210157908Speter	}
211157908Speter
212157908Speter	return (0);
213157908Speter}
214157908Speter
215157908Speter/* A fake page table page, to avoid having to handle both 4K and 2M pages */
216215133Savgstatic pd_entry_t fakepd[NPDEPG];
217157908Speter
218157908Spetervoid
219157908Speterminidumpsys(struct dumperinfo *di)
220157908Speter{
221215133Savg	uint32_t pmapsize;
222157908Speter	vm_offset_t va;
223157908Speter	int error;
224157908Speter	uint64_t bits;
225254547Sneel	uint64_t *pml4, *pdp, *pd, *pt, pa;
226254547Sneel	int i, ii, j, k, n, bit;
227215133Savg	int retry_count;
228157908Speter	struct minidumphdr mdhdr;
229157908Speter
230215133Savg	retry_count = 0;
231215133Savg retry:
232215133Savg	retry_count++;
233157908Speter	counter = 0;
234257575Skib	for (i = 0; i < nitems(progress_track); i++)
235257575Skib		progress_track[i].visited = 0;
236157908Speter	/* Walk page table pages, set bits in vm_page_dump */
237215133Savg	pmapsize = 0;
238246384Sneel	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR,
239215133Savg	    kernel_vm_end); ) {
240157908Speter		/*
241157908Speter		 * We always write a page, even if it is zero. Each
242215133Savg		 * page written corresponds to 1GB of space
243157908Speter		 */
244215133Savg		pmapsize += PAGE_SIZE;
245254547Sneel		ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1);
246254547Sneel		pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii;
247254547Sneel		pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
248215133Savg		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
249215133Savg		if ((pdp[i] & PG_V) == 0) {
250215133Savg			va += NBPDP;
251157908Speter			continue;
252215133Savg		}
253215133Savg
254215133Savg		/*
255215133Savg		 * 1GB page is represented as 512 2MB pages in a dump.
256215133Savg		 */
257215133Savg		if ((pdp[i] & PG_PS) != 0) {
258215133Savg			va += NBPDP;
259215133Savg			pa = pdp[i] & PG_PS_FRAME;
260215133Savg			for (n = 0; n < NPDEPG * NPTEPG; n++) {
261157908Speter				if (is_dumpable(pa))
262157908Speter					dump_add_page(pa);
263157908Speter				pa += PAGE_SIZE;
264157908Speter			}
265157908Speter			continue;
266157908Speter		}
267215133Savg
268215133Savg		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
269215133Savg		for (n = 0; n < NPDEPG; n++, va += NBPDR) {
270215133Savg			j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1);
271215133Savg
272215133Savg			if ((pd[j] & PG_V) == 0)
273215133Savg				continue;
274215133Savg
275215133Savg			if ((pd[j] & PG_PS) != 0) {
276215133Savg				/* This is an entire 2M page. */
277215133Savg				pa = pd[j] & PG_PS_FRAME;
278215133Savg				for (k = 0; k < NPTEPG; k++) {
279157908Speter					if (is_dumpable(pa))
280157908Speter						dump_add_page(pa);
281215133Savg					pa += PAGE_SIZE;
282157908Speter				}
283215133Savg				continue;
284157908Speter			}
285215133Savg
286215133Savg			pa = pd[j] & PG_FRAME;
287215133Savg			/* set bit for this PTE page */
288215133Savg			if (is_dumpable(pa))
289215133Savg				dump_add_page(pa);
290215133Savg			/* and for each valid page in this 2MB block */
291215133Savg			pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
292215133Savg			for (k = 0; k < NPTEPG; k++) {
293215133Savg				if ((pt[k] & PG_V) == 0)
294215133Savg					continue;
295215133Savg				pa = pt[k] & PG_FRAME;
296215133Savg				if (is_dumpable(pa))
297215133Savg					dump_add_page(pa);
298215133Savg			}
299157908Speter		}
300157908Speter	}
301157908Speter
302157908Speter	/* Calculate dump size. */
303215133Savg	dumpsize = pmapsize;
304157908Speter	dumpsize += round_page(msgbufp->msg_size);
305157908Speter	dumpsize += round_page(vm_page_dump_size);
306157908Speter	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
307157908Speter		bits = vm_page_dump[i];
308157908Speter		while (bits) {
309157908Speter			bit = bsfq(bits);
310157908Speter			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
311157908Speter			/* Clear out undumpable pages now if needed */
312157908Speter			if (is_dumpable(pa)) {
313157908Speter				dumpsize += PAGE_SIZE;
314157908Speter			} else {
315157908Speter				dump_drop_page(pa);
316157908Speter			}
317157908Speter			bits &= ~(1ul << bit);
318157908Speter		}
319157908Speter	}
320157908Speter	dumpsize += PAGE_SIZE;
321157908Speter
322157908Speter	/* Determine dump offset on device. */
323157908Speter	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
324215133Savg		error = E2BIG;
325157908Speter		goto fail;
326157908Speter	}
327157908Speter	dumplo = di->mediaoffset + di->mediasize - dumpsize;
328157908Speter	dumplo -= sizeof(kdh) * 2;
329157908Speter	progress = dumpsize;
330157908Speter
331157908Speter	/* Initialize mdhdr */
332157908Speter	bzero(&mdhdr, sizeof(mdhdr));
333157908Speter	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
334157908Speter	mdhdr.version = MINIDUMP_VERSION;
335157908Speter	mdhdr.msgbufsize = msgbufp->msg_size;
336157908Speter	mdhdr.bitmapsize = vm_page_dump_size;
337215133Savg	mdhdr.pmapsize = pmapsize;
338179898Salc	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
339157908Speter	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
340157908Speter	mdhdr.dmapend = DMAP_MAX_ADDRESS;
341157908Speter
342183527Speter	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
343157908Speter
344221069Ssobomax	printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
345221069Ssobomax	    ptoa((uintmax_t)physmem) / 1048576);
346157908Speter
347157908Speter	/* Dump leader */
348175768Sru	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
349157908Speter	if (error)
350157908Speter		goto fail;
351157908Speter	dumplo += sizeof(kdh);
352157908Speter
353157908Speter	/* Dump my header */
354215133Savg	bzero(&fakepd, sizeof(fakepd));
355215133Savg	bcopy(&mdhdr, &fakepd, sizeof(mdhdr));
356215133Savg	error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
357157908Speter	if (error)
358157908Speter		goto fail;
359157908Speter
360157908Speter	/* Dump msgbuf up front */
361157908Speter	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
362157908Speter	if (error)
363157908Speter		goto fail;
364157908Speter
365157908Speter	/* Dump bitmap */
366157908Speter	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
367157908Speter	if (error)
368157908Speter		goto fail;
369157908Speter
370215133Savg	/* Dump kernel page directory pages */
371215133Savg	bzero(fakepd, sizeof(fakepd));
372246384Sneel	for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR,
373215133Savg	    kernel_vm_end); va += NBPDP) {
374254547Sneel		ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1);
375254547Sneel		pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii;
376254547Sneel		pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
377157908Speter		i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
378215133Savg
379157908Speter		/* We always write a page, even if it is zero */
380157908Speter		if ((pdp[i] & PG_V) == 0) {
381215133Savg			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
382157908Speter			if (error)
383157908Speter				goto fail;
384215133Savg			/* flush, in case we reuse fakepd in the same block */
385157908Speter			error = blk_flush(di);
386157908Speter			if (error)
387157908Speter				goto fail;
388157908Speter			continue;
389157908Speter		}
390215133Savg
391215133Savg		/* 1GB page is represented as 512 2MB pages in a dump */
392215133Savg		if ((pdp[i] & PG_PS) != 0) {
393215133Savg			/* PDPE and PDP have identical layout in this case */
394215133Savg			fakepd[0] = pdp[i];
395215133Savg			for (j = 1; j < NPDEPG; j++)
396215133Savg				fakepd[j] = fakepd[j - 1] + NBPDR;
397215133Savg			error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
398157908Speter			if (error)
399157908Speter				goto fail;
400215133Savg			/* flush, in case we reuse fakepd in the same block */
401157908Speter			error = blk_flush(di);
402157908Speter			if (error)
403157908Speter				goto fail;
404215133Savg			bzero(fakepd, sizeof(fakepd));
405157908Speter			continue;
406157908Speter		}
407215133Savg
408215133Savg		pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
409215133Savg		error = blk_write(di, (char *)pd, 0, PAGE_SIZE);
410215133Savg		if (error)
411215133Savg			goto fail;
412215133Savg		error = blk_flush(di);
413215133Savg		if (error)
414215133Savg			goto fail;
415157908Speter	}
416157908Speter
417157908Speter	/* Dump memory chunks */
418157908Speter	/* XXX cluster it up and use blk_dump() */
419157908Speter	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
420157908Speter		bits = vm_page_dump[i];
421157908Speter		while (bits) {
422157908Speter			bit = bsfq(bits);
423157908Speter			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
424157908Speter			error = blk_write(di, 0, pa, PAGE_SIZE);
425157908Speter			if (error)
426157908Speter				goto fail;
427157908Speter			bits &= ~(1ul << bit);
428157908Speter		}
429157908Speter	}
430157908Speter
431157908Speter	error = blk_flush(di);
432157908Speter	if (error)
433157908Speter		goto fail;
434157908Speter
435157908Speter	/* Dump trailer */
436175768Sru	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
437157908Speter	if (error)
438157908Speter		goto fail;
439157908Speter	dumplo += sizeof(kdh);
440157908Speter
441157908Speter	/* Signal completion, signoff and exit stage left. */
442175768Sru	dump_write(di, NULL, 0, 0, 0);
443157908Speter	printf("\nDump complete\n");
444157908Speter	return;
445157908Speter
446157908Speter fail:
447157908Speter	if (error < 0)
448157908Speter		error = -error;
449157908Speter
450215133Savg	printf("\n");
451215133Savg	if (error == ENOSPC) {
452215133Savg		printf("Dump map grown while dumping. ");
453215133Savg		if (retry_count < 5) {
454215133Savg			printf("Retrying...\n");
455215133Savg			goto retry;
456215133Savg		}
457215133Savg		printf("Dump failed.\n");
458215133Savg	}
459215133Savg	else if (error == ECANCELED)
460215133Savg		printf("Dump aborted\n");
461215133Savg	else if (error == E2BIG)
462215133Savg		printf("Dump failed. Partition too small.\n");
463157908Speter	else
464215133Savg		printf("** DUMP FAILED (ERROR %d) **\n", error);
465157908Speter}
466157908Speter
467157908Spetervoid
468157908Speterdump_add_page(vm_paddr_t pa)
469157908Speter{
470157908Speter	int idx, bit;
471157908Speter
472157908Speter	pa >>= PAGE_SHIFT;
473157908Speter	idx = pa >> 6;		/* 2^6 = 64 */
474157908Speter	bit = pa & 63;
475157908Speter	atomic_set_long(&vm_page_dump[idx], 1ul << bit);
476157908Speter}
477157908Speter
478157908Spetervoid
479157908Speterdump_drop_page(vm_paddr_t pa)
480157908Speter{
481157908Speter	int idx, bit;
482157908Speter
483157908Speter	pa >>= PAGE_SHIFT;
484157908Speter	idx = pa >> 6;		/* 2^6 = 64 */
485157908Speter	bit = pa & 63;
486157908Speter	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
487157908Speter}
488