1157909Speter/*-
2157909Speter * Copyright (c) 2006 Peter Wemm
3157909Speter * All rights reserved.
4157909Speter *
5157909Speter * Redistribution and use in source and binary forms, with or without
6157909Speter * modification, are permitted provided that the following conditions
7157909Speter * are met:
8157909Speter *
9157909Speter * 1. Redistributions of source code must retain the above copyright
10157909Speter *    notice, this list of conditions and the following disclaimer.
11157909Speter * 2. Redistributions in binary form must reproduce the above copyright
12157909Speter *    notice, this list of conditions and the following disclaimer in the
13157909Speter *    documentation and/or other materials provided with the distribution.
14157909Speter *
15157909Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16157909Speter * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17157909Speter * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18157909Speter * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19157909Speter * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20157909Speter * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21157909Speter * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22157909Speter * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23157909Speter * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24157909Speter * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25157909Speter */
26157909Speter
27157909Speter#include <sys/cdefs.h>
28157909Speter__FBSDID("$FreeBSD$");
29157909Speter
30221173Sattilio#include "opt_watchdog.h"
31221173Sattilio
32157909Speter#include <sys/param.h>
33157909Speter#include <sys/systm.h>
34157909Speter#include <sys/conf.h>
35157909Speter#include <sys/cons.h>
36157909Speter#include <sys/kernel.h>
37157909Speter#include <sys/kerneldump.h>
38157909Speter#include <sys/msgbuf.h>
39221173Sattilio#include <sys/watchdog.h>
40157909Speter#include <vm/vm.h>
41157909Speter#include <vm/pmap.h>
42157909Speter#include <machine/atomic.h>
43157909Speter#include <machine/elf.h>
44157909Speter#include <machine/md_var.h>
45157909Speter#include <machine/vmparam.h>
46157909Speter#include <machine/minidump.h>
47157909Speter
48157909SpeterCTASSERT(sizeof(struct kerneldumpheader) == 512);
49157909Speter
50157909Speter/*
51157909Speter * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
52157909Speter * is to protect us from metadata and to protect metadata from us.
53157909Speter */
54157909Speter#define	SIZEOF_METADATA		(64*1024)
55157909Speter
56157909Speter#define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
57157909Speter#define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
58157909Speter
59157909Speteruint32_t *vm_page_dump;
60157909Speterint vm_page_dump_size;
61157909Speter
62157909Speterstatic struct kerneldumpheader kdh;
63157909Speterstatic off_t dumplo;
64157909Speter
65157909Speter/* Handle chunked writes. */
66157909Speterstatic size_t fragsz;
67157909Speterstatic void *dump_va;
68157909Speterstatic uint64_t counter, progress;
69157909Speter
70157909SpeterCTASSERT(sizeof(*vm_page_dump) == 4);
71200346Skmacy#ifndef XEN
72200346Skmacy#define xpmap_mtop(x) (x)
73200346Skmacy#define xpmap_ptom(x) (x)
74200346Skmacy#endif
75157909Speter
76200346Skmacy
77157909Speterstatic int
78157909Speteris_dumpable(vm_paddr_t pa)
79157909Speter{
80157909Speter	int i;
81157909Speter
82157909Speter	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
83157909Speter		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
84157909Speter			return (1);
85157909Speter	}
86157909Speter	return (0);
87157909Speter}
88157909Speter
89157909Speter#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
90157909Speter
91157909Speterstatic int
92157909Speterblk_flush(struct dumperinfo *di)
93157909Speter{
94157909Speter	int error;
95157909Speter
96157909Speter	if (fragsz == 0)
97157909Speter		return (0);
98157909Speter
99175768Sru	error = dump_write(di, dump_va, 0, dumplo, fragsz);
100157909Speter	dumplo += fragsz;
101157909Speter	fragsz = 0;
102157909Speter	return (error);
103157909Speter}
104157909Speter
105157909Speterstatic int
106157909Speterblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
107157909Speter{
108157909Speter	size_t len;
109157909Speter	int error, i, c;
110176304Sscottl	u_int maxdumpsz;
111157909Speter
112184500Skib	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
113176304Sscottl	if (maxdumpsz == 0)	/* seatbelt */
114176304Sscottl		maxdumpsz = PAGE_SIZE;
115157909Speter	error = 0;
116157909Speter	if ((sz % PAGE_SIZE) != 0) {
117157909Speter		printf("size not page aligned\n");
118157909Speter		return (EINVAL);
119157909Speter	}
120157909Speter	if (ptr != NULL && pa != 0) {
121157909Speter		printf("cant have both va and pa!\n");
122157909Speter		return (EINVAL);
123157909Speter	}
124157909Speter	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
125157909Speter		printf("address not page aligned\n");
126157909Speter		return (EINVAL);
127157909Speter	}
128157909Speter	if (ptr != NULL) {
129157909Speter		/* If we're doing a virtual dump, flush any pre-existing pa pages */
130157909Speter		error = blk_flush(di);
131157909Speter		if (error)
132157909Speter			return (error);
133157909Speter	}
134157909Speter	while (sz) {
135176304Sscottl		len = maxdumpsz - fragsz;
136157909Speter		if (len > sz)
137157909Speter			len = sz;
138157909Speter		counter += len;
139157909Speter		progress -= len;
140157909Speter		if (counter >> 24) {
141157909Speter			printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
142157909Speter			counter &= (1<<24) - 1;
143157909Speter		}
144236503Savg
145221173Sattilio		wdog_kern_pat(WD_LASTVAL);
146236503Savg
147157909Speter		if (ptr) {
148175768Sru			error = dump_write(di, ptr, 0, dumplo, len);
149157909Speter			if (error)
150157909Speter				return (error);
151157909Speter			dumplo += len;
152157909Speter			ptr += len;
153157909Speter			sz -= len;
154157909Speter		} else {
155157909Speter			for (i = 0; i < len; i += PAGE_SIZE)
156157909Speter				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
157157909Speter			fragsz += len;
158157909Speter			pa += len;
159157909Speter			sz -= len;
160176304Sscottl			if (fragsz == maxdumpsz) {
161157909Speter				error = blk_flush(di);
162157909Speter				if (error)
163157909Speter					return (error);
164157909Speter			}
165157909Speter		}
166157909Speter
167157909Speter		/* Check for user abort. */
168157909Speter		c = cncheckc();
169157909Speter		if (c == 0x03)
170157909Speter			return (ECANCELED);
171157909Speter		if (c != -1)
172157909Speter			printf(" (CTRL-C to abort) ");
173157909Speter	}
174157909Speter
175157909Speter	return (0);
176157909Speter}
177157909Speter
178157909Speter/* A fake page table page, to avoid having to handle both 4K and 2M pages */
179157909Speterstatic pt_entry_t fakept[NPTEPG];
180157909Speter
181157909Spetervoid
182157909Speterminidumpsys(struct dumperinfo *di)
183157909Speter{
184157909Speter	uint64_t dumpsize;
185157909Speter	uint32_t ptesize;
186157909Speter	vm_offset_t va;
187157909Speter	int error;
188157909Speter	uint32_t bits;
189157909Speter	uint64_t pa;
190157909Speter	pd_entry_t *pd;
191157909Speter	pt_entry_t *pt;
192157909Speter	int i, j, k, bit;
193157909Speter	struct minidumphdr mdhdr;
194157909Speter
195157909Speter	counter = 0;
196157909Speter	/* Walk page table pages, set bits in vm_page_dump */
197157909Speter	ptesize = 0;
198157909Speter	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
199157909Speter		/*
200157909Speter		 * We always write a page, even if it is zero. Each
201157909Speter		 * page written corresponds to 2MB of space
202157909Speter		 */
203157909Speter		ptesize += PAGE_SIZE;
204157909Speter		pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE);	/* always mapped! */
205157909Speter		j = va >> PDRSHIFT;
206157909Speter		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
207157909Speter			/* This is an entire 2M page. */
208200346Skmacy			pa = xpmap_mtop(pd[j] & PG_PS_FRAME);
209157909Speter			for (k = 0; k < NPTEPG; k++) {
210157909Speter				if (is_dumpable(pa))
211157909Speter					dump_add_page(pa);
212157909Speter				pa += PAGE_SIZE;
213157909Speter			}
214157909Speter			continue;
215157909Speter		}
216157909Speter		if ((pd[j] & PG_V) == PG_V) {
217157909Speter			/* set bit for each valid page in this 2MB block */
218200346Skmacy			pt = pmap_kenter_temporary(xpmap_mtop(pd[j] & PG_FRAME), 0);
219157909Speter			for (k = 0; k < NPTEPG; k++) {
220157909Speter				if ((pt[k] & PG_V) == PG_V) {
221200346Skmacy					pa = xpmap_mtop(pt[k] & PG_FRAME);
222157909Speter					if (is_dumpable(pa))
223157909Speter						dump_add_page(pa);
224157909Speter				}
225157909Speter			}
226157909Speter		} else {
227157909Speter			/* nothing, we're going to dump a null page */
228157909Speter		}
229157909Speter	}
230157909Speter
231157909Speter	/* Calculate dump size. */
232157909Speter	dumpsize = ptesize;
233157909Speter	dumpsize += round_page(msgbufp->msg_size);
234157909Speter	dumpsize += round_page(vm_page_dump_size);
235157909Speter	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
236157909Speter		bits = vm_page_dump[i];
237157909Speter		while (bits) {
238157909Speter			bit = bsfl(bits);
239157909Speter			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
240157909Speter			/* Clear out undumpable pages now if needed */
241157909Speter			if (is_dumpable(pa)) {
242157909Speter				dumpsize += PAGE_SIZE;
243157909Speter			} else {
244157909Speter				dump_drop_page(pa);
245157909Speter			}
246157909Speter			bits &= ~(1ul << bit);
247157909Speter		}
248157909Speter	}
249157909Speter	dumpsize += PAGE_SIZE;
250157909Speter
251157909Speter	/* Determine dump offset on device. */
252157909Speter	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
253157909Speter		error = ENOSPC;
254157909Speter		goto fail;
255157909Speter	}
256157909Speter	dumplo = di->mediaoffset + di->mediasize - dumpsize;
257157909Speter	dumplo -= sizeof(kdh) * 2;
258157909Speter	progress = dumpsize;
259157909Speter
260157909Speter	/* Initialize mdhdr */
261157909Speter	bzero(&mdhdr, sizeof(mdhdr));
262157909Speter	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
263157909Speter	mdhdr.version = MINIDUMP_VERSION;
264157909Speter	mdhdr.msgbufsize = msgbufp->msg_size;
265157909Speter	mdhdr.bitmapsize = vm_page_dump_size;
266157909Speter	mdhdr.ptesize = ptesize;
267157909Speter	mdhdr.kernbase = KERNBASE;
268282065Skib#if defined(PAE) || defined(PAE_TABLES)
269157909Speter	mdhdr.paemode = 1;
270157909Speter#endif
271157909Speter
272183527Speter	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, dumpsize, di->blocksize);
273157909Speter
274157909Speter	printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
275157909Speter	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
276157909Speter
277157909Speter	/* Dump leader */
278175768Sru	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
279157909Speter	if (error)
280157909Speter		goto fail;
281157909Speter	dumplo += sizeof(kdh);
282157909Speter
283157909Speter	/* Dump my header */
284157909Speter	bzero(&fakept, sizeof(fakept));
285157909Speter	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
286157909Speter	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
287157909Speter	if (error)
288157909Speter		goto fail;
289157909Speter
290157909Speter	/* Dump msgbuf up front */
291157909Speter	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
292157909Speter	if (error)
293157909Speter		goto fail;
294157909Speter
295157909Speter	/* Dump bitmap */
296157909Speter	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
297157909Speter	if (error)
298157909Speter		goto fail;
299157909Speter
300157909Speter	/* Dump kernel page table pages */
301157909Speter	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
302157909Speter		/* We always write a page, even if it is zero */
303157909Speter		pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE);	/* always mapped! */
304157909Speter		j = va >> PDRSHIFT;
305157909Speter		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
306157909Speter			/* This is a single 2M block. Generate a fake PTP */
307168439Sru			pa = pd[j] & PG_PS_FRAME;
308157909Speter			for (k = 0; k < NPTEPG; k++) {
309157909Speter				fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
310157909Speter			}
311157909Speter			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
312157909Speter			if (error)
313157909Speter				goto fail;
314157909Speter			/* flush, in case we reuse fakept in the same block */
315157909Speter			error = blk_flush(di);
316157909Speter			if (error)
317157909Speter				goto fail;
318157909Speter			continue;
319157909Speter		}
320157909Speter		if ((pd[j] & PG_V) == PG_V) {
321200346Skmacy			pa = xpmap_mtop(pd[j] & PG_FRAME);
322200352Skmacy#ifndef XEN
323157909Speter			error = blk_write(di, 0, pa, PAGE_SIZE);
324200352Skmacy#else
325200352Skmacy			pt = pmap_kenter_temporary(pa, 0);
326200352Skmacy			memcpy(fakept, pt, PAGE_SIZE);
327200352Skmacy			for (i = 0; i < NPTEPG; i++)
328200352Skmacy				fakept[i] = xpmap_mtop(fakept[i]);
329200352Skmacy			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
330157909Speter			if (error)
331157909Speter				goto fail;
332200352Skmacy			/* flush, in case we reuse fakept in the same block */
333200352Skmacy			error = blk_flush(di);
334200352Skmacy			if (error)
335200352Skmacy				goto fail;
336200352Skmacy			bzero(fakept, sizeof(fakept));
337200352Skmacy#endif
338200352Skmacy
339200352Skmacy			if (error)
340200352Skmacy				goto fail;
341157909Speter		} else {
342157909Speter			bzero(fakept, sizeof(fakept));
343157909Speter			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
344157909Speter			if (error)
345157909Speter				goto fail;
346157909Speter			/* flush, in case we reuse fakept in the same block */
347157909Speter			error = blk_flush(di);
348157909Speter			if (error)
349157909Speter				goto fail;
350157909Speter		}
351157909Speter	}
352157909Speter
353157909Speter	/* Dump memory chunks */
354157909Speter	/* XXX cluster it up and use blk_dump() */
355157909Speter	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
356157909Speter		bits = vm_page_dump[i];
357157909Speter		while (bits) {
358157909Speter			bit = bsfl(bits);
359157909Speter			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
360157909Speter			error = blk_write(di, 0, pa, PAGE_SIZE);
361157909Speter			if (error)
362157909Speter				goto fail;
363157909Speter			bits &= ~(1ul << bit);
364157909Speter		}
365157909Speter	}
366157909Speter
367157909Speter	error = blk_flush(di);
368157909Speter	if (error)
369157909Speter		goto fail;
370157909Speter
371157909Speter	/* Dump trailer */
372175768Sru	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
373157909Speter	if (error)
374157909Speter		goto fail;
375157909Speter	dumplo += sizeof(kdh);
376157909Speter
377157909Speter	/* Signal completion, signoff and exit stage left. */
378175768Sru	dump_write(di, NULL, 0, 0, 0);
379157909Speter	printf("\nDump complete\n");
380157909Speter	return;
381157909Speter
382157909Speter fail:
383157909Speter	if (error < 0)
384157909Speter		error = -error;
385157909Speter
386157909Speter	if (error == ECANCELED)
387157909Speter		printf("\nDump aborted\n");
388157909Speter	else if (error == ENOSPC)
389157909Speter		printf("\nDump failed. Partition too small.\n");
390157909Speter	else
391157909Speter		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
392157909Speter}
393157909Speter
394157909Spetervoid
395157909Speterdump_add_page(vm_paddr_t pa)
396157909Speter{
397157909Speter	int idx, bit;
398157909Speter
399157909Speter	pa >>= PAGE_SHIFT;
400157909Speter	idx = pa >> 5;		/* 2^5 = 32 */
401157909Speter	bit = pa & 31;
402157909Speter	atomic_set_int(&vm_page_dump[idx], 1ul << bit);
403157909Speter}
404157909Speter
405157909Spetervoid
406157909Speterdump_drop_page(vm_paddr_t pa)
407157909Speter{
408157909Speter	int idx, bit;
409157909Speter
410157909Speter	pa >>= PAGE_SHIFT;
411157909Speter	idx = pa >> 5;		/* 2^5 = 32 */
412157909Speter	bit = pa & 31;
413157909Speter	atomic_clear_int(&vm_page_dump[idx], 1ul << bit);
414157909Speter}
415157909Speter
416