1/*-
2 * Copyright (c) 2006 Peter Wemm
3 * Copyright (c) 2015 The FreeBSD Foundation
4 * All rights reserved.
5 * Copyright (c) 2019 Mitchell Horne
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30#include "opt_watchdog.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/cons.h>
36#include <sys/kernel.h>
37#include <sys/kerneldump.h>
38#include <sys/msgbuf.h>
39#include <sys/watchdog.h>
40#include <sys/vmmeter.h>
41
42#include <vm/vm.h>
43#include <vm/vm_param.h>
44#include <vm/vm_page.h>
45#include <vm/vm_phys.h>
46#include <vm/vm_dumpset.h>
47#include <vm/pmap.h>
48
49#include <machine/atomic.h>
50#include <machine/elf.h>
51#include <machine/md_var.h>
52#include <machine/minidump.h>
53
54CTASSERT(sizeof(struct kerneldumpheader) == 512);
55
56static struct kerneldumpheader kdh;
57
58/* Handle chunked writes. */
59static size_t fragsz;
60static void *dump_va;
61static size_t dumpsize;
62
63static uint64_t tmpbuffer[PAGE_SIZE / sizeof(uint64_t)];
64
65static int
66blk_flush(struct dumperinfo *di)
67{
68	int error;
69
70	if (fragsz == 0)
71		return (0);
72
73	error = dump_append(di, dump_va, fragsz);
74	fragsz = 0;
75	return (error);
76}
77
78/*
79 * Write a block of data to the dump file.
80 *
81 * Caller can provide data through a pointer or by specifying its
82 * physical address.
83 *
84 * XXX writes using pa should be no larger than PAGE_SIZE.
85 */
86static int
87blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
88{
89	size_t len;
90	int error, c;
91	u_int maxdumpsz;
92
93	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
94	if (maxdumpsz == 0)	/* seatbelt */
95		maxdumpsz = PAGE_SIZE;
96	error = 0;
97	if ((sz % PAGE_SIZE) != 0) {
98		printf("size not page aligned\n");
99		return (EINVAL);
100	}
101	if (ptr != NULL && pa != 0) {
102		printf("cant have both va and pa!\n");
103		return (EINVAL);
104	}
105	if ((((uintptr_t)pa) % PAGE_SIZE) != 0) {
106		printf("address not page aligned %#lx\n", (uintptr_t)pa);
107		return (EINVAL);
108	}
109	if (ptr != NULL) {
110		/*
111		 * If we're doing a virtual dump, flush any
112		 * pre-existing pa pages.
113		 */
114		error = blk_flush(di);
115		if (error != 0)
116			return (error);
117	}
118	while (sz) {
119		len = maxdumpsz - fragsz;
120		if (len > sz)
121			len = sz;
122
123		dumpsys_pb_progress(len);
124		wdog_kern_pat(WD_LASTVAL);
125
126		if (ptr) {
127			error = dump_append(di, ptr, len);
128			if (error != 0)
129				return (error);
130			ptr += len;
131			sz -= len;
132		} else {
133			dump_va = (void *)PHYS_TO_DMAP(pa);
134			fragsz += len;
135			pa += len;
136			sz -= len;
137			error = blk_flush(di);
138			if (error != 0)
139				return (error);
140		}
141
142		/* Check for user abort */
143		c = cncheckc();
144		if (c == 0x03)
145			return (ECANCELED);
146		if (c != -1)
147			printf(" (CTRL-C to abort) ");
148	}
149
150	return (0);
151}
152
153int
154cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
155{
156	pd_entry_t *l1, *l2, l2e;
157	pt_entry_t *l3, l3e;
158	struct minidumphdr mdhdr;
159	struct msgbuf *mbp;
160	uint32_t pmapsize;
161	vm_offset_t va, kva_max;
162	vm_paddr_t pa;
163	int error;
164	int i;
165	int retry_count;
166
167	retry_count = 0;
168retry:
169	retry_count++;
170	error = 0;
171	pmapsize = 0;
172
173	/* Snapshot the KVA upper bound in case it grows. */
174	kva_max = kernel_vm_end;
175
176	/*
177	 * Walk the kernel page table pages, setting the active entries in the
178	 * dump bitmap.
179	 *
180	 * NB: for a live dump, we may be racing with updates to the page
181	 * tables, so care must be taken to read each entry only once.
182	 */
183	for (va = VM_MIN_KERNEL_ADDRESS; va < kva_max; va += L2_SIZE) {
184		pmapsize += PAGE_SIZE;
185		if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3))
186			continue;
187
188		/* We should always be using the l2 table for kvm */
189		if (l2 == NULL)
190			continue;
191
192		/* l2 may be a superpage */
193		l2e = atomic_load_64(l2);
194		if ((l2e & PTE_RWX) != 0) {
195			pa = (l2e >> PTE_PPN1_S) << L2_SHIFT;
196			for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) {
197				if (vm_phys_is_dumpable(pa))
198					vm_page_dump_add(state->dump_bitset,
199					    pa);
200			}
201		} else {
202			for (i = 0; i < Ln_ENTRIES; i++) {
203				l3e = atomic_load_64(&l3[i]);
204				if ((l3e & PTE_V) == 0)
205					continue;
206				pa = (l3e >> PTE_PPN0_S) * PAGE_SIZE;
207				if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa))
208					vm_page_dump_add(state->dump_bitset,
209					    pa);
210			}
211		}
212	}
213
214	/* Calculate dump size */
215	mbp = state->msgbufp;
216	dumpsize = pmapsize;
217	dumpsize += round_page(mbp->msg_size);
218	dumpsize += round_page(sizeof(dump_avail));
219	dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages));
220	VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
221		/* Clear out undumpable pages now if needed */
222		if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa))
223			dumpsize += PAGE_SIZE;
224		else
225			vm_page_dump_drop(state->dump_bitset, pa);
226	}
227	dumpsize += PAGE_SIZE;
228
229	dumpsys_pb_init(dumpsize);
230
231	/* Initialize mdhdr */
232	bzero(&mdhdr, sizeof(mdhdr));
233	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
234	mdhdr.version = MINIDUMP_VERSION;
235	mdhdr.msgbufsize = mbp->msg_size;
236	mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages));
237	mdhdr.pmapsize = pmapsize;
238	mdhdr.kernbase = KERNBASE;
239	mdhdr.dmapphys = DMAP_MIN_PHYSADDR;
240	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
241	mdhdr.dmapend = DMAP_MAX_ADDRESS;
242	mdhdr.dumpavailsize = round_page(sizeof(dump_avail));
243
244	dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_RISCV_VERSION,
245	    dumpsize);
246
247	error = dump_start(di, &kdh);
248	if (error != 0)
249		goto fail;
250
251	printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
252	    ptoa((uintmax_t)physmem) / 1048576);
253
254	/* Dump minidump header */
255	bzero(&tmpbuffer, sizeof(tmpbuffer));
256	bcopy(&mdhdr, &tmpbuffer, sizeof(mdhdr));
257	error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
258	if (error)
259		goto fail;
260
261	/* Dump msgbuf up front */
262	error = blk_write(di, mbp->msg_ptr, 0, round_page(mbp->msg_size));
263	if (error)
264		goto fail;
265
266	/* Dump dump_avail */
267	_Static_assert(sizeof(dump_avail) <= sizeof(tmpbuffer),
268	    "Large dump_avail not handled");
269	bzero(tmpbuffer, sizeof(tmpbuffer));
270	memcpy(tmpbuffer, dump_avail, sizeof(dump_avail));
271	error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
272	if (error)
273		goto fail;
274
275	/* Dump bitmap */
276	error = blk_write(di, (char *)vm_page_dump, 0,
277	    round_page(BITSET_SIZE(vm_page_dump_pages)));
278	if (error)
279		goto fail;
280
281	/* Dump kernel page directory pages */
282	bzero(&tmpbuffer, sizeof(tmpbuffer));
283	for (va = VM_MIN_KERNEL_ADDRESS; va < kva_max; va += L2_SIZE) {
284		if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) {
285			/* We always write a page, even if it is zero */
286			error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
287			if (error)
288				goto fail;
289			/* Flush, in case we reuse tmpbuffer in the same block */
290			error = blk_flush(di);
291			if (error)
292				goto fail;
293			continue;
294		}
295
296		l2e = atomic_load_64(l2);
297		if ((l2e & PTE_RWX) != 0) {
298			/* Generate fake l3 entries based on the l2 superpage */
299			for (i = 0; i < Ln_ENTRIES; i++) {
300				tmpbuffer[i] = (l2e | (i << PTE_PPN0_S));
301			}
302			/* We always write a page, even if it is zero */
303			error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
304			if (error)
305				goto fail;
306			/* Flush, in case we reuse tmpbuffer in the same block */
307			error = blk_flush(di);
308			if (error)
309				goto fail;
310			bzero(&tmpbuffer, sizeof(tmpbuffer));
311		} else {
312			pa = (l2e >> PTE_PPN0_S) * PAGE_SIZE;
313
314			/*
315			 * We always write a page, even if it is zero. If pa
316			 * is malformed, write the zeroed tmpbuffer.
317			 */
318			if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa))
319				error = blk_write(di, NULL, pa, PAGE_SIZE);
320			else
321				error = blk_write(di, (char *)&tmpbuffer, 0,
322				    PAGE_SIZE);
323			if (error)
324				goto fail;
325		}
326	}
327
328	/* Dump memory chunks */
329	/* XXX cluster it up and use blk_dump() */
330	VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
331		error = blk_write(di, 0, pa, PAGE_SIZE);
332		if (error)
333			goto fail;
334	}
335
336	error = blk_flush(di);
337	if (error)
338		goto fail;
339
340	error = dump_finish(di, &kdh);
341	if (error != 0)
342		goto fail;
343
344	printf("\nDump complete\n");
345	return (0);
346
347fail:
348	if (error < 0)
349		error = -error;
350
351	printf("\n");
352	if (error == ENOSPC) {
353		printf("Dump map grown while dumping. ");
354		if (retry_count < 5) {
355			printf("Retrying...\n");
356			goto retry;
357		}
358		printf("Dump failed.\n");
359	}
360	else if (error == ECANCELED)
361		printf("Dump aborted\n");
362	else if (error == E2BIG) {
363		printf("Dump failed. Partition too small (about %lluMB were "
364		    "needed this time).\n", (long long)dumpsize >> 20);
365	} else
366		printf("** DUMP FAILED (ERROR %d) **\n", error);
367	return (error);
368}
369