1/*-
2 * Copyright (c) 2006 Peter Wemm
3 * Copyright (c) 2015 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * This software was developed by Andrew Turner under
7 * sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#include "opt_watchdog.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/conf.h>
38#include <sys/cons.h>
39#include <sys/kernel.h>
40#include <sys/kerneldump.h>
41#include <sys/msgbuf.h>
42#include <sys/watchdog.h>
43#include <sys/vmmeter.h>
44
45#include <vm/vm.h>
46#include <vm/vm_param.h>
47#include <vm/vm_page.h>
48#include <vm/vm_phys.h>
49#include <vm/vm_dumpset.h>
50#include <vm/pmap.h>
51
52#include <machine/atomic.h>
53#include <machine/md_var.h>
54#include <machine/pte.h>
55#include <machine/minidump.h>
56
57CTASSERT(sizeof(struct kerneldumpheader) == 512);
58
59static struct kerneldumpheader kdh;
60
61/* Handle chunked writes. */
62static size_t fragsz;
63static void *dump_va;
64static size_t dumpsize;
65
66static uint64_t tmpbuffer[Ln_ENTRIES];
67
68static int
69blk_flush(struct dumperinfo *di)
70{
71	int error;
72
73	if (fragsz == 0)
74		return (0);
75
76	error = dump_append(di, dump_va, fragsz);
77	fragsz = 0;
78	return (error);
79}
80
81static int
82blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
83{
84	size_t len;
85	int error, c;
86	u_int maxdumpsz;
87
88	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
89	if (maxdumpsz == 0)	/* seatbelt */
90		maxdumpsz = PAGE_SIZE;
91	error = 0;
92	if ((sz % PAGE_SIZE) != 0) {
93		printf("size not page aligned\n");
94		return (EINVAL);
95	}
96	if (ptr != NULL && pa != 0) {
97		printf("cant have both va and pa!\n");
98		return (EINVAL);
99	}
100	if ((((uintptr_t)pa) % PAGE_SIZE) != 0) {
101		printf("address not page aligned %p\n", ptr);
102		return (EINVAL);
103	}
104	if (ptr != NULL) {
105		/*
106		 * If we're doing a virtual dump, flush any
107		 * pre-existing pa pages.
108		 */
109		error = blk_flush(di);
110		if (error)
111			return (error);
112	}
113	while (sz) {
114		len = maxdumpsz - fragsz;
115		if (len > sz)
116			len = sz;
117
118		dumpsys_pb_progress(len);
119		wdog_kern_pat(WD_LASTVAL);
120
121		if (ptr) {
122			error = dump_append(di, ptr, len);
123			if (error)
124				return (error);
125			ptr += len;
126			sz -= len;
127		} else {
128			dump_va = (void *)PHYS_TO_DMAP(pa);
129			fragsz += len;
130			pa += len;
131			sz -= len;
132			error = blk_flush(di);
133			if (error)
134				return (error);
135		}
136
137		/* Check for user abort. */
138		c = cncheckc();
139		if (c == 0x03)
140			return (ECANCELED);
141		if (c != -1)
142			printf(" (CTRL-C to abort) ");
143	}
144
145	return (0);
146}
147
148int
149cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
150{
151	struct minidumphdr mdhdr;
152	struct msgbuf *mbp;
153	pd_entry_t *l0, *l1, l1e, *l2, l2e;
154	pt_entry_t *l3, l3e;
155	vm_offset_t va, kva_end;
156	vm_paddr_t pa;
157	uint32_t pmapsize;
158	int error, i, j, retry_count;
159
160	retry_count = 0;
161 retry:
162	retry_count++;
163	error = 0;
164	pmapsize = 0;
165
166	/* Snapshot the KVA upper bound in case it grows. */
167	kva_end = kernel_vm_end;
168
169	/*
170	 * Walk the kernel page table pages, setting the active entries in the
171	 * dump bitmap.
172	 *
173	 * NB: for a live dump, we may be racing with updates to the page
174	 * tables, so care must be taken to read each entry only once.
175	 */
176	for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += L2_SIZE) {
177		pmapsize += PAGE_SIZE;
178		if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3))
179			continue;
180
181		l1e = atomic_load_64(l1);
182		l2e = atomic_load_64(l2);
183		if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) {
184			pa = PTE_TO_PHYS(l1e);
185			for (i = 0; i < Ln_ENTRIES * Ln_ENTRIES;
186			    i++, pa += PAGE_SIZE)
187				if (vm_phys_is_dumpable(pa))
188					vm_page_dump_add(state->dump_bitset,
189					    pa);
190			pmapsize += (Ln_ENTRIES - 1) * PAGE_SIZE;
191			va += L1_SIZE - L2_SIZE;
192		} else if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) {
193			pa = PTE_TO_PHYS(l2e);
194			for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) {
195				if (vm_phys_is_dumpable(pa))
196					vm_page_dump_add(state->dump_bitset,
197					    pa);
198			}
199		} else if ((l2e & ATTR_DESCR_MASK) == L2_TABLE) {
200			for (i = 0; i < Ln_ENTRIES; i++) {
201				l3e = atomic_load_64(&l3[i]);
202				if ((l3e & ATTR_DESCR_MASK) != L3_PAGE)
203					continue;
204				pa = PTE_TO_PHYS(l3e);
205				if (PHYS_IN_DMAP_RANGE(pa) &&
206				    vm_phys_is_dumpable(pa))
207					vm_page_dump_add(state->dump_bitset,
208					    pa);
209			}
210		}
211	}
212
213	/* Calculate dump size. */
214	mbp = state->msgbufp;
215	dumpsize = pmapsize;
216	dumpsize += round_page(mbp->msg_size);
217	dumpsize += round_page(sizeof(dump_avail));
218	dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages));
219	VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
220		if (PHYS_IN_DMAP_RANGE(pa) && vm_phys_is_dumpable(pa))
221			dumpsize += PAGE_SIZE;
222		else
223			vm_page_dump_drop(state->dump_bitset, pa);
224	}
225	dumpsize += PAGE_SIZE;
226
227	dumpsys_pb_init(dumpsize);
228
229	/* Initialize mdhdr */
230	bzero(&mdhdr, sizeof(mdhdr));
231	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
232	mdhdr.version = MINIDUMP_VERSION;
233	mdhdr.msgbufsize = mbp->msg_size;
234	mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages));
235	mdhdr.pmapsize = pmapsize;
236	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
237	mdhdr.dmapphys = DMAP_MIN_PHYSADDR;
238	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
239	mdhdr.dmapend = DMAP_MAX_ADDRESS;
240	mdhdr.dumpavailsize = round_page(sizeof(dump_avail));
241#if PAGE_SIZE == PAGE_SIZE_4K
242	mdhdr.flags = MINIDUMP_FLAG_PS_4K;
243#elif PAGE_SIZE == PAGE_SIZE_16K
244	mdhdr.flags = MINIDUMP_FLAG_PS_16K;
245#else
246#error Unsupported page size
247#endif
248
249	dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AARCH64_VERSION,
250	    dumpsize);
251
252	error = dump_start(di, &kdh);
253	if (error != 0)
254		goto fail;
255
256	printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
257	    ptoa((uintmax_t)physmem) / 1048576);
258
259	/* Dump my header */
260	bzero(&tmpbuffer, sizeof(tmpbuffer));
261	bcopy(&mdhdr, &tmpbuffer, sizeof(mdhdr));
262	error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
263	if (error)
264		goto fail;
265
266	/* Dump msgbuf up front */
267	error = blk_write(di, mbp->msg_ptr, 0, round_page(mbp->msg_size));
268	if (error)
269		goto fail;
270
271	/* Dump dump_avail */
272	_Static_assert(sizeof(dump_avail) <= sizeof(tmpbuffer),
273	    "Large dump_avail not handled");
274	bzero(tmpbuffer, sizeof(tmpbuffer));
275	memcpy(tmpbuffer, dump_avail, sizeof(dump_avail));
276	error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
277	if (error)
278		goto fail;
279
280	/* Dump bitmap */
281	error = blk_write(di, (char *)state->dump_bitset, 0,
282	    round_page(BITSET_SIZE(vm_page_dump_pages)));
283	if (error)
284		goto fail;
285
286	/* Dump kernel page directory pages */
287	bzero(&tmpbuffer, sizeof(tmpbuffer));
288	for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += L2_SIZE) {
289		if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) {
290			/* We always write a page, even if it is zero */
291			error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
292			if (error)
293				goto fail;
294			/* flush, in case we reuse tmpbuffer in the same block*/
295			error = blk_flush(di);
296			if (error)
297				goto fail;
298			continue;
299		}
300
301		l1e = atomic_load_64(l1);
302		l2e = atomic_load_64(l2);
303		if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) {
304			/*
305			 * Handle a 1GB block mapping: write out 512 fake L2
306			 * pages.
307			 */
308			pa = PTE_TO_PHYS(l1e) | (va & L1_OFFSET);
309
310			for (i = 0; i < Ln_ENTRIES; i++) {
311				for (j = 0; j < Ln_ENTRIES; j++) {
312					tmpbuffer[j] = (pa + i * L2_SIZE +
313					    j * PAGE_SIZE) | ATTR_DEFAULT |
314					    L3_PAGE;
315				}
316				error = blk_write(di, (char *)&tmpbuffer, 0,
317				    PAGE_SIZE);
318				if (error)
319					goto fail;
320			}
321			/* flush, in case we reuse tmpbuffer in the same block*/
322			error = blk_flush(di);
323			if (error)
324				goto fail;
325			bzero(&tmpbuffer, sizeof(tmpbuffer));
326			va += L1_SIZE - L2_SIZE;
327		} else if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) {
328			pa = PTE_TO_PHYS(l2e) | (va & L2_OFFSET);
329
330			/* Generate fake l3 entries based upon the l1 entry */
331			for (i = 0; i < Ln_ENTRIES; i++) {
332				tmpbuffer[i] = (pa + i * PAGE_SIZE) |
333				    ATTR_DEFAULT | L3_PAGE;
334			}
335			error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
336			if (error)
337				goto fail;
338			/* flush, in case we reuse fakepd in the same block */
339			error = blk_flush(di);
340			if (error)
341				goto fail;
342			bzero(&tmpbuffer, sizeof(tmpbuffer));
343			continue;
344		} else {
345			pa = PTE_TO_PHYS(l2e);
346
347			/*
348			 * We always write a page, even if it is zero. If pa
349			 * is malformed, write the zeroed tmpbuffer.
350			 */
351			if (PHYS_IN_DMAP_RANGE(pa) && vm_phys_is_dumpable(pa))
352				error = blk_write(di, NULL, pa, PAGE_SIZE);
353			else
354				error = blk_write(di, (char *)&tmpbuffer, 0,
355				    PAGE_SIZE);
356			if (error)
357				goto fail;
358		}
359	}
360
361	/* Dump memory chunks */
362	VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
363		error = blk_write(di, 0, pa, PAGE_SIZE);
364		if (error)
365			goto fail;
366	}
367
368	error = blk_flush(di);
369	if (error)
370		goto fail;
371
372	error = dump_finish(di, &kdh);
373	if (error != 0)
374		goto fail;
375
376	printf("\nDump complete\n");
377	return (0);
378
379fail:
380	if (error < 0)
381		error = -error;
382
383	printf("\n");
384	if (error == ENOSPC) {
385		printf("Dump map grown while dumping. ");
386		if (retry_count < 5) {
387			printf("Retrying...\n");
388			goto retry;
389		}
390		printf("Dump failed.\n");
391	}
392	else if (error == ECANCELED)
393		printf("Dump aborted\n");
394	else if (error == E2BIG) {
395		printf("Dump failed. Partition too small (about %lluMB were "
396		    "needed this time).\n", (long long)dumpsize >> 20);
397	} else
398		printf("** DUMP FAILED (ERROR %d) **\n", error);
399	return (error);
400}
401