1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2006 Peter Wemm
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31/*
32 * AMD64 machine dependent routines for kvm and minidumps.
33 */
34
35#include <sys/param.h>
36#include <sys/endian.h>
37#include <stdint.h>
38#include <stdlib.h>
39#include <string.h>
40#include <unistd.h>
41#include <vm/vm.h>
42#include <kvm.h>
43
44#include "../../sys/amd64/include/minidump.h"
45
46#include <limits.h>
47
48#include "kvm_private.h"
49#include "kvm_amd64.h"
50
51#define	amd64_round_page(x)	roundup2((kvaddr_t)(x), AMD64_PAGE_SIZE)
52#define	VM_IS_V1(vm)		(vm->hdr.version == 1)
53#define	VA_OFF(vm, va)		\
54	(VM_IS_V1(vm) ? ((va) & (AMD64_PAGE_SIZE - 1)) : ((va) & AMD64_PAGE_MASK))
55
56struct vmstate {
57	struct minidumphdr hdr;
58};
59
60static vm_prot_t
61_amd64_entry_to_prot(uint64_t entry)
62{
63	vm_prot_t prot = VM_PROT_READ;
64
65	if ((entry & AMD64_PG_RW) != 0)
66		prot |= VM_PROT_WRITE;
67	if ((entry & AMD64_PG_NX) == 0)
68		prot |= VM_PROT_EXECUTE;
69	return prot;
70}
71
72/*
73 * Version 2 minidumps use page directory entries, while version 1 use page
74 * table entries.
75 */
76
77static amd64_pde_t
78_amd64_pde_get(kvm_t *kd, u_long pdeindex)
79{
80	amd64_pde_t *pde = _kvm_pmap_get(kd, pdeindex, sizeof(*pde));
81
82	return le64toh(*pde);
83}
84
85static amd64_pte_t
86_amd64_pte_get(kvm_t *kd, u_long pteindex)
87{
88	amd64_pte_t *pte = _kvm_pmap_get(kd, pteindex, sizeof(*pte));
89
90	return le64toh(*pte);
91}
92
93/* Get the first page table entry for a given page directory index. */
94static amd64_pte_t *
95_amd64_pde_first_pte(kvm_t *kd, u_long pdeindex)
96{
97	u_long *pa;
98
99	pa = _kvm_pmap_get(kd, pdeindex, sizeof(amd64_pde_t));
100	if (pa == NULL)
101		return NULL;
102	return _kvm_map_get(kd, *pa & AMD64_PG_FRAME, AMD64_PAGE_SIZE);
103}
104
105static int
106_amd64_minidump_probe(kvm_t *kd)
107{
108
109	return (_kvm_probe_elf_kernel(kd, ELFCLASS64, EM_X86_64) &&
110	    _kvm_is_minidump(kd));
111}
112
113static void
114_amd64_minidump_freevtop(kvm_t *kd)
115{
116	struct vmstate *vm = kd->vmst;
117
118	free(vm);
119	kd->vmst = NULL;
120}
121
122static int
123_amd64_minidump_initvtop(kvm_t *kd)
124{
125	struct vmstate *vmst;
126	off_t off, dump_avail_off, sparse_off;
127
128	vmst = _kvm_malloc(kd, sizeof(*vmst));
129	if (vmst == NULL) {
130		_kvm_err(kd, kd->program, "cannot allocate vm");
131		return (-1);
132	}
133	kd->vmst = vmst;
134	if (pread(kd->pmfd, &vmst->hdr, sizeof(vmst->hdr), 0) !=
135	    sizeof(vmst->hdr)) {
136		_kvm_err(kd, kd->program, "cannot read dump header");
137		return (-1);
138	}
139	if (strncmp(MINIDUMP_MAGIC, vmst->hdr.magic, sizeof(vmst->hdr.magic)) != 0) {
140		_kvm_err(kd, kd->program, "not a minidump for this platform");
141		return (-1);
142	}
143
144	/*
145	 * NB: amd64 minidump header is binary compatible between version 1
146	 * and version 2; version 3 adds the dumpavailsize field
147	 */
148	vmst->hdr.version = le32toh(vmst->hdr.version);
149	if (vmst->hdr.version > MINIDUMP_VERSION || vmst->hdr.version < 1) {
150		_kvm_err(kd, kd->program, "wrong minidump version. expected %d got %d",
151		    MINIDUMP_VERSION, vmst->hdr.version);
152		return (-1);
153	}
154	vmst->hdr.msgbufsize = le32toh(vmst->hdr.msgbufsize);
155	vmst->hdr.bitmapsize = le32toh(vmst->hdr.bitmapsize);
156	vmst->hdr.pmapsize = le32toh(vmst->hdr.pmapsize);
157	vmst->hdr.kernbase = le64toh(vmst->hdr.kernbase);
158	vmst->hdr.dmapbase = le64toh(vmst->hdr.dmapbase);
159	vmst->hdr.dmapend = le64toh(vmst->hdr.dmapend);
160	vmst->hdr.dumpavailsize = vmst->hdr.version == MINIDUMP_VERSION ?
161	    le32toh(vmst->hdr.dumpavailsize) : 0;
162
163	/* Skip header and msgbuf */
164	dump_avail_off = AMD64_PAGE_SIZE + amd64_round_page(vmst->hdr.msgbufsize);
165
166	/* Skip dump_avail */
167	off = dump_avail_off + amd64_round_page(vmst->hdr.dumpavailsize);
168
169	sparse_off = off + amd64_round_page(vmst->hdr.bitmapsize) +
170	    amd64_round_page(vmst->hdr.pmapsize);
171	if (_kvm_pt_init(kd, vmst->hdr.dumpavailsize, dump_avail_off,
172	    vmst->hdr.bitmapsize, off, sparse_off, AMD64_PAGE_SIZE) == -1) {
173		return (-1);
174	}
175	off += amd64_round_page(vmst->hdr.bitmapsize);
176
177	if (_kvm_pmap_init(kd, vmst->hdr.pmapsize, off) == -1) {
178		return (-1);
179	}
180	off += amd64_round_page(vmst->hdr.pmapsize);
181
182	return (0);
183}
184
185static int
186_amd64_minidump_vatop_v1(kvm_t *kd, kvaddr_t va, off_t *pa)
187{
188	struct vmstate *vm;
189	amd64_physaddr_t offset;
190	amd64_pte_t pte;
191	kvaddr_t pteindex;
192	amd64_physaddr_t a;
193	off_t ofs;
194
195	vm = kd->vmst;
196	offset = va & AMD64_PAGE_MASK;
197
198	if (va >= vm->hdr.kernbase) {
199		pteindex = (va - vm->hdr.kernbase) >> AMD64_PAGE_SHIFT;
200		if (pteindex >= vm->hdr.pmapsize / sizeof(pte))
201			goto invalid;
202		pte = _amd64_pte_get(kd, pteindex);
203		if ((pte & AMD64_PG_V) == 0) {
204			_kvm_err(kd, kd->program,
205			    "_amd64_minidump_vatop_v1: pte not valid");
206			goto invalid;
207		}
208		a = pte & AMD64_PG_FRAME;
209		ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
210		if (ofs == -1) {
211			_kvm_err(kd, kd->program,
212	    "_amd64_minidump_vatop_v1: physical address 0x%jx not in minidump",
213			    (uintmax_t)a);
214			goto invalid;
215		}
216		*pa = ofs + offset;
217		return (AMD64_PAGE_SIZE - offset);
218	} else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
219		a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK;
220		ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
221		if (ofs == -1) {
222			_kvm_err(kd, kd->program,
223    "_amd64_minidump_vatop_v1: direct map address 0x%jx not in minidump",
224			    (uintmax_t)va);
225			goto invalid;
226		}
227		*pa = ofs + offset;
228		return (AMD64_PAGE_SIZE - offset);
229	} else {
230		_kvm_err(kd, kd->program,
231	    "_amd64_minidump_vatop_v1: virtual address 0x%jx not minidumped",
232		    (uintmax_t)va);
233		goto invalid;
234	}
235
236invalid:
237	_kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va);
238	return (0);
239}
240
241static int
242_amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
243{
244	amd64_pte_t pt[AMD64_NPTEPG];
245	struct vmstate *vm;
246	amd64_physaddr_t offset;
247	amd64_pde_t pde;
248	amd64_pte_t pte;
249	kvaddr_t pteindex;
250	kvaddr_t pdeindex;
251	amd64_physaddr_t a;
252	off_t ofs;
253
254	vm = kd->vmst;
255	offset = va & AMD64_PAGE_MASK;
256
257	if (va >= vm->hdr.kernbase) {
258		pdeindex = (va - vm->hdr.kernbase) >> AMD64_PDRSHIFT;
259		if (pdeindex >= vm->hdr.pmapsize / sizeof(pde))
260			goto invalid;
261		pde = _amd64_pde_get(kd, pdeindex);
262		if ((pde & AMD64_PG_V) == 0) {
263			_kvm_err(kd, kd->program,
264			    "_amd64_minidump_vatop: pde not valid");
265			goto invalid;
266		}
267		if ((pde & AMD64_PG_PS) == 0) {
268			a = pde & AMD64_PG_FRAME;
269			/* TODO: Just read the single PTE */
270			ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
271			if (ofs == -1) {
272				_kvm_err(kd, kd->program,
273				    "cannot find page table entry for %ju",
274				    (uintmax_t)a);
275				goto invalid;
276			}
277			if (pread(kd->pmfd, &pt, AMD64_PAGE_SIZE, ofs) !=
278			    AMD64_PAGE_SIZE) {
279				_kvm_err(kd, kd->program,
280				    "cannot read page table entry for %ju",
281				    (uintmax_t)a);
282				goto invalid;
283			}
284			pteindex = (va >> AMD64_PAGE_SHIFT) &
285			    (AMD64_NPTEPG - 1);
286			pte = le64toh(pt[pteindex]);
287			if ((pte & AMD64_PG_V) == 0) {
288				_kvm_err(kd, kd->program,
289				    "_amd64_minidump_vatop: pte not valid");
290				goto invalid;
291			}
292			a = pte & AMD64_PG_FRAME;
293		} else {
294			a = pde & AMD64_PG_PS_FRAME;
295			a += (va & AMD64_PDRMASK) ^ offset;
296		}
297		ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
298		if (ofs == -1) {
299			_kvm_err(kd, kd->program,
300	    "_amd64_minidump_vatop: physical address 0x%jx not in minidump",
301			    (uintmax_t)a);
302			goto invalid;
303		}
304		*pa = ofs + offset;
305		return (AMD64_PAGE_SIZE - offset);
306	} else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
307		a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK;
308		ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
309		if (ofs == -1) {
310			_kvm_err(kd, kd->program,
311	    "_amd64_minidump_vatop: direct map address 0x%jx not in minidump",
312			    (uintmax_t)va);
313			goto invalid;
314		}
315		*pa = ofs + offset;
316		return (AMD64_PAGE_SIZE - offset);
317	} else {
318		_kvm_err(kd, kd->program,
319	    "_amd64_minidump_vatop: virtual address 0x%jx not minidumped",
320		    (uintmax_t)va);
321		goto invalid;
322	}
323
324invalid:
325	_kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va);
326	return (0);
327}
328
329static int
330_amd64_minidump_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa)
331{
332
333	if (ISALIVE(kd)) {
334		_kvm_err(kd, 0,
335		    "_amd64_minidump_kvatop called in live kernel!");
336		return (0);
337	}
338	if (((struct vmstate *)kd->vmst)->hdr.version == 1)
339		return (_amd64_minidump_vatop_v1(kd, va, pa));
340	else
341		return (_amd64_minidump_vatop(kd, va, pa));
342}
343
344static int
345_amd64_minidump_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *arg)
346{
347	struct vmstate *vm = kd->vmst;
348	u_long npdes = vm->hdr.pmapsize / sizeof(amd64_pde_t);
349	u_long bmindex, dva, pa, pdeindex, va;
350	struct kvm_bitmap bm;
351	int ret = 0;
352	vm_prot_t prot;
353	unsigned int pgsz = AMD64_PAGE_SIZE;
354
355	if (vm->hdr.version < 2)
356		return (0);
357
358	if (!_kvm_bitmap_init(&bm, vm->hdr.bitmapsize, &bmindex))
359		return (0);
360
361	for (pdeindex = 0; pdeindex < npdes; pdeindex++) {
362		amd64_pde_t pde = _amd64_pde_get(kd, pdeindex);
363		amd64_pte_t *ptes;
364		u_long i;
365
366		va = vm->hdr.kernbase + (pdeindex << AMD64_PDRSHIFT);
367		if ((pde & AMD64_PG_V) == 0)
368			continue;
369
370		if ((pde & AMD64_PG_PS) != 0) {
371			/*
372			 * Large page.  Iterate on each 4K page section
373			 * within this page.  This differs from 4K pages in
374			 * that every page here uses the same PDE to
375			 * generate permissions.
376			 */
377			pa = (pde & AMD64_PG_PS_FRAME) +
378			    ((va & AMD64_PDRMASK) ^ VA_OFF(vm, va));
379			dva = vm->hdr.dmapbase + pa;
380			_kvm_bitmap_set(&bm, _kvm_pa_bit_id(kd, pa, AMD64_PAGE_SIZE));
381			if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva,
382			    _amd64_entry_to_prot(pde), AMD64_NBPDR, pgsz)) {
383				goto out;
384			}
385			continue;
386		}
387
388		/* 4K pages: pde references another page of entries. */
389		ptes = _amd64_pde_first_pte(kd, pdeindex);
390		/* Ignore page directory pages that were not dumped. */
391		if (ptes == NULL)
392			continue;
393
394		for (i = 0; i < AMD64_NPTEPG; i++) {
395			amd64_pte_t pte = (u_long)ptes[i];
396
397			pa = pte & AMD64_PG_FRAME;
398			dva = vm->hdr.dmapbase + pa;
399			if ((pte & AMD64_PG_V) != 0) {
400				_kvm_bitmap_set(&bm,
401				    _kvm_pa_bit_id(kd, pa, AMD64_PAGE_SIZE));
402				if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva,
403				    _amd64_entry_to_prot(pte), pgsz, 0)) {
404					goto out;
405				}
406			}
407			va += AMD64_PAGE_SIZE;
408		}
409	}
410
411	while (_kvm_bitmap_next(&bm, &bmindex)) {
412		pa = _kvm_bit_id_pa(kd, bmindex, AMD64_PAGE_SIZE);
413		if (pa == _KVM_PA_INVALID)
414			break;
415		dva = vm->hdr.dmapbase + pa;
416		if (vm->hdr.dmapend < (dva + pgsz))
417			break;
418		va = 0;
419		/* amd64/pmap.c: create_pagetables(): dmap always R|W. */
420		prot = VM_PROT_READ | VM_PROT_WRITE;
421		if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva, prot, pgsz, 0)) {
422			goto out;
423		}
424	}
425
426	ret = 1;
427
428out:
429	_kvm_bitmap_deinit(&bm);
430	return (ret);
431}
432
433static struct kvm_arch kvm_amd64_minidump = {
434	.ka_probe = _amd64_minidump_probe,
435	.ka_initvtop = _amd64_minidump_initvtop,
436	.ka_freevtop = _amd64_minidump_freevtop,
437	.ka_kvatop = _amd64_minidump_kvatop,
438	.ka_native = _amd64_native,
439	.ka_walk_pages = _amd64_minidump_walk_pages,
440};
441
442KVM_ARCH(kvm_amd64_minidump);
443