1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1992, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software developed by the Computer Systems
8 * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
9 * BG 91-66 and contributed to Berkeley.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD$");
38__SCCSID("@(#)kvm.c	8.2 (Berkeley) 2/13/94");
39
40#include <sys/param.h>
41#include <sys/fnv_hash.h>
42
43#define	_WANT_VNET
44
45#include <sys/user.h>
46#include <sys/linker.h>
47#include <sys/pcpu.h>
48#include <sys/stat.h>
49#include <sys/sysctl.h>
50#include <sys/mman.h>
51
52#include <stdbool.h>
53#include <net/vnet.h>
54
55#include <fcntl.h>
56#include <kvm.h>
57#include <limits.h>
58#include <paths.h>
59#include <stdint.h>
60#include <stdio.h>
61#include <stdlib.h>
62#include <string.h>
63#include <unistd.h>
64
65#include "kvm_private.h"
66
67SET_DECLARE(kvm_arch, struct kvm_arch);
68
69static char _kd_is_null[] = "";
70
71char *
72kvm_geterr(kvm_t *kd)
73{
74
75	if (kd == NULL)
76		return (_kd_is_null);
77	return (kd->errbuf);
78}
79
80static int
81_kvm_read_kernel_ehdr(kvm_t *kd)
82{
83	Elf *elf;
84
85	if (elf_version(EV_CURRENT) == EV_NONE) {
86		_kvm_err(kd, kd->program, "Unsupported libelf");
87		return (-1);
88	}
89	elf = elf_begin(kd->nlfd, ELF_C_READ, NULL);
90	if (elf == NULL) {
91		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
92		return (-1);
93	}
94	if (elf_kind(elf) != ELF_K_ELF) {
95		_kvm_err(kd, kd->program, "kernel is not an ELF file");
96		return (-1);
97	}
98	if (gelf_getehdr(elf, &kd->nlehdr) == NULL) {
99		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
100		elf_end(elf);
101		return (-1);
102	}
103	elf_end(elf);
104
105	switch (kd->nlehdr.e_ident[EI_DATA]) {
106	case ELFDATA2LSB:
107	case ELFDATA2MSB:
108		return (0);
109	default:
110		_kvm_err(kd, kd->program,
111		    "unsupported ELF data encoding for kernel");
112		return (-1);
113	}
114}
115
116static kvm_t *
117_kvm_open(kvm_t *kd, const char *uf, const char *mf, int flag, char *errout)
118{
119	struct kvm_arch **parch;
120	struct stat st;
121
122	kd->vmfd = -1;
123	kd->pmfd = -1;
124	kd->nlfd = -1;
125	kd->vmst = NULL;
126	kd->procbase = NULL;
127	kd->argspc = NULL;
128	kd->argv = NULL;
129
130	if (uf == NULL)
131		uf = getbootfile();
132	else if (strlen(uf) >= MAXPATHLEN) {
133		_kvm_err(kd, kd->program, "exec file name too long");
134		goto failed;
135	}
136	if (flag & ~O_RDWR) {
137		_kvm_err(kd, kd->program, "bad flags arg");
138		goto failed;
139	}
140	if (mf == NULL)
141		mf = _PATH_MEM;
142
143	if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) {
144		_kvm_syserr(kd, kd->program, "%s", mf);
145		goto failed;
146	}
147	if (fstat(kd->pmfd, &st) < 0) {
148		_kvm_syserr(kd, kd->program, "%s", mf);
149		goto failed;
150	}
151	if (S_ISREG(st.st_mode) && st.st_size <= 0) {
152		errno = EINVAL;
153		_kvm_syserr(kd, kd->program, "empty file");
154		goto failed;
155	}
156	if (S_ISCHR(st.st_mode)) {
157		/*
158		 * If this is a character special device, then check that
159		 * it's /dev/mem.  If so, open kmem too.  (Maybe we should
160		 * make it work for either /dev/mem or /dev/kmem -- in either
161		 * case you're working with a live kernel.)
162		 */
163		if (strcmp(mf, _PATH_DEVNULL) == 0) {
164			kd->vmfd = open(_PATH_DEVNULL, O_RDONLY | O_CLOEXEC);
165			return (kd);
166		} else if (strcmp(mf, _PATH_MEM) == 0) {
167			if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC)) <
168			    0) {
169				_kvm_syserr(kd, kd->program, "%s", _PATH_KMEM);
170				goto failed;
171			}
172			return (kd);
173		}
174	}
175
176	/*
177	 * This is either a crash dump or a remote live system with its physical
178	 * memory fully accessible via a special device.
179	 * Open the namelist fd and determine the architecture.
180	 */
181	if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) {
182		_kvm_syserr(kd, kd->program, "%s", uf);
183		goto failed;
184	}
185	if (_kvm_read_kernel_ehdr(kd) < 0)
186		goto failed;
187	if (strncmp(mf, _PATH_FWMEM, strlen(_PATH_FWMEM)) == 0 ||
188	    strncmp(mf, _PATH_DEVVMM, strlen(_PATH_DEVVMM)) == 0) {
189		kd->rawdump = 1;
190		kd->writable = 1;
191	}
192	SET_FOREACH(parch, kvm_arch) {
193		if ((*parch)->ka_probe(kd)) {
194			kd->arch = *parch;
195			break;
196		}
197	}
198	if (kd->arch == NULL) {
199		_kvm_err(kd, kd->program, "unsupported architecture");
200		goto failed;
201	}
202
203	/*
204	 * Non-native kernels require a symbol resolver.
205	 */
206	if (!kd->arch->ka_native(kd) && kd->resolve_symbol == NULL) {
207		_kvm_err(kd, kd->program,
208		    "non-native kernel requires a symbol resolver");
209		goto failed;
210	}
211
212	/*
213	 * Initialize the virtual address translation machinery.
214	 */
215	if (kd->arch->ka_initvtop(kd) < 0)
216		goto failed;
217	return (kd);
218failed:
219	/*
220	 * Copy out the error if doing sane error semantics.
221	 */
222	if (errout != NULL)
223		strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX);
224	(void)kvm_close(kd);
225	return (NULL);
226}
227
228kvm_t *
229kvm_openfiles(const char *uf, const char *mf, const char *sf __unused, int flag,
230    char *errout)
231{
232	kvm_t *kd;
233
234	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
235		if (errout != NULL)
236			(void)strlcpy(errout, strerror(errno),
237			    _POSIX2_LINE_MAX);
238		return (NULL);
239	}
240	return (_kvm_open(kd, uf, mf, flag, errout));
241}
242
243kvm_t *
244kvm_open(const char *uf, const char *mf, const char *sf __unused, int flag,
245    const char *errstr)
246{
247	kvm_t *kd;
248
249	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
250		if (errstr != NULL)
251			(void)fprintf(stderr, "%s: %s\n",
252				      errstr, strerror(errno));
253		return (NULL);
254	}
255	kd->program = errstr;
256	return (_kvm_open(kd, uf, mf, flag, NULL));
257}
258
259kvm_t *
260kvm_open2(const char *uf, const char *mf, int flag, char *errout,
261    int (*resolver)(const char *, kvaddr_t *))
262{
263	kvm_t *kd;
264
265	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
266		if (errout != NULL)
267			(void)strlcpy(errout, strerror(errno),
268			    _POSIX2_LINE_MAX);
269		return (NULL);
270	}
271	kd->resolve_symbol = resolver;
272	return (_kvm_open(kd, uf, mf, flag, errout));
273}
274
275int
276kvm_close(kvm_t *kd)
277{
278	int error = 0;
279
280	if (kd == NULL) {
281		errno = EINVAL;
282		return (-1);
283	}
284	if (kd->vmst != NULL)
285		kd->arch->ka_freevtop(kd);
286	if (kd->pmfd >= 0)
287		error |= close(kd->pmfd);
288	if (kd->vmfd >= 0)
289		error |= close(kd->vmfd);
290	if (kd->nlfd >= 0)
291		error |= close(kd->nlfd);
292	if (kd->procbase != 0)
293		free((void *)kd->procbase);
294	if (kd->argbuf != 0)
295		free((void *) kd->argbuf);
296	if (kd->argspc != 0)
297		free((void *) kd->argspc);
298	if (kd->argv != 0)
299		free((void *)kd->argv);
300	if (kd->dpcpu_initialized != 0)
301		free(kd->dpcpu_off);
302	if (kd->pt_map != NULL)
303		free(kd->pt_map);
304	if (kd->page_map != NULL)
305		free(kd->page_map);
306	if (kd->sparse_map != MAP_FAILED)
307		munmap(kd->sparse_map, kd->pt_sparse_size);
308	free((void *)kd);
309
310	return (error);
311}
312
313int
314kvm_nlist2(kvm_t *kd, struct kvm_nlist *nl)
315{
316
317	/*
318	 * If called via the public interface, permit initialization of
319	 * further virtualized modules on demand.
320	 */
321	return (_kvm_nlist(kd, nl, 1));
322}
323
324int
325kvm_nlist(kvm_t *kd, struct nlist *nl)
326{
327	struct kvm_nlist *kl;
328	int count, i, nfail;
329
330	/*
331	 * Avoid reporting truncated addresses by failing for non-native
332	 * cores.
333	 */
334	if (!kvm_native(kd)) {
335		_kvm_err(kd, kd->program, "kvm_nlist of non-native vmcore");
336		return (-1);
337	}
338
339	for (count = 0; nl[count].n_name != NULL && nl[count].n_name[0] != '\0';
340	     count++)
341		;
342	if (count == 0)
343		return (0);
344	kl = calloc(count + 1, sizeof(*kl));
345	if (kl == NULL) {
346		_kvm_err(kd, kd->program, "cannot allocate memory");
347		return (-1);
348	}
349	for (i = 0; i < count; i++)
350		kl[i].n_name = nl[i].n_name;
351	nfail = kvm_nlist2(kd, kl);
352	for (i = 0; i < count; i++) {
353		nl[i].n_type = kl[i].n_type;
354		nl[i].n_other = 0;
355		nl[i].n_desc = 0;
356		nl[i].n_value = kl[i].n_value;
357	}
358	free(kl);
359	return (nfail);
360}
361
362ssize_t
363kvm_read(kvm_t *kd, u_long kva, void *buf, size_t len)
364{
365
366	return (kvm_read2(kd, kva, buf, len));
367}
368
369ssize_t
370kvm_read2(kvm_t *kd, kvaddr_t kva, void *buf, size_t len)
371{
372	int cc;
373	ssize_t cr;
374	off_t pa;
375	char *cp;
376
377	if (ISALIVE(kd)) {
378		/*
379		 * We're using /dev/kmem.  Just read straight from the
380		 * device and let the active kernel do the address translation.
381		 */
382		errno = 0;
383		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
384			_kvm_err(kd, 0, "invalid address (0x%jx)",
385			    (uintmax_t)kva);
386			return (-1);
387		}
388		cr = read(kd->vmfd, buf, len);
389		if (cr < 0) {
390			_kvm_syserr(kd, 0, "kvm_read");
391			return (-1);
392		} else if (cr < (ssize_t)len)
393			_kvm_err(kd, kd->program, "short read");
394		return (cr);
395	}
396
397	cp = buf;
398	while (len > 0) {
399		cc = kd->arch->ka_kvatop(kd, kva, &pa);
400		if (cc == 0)
401			return (-1);
402		if (cc > (ssize_t)len)
403			cc = len;
404		errno = 0;
405		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
406			_kvm_syserr(kd, 0, _PATH_MEM);
407			break;
408		}
409		cr = read(kd->pmfd, cp, cc);
410		if (cr < 0) {
411			_kvm_syserr(kd, kd->program, "kvm_read");
412			break;
413		}
414		/*
415		 * If ka_kvatop returns a bogus value or our core file is
416		 * truncated, we might wind up seeking beyond the end of the
417		 * core file in which case the read will return 0 (EOF).
418		 */
419		if (cr == 0)
420			break;
421		cp += cr;
422		kva += cr;
423		len -= cr;
424	}
425
426	return (cp - (char *)buf);
427}
428
429ssize_t
430kvm_write(kvm_t *kd, u_long kva, const void *buf, size_t len)
431{
432	int cc;
433	ssize_t cw;
434	off_t pa;
435	const char *cp;
436
437	if (!ISALIVE(kd) && !kd->writable) {
438		_kvm_err(kd, kd->program,
439		    "kvm_write not implemented for dead kernels");
440		return (-1);
441	}
442
443	if (ISALIVE(kd)) {
444		/*
445		 * Just like kvm_read, only we write.
446		 */
447		errno = 0;
448		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
449			_kvm_err(kd, 0, "invalid address (%lx)", kva);
450			return (-1);
451		}
452		cc = write(kd->vmfd, buf, len);
453		if (cc < 0) {
454			_kvm_syserr(kd, 0, "kvm_write");
455			return (-1);
456		} else if ((size_t)cc < len)
457			_kvm_err(kd, kd->program, "short write");
458		return (cc);
459	}
460
461	cp = buf;
462	while (len > 0) {
463		cc = kd->arch->ka_kvatop(kd, kva, &pa);
464		if (cc == 0)
465			return (-1);
466		if (cc > (ssize_t)len)
467			cc = len;
468		errno = 0;
469		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
470			_kvm_syserr(kd, 0, _PATH_MEM);
471			break;
472		}
473		cw = write(kd->pmfd, cp, cc);
474		if (cw < 0) {
475			_kvm_syserr(kd, kd->program, "kvm_write");
476			break;
477		}
478		/*
479		 * If ka_kvatop returns a bogus value or our core file is
480		 * truncated, we might wind up seeking beyond the end of the
481		 * core file in which case the read will return 0 (EOF).
482		 */
483		if (cw == 0)
484			break;
485		cp += cw;
486		kva += cw;
487		len -= cw;
488	}
489
490	return (cp - (const char *)buf);
491}
492
493int
494kvm_native(kvm_t *kd)
495{
496
497	if (ISALIVE(kd))
498		return (1);
499	return (kd->arch->ka_native(kd));
500}
501
502int
503kvm_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *closure)
504{
505
506	if (kd->arch->ka_walk_pages == NULL)
507		return (0);
508
509	return (kd->arch->ka_walk_pages(kd, cb, closure));
510}
511
512kssize_t
513kvm_kerndisp(kvm_t *kd)
514{
515	unsigned long kernbase, rel_kernbase;
516	size_t kernbase_len = sizeof(kernbase);
517	size_t rel_kernbase_len = sizeof(rel_kernbase);
518
519	if (ISALIVE(kd)) {
520		if (sysctlbyname("kern.base_address", &kernbase,
521		    &kernbase_len, NULL, 0) == -1) {
522			_kvm_syserr(kd, kd->program,
523				"failed to get kernel base address");
524			return (0);
525		}
526		if (sysctlbyname("kern.relbase_address", &rel_kernbase,
527		    &rel_kernbase_len, NULL, 0) == -1) {
528			_kvm_syserr(kd, kd->program,
529				"failed to get relocated kernel base address");
530			return (0);
531		}
532		return (rel_kernbase - kernbase);
533	}
534
535	if (kd->arch->ka_kerndisp == NULL)
536		return (0);
537
538	return (kd->arch->ka_kerndisp(kd));
539}
540