1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1992, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software developed by the Computer Systems
8 * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
9 * BG 91-66 and contributed to Berkeley.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD$");
38__SCCSID("@(#)kvm.c	8.2 (Berkeley) 2/13/94");
39
40#include <sys/param.h>
41#include <sys/fnv_hash.h>
42
43#define	_WANT_VNET
44
45#include <sys/user.h>
46#include <sys/linker.h>
47#include <sys/pcpu.h>
48#include <sys/stat.h>
49#include <sys/mman.h>
50
51#include <net/vnet.h>
52
53#include <fcntl.h>
54#include <kvm.h>
55#include <limits.h>
56#include <paths.h>
57#include <stdint.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include "kvm_private.h"
64
65SET_DECLARE(kvm_arch, struct kvm_arch);
66
67static char _kd_is_null[] = "";
68
69char *
70kvm_geterr(kvm_t *kd)
71{
72
73	if (kd == NULL)
74		return (_kd_is_null);
75	return (kd->errbuf);
76}
77
78static int
79_kvm_read_kernel_ehdr(kvm_t *kd)
80{
81	Elf *elf;
82
83	if (elf_version(EV_CURRENT) == EV_NONE) {
84		_kvm_err(kd, kd->program, "Unsupported libelf");
85		return (-1);
86	}
87	elf = elf_begin(kd->nlfd, ELF_C_READ, NULL);
88	if (elf == NULL) {
89		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
90		return (-1);
91	}
92	if (elf_kind(elf) != ELF_K_ELF) {
93		_kvm_err(kd, kd->program, "kernel is not an ELF file");
94		return (-1);
95	}
96	if (gelf_getehdr(elf, &kd->nlehdr) == NULL) {
97		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
98		elf_end(elf);
99		return (-1);
100	}
101	elf_end(elf);
102
103	switch (kd->nlehdr.e_ident[EI_DATA]) {
104	case ELFDATA2LSB:
105	case ELFDATA2MSB:
106		return (0);
107	default:
108		_kvm_err(kd, kd->program,
109		    "unsupported ELF data encoding for kernel");
110		return (-1);
111	}
112}
113
114static kvm_t *
115_kvm_open(kvm_t *kd, const char *uf, const char *mf, int flag, char *errout)
116{
117	struct kvm_arch **parch;
118	struct stat st;
119
120	kd->vmfd = -1;
121	kd->pmfd = -1;
122	kd->nlfd = -1;
123	kd->vmst = NULL;
124	kd->procbase = NULL;
125	kd->argspc = NULL;
126	kd->argv = NULL;
127
128	if (uf == NULL)
129		uf = getbootfile();
130	else if (strlen(uf) >= MAXPATHLEN) {
131		_kvm_err(kd, kd->program, "exec file name too long");
132		goto failed;
133	}
134	if (flag & ~O_RDWR) {
135		_kvm_err(kd, kd->program, "bad flags arg");
136		goto failed;
137	}
138	if (mf == NULL)
139		mf = _PATH_MEM;
140
141	if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) {
142		_kvm_syserr(kd, kd->program, "%s", mf);
143		goto failed;
144	}
145	if (fstat(kd->pmfd, &st) < 0) {
146		_kvm_syserr(kd, kd->program, "%s", mf);
147		goto failed;
148	}
149	if (S_ISREG(st.st_mode) && st.st_size <= 0) {
150		errno = EINVAL;
151		_kvm_syserr(kd, kd->program, "empty file");
152		goto failed;
153	}
154	if (S_ISCHR(st.st_mode)) {
155		/*
156		 * If this is a character special device, then check that
157		 * it's /dev/mem.  If so, open kmem too.  (Maybe we should
158		 * make it work for either /dev/mem or /dev/kmem -- in either
159		 * case you're working with a live kernel.)
160		 */
161		if (strcmp(mf, _PATH_DEVNULL) == 0) {
162			kd->vmfd = open(_PATH_DEVNULL, O_RDONLY | O_CLOEXEC);
163			return (kd);
164		} else if (strcmp(mf, _PATH_MEM) == 0) {
165			if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC)) <
166			    0) {
167				_kvm_syserr(kd, kd->program, "%s", _PATH_KMEM);
168				goto failed;
169			}
170			return (kd);
171		}
172	}
173
174	/*
175	 * This is either a crash dump or a remote live system with its physical
176	 * memory fully accessible via a special device.
177	 * Open the namelist fd and determine the architecture.
178	 */
179	if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) {
180		_kvm_syserr(kd, kd->program, "%s", uf);
181		goto failed;
182	}
183	if (_kvm_read_kernel_ehdr(kd) < 0)
184		goto failed;
185	if (strncmp(mf, _PATH_FWMEM, strlen(_PATH_FWMEM)) == 0 ||
186	    strncmp(mf, _PATH_DEVVMM, strlen(_PATH_DEVVMM)) == 0) {
187		kd->rawdump = 1;
188		kd->writable = 1;
189	}
190	SET_FOREACH(parch, kvm_arch) {
191		if ((*parch)->ka_probe(kd)) {
192			kd->arch = *parch;
193			break;
194		}
195	}
196	if (kd->arch == NULL) {
197		_kvm_err(kd, kd->program, "unsupported architecture");
198		goto failed;
199	}
200
201	/*
202	 * Non-native kernels require a symbol resolver.
203	 */
204	if (!kd->arch->ka_native(kd) && kd->resolve_symbol == NULL) {
205		_kvm_err(kd, kd->program,
206		    "non-native kernel requires a symbol resolver");
207		goto failed;
208	}
209
210	/*
211	 * Initialize the virtual address translation machinery.
212	 */
213	if (kd->arch->ka_initvtop(kd) < 0)
214		goto failed;
215	return (kd);
216failed:
217	/*
218	 * Copy out the error if doing sane error semantics.
219	 */
220	if (errout != NULL)
221		strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX);
222	(void)kvm_close(kd);
223	return (NULL);
224}
225
226kvm_t *
227kvm_openfiles(const char *uf, const char *mf, const char *sf __unused, int flag,
228    char *errout)
229{
230	kvm_t *kd;
231
232	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
233		if (errout != NULL)
234			(void)strlcpy(errout, strerror(errno),
235			    _POSIX2_LINE_MAX);
236		return (NULL);
237	}
238	return (_kvm_open(kd, uf, mf, flag, errout));
239}
240
241kvm_t *
242kvm_open(const char *uf, const char *mf, const char *sf __unused, int flag,
243    const char *errstr)
244{
245	kvm_t *kd;
246
247	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
248		if (errstr != NULL)
249			(void)fprintf(stderr, "%s: %s\n",
250				      errstr, strerror(errno));
251		return (NULL);
252	}
253	kd->program = errstr;
254	return (_kvm_open(kd, uf, mf, flag, NULL));
255}
256
257kvm_t *
258kvm_open2(const char *uf, const char *mf, int flag, char *errout,
259    int (*resolver)(const char *, kvaddr_t *))
260{
261	kvm_t *kd;
262
263	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
264		if (errout != NULL)
265			(void)strlcpy(errout, strerror(errno),
266			    _POSIX2_LINE_MAX);
267		return (NULL);
268	}
269	kd->resolve_symbol = resolver;
270	return (_kvm_open(kd, uf, mf, flag, errout));
271}
272
273int
274kvm_close(kvm_t *kd)
275{
276	int error = 0;
277
278	if (kd == NULL) {
279		errno = EINVAL;
280		return (-1);
281	}
282	if (kd->vmst != NULL)
283		kd->arch->ka_freevtop(kd);
284	if (kd->pmfd >= 0)
285		error |= close(kd->pmfd);
286	if (kd->vmfd >= 0)
287		error |= close(kd->vmfd);
288	if (kd->nlfd >= 0)
289		error |= close(kd->nlfd);
290	if (kd->procbase != 0)
291		free((void *)kd->procbase);
292	if (kd->argbuf != 0)
293		free((void *) kd->argbuf);
294	if (kd->argspc != 0)
295		free((void *) kd->argspc);
296	if (kd->argv != 0)
297		free((void *)kd->argv);
298	if (kd->dpcpu_initialized != 0)
299		free(kd->dpcpu_off);
300	if (kd->pt_map != NULL)
301		free(kd->pt_map);
302	if (kd->page_map != NULL)
303		free(kd->page_map);
304	if (kd->sparse_map != MAP_FAILED)
305		munmap(kd->sparse_map, kd->pt_sparse_size);
306	free((void *)kd);
307
308	return (error);
309}
310
311int
312kvm_nlist2(kvm_t *kd, struct kvm_nlist *nl)
313{
314
315	/*
316	 * If called via the public interface, permit initialization of
317	 * further virtualized modules on demand.
318	 */
319	return (_kvm_nlist(kd, nl, 1));
320}
321
322int
323kvm_nlist(kvm_t *kd, struct nlist *nl)
324{
325	struct kvm_nlist *kl;
326	int count, i, nfail;
327
328	/*
329	 * Avoid reporting truncated addresses by failing for non-native
330	 * cores.
331	 */
332	if (!kvm_native(kd)) {
333		_kvm_err(kd, kd->program, "kvm_nlist of non-native vmcore");
334		return (-1);
335	}
336
337	for (count = 0; nl[count].n_name != NULL && nl[count].n_name[0] != '\0';
338	     count++)
339		;
340	if (count == 0)
341		return (0);
342	kl = calloc(count + 1, sizeof(*kl));
343	if (kl == NULL) {
344		_kvm_err(kd, kd->program, "cannot allocate memory");
345		return (-1);
346	}
347	for (i = 0; i < count; i++)
348		kl[i].n_name = nl[i].n_name;
349	nfail = kvm_nlist2(kd, kl);
350	for (i = 0; i < count; i++) {
351		nl[i].n_type = kl[i].n_type;
352		nl[i].n_other = 0;
353		nl[i].n_desc = 0;
354		nl[i].n_value = kl[i].n_value;
355	}
356	free(kl);
357	return (nfail);
358}
359
360ssize_t
361kvm_read(kvm_t *kd, u_long kva, void *buf, size_t len)
362{
363
364	return (kvm_read2(kd, kva, buf, len));
365}
366
367ssize_t
368kvm_read2(kvm_t *kd, kvaddr_t kva, void *buf, size_t len)
369{
370	int cc;
371	ssize_t cr;
372	off_t pa;
373	char *cp;
374
375	if (ISALIVE(kd)) {
376		/*
377		 * We're using /dev/kmem.  Just read straight from the
378		 * device and let the active kernel do the address translation.
379		 */
380		errno = 0;
381		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
382			_kvm_err(kd, 0, "invalid address (0x%jx)",
383			    (uintmax_t)kva);
384			return (-1);
385		}
386		cr = read(kd->vmfd, buf, len);
387		if (cr < 0) {
388			_kvm_syserr(kd, 0, "kvm_read");
389			return (-1);
390		} else if (cr < (ssize_t)len)
391			_kvm_err(kd, kd->program, "short read");
392		return (cr);
393	}
394
395	cp = buf;
396	while (len > 0) {
397		cc = kd->arch->ka_kvatop(kd, kva, &pa);
398		if (cc == 0)
399			return (-1);
400		if (cc > (ssize_t)len)
401			cc = len;
402		errno = 0;
403		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
404			_kvm_syserr(kd, 0, _PATH_MEM);
405			break;
406		}
407		cr = read(kd->pmfd, cp, cc);
408		if (cr < 0) {
409			_kvm_syserr(kd, kd->program, "kvm_read");
410			break;
411		}
412		/*
413		 * If ka_kvatop returns a bogus value or our core file is
414		 * truncated, we might wind up seeking beyond the end of the
415		 * core file in which case the read will return 0 (EOF).
416		 */
417		if (cr == 0)
418			break;
419		cp += cr;
420		kva += cr;
421		len -= cr;
422	}
423
424	return (cp - (char *)buf);
425}
426
427ssize_t
428kvm_write(kvm_t *kd, u_long kva, const void *buf, size_t len)
429{
430	int cc;
431	ssize_t cw;
432	off_t pa;
433	const char *cp;
434
435	if (!ISALIVE(kd) && !kd->writable) {
436		_kvm_err(kd, kd->program,
437		    "kvm_write not implemented for dead kernels");
438		return (-1);
439	}
440
441	if (ISALIVE(kd)) {
442		/*
443		 * Just like kvm_read, only we write.
444		 */
445		errno = 0;
446		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
447			_kvm_err(kd, 0, "invalid address (%lx)", kva);
448			return (-1);
449		}
450		cc = write(kd->vmfd, buf, len);
451		if (cc < 0) {
452			_kvm_syserr(kd, 0, "kvm_write");
453			return (-1);
454		} else if ((size_t)cc < len)
455			_kvm_err(kd, kd->program, "short write");
456		return (cc);
457	}
458
459	cp = buf;
460	while (len > 0) {
461		cc = kd->arch->ka_kvatop(kd, kva, &pa);
462		if (cc == 0)
463			return (-1);
464		if (cc > (ssize_t)len)
465			cc = len;
466		errno = 0;
467		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
468			_kvm_syserr(kd, 0, _PATH_MEM);
469			break;
470		}
471		cw = write(kd->pmfd, cp, cc);
472		if (cw < 0) {
473			_kvm_syserr(kd, kd->program, "kvm_write");
474			break;
475		}
476		/*
477		 * If ka_kvatop returns a bogus value or our core file is
478		 * truncated, we might wind up seeking beyond the end of the
479		 * core file in which case the read will return 0 (EOF).
480		 */
481		if (cw == 0)
482			break;
483		cp += cw;
484		kva += cw;
485		len -= cw;
486	}
487
488	return (cp - (const char *)buf);
489}
490
491int
492kvm_native(kvm_t *kd)
493{
494
495	if (ISALIVE(kd))
496		return (1);
497	return (kd->arch->ka_native(kd));
498}
499
500int
501kvm_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *closure)
502{
503
504	if (kd->arch->ka_walk_pages == NULL)
505		return (0);
506
507	return (kd->arch->ka_walk_pages(kd, cb, closure));
508}
509