1/*-
2 * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/sys/dev/ksyms/ksyms.c 321742 2017-07-31 00:41:05Z markj $
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/kernel.h>
32
33#include <sys/conf.h>
34#include <sys/elf.h>
35#include <sys/ksyms.h>
36#include <sys/linker.h>
37#include <sys/malloc.h>
38#include <sys/mman.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/proc.h>
42#include <sys/queue.h>
43#include <sys/resourcevar.h>
44#include <sys/stat.h>
45#include <sys/uio.h>
46
47#include <machine/elf.h>
48
49#include <vm/pmap.h>
50#include <vm/vm.h>
51#include <vm/vm_extern.h>
52#include <vm/vm_map.h>
53
54#include "linker_if.h"
55
56#define SHDR_NULL	0
57#define SHDR_SYMTAB	1
58#define SHDR_STRTAB	2
59#define SHDR_SHSTRTAB	3
60
61#define SHDR_NUM	4
62
63#define STR_SYMTAB	".symtab"
64#define STR_STRTAB	".strtab"
65#define STR_SHSTRTAB	".shstrtab"
66
67#define KSYMS_DNAME	"ksyms"
68
69static d_open_t ksyms_open;
70static d_read_t ksyms_read;
71static d_close_t ksyms_close;
72static d_ioctl_t ksyms_ioctl;
73static d_mmap_t ksyms_mmap;
74
75static struct cdevsw ksyms_cdevsw = {
76	.d_version =	D_VERSION,
77	.d_flags =	D_TRACKCLOSE,
78	.d_open =	ksyms_open,
79	.d_close =	ksyms_close,
80	.d_read =	ksyms_read,
81	.d_ioctl =	ksyms_ioctl,
82	.d_mmap =	ksyms_mmap,
83	.d_name =	KSYMS_DNAME
84};
85
86struct ksyms_softc {
87	LIST_ENTRY(ksyms_softc)	sc_list;
88	vm_offset_t		sc_uaddr;
89	size_t			sc_usize;
90	pmap_t			sc_pmap;
91	struct proc	       *sc_proc;
92};
93
94static struct mtx		 ksyms_mtx;
95static struct cdev		*ksyms_dev;
96static LIST_HEAD(, ksyms_softc)	 ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list);
97
98static const char	ksyms_shstrtab[] =
99	"\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0";
100
101struct ksyms_hdr {
102	Elf_Ehdr	kh_ehdr;
103	Elf_Phdr	kh_txtphdr;
104	Elf_Phdr	kh_datphdr;
105	Elf_Shdr	kh_shdr[SHDR_NUM];
106	char		kh_shstrtab[sizeof(ksyms_shstrtab)];
107};
108
109struct tsizes {
110	size_t		ts_symsz;
111	size_t		ts_strsz;
112};
113
114struct toffsets {
115	vm_offset_t	to_symoff;
116	vm_offset_t	to_stroff;
117	unsigned	to_stridx;
118	size_t		to_resid;
119};
120
121static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table");
122
123/*
124 * Get the symbol and string table sizes for a kernel module. Add it to the
125 * running total.
126 */
127static int
128ksyms_size_permod(linker_file_t lf, void *arg)
129{
130	struct tsizes *ts;
131	const Elf_Sym *symtab;
132	caddr_t strtab;
133	long syms;
134
135	ts = arg;
136
137	syms = LINKER_SYMTAB_GET(lf, &symtab);
138	ts->ts_symsz += syms * sizeof(Elf_Sym);
139	ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab);
140
141	return (0);
142}
143
144/*
145 * For kernel module get the symbol and string table sizes, returning the
146 * totals in *ts.
147 */
148static void
149ksyms_size_calc(struct tsizes *ts)
150{
151
152	ts->ts_symsz = 0;
153	ts->ts_strsz = 0;
154
155	(void)linker_file_foreach(ksyms_size_permod, ts);
156}
157
158#define KSYMS_EMIT(src, des, sz) do {			\
159	copyout(src, (void *)des, sz);			\
160	des += sz;					\
161} while (0)
162
163#define SYMBLKSZ	(256 * sizeof(Elf_Sym))
164
165/*
166 * For a kernel module, add the symbol and string tables into the
167 * snapshot buffer.  Fix up the offsets in the tables.
168 */
169static int
170ksyms_add(linker_file_t lf, void *arg)
171{
172	char *buf;
173	struct toffsets *to;
174	const Elf_Sym *symtab;
175	Elf_Sym *symp;
176	caddr_t strtab;
177	long symsz;
178	size_t strsz, numsyms;
179	linker_symval_t symval;
180	int i, nsyms, len;
181
182	to = arg;
183
184	MOD_SLOCK;
185	numsyms =  LINKER_SYMTAB_GET(lf, &symtab);
186	strsz = LINKER_STRTAB_GET(lf, &strtab);
187	symsz = numsyms * sizeof(Elf_Sym);
188
189	buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
190
191	while (symsz > 0) {
192		len = min(SYMBLKSZ, symsz);
193		bcopy(symtab, buf, len);
194
195		/*
196		 * Fix up symbol table for kernel modules:
197		 *   string offsets need adjusted
198		 *   symbol values made absolute
199		 */
200		symp = (Elf_Sym *) buf;
201		nsyms = len / sizeof(Elf_Sym);
202		for (i = 0; i < nsyms; i++) {
203			symp[i].st_name += to->to_stridx;
204			if (lf->id > 1 && LINKER_SYMBOL_VALUES(lf,
205			    (c_linker_sym_t)&symtab[i], &symval) == 0) {
206				symp[i].st_value = (uintptr_t)symval.value;
207			}
208		}
209
210		if (len > to->to_resid) {
211			MOD_SUNLOCK;
212			free(buf, M_KSYMS);
213			return (ENXIO);
214		}
215		to->to_resid -= len;
216		KSYMS_EMIT(buf, to->to_symoff, len);
217
218		symtab += nsyms;
219		symsz -= len;
220	}
221	free(buf, M_KSYMS);
222	MOD_SUNLOCK;
223
224	if (strsz > to->to_resid)
225		return (ENXIO);
226	to->to_resid -= strsz;
227	KSYMS_EMIT(strtab, to->to_stroff, strsz);
228	to->to_stridx += strsz;
229
230	return (0);
231}
232
233/*
234 * Create a single ELF symbol table for the kernel and kernel modules loaded
235 * at this time. Write this snapshot out in the process address space. Return
236 * 0 on success, otherwise error.
237 */
238static int
239ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, size_t resid)
240{
241	struct ksyms_hdr *hdr;
242	struct toffsets	to;
243	int error = 0;
244
245	hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO);
246
247	/*
248	 * Create the ELF header.
249	 */
250	hdr->kh_ehdr.e_ident[EI_PAD] = 0;
251	hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0;
252	hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1;
253	hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2;
254	hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3;
255	hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA;
256	hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
257	hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS;
258	hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT;
259	hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0;
260	hdr->kh_ehdr.e_type = ET_EXEC;
261	hdr->kh_ehdr.e_machine = ELF_ARCH;
262	hdr->kh_ehdr.e_version = EV_CURRENT;
263	hdr->kh_ehdr.e_entry = 0;
264	hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr);
265	hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr);
266	hdr->kh_ehdr.e_flags = 0;
267	hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr);
268	hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
269	hdr->kh_ehdr.e_phnum = 2;	/* Text and Data */
270	hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
271	hdr->kh_ehdr.e_shnum = SHDR_NUM;
272	hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB;
273
274	/*
275	 * Add both the text and data program headers.
276	 */
277	hdr->kh_txtphdr.p_type = PT_LOAD;
278	/* XXX - is there a way to put the actual .text addr/size here? */
279	hdr->kh_txtphdr.p_vaddr = 0;
280	hdr->kh_txtphdr.p_memsz = 0;
281	hdr->kh_txtphdr.p_flags = PF_R | PF_X;
282
283	hdr->kh_datphdr.p_type = PT_LOAD;
284	/* XXX - is there a way to put the actual .data addr/size here? */
285	hdr->kh_datphdr.p_vaddr = 0;
286	hdr->kh_datphdr.p_memsz = 0;
287	hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X;
288
289	/*
290	 * Add the section headers: null, symtab, strtab, shstrtab.
291	 */
292
293	/* First section header - null */
294
295	/* Second section header - symtab */
296	hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */
297	hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB;
298	hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0;
299	hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0;
300	hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr);
301	hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz;
302	hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB;
303	hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym);
304	hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long);
305	hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym);
306
307	/* Third section header - strtab */
308	hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB);
309	hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB;
310	hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0;
311	hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0;
312	hdr->kh_shdr[SHDR_STRTAB].sh_offset =
313	    hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz;
314	hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz;
315	hdr->kh_shdr[SHDR_STRTAB].sh_link = 0;
316	hdr->kh_shdr[SHDR_STRTAB].sh_info = 0;
317	hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char);
318	hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0;
319
320	/* Fourth section - shstrtab */
321	hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) +
322	    sizeof(STR_STRTAB);
323	hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB;
324	hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0;
325	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0;
326	hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset =
327	    offsetof(struct ksyms_hdr, kh_shstrtab);
328	hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab);
329	hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0;
330	hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0;
331	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */;
332	hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0;
333
334	/* Copy shstrtab into the header. */
335	bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab));
336
337	to.to_symoff = uaddr + hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
338	to.to_stroff = uaddr + hdr->kh_shdr[SHDR_STRTAB].sh_offset;
339	to.to_stridx = 0;
340	if (sizeof(struct ksyms_hdr) > resid) {
341		free(hdr, M_KSYMS);
342		return (ENXIO);
343	}
344	to.to_resid = resid - sizeof(struct ksyms_hdr);
345
346	/* emit header */
347	copyout(hdr, (void *)uaddr, sizeof(struct ksyms_hdr));
348
349	free(hdr, M_KSYMS);
350
351	/* Add symbol and string tables for each kernel module. */
352	error = linker_file_foreach(ksyms_add, &to);
353
354	if (to.to_resid != 0)
355		return (ENXIO);
356
357	return (error);
358}
359
360static void
361ksyms_cdevpriv_dtr(void *data)
362{
363	struct ksyms_softc *sc;
364
365	sc = (struct ksyms_softc *)data;
366
367	mtx_lock(&ksyms_mtx);
368	LIST_REMOVE(sc, sc_list);
369	mtx_unlock(&ksyms_mtx);
370	free(sc, M_KSYMS);
371}
372
373static int
374ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
375{
376	struct tsizes ts;
377	struct ksyms_softc *sc;
378	size_t total_elf_sz;
379	int error, try;
380
381	/*
382	 * Limit one open() per process. The process must close()
383	 * before open()'ing again.
384	 */
385	mtx_lock(&ksyms_mtx);
386	LIST_FOREACH(sc, &ksyms_list, sc_list) {
387		if (sc->sc_proc == td->td_proc) {
388			mtx_unlock(&ksyms_mtx);
389			return (EBUSY);
390		}
391	}
392
393	sc = malloc(sizeof(*sc), M_KSYMS, M_NOWAIT | M_ZERO);
394	if (sc == NULL) {
395		mtx_unlock(&ksyms_mtx);
396		return (ENOMEM);
397	}
398	sc->sc_proc = td->td_proc;
399	sc->sc_pmap = &td->td_proc->p_vmspace->vm_pmap;
400	LIST_INSERT_HEAD(&ksyms_list, sc, sc_list);
401	mtx_unlock(&ksyms_mtx);
402
403	error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr);
404	if (error != 0)
405		goto failed;
406
407	/*
408	 * MOD_SLOCK doesn't work here (because of a lock reversal with
409	 * KLD_SLOCK).  Therefore, simply try up to 3 times to get a "clean"
410	 * snapshot of the kernel symbol table.  This should work fine in the
411	 * rare case of a kernel module being loaded/unloaded at the same
412	 * time.
413	 */
414	for (try = 0; try < 3; try++) {
415		/*
416		 * Map a buffer in the calling process memory space and
417		 * create a snapshot of the kernel symbol table in it.
418		 */
419
420		/* Compute the size of buffer needed. */
421		ksyms_size_calc(&ts);
422		total_elf_sz = sizeof(struct ksyms_hdr) + ts.ts_symsz +
423		    ts.ts_strsz;
424
425		error = copyout_map(td, &sc->sc_uaddr, (vm_size_t)total_elf_sz);
426		if (error != 0)
427			break;
428		sc->sc_usize = total_elf_sz;
429
430		error = ksyms_snapshot(&ts, sc->sc_uaddr, total_elf_sz);
431		if (error == 0)
432			/* successful snapshot */
433			return (0);
434
435		/* Snapshot failed, unmap the memory and try again. */
436		(void)copyout_unmap(td, sc->sc_uaddr, sc->sc_usize);
437	}
438
439failed:
440	ksyms_cdevpriv_dtr(sc);
441	return (error);
442}
443
444static int
445ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused)
446{
447	struct ksyms_softc *sc;
448	char *buf;
449	off_t off;
450	size_t len, sz;
451	vm_size_t ubase;
452	int error;
453
454	error = devfs_get_cdevpriv((void **)&sc);
455	if (error != 0)
456		return (error);
457
458	off = uio->uio_offset;
459	len = uio->uio_resid;
460
461	if (off < 0 || off > sc->sc_usize)
462		return (EFAULT);
463
464	if (len > sc->sc_usize - off)
465		len = sc->sc_usize - off;
466	if (len == 0)
467		return (0);
468
469	/*
470	 * Since the snapshot buffer is in the user space we have to copy it
471	 * in to the kernel and then back out.  The extra copy saves valuable
472	 * kernel memory.
473	 */
474	buf = malloc(PAGE_SIZE, M_KSYMS, M_WAITOK);
475	ubase = sc->sc_uaddr + off;
476
477	while (len) {
478		sz = min(PAGE_SIZE, len);
479		if (copyin((void *)ubase, buf, sz) != 0)
480			error = EFAULT;
481		else
482			error = uiomove(buf, sz, uio);
483		if (error != 0)
484			break;
485
486		len -= sz;
487		ubase += sz;
488	}
489	free(buf, M_KSYMS);
490
491	return (error);
492}
493
494static int
495ksyms_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int32_t flag __unused,
496    struct thread *td __unused)
497{
498	struct ksyms_softc *sc;
499	int error;
500
501	error = devfs_get_cdevpriv((void **)&sc);
502	if (error != 0)
503		return (error);
504
505	switch (cmd) {
506	case KIOCGSIZE:
507		/*
508		 * Return the size (in bytes) of the symbol table
509		 * snapshot.
510		 */
511		*(size_t *)data = sc->sc_usize;
512		break;
513	case KIOCGADDR:
514		/*
515		 * Return the address of the symbol table snapshot.
516		 * XXX - compat32 version of this?
517		 */
518		*(void **)data = (void *)sc->sc_uaddr;
519		break;
520	default:
521		error = ENOTTY;
522		break;
523	}
524
525	return (error);
526}
527
528static int
529ksyms_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
530    int prot __unused, vm_memattr_t *memattr __unused)
531{
532	struct ksyms_softc *sc;
533	int error;
534
535	error = devfs_get_cdevpriv((void **)&sc);
536	if (error != 0)
537		return (error);
538
539	/*
540	 * XXX mmap() will actually map the symbol table into the process
541	 * address space again.
542	 */
543	if (offset > round_page(sc->sc_usize) ||
544	    (*paddr = pmap_extract(sc->sc_pmap,
545	    (vm_offset_t)sc->sc_uaddr + offset)) == 0)
546		return (-1);
547
548	return (0);
549}
550
551static int
552ksyms_close(struct cdev *dev, int flags __unused, int fmt __unused,
553    struct thread *td)
554{
555	struct ksyms_softc *sc;
556	int error;
557
558	error = devfs_get_cdevpriv((void **)&sc);
559	if (error != 0)
560		return (error);
561
562	/* Unmap the buffer from the process address space. */
563	return (copyout_unmap(td, sc->sc_uaddr, sc->sc_usize));
564}
565
566static int
567ksyms_modevent(module_t mod __unused, int type, void *data __unused)
568{
569	int error;
570
571	error = 0;
572	switch (type) {
573	case MOD_LOAD:
574		mtx_init(&ksyms_mtx, "KSyms mtx", NULL, MTX_DEF);
575		ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL,
576		    0400, KSYMS_DNAME);
577		break;
578	case MOD_UNLOAD:
579		if (!LIST_EMPTY(&ksyms_list))
580			return (EBUSY);
581		destroy_dev(ksyms_dev);
582		mtx_destroy(&ksyms_mtx);
583		break;
584	case MOD_SHUTDOWN:
585		break;
586	default:
587		error = EOPNOTSUPP;
588		break;
589	}
590	return (error);
591}
592
593DEV_MODULE(ksyms, ksyms_modevent, NULL);
594MODULE_VERSION(ksyms, 1);
595