ksyms.c revision 321739
1/*-
2 * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/11/sys/dev/ksyms/ksyms.c 321739 2017-07-31 00:35:21Z markj $
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/kernel.h>
32
33#include <sys/conf.h>
34#include <sys/elf.h>
35#include <sys/ksyms.h>
36#include <sys/linker.h>
37#include <sys/malloc.h>
38#include <sys/mman.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/proc.h>
42#include <sys/queue.h>
43#include <sys/resourcevar.h>
44#include <sys/stat.h>
45#include <sys/uio.h>
46
47#include <machine/elf.h>
48
49#include <vm/pmap.h>
50#include <vm/vm.h>
51#include <vm/vm_extern.h>
52#include <vm/vm_map.h>
53
54#include "linker_if.h"
55
56#define SHDR_NULL	0
57#define SHDR_SYMTAB	1
58#define SHDR_STRTAB	2
59#define SHDR_SHSTRTAB	3
60
61#define SHDR_NUM	4
62
63#define STR_SYMTAB	".symtab"
64#define STR_STRTAB	".strtab"
65#define STR_SHSTRTAB	".shstrtab"
66
67#define KSYMS_DNAME	"ksyms"
68
69static	d_open_t 	ksyms_open;
70static	d_read_t	ksyms_read;
71static	d_close_t	ksyms_close;
72static	d_ioctl_t	ksyms_ioctl;
73static	d_mmap_t	ksyms_mmap;
74
75static struct cdevsw ksyms_cdevsw = {
76    .d_version	=	D_VERSION,
77    .d_flags	=	D_TRACKCLOSE,
78    .d_open	=	ksyms_open,
79    .d_close	=	ksyms_close,
80    .d_read	=	ksyms_read,
81    .d_ioctl	=	ksyms_ioctl,
82    .d_mmap	=	ksyms_mmap,
83    .d_name	=	KSYMS_DNAME
84};
85
86struct ksyms_softc {
87	LIST_ENTRY(ksyms_softc)	sc_list;
88	vm_offset_t 		sc_uaddr;
89	size_t 			sc_usize;
90	pmap_t			sc_pmap;
91	struct proc	       *sc_proc;
92};
93
94static struct mtx 		 ksyms_mtx;
95static struct cdev 		*ksyms_dev;
96static LIST_HEAD(, ksyms_softc)	 ksyms_list =
97	LIST_HEAD_INITIALIZER(ksyms_list);
98
99static const char 	ksyms_shstrtab[] =
100	"\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0";
101
102struct ksyms_hdr {
103	Elf_Ehdr	kh_ehdr;
104	Elf_Phdr	kh_txtphdr;
105	Elf_Phdr	kh_datphdr;
106	Elf_Shdr	kh_shdr[SHDR_NUM];
107	char		kh_shstrtab[sizeof(ksyms_shstrtab)];
108};
109
110struct tsizes {
111	size_t		ts_symsz;
112	size_t		ts_strsz;
113};
114
115struct toffsets {
116	vm_offset_t	to_symoff;
117	vm_offset_t	to_stroff;
118	unsigned	to_stridx;
119	size_t		to_resid;
120};
121
122static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table");
123
124/*
125 * Get the symbol and string table sizes for a kernel module. Add it to the
126 * running total.
127 */
128static int
129ksyms_size_permod(linker_file_t lf, void *arg)
130{
131	struct tsizes *ts;
132	const Elf_Sym *symtab;
133	caddr_t strtab;
134	long syms;
135
136	ts = arg;
137
138	syms = LINKER_SYMTAB_GET(lf, &symtab);
139	ts->ts_symsz += syms * sizeof(Elf_Sym);
140	ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab);
141
142	return (0);
143}
144
145/*
146 * For kernel module get the symbol and string table sizes, returning the
147 * totals in *ts.
148 */
149static void
150ksyms_size_calc(struct tsizes *ts)
151{
152	ts->ts_symsz = 0;
153	ts->ts_strsz = 0;
154
155	(void) linker_file_foreach(ksyms_size_permod, ts);
156}
157
158#define KSYMS_EMIT(src, des, sz) do {				\
159		copyout(src, (void *)des, sz);			\
160		des += sz;					\
161	} while (0)
162
163#define SYMBLKSZ	256 * sizeof (Elf_Sym)
164
165/*
166 * For a kernel module, add the symbol and string tables into the
167 * snapshot buffer.  Fix up the offsets in the tables.
168 */
169static int
170ksyms_add(linker_file_t lf, void *arg)
171{
172	struct toffsets *to;
173	const Elf_Sym *symtab;
174	Elf_Sym *symp;
175	caddr_t strtab;
176	long symsz;
177	size_t strsz, numsyms;
178	linker_symval_t symval;
179	char *buf;
180	int i, nsyms, len;
181
182	to = arg;
183
184	MOD_SLOCK;
185	numsyms =  LINKER_SYMTAB_GET(lf, &symtab);
186	strsz = LINKER_STRTAB_GET(lf, &strtab);
187	symsz = numsyms * sizeof(Elf_Sym);
188
189	buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
190
191	while (symsz > 0) {
192		len = min(SYMBLKSZ, symsz);
193		bcopy(symtab, buf, len);
194
195		/*
196		 * Fix up symbol table for kernel modules:
197		 *   string offsets need adjusted
198		 *   symbol values made absolute
199		 */
200		symp = (Elf_Sym *) buf;
201		nsyms = len / sizeof (Elf_Sym);
202		for (i = 0; i < nsyms; i++) {
203			symp[i].st_name += to->to_stridx;
204			if (lf->id > 1 && LINKER_SYMBOL_VALUES(lf,
205				(c_linker_sym_t) &symtab[i], &symval) == 0) {
206				symp[i].st_value = (uintptr_t) symval.value;
207			}
208		}
209
210		if (len > to->to_resid) {
211			MOD_SUNLOCK;
212			free(buf, M_KSYMS);
213			return (ENXIO);
214		} else
215			to->to_resid -= len;
216		KSYMS_EMIT(buf, to->to_symoff, len);
217
218		symtab += nsyms;
219		symsz -= len;
220	}
221	free(buf, M_KSYMS);
222	MOD_SUNLOCK;
223
224	if (strsz > to->to_resid)
225		return (ENXIO);
226	else
227		to->to_resid -= strsz;
228	KSYMS_EMIT(strtab, to->to_stroff, strsz);
229	to->to_stridx += strsz;
230
231	return (0);
232}
233
234/*
235 * Create a single ELF symbol table for the kernel and kernel modules loaded
236 * at this time. Write this snapshot out in the process address space. Return
237 * 0 on success, otherwise error.
238 */
239static int
240ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, size_t resid)
241{
242
243	struct ksyms_hdr *hdr;
244	struct toffsets	 to;
245	int error = 0;
246
247	/* Be kernel stack friendly */
248	hdr = malloc(sizeof (*hdr), M_KSYMS, M_WAITOK|M_ZERO);
249
250	/*
251	 * Create the ELF header.
252	 */
253	hdr->kh_ehdr.e_ident[EI_PAD] = 0;
254	hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0;
255	hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1;
256	hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2;
257	hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3;
258	hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA;
259	hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
260	hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS;
261	hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT;
262	hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0;
263	hdr->kh_ehdr.e_type = ET_EXEC;
264	hdr->kh_ehdr.e_machine = ELF_ARCH;
265	hdr->kh_ehdr.e_version = EV_CURRENT;
266	hdr->kh_ehdr.e_entry = 0;
267	hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr);
268	hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr);
269	hdr->kh_ehdr.e_flags = 0;
270	hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr);
271	hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
272	hdr->kh_ehdr.e_phnum = 2;	/* Text and Data */
273	hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
274	hdr->kh_ehdr.e_shnum = SHDR_NUM;
275	hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB;
276
277	/*
278	 * Add both the text and data Program headers.
279	 */
280	hdr->kh_txtphdr.p_type = PT_LOAD;
281	/* XXX - is there a way to put the actual .text addr/size here? */
282	hdr->kh_txtphdr.p_vaddr = 0;
283	hdr->kh_txtphdr.p_memsz = 0;
284	hdr->kh_txtphdr.p_flags = PF_R | PF_X;
285
286	hdr->kh_datphdr.p_type = PT_LOAD;
287	/* XXX - is there a way to put the actual .data addr/size here? */
288	hdr->kh_datphdr.p_vaddr = 0;
289	hdr->kh_datphdr.p_memsz = 0;
290	hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X;
291
292	/*
293	 * Add the Section headers: null, symtab, strtab, shstrtab,
294	 */
295
296	/* First section header - null */
297
298	/* Second section header - symtab */
299	hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */
300	hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB;
301	hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0;
302	hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0;
303	hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr);
304	hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz;
305	hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB;
306	hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym);
307	hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long);
308	hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym);
309
310	/* Third section header - strtab */
311	hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB);
312	hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB;
313	hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0;
314	hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0;
315	hdr->kh_shdr[SHDR_STRTAB].sh_offset =
316	    hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz;
317	hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz;
318	hdr->kh_shdr[SHDR_STRTAB].sh_link = 0;
319	hdr->kh_shdr[SHDR_STRTAB].sh_info = 0;
320	hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char);
321	hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0;
322
323	/* Fourth section - shstrtab */
324	hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) +
325	    sizeof(STR_STRTAB);
326	hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB;
327	hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0;
328	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0;
329	hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset =
330	    offsetof(struct ksyms_hdr, kh_shstrtab);
331	hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab);
332	hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0;
333	hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0;
334	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */;
335	hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0;
336
337	/* Copy shstrtab into the header */
338	bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab));
339
340	to.to_symoff = uaddr + hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
341	to.to_stroff = uaddr + hdr->kh_shdr[SHDR_STRTAB].sh_offset;
342	to.to_stridx = 0;
343	if (sizeof(struct ksyms_hdr) > resid) {
344		free(hdr, M_KSYMS);
345		return (ENXIO);
346	}
347	to.to_resid = resid - sizeof(struct ksyms_hdr);
348
349	/* Emit Header */
350	copyout(hdr, (void *)uaddr, sizeof(struct ksyms_hdr));
351
352	free(hdr, M_KSYMS);
353
354	/* Add symbol and string tables for each kernelmodule */
355	error = linker_file_foreach(ksyms_add, &to);
356
357	if (to.to_resid != 0)
358		return (ENXIO);
359
360	return (error);
361}
362
363static void
364ksyms_cdevpriv_dtr(void *data)
365{
366	struct ksyms_softc *sc;
367
368	sc = (struct ksyms_softc *)data;
369
370	mtx_lock(&ksyms_mtx);
371	LIST_REMOVE(sc, sc_list);
372	mtx_unlock(&ksyms_mtx);
373	free(sc, M_KSYMS);
374}
375
376/* ARGSUSED */
377static int
378ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
379{
380	struct tsizes ts;
381	size_t total_elf_sz;
382	int error, try;
383	struct ksyms_softc *sc;
384
385	/*
386	 *  Limit one open() per process. The process must close()
387	 *  before open()'ing again.
388	 */
389	mtx_lock(&ksyms_mtx);
390	LIST_FOREACH(sc, &ksyms_list, sc_list) {
391		if (sc->sc_proc == td->td_proc) {
392			mtx_unlock(&ksyms_mtx);
393			return (EBUSY);
394		}
395	}
396
397	sc = (struct ksyms_softc *) malloc(sizeof (*sc), M_KSYMS,
398	    M_NOWAIT|M_ZERO);
399
400	if (sc == NULL) {
401		mtx_unlock(&ksyms_mtx);
402		return (ENOMEM);
403	}
404	sc->sc_proc = td->td_proc;
405	sc->sc_pmap = &td->td_proc->p_vmspace->vm_pmap;
406	LIST_INSERT_HEAD(&ksyms_list, sc, sc_list);
407	mtx_unlock(&ksyms_mtx);
408
409	error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr);
410	if (error)
411		goto failed;
412
413	/*
414	 * MOD_SLOCK doesn't work here (because of a lock reversal with
415	 * KLD_SLOCK).  Therefore, simply try upto 3 times to get a "clean"
416	 * snapshot of the kernel symbol table.  This should work fine in the
417	 * rare case of a kernel module being loaded/unloaded at the same
418	 * time.
419	 */
420	for(try = 0; try < 3; try++) {
421		/*
422	 	* Map a buffer in the calling process memory space and
423	 	* create a snapshot of the kernel symbol table in it.
424	 	*/
425
426		/* Compute the size of buffer needed. */
427		ksyms_size_calc(&ts);
428		total_elf_sz = sizeof(struct ksyms_hdr) + ts.ts_symsz +
429			ts.ts_strsz;
430
431		error = copyout_map(td, &(sc->sc_uaddr),
432				(vm_size_t) total_elf_sz);
433		if (error)
434			break;
435		sc->sc_usize = total_elf_sz;
436
437		error = ksyms_snapshot(&ts, sc->sc_uaddr, total_elf_sz);
438		if (!error)  {
439			/* Successful Snapshot */
440			return (0);
441		}
442
443		/* Snapshot failed, unmap the memory and try again */
444		(void) copyout_unmap(td, sc->sc_uaddr, sc->sc_usize);
445	}
446
447failed:
448	ksyms_cdevpriv_dtr(sc);
449	return (error);
450}
451
452/* ARGSUSED */
453static int
454ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused)
455{
456	int error;
457	size_t len, sz;
458	struct ksyms_softc *sc;
459	off_t off;
460	char *buf;
461	vm_size_t ubase;
462
463	error = devfs_get_cdevpriv((void **)&sc);
464	if (error)
465		return (error);
466
467	off = uio->uio_offset;
468	len = uio->uio_resid;
469
470	if (off < 0 || off > sc->sc_usize)
471		return (EFAULT);
472
473	if (len > (sc->sc_usize - off))
474		len = sc->sc_usize - off;
475
476	if (len == 0)
477		return (0);
478
479	/*
480	 * Since the snapshot buffer is in the user space we have to copy it
481	 * in to the kernel and then back out.  The extra copy saves valuable
482	 * kernel memory.
483	 */
484	buf = malloc(PAGE_SIZE, M_KSYMS, M_WAITOK);
485	ubase = sc->sc_uaddr + off;
486
487	while (len) {
488
489		sz = min(PAGE_SIZE, len);
490		if (copyin((void *)ubase, buf, sz))
491			error = EFAULT;
492		else
493			error = uiomove(buf, sz, uio);
494
495		if (error)
496			break;
497
498		len -= sz;
499		ubase += sz;
500	}
501	free(buf, M_KSYMS);
502
503	return (error);
504}
505
506/* ARGSUSED */
507static int
508ksyms_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int32_t flag __unused,
509    struct thread *td __unused)
510{
511	int error = 0;
512	struct ksyms_softc *sc;
513
514	error = devfs_get_cdevpriv((void **)&sc);
515	if (error)
516		return (error);
517
518	switch (cmd) {
519	case KIOCGSIZE:
520		/*
521		 * Return the size (in bytes) of the symbol table
522		 * snapshot.
523		 */
524		*(size_t *)data = sc->sc_usize;
525		break;
526
527	case KIOCGADDR:
528		/*
529		 * Return the address of the symbol table snapshot.
530		 * XXX - compat32 version of this?
531		 */
532		*(void **)data = (void *)sc->sc_uaddr;
533		break;
534
535	default:
536		error = ENOTTY;
537		break;
538	}
539
540	return (error);
541}
542
543/* ARGUSED */
544static int
545ksyms_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
546		int prot __unused, vm_memattr_t *memattr __unused)
547{
548    	struct ksyms_softc *sc;
549	int error;
550
551	error = devfs_get_cdevpriv((void **)&sc);
552	if (error)
553		return (error);
554
555	/*
556	 * XXX mmap() will actually map the symbol table into the process
557	 * address space again.
558	 */
559	if (offset > round_page(sc->sc_usize) ||
560	    (*paddr = pmap_extract(sc->sc_pmap,
561	    (vm_offset_t)sc->sc_uaddr + offset)) == 0)
562		return (-1);
563
564	return (0);
565}
566
567/* ARGUSED */
568static int
569ksyms_close(struct cdev *dev, int flags __unused, int fmt __unused,
570		struct thread *td)
571{
572	int error = 0;
573	struct ksyms_softc *sc;
574
575	error = devfs_get_cdevpriv((void **)&sc);
576	if (error)
577		return (error);
578
579	/* Unmap the buffer from the process address space. */
580	error = copyout_unmap(td, sc->sc_uaddr, sc->sc_usize);
581
582	return (error);
583}
584
585/* ARGSUSED */
586static int
587ksyms_modevent(module_t mod __unused, int type, void *data __unused)
588{
589	int error = 0;
590
591	switch (type) {
592	case MOD_LOAD:
593		mtx_init(&ksyms_mtx, "KSyms mtx", NULL, MTX_DEF);
594		ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL,
595		    0400, KSYMS_DNAME);
596		break;
597
598	case MOD_UNLOAD:
599		if (!LIST_EMPTY(&ksyms_list))
600			return (EBUSY);
601		destroy_dev(ksyms_dev);
602		mtx_destroy(&ksyms_mtx);
603		break;
604
605	case MOD_SHUTDOWN:
606		break;
607
608	default:
609		error = EOPNOTSUPP;
610		break;
611	}
612	return (error);
613}
614
615DEV_MODULE(ksyms, ksyms_modevent, NULL);
616MODULE_VERSION(ksyms, 1);
617