1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
34
35#include <sys/conf.h>
36#include <sys/elf.h>
37#include <sys/linker.h>
38#include <sys/malloc.h>
39#include <sys/mman.h>
40#include <sys/module.h>
41#include <sys/proc.h>
42#include <sys/queue.h>
43#include <sys/resourcevar.h>
44#include <sys/rwlock.h>
45#include <sys/stat.h>
46#include <sys/sx.h>
47#include <sys/uio.h>
48
49#include <machine/elf.h>
50
51#include <vm/pmap.h>
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_object.h>
55#include <vm/vm_page.h>
56#include <vm/vm_pager.h>
57
58#include "linker_if.h"
59
60#define SHDR_NULL	0
61#define SHDR_SYMTAB	1
62#define SHDR_STRTAB	2
63#define SHDR_SHSTRTAB	3
64
65#define SHDR_NUM	4
66
67#define STR_SYMTAB	".symtab"
68#define STR_STRTAB	".strtab"
69#define STR_SHSTRTAB	".shstrtab"
70
71#define KSYMS_DNAME	"ksyms"
72
73static d_open_t ksyms_open;
74static d_read_t ksyms_read;
75static d_mmap_single_t ksyms_mmap_single;
76
77static struct cdevsw ksyms_cdevsw = {
78	.d_version =	D_VERSION,
79	.d_flags =	0,
80	.d_open =	ksyms_open,
81	.d_read =	ksyms_read,
82	.d_mmap_single = ksyms_mmap_single,
83	.d_name =	KSYMS_DNAME
84};
85
86struct ksyms_softc {
87	LIST_ENTRY(ksyms_softc)	sc_list;
88	vm_offset_t		sc_uaddr;
89	size_t			sc_usize;
90	vm_object_t		sc_obj;
91	vm_size_t		sc_objsz;
92	struct proc	       *sc_proc;
93};
94
95static struct sx		 ksyms_mtx;
96static struct cdev		*ksyms_dev;
97static LIST_HEAD(, ksyms_softc)	 ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list);
98
99static const char	ksyms_shstrtab[] =
100	"\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0";
101
102struct ksyms_hdr {
103	Elf_Ehdr	kh_ehdr;
104	Elf_Phdr	kh_txtphdr;
105	Elf_Phdr	kh_datphdr;
106	Elf_Shdr	kh_shdr[SHDR_NUM];
107	char		kh_shstrtab[sizeof(ksyms_shstrtab)];
108};
109
110struct tsizes {
111	size_t		ts_symsz;
112	size_t		ts_strsz;
113};
114
115struct toffsets {
116	struct ksyms_softc *to_sc;
117	vm_offset_t	to_symoff;
118	vm_offset_t	to_stroff;
119	unsigned	to_stridx;
120	size_t		to_resid;
121};
122
123static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table");
124
125/*
126 * Get the symbol and string table sizes for a kernel module. Add it to the
127 * running total.
128 */
129static int
130ksyms_size_permod(linker_file_t lf, void *arg)
131{
132	struct tsizes *ts;
133	const Elf_Sym *symtab;
134	caddr_t strtab;
135	long syms;
136
137	ts = arg;
138
139	syms = LINKER_SYMTAB_GET(lf, &symtab);
140	ts->ts_symsz += syms * sizeof(Elf_Sym);
141	ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab);
142
143	return (0);
144}
145
146/*
147 * For kernel module get the symbol and string table sizes, returning the
148 * totals in *ts.
149 */
150static void
151ksyms_size_calc(struct tsizes *ts)
152{
153
154	ts->ts_symsz = 0;
155	ts->ts_strsz = 0;
156
157	(void)linker_file_foreach(ksyms_size_permod, ts);
158}
159
160static int
161ksyms_emit(struct ksyms_softc *sc, void *buf, off_t off, size_t sz)
162{
163	struct iovec iov;
164	struct uio uio;
165
166	iov.iov_base = buf;
167	iov.iov_len = sz;
168	uio.uio_iov = &iov;
169	uio.uio_iovcnt = 1;
170	uio.uio_offset = off;
171	uio.uio_resid = (ssize_t)sz;
172	uio.uio_segflg = UIO_SYSSPACE;
173	uio.uio_rw = UIO_WRITE;
174	uio.uio_td = curthread;
175
176	return (uiomove_object(sc->sc_obj, sc->sc_objsz, &uio));
177}
178
179#define SYMBLKSZ	(256 * sizeof(Elf_Sym))
180
181/*
182 * For a kernel module, add the symbol and string tables into the
183 * snapshot buffer.  Fix up the offsets in the tables.
184 */
185static int
186ksyms_add(linker_file_t lf, void *arg)
187{
188	char *buf;
189	struct ksyms_softc *sc;
190	struct toffsets *to;
191	const Elf_Sym *symtab;
192	Elf_Sym *symp;
193	caddr_t strtab;
194	size_t len, numsyms, strsz, symsz;
195	linker_symval_t symval;
196	int error, i, nsyms;
197
198	buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
199	to = arg;
200	sc = to->to_sc;
201
202	MOD_SLOCK;
203	numsyms =  LINKER_SYMTAB_GET(lf, &symtab);
204	strsz = LINKER_STRTAB_GET(lf, &strtab);
205	symsz = numsyms * sizeof(Elf_Sym);
206
207	while (symsz > 0) {
208		len = min(SYMBLKSZ, symsz);
209		bcopy(symtab, buf, len);
210
211		/*
212		 * Fix up symbol table for kernel modules:
213		 *   string offsets need adjusted
214		 *   symbol values made absolute
215		 */
216		symp = (Elf_Sym *) buf;
217		nsyms = len / sizeof(Elf_Sym);
218		for (i = 0; i < nsyms; i++) {
219			symp[i].st_name += to->to_stridx;
220			if (lf->id > 1 && LINKER_SYMBOL_VALUES(lf,
221			    (c_linker_sym_t)&symtab[i], &symval) == 0) {
222				symp[i].st_value = (uintptr_t)symval.value;
223			}
224		}
225
226		if (len > to->to_resid) {
227			MOD_SUNLOCK;
228			free(buf, M_KSYMS);
229			return (ENXIO);
230		}
231		to->to_resid -= len;
232		error = ksyms_emit(sc, buf, to->to_symoff, len);
233		to->to_symoff += len;
234		if (error != 0) {
235			MOD_SUNLOCK;
236			free(buf, M_KSYMS);
237			return (error);
238		}
239
240		symtab += nsyms;
241		symsz -= len;
242	}
243	free(buf, M_KSYMS);
244	MOD_SUNLOCK;
245
246	if (strsz > to->to_resid)
247		return (ENXIO);
248	to->to_resid -= strsz;
249	error = ksyms_emit(sc, strtab, to->to_stroff, strsz);
250	to->to_stroff += strsz;
251	to->to_stridx += strsz;
252
253	return (error);
254}
255
256/*
257 * Create a single ELF symbol table for the kernel and kernel modules loaded
258 * at this time. Write this snapshot out in the process address space. Return
259 * 0 on success, otherwise error.
260 */
261static int
262ksyms_snapshot(struct ksyms_softc *sc, struct tsizes *ts)
263{
264	struct toffsets	to;
265	struct ksyms_hdr *hdr;
266	int error;
267
268	hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO);
269
270	/*
271	 * Create the ELF header.
272	 */
273	hdr->kh_ehdr.e_ident[EI_PAD] = 0;
274	hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0;
275	hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1;
276	hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2;
277	hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3;
278	hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA;
279	hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
280	hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS;
281	hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT;
282	hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0;
283	hdr->kh_ehdr.e_type = ET_EXEC;
284	hdr->kh_ehdr.e_machine = ELF_ARCH;
285	hdr->kh_ehdr.e_version = EV_CURRENT;
286	hdr->kh_ehdr.e_entry = 0;
287	hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr);
288	hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr);
289	hdr->kh_ehdr.e_flags = 0;
290	hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr);
291	hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
292	hdr->kh_ehdr.e_phnum = 2;	/* Text and Data */
293	hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
294	hdr->kh_ehdr.e_shnum = SHDR_NUM;
295	hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB;
296
297	/*
298	 * Add both the text and data program headers.
299	 */
300	hdr->kh_txtphdr.p_type = PT_LOAD;
301	/* XXX - is there a way to put the actual .text addr/size here? */
302	hdr->kh_txtphdr.p_vaddr = 0;
303	hdr->kh_txtphdr.p_memsz = 0;
304	hdr->kh_txtphdr.p_flags = PF_R | PF_X;
305
306	hdr->kh_datphdr.p_type = PT_LOAD;
307	/* XXX - is there a way to put the actual .data addr/size here? */
308	hdr->kh_datphdr.p_vaddr = 0;
309	hdr->kh_datphdr.p_memsz = 0;
310	hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X;
311
312	/*
313	 * Add the section headers: null, symtab, strtab, shstrtab.
314	 */
315
316	/* First section header - null */
317
318	/* Second section header - symtab */
319	hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */
320	hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB;
321	hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0;
322	hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0;
323	hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr);
324	hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz;
325	hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB;
326	hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym);
327	hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long);
328	hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym);
329
330	/* Third section header - strtab */
331	hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB);
332	hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB;
333	hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0;
334	hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0;
335	hdr->kh_shdr[SHDR_STRTAB].sh_offset =
336	    hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz;
337	hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz;
338	hdr->kh_shdr[SHDR_STRTAB].sh_link = 0;
339	hdr->kh_shdr[SHDR_STRTAB].sh_info = 0;
340	hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char);
341	hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0;
342
343	/* Fourth section - shstrtab */
344	hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) +
345	    sizeof(STR_STRTAB);
346	hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB;
347	hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0;
348	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0;
349	hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset =
350	    offsetof(struct ksyms_hdr, kh_shstrtab);
351	hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab);
352	hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0;
353	hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0;
354	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */;
355	hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0;
356
357	/* Copy shstrtab into the header. */
358	bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab));
359
360	to.to_sc = sc;
361	to.to_symoff = hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
362	to.to_stroff = hdr->kh_shdr[SHDR_STRTAB].sh_offset;
363	to.to_stridx = 0;
364	to.to_resid = sc->sc_objsz - sizeof(struct ksyms_hdr);
365
366	/* emit header */
367	error = ksyms_emit(sc, hdr, 0, sizeof(*hdr));
368	free(hdr, M_KSYMS);
369	if (error != 0)
370		return (error);
371
372	/* Add symbol and string tables for each kernel module. */
373	error = linker_file_foreach(ksyms_add, &to);
374	if (error != 0)
375		return (error);
376	if (to.to_resid != 0)
377		return (ENXIO);
378	return (0);
379}
380
381static void
382ksyms_cdevpriv_dtr(void *data)
383{
384	struct ksyms_softc *sc;
385	vm_object_t obj;
386
387	sc = (struct ksyms_softc *)data;
388
389	sx_xlock(&ksyms_mtx);
390	LIST_REMOVE(sc, sc_list);
391	sx_xunlock(&ksyms_mtx);
392	obj = sc->sc_obj;
393	if (obj != NULL)
394		vm_object_deallocate(obj);
395	free(sc, M_KSYMS);
396}
397
398static int
399ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
400{
401	struct tsizes ts;
402	struct ksyms_softc *sc;
403	vm_object_t object;
404	vm_size_t elfsz;
405	int error, try;
406
407	/*
408	 * Limit one open() per process. The process must close()
409	 * before open()'ing again.
410	 */
411	sx_xlock(&ksyms_mtx);
412	LIST_FOREACH(sc, &ksyms_list, sc_list) {
413		if (sc->sc_proc == td->td_proc) {
414			sx_xunlock(&ksyms_mtx);
415			return (EBUSY);
416		}
417	}
418
419	sc = malloc(sizeof(*sc), M_KSYMS, M_WAITOK | M_ZERO);
420	sc->sc_proc = td->td_proc;
421	LIST_INSERT_HEAD(&ksyms_list, sc, sc_list);
422	sx_xunlock(&ksyms_mtx);
423
424	error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr);
425	if (error != 0) {
426		ksyms_cdevpriv_dtr(sc);
427		return (error);
428	}
429
430	/*
431	 * MOD_SLOCK doesn't work here (because of a lock reversal with
432	 * KLD_SLOCK).  Therefore, simply try up to 3 times to get a "clean"
433	 * snapshot of the kernel symbol table.  This should work fine in the
434	 * rare case of a kernel module being loaded/unloaded at the same
435	 * time.
436	 */
437	for (try = 0; try < 3; try++) {
438		ksyms_size_calc(&ts);
439		elfsz = sizeof(struct ksyms_hdr) + ts.ts_symsz + ts.ts_strsz;
440
441		object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(elfsz),
442		    VM_PROT_ALL, 0, td->td_ucred);
443		sc->sc_obj = object;
444		sc->sc_objsz = elfsz;
445
446		error = ksyms_snapshot(sc, &ts);
447		if (error == 0)
448			break;
449
450		vm_object_deallocate(sc->sc_obj);
451		sc->sc_obj = NULL;
452	}
453	return (error);
454}
455
456static int
457ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused)
458{
459	struct ksyms_softc *sc;
460	int error;
461
462	error = devfs_get_cdevpriv((void **)&sc);
463	if (error != 0)
464		return (error);
465	return (uiomove_object(sc->sc_obj, sc->sc_objsz, uio));
466}
467
468static int
469ksyms_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
470    vm_object_t *objp, int nprot)
471{
472	struct ksyms_softc *sc;
473	vm_object_t obj;
474	int error;
475
476	error = devfs_get_cdevpriv((void **)&sc);
477	if (error != 0)
478		return (error);
479
480	if (*offset < 0 || *offset >= round_page(sc->sc_objsz) ||
481	    size > round_page(sc->sc_objsz) - *offset ||
482	    (nprot & ~PROT_READ) != 0)
483		return (EINVAL);
484
485	obj = sc->sc_obj;
486	vm_object_reference(obj);
487	*objp = obj;
488	return (0);
489}
490
491static int
492ksyms_modevent(module_t mod __unused, int type, void *data __unused)
493{
494	int error;
495
496	error = 0;
497	switch (type) {
498	case MOD_LOAD:
499		sx_init(&ksyms_mtx, "KSyms mtx");
500		ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL,
501		    0400, KSYMS_DNAME);
502		break;
503	case MOD_UNLOAD:
504		if (!LIST_EMPTY(&ksyms_list))
505			return (EBUSY);
506		destroy_dev(ksyms_dev);
507		sx_destroy(&ksyms_mtx);
508		break;
509	case MOD_SHUTDOWN:
510		break;
511	default:
512		error = EOPNOTSUPP;
513		break;
514	}
515	return (error);
516}
517
518DEV_MODULE(ksyms, ksyms_modevent, NULL);
519MODULE_VERSION(ksyms, 1);
520