1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31#include <sys/param.h> 32#include <sys/systm.h> 33#include <sys/kernel.h> 34 35#include <sys/conf.h> 36#include <sys/elf.h> 37#include <sys/linker.h> 38#include <sys/malloc.h> 39#include <sys/mman.h> 40#include <sys/module.h> 41#include <sys/proc.h> 42#include <sys/queue.h> 43#include <sys/resourcevar.h> 44#include <sys/rwlock.h> 45#include <sys/stat.h> 46#include <sys/sx.h> 47#include <sys/uio.h> 48 49#include <machine/elf.h> 50 51#include <vm/pmap.h> 52#include <vm/vm.h> 53#include <vm/vm_extern.h> 54#include <vm/vm_object.h> 55#include <vm/vm_page.h> 56#include <vm/vm_pager.h> 57 58#include "linker_if.h" 59 60#define SHDR_NULL 0 61#define SHDR_SYMTAB 1 62#define SHDR_STRTAB 2 63#define SHDR_SHSTRTAB 3 64 65#define SHDR_NUM 4 66 67#define STR_SYMTAB ".symtab" 68#define STR_STRTAB ".strtab" 69#define STR_SHSTRTAB ".shstrtab" 70 71#define KSYMS_DNAME "ksyms" 72 73static d_open_t ksyms_open; 74static d_read_t ksyms_read; 75static d_mmap_single_t ksyms_mmap_single; 76 77static struct cdevsw ksyms_cdevsw = { 78 .d_version = D_VERSION, 79 .d_flags = 0, 80 .d_open = ksyms_open, 81 .d_read = ksyms_read, 82 .d_mmap_single = ksyms_mmap_single, 83 .d_name = KSYMS_DNAME 84}; 85 86struct ksyms_softc { 87 LIST_ENTRY(ksyms_softc) sc_list; 88 vm_offset_t sc_uaddr; 89 size_t sc_usize; 90 vm_object_t sc_obj; 91 vm_size_t sc_objsz; 92 struct proc *sc_proc; 93}; 94 95static struct sx ksyms_mtx; 96static struct cdev *ksyms_dev; 97static LIST_HEAD(, ksyms_softc) ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list); 98 99static const char ksyms_shstrtab[] = 100 "\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0"; 101 102struct ksyms_hdr { 103 Elf_Ehdr kh_ehdr; 104 Elf_Phdr kh_txtphdr; 105 Elf_Phdr kh_datphdr; 106 Elf_Shdr kh_shdr[SHDR_NUM]; 107 char kh_shstrtab[sizeof(ksyms_shstrtab)]; 108}; 109 110struct tsizes { 111 size_t ts_symsz; 112 size_t ts_strsz; 113}; 114 115struct toffsets { 116 struct ksyms_softc *to_sc; 117 vm_offset_t to_symoff; 118 vm_offset_t to_stroff; 119 unsigned to_stridx; 120 size_t to_resid; 121}; 122 123static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table"); 124 125/* 126 * Get the symbol and string table sizes for a kernel module. Add it to the 127 * running total. 128 */ 129static int 130ksyms_size_permod(linker_file_t lf, void *arg) 131{ 132 struct tsizes *ts; 133 const Elf_Sym *symtab; 134 caddr_t strtab; 135 long syms; 136 137 ts = arg; 138 139 syms = LINKER_SYMTAB_GET(lf, &symtab); 140 ts->ts_symsz += syms * sizeof(Elf_Sym); 141 ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab); 142 143 return (0); 144} 145 146/* 147 * For kernel module get the symbol and string table sizes, returning the 148 * totals in *ts. 149 */ 150static void 151ksyms_size_calc(struct tsizes *ts) 152{ 153 154 ts->ts_symsz = 0; 155 ts->ts_strsz = 0; 156 157 (void)linker_file_foreach(ksyms_size_permod, ts); 158} 159 160static int 161ksyms_emit(struct ksyms_softc *sc, void *buf, off_t off, size_t sz) 162{ 163 struct iovec iov; 164 struct uio uio; 165 166 iov.iov_base = buf; 167 iov.iov_len = sz; 168 uio.uio_iov = &iov; 169 uio.uio_iovcnt = 1; 170 uio.uio_offset = off; 171 uio.uio_resid = (ssize_t)sz; 172 uio.uio_segflg = UIO_SYSSPACE; 173 uio.uio_rw = UIO_WRITE; 174 uio.uio_td = curthread; 175 176 return (uiomove_object(sc->sc_obj, sc->sc_objsz, &uio)); 177} 178 179#define SYMBLKSZ (256 * sizeof(Elf_Sym)) 180 181/* 182 * For a kernel module, add the symbol and string tables into the 183 * snapshot buffer. Fix up the offsets in the tables. 184 */ 185static int 186ksyms_add(linker_file_t lf, void *arg) 187{ 188 char *buf; 189 struct ksyms_softc *sc; 190 struct toffsets *to; 191 const Elf_Sym *symtab; 192 Elf_Sym *symp; 193 caddr_t strtab; 194 size_t len, numsyms, strsz, symsz; 195 linker_symval_t symval; 196 int error, i, nsyms; 197 198 buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK); 199 to = arg; 200 sc = to->to_sc; 201 202 MOD_SLOCK; 203 numsyms = LINKER_SYMTAB_GET(lf, &symtab); 204 strsz = LINKER_STRTAB_GET(lf, &strtab); 205 symsz = numsyms * sizeof(Elf_Sym); 206 207 while (symsz > 0) { 208 len = min(SYMBLKSZ, symsz); 209 bcopy(symtab, buf, len); 210 211 /* 212 * Fix up symbol table for kernel modules: 213 * string offsets need adjusted 214 * symbol values made absolute 215 */ 216 symp = (Elf_Sym *) buf; 217 nsyms = len / sizeof(Elf_Sym); 218 for (i = 0; i < nsyms; i++) { 219 symp[i].st_name += to->to_stridx; 220 if (lf->id > 1 && LINKER_SYMBOL_VALUES(lf, 221 (c_linker_sym_t)&symtab[i], &symval) == 0) { 222 symp[i].st_value = (uintptr_t)symval.value; 223 } 224 } 225 226 if (len > to->to_resid) { 227 MOD_SUNLOCK; 228 free(buf, M_KSYMS); 229 return (ENXIO); 230 } 231 to->to_resid -= len; 232 error = ksyms_emit(sc, buf, to->to_symoff, len); 233 to->to_symoff += len; 234 if (error != 0) { 235 MOD_SUNLOCK; 236 free(buf, M_KSYMS); 237 return (error); 238 } 239 240 symtab += nsyms; 241 symsz -= len; 242 } 243 free(buf, M_KSYMS); 244 MOD_SUNLOCK; 245 246 if (strsz > to->to_resid) 247 return (ENXIO); 248 to->to_resid -= strsz; 249 error = ksyms_emit(sc, strtab, to->to_stroff, strsz); 250 to->to_stroff += strsz; 251 to->to_stridx += strsz; 252 253 return (error); 254} 255 256/* 257 * Create a single ELF symbol table for the kernel and kernel modules loaded 258 * at this time. Write this snapshot out in the process address space. Return 259 * 0 on success, otherwise error. 260 */ 261static int 262ksyms_snapshot(struct ksyms_softc *sc, struct tsizes *ts) 263{ 264 struct toffsets to; 265 struct ksyms_hdr *hdr; 266 int error; 267 268 hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO); 269 270 /* 271 * Create the ELF header. 272 */ 273 hdr->kh_ehdr.e_ident[EI_PAD] = 0; 274 hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0; 275 hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1; 276 hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2; 277 hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3; 278 hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA; 279 hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD; 280 hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS; 281 hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT; 282 hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0; 283 hdr->kh_ehdr.e_type = ET_EXEC; 284 hdr->kh_ehdr.e_machine = ELF_ARCH; 285 hdr->kh_ehdr.e_version = EV_CURRENT; 286 hdr->kh_ehdr.e_entry = 0; 287 hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr); 288 hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr); 289 hdr->kh_ehdr.e_flags = 0; 290 hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr); 291 hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr); 292 hdr->kh_ehdr.e_phnum = 2; /* Text and Data */ 293 hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr); 294 hdr->kh_ehdr.e_shnum = SHDR_NUM; 295 hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB; 296 297 /* 298 * Add both the text and data program headers. 299 */ 300 hdr->kh_txtphdr.p_type = PT_LOAD; 301 /* XXX - is there a way to put the actual .text addr/size here? */ 302 hdr->kh_txtphdr.p_vaddr = 0; 303 hdr->kh_txtphdr.p_memsz = 0; 304 hdr->kh_txtphdr.p_flags = PF_R | PF_X; 305 306 hdr->kh_datphdr.p_type = PT_LOAD; 307 /* XXX - is there a way to put the actual .data addr/size here? */ 308 hdr->kh_datphdr.p_vaddr = 0; 309 hdr->kh_datphdr.p_memsz = 0; 310 hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X; 311 312 /* 313 * Add the section headers: null, symtab, strtab, shstrtab. 314 */ 315 316 /* First section header - null */ 317 318 /* Second section header - symtab */ 319 hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */ 320 hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB; 321 hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0; 322 hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0; 323 hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr); 324 hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz; 325 hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB; 326 hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym); 327 hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long); 328 hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym); 329 330 /* Third section header - strtab */ 331 hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB); 332 hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB; 333 hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0; 334 hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0; 335 hdr->kh_shdr[SHDR_STRTAB].sh_offset = 336 hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz; 337 hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz; 338 hdr->kh_shdr[SHDR_STRTAB].sh_link = 0; 339 hdr->kh_shdr[SHDR_STRTAB].sh_info = 0; 340 hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char); 341 hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0; 342 343 /* Fourth section - shstrtab */ 344 hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) + 345 sizeof(STR_STRTAB); 346 hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB; 347 hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0; 348 hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0; 349 hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset = 350 offsetof(struct ksyms_hdr, kh_shstrtab); 351 hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab); 352 hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0; 353 hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0; 354 hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */; 355 hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0; 356 357 /* Copy shstrtab into the header. */ 358 bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab)); 359 360 to.to_sc = sc; 361 to.to_symoff = hdr->kh_shdr[SHDR_SYMTAB].sh_offset; 362 to.to_stroff = hdr->kh_shdr[SHDR_STRTAB].sh_offset; 363 to.to_stridx = 0; 364 to.to_resid = sc->sc_objsz - sizeof(struct ksyms_hdr); 365 366 /* emit header */ 367 error = ksyms_emit(sc, hdr, 0, sizeof(*hdr)); 368 free(hdr, M_KSYMS); 369 if (error != 0) 370 return (error); 371 372 /* Add symbol and string tables for each kernel module. */ 373 error = linker_file_foreach(ksyms_add, &to); 374 if (error != 0) 375 return (error); 376 if (to.to_resid != 0) 377 return (ENXIO); 378 return (0); 379} 380 381static void 382ksyms_cdevpriv_dtr(void *data) 383{ 384 struct ksyms_softc *sc; 385 vm_object_t obj; 386 387 sc = (struct ksyms_softc *)data; 388 389 sx_xlock(&ksyms_mtx); 390 LIST_REMOVE(sc, sc_list); 391 sx_xunlock(&ksyms_mtx); 392 obj = sc->sc_obj; 393 if (obj != NULL) 394 vm_object_deallocate(obj); 395 free(sc, M_KSYMS); 396} 397 398static int 399ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td) 400{ 401 struct tsizes ts; 402 struct ksyms_softc *sc; 403 vm_object_t object; 404 vm_size_t elfsz; 405 int error, try; 406 407 /* 408 * Limit one open() per process. The process must close() 409 * before open()'ing again. 410 */ 411 sx_xlock(&ksyms_mtx); 412 LIST_FOREACH(sc, &ksyms_list, sc_list) { 413 if (sc->sc_proc == td->td_proc) { 414 sx_xunlock(&ksyms_mtx); 415 return (EBUSY); 416 } 417 } 418 419 sc = malloc(sizeof(*sc), M_KSYMS, M_WAITOK | M_ZERO); 420 sc->sc_proc = td->td_proc; 421 LIST_INSERT_HEAD(&ksyms_list, sc, sc_list); 422 sx_xunlock(&ksyms_mtx); 423 424 error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr); 425 if (error != 0) { 426 ksyms_cdevpriv_dtr(sc); 427 return (error); 428 } 429 430 /* 431 * MOD_SLOCK doesn't work here (because of a lock reversal with 432 * KLD_SLOCK). Therefore, simply try up to 3 times to get a "clean" 433 * snapshot of the kernel symbol table. This should work fine in the 434 * rare case of a kernel module being loaded/unloaded at the same 435 * time. 436 */ 437 for (try = 0; try < 3; try++) { 438 ksyms_size_calc(&ts); 439 elfsz = sizeof(struct ksyms_hdr) + ts.ts_symsz + ts.ts_strsz; 440 441 object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(elfsz), 442 VM_PROT_ALL, 0, td->td_ucred); 443 sc->sc_obj = object; 444 sc->sc_objsz = elfsz; 445 446 error = ksyms_snapshot(sc, &ts); 447 if (error == 0) 448 break; 449 450 vm_object_deallocate(sc->sc_obj); 451 sc->sc_obj = NULL; 452 } 453 return (error); 454} 455 456static int 457ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused) 458{ 459 struct ksyms_softc *sc; 460 int error; 461 462 error = devfs_get_cdevpriv((void **)&sc); 463 if (error != 0) 464 return (error); 465 return (uiomove_object(sc->sc_obj, sc->sc_objsz, uio)); 466} 467 468static int 469ksyms_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size, 470 vm_object_t *objp, int nprot) 471{ 472 struct ksyms_softc *sc; 473 vm_object_t obj; 474 int error; 475 476 error = devfs_get_cdevpriv((void **)&sc); 477 if (error != 0) 478 return (error); 479 480 if (*offset < 0 || *offset >= round_page(sc->sc_objsz) || 481 size > round_page(sc->sc_objsz) - *offset || 482 (nprot & ~PROT_READ) != 0) 483 return (EINVAL); 484 485 obj = sc->sc_obj; 486 vm_object_reference(obj); 487 *objp = obj; 488 return (0); 489} 490 491static int 492ksyms_modevent(module_t mod __unused, int type, void *data __unused) 493{ 494 int error; 495 496 error = 0; 497 switch (type) { 498 case MOD_LOAD: 499 sx_init(&ksyms_mtx, "KSyms mtx"); 500 ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL, 501 0400, KSYMS_DNAME); 502 break; 503 case MOD_UNLOAD: 504 if (!LIST_EMPTY(&ksyms_list)) 505 return (EBUSY); 506 destroy_dev(ksyms_dev); 507 sx_destroy(&ksyms_mtx); 508 break; 509 case MOD_SHUTDOWN: 510 break; 511 default: 512 error = EOPNOTSUPP; 513 break; 514 } 515 return (error); 516} 517 518DEV_MODULE(ksyms, ksyms_modevent, NULL); 519MODULE_VERSION(ksyms, 1); 520