1/*	$NetBSD: kern_ksyms.c,v 1.108 2023/02/21 11:40:00 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software developed for The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 2001, 2003 Anders Magnusson (ragge@ludd.luth.se).
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. The name of the author may not be used to endorse or promote products
45 *    derived from this software without specific prior written permission
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57 */
58
59/*
60 * Code to deal with in-kernel symbol table management + /dev/ksyms.
61 *
62 * For each loaded module the symbol table info is kept track of by a
63 * struct, placed in a circular list. The first entry is the kernel
64 * symbol table.
65 */
66
67/*
68 * TODO:
69 *
70 *	Add support for mmap, poll.
71 *	Constify tables.
72 *	Constify db_symtab and move it to .rodata.
73 */
74
75#include <sys/cdefs.h>
76__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.108 2023/02/21 11:40:00 riastradh Exp $");
77
78#if defined(_KERNEL) && defined(_KERNEL_OPT)
79#include "opt_copy_symtab.h"
80#include "opt_ddb.h"
81#include "opt_dtrace.h"
82#endif
83
84#define _KSYMS_PRIVATE
85
86#include <sys/param.h>
87#include <sys/queue.h>
88#include <sys/exec.h>
89#include <sys/file.h>
90#include <sys/filedesc.h>
91#include <sys/kauth.h>
92#include <sys/systm.h>
93#include <sys/conf.h>
94#include <sys/kmem.h>
95#include <sys/proc.h>
96#include <sys/atomic.h>
97#include <sys/ksyms.h>
98#include <sys/kernel.h>
99#include <sys/intr.h>
100#include <sys/pserialize.h>
101#include <sys/stat.h>
102
103#include <uvm/uvm_extern.h>
104
105#ifdef DDB
106#include <ddb/db_output.h>
107#endif
108
109#include "ksyms.h"
110#if NKSYMS > 0
111#include "ioconf.h"
112#endif
113
114struct ksyms_snapshot {
115	uint64_t		ks_refcnt;
116	uint64_t		ks_gen;
117	struct uvm_object	*ks_uobj;
118	size_t			ks_size;
119	dev_t			ks_dev;
120	int			ks_maxlen;
121};
122
123#define KSYMS_MAX_ID	98304
124#ifdef KDTRACE_HOOKS
125static uint32_t ksyms_nmap[KSYMS_MAX_ID];	/* sorted symbol table map */
126#else
127static uint32_t *ksyms_nmap = NULL;
128#endif
129
130static int ksyms_maxlen;
131static bool ksyms_initted;
132static bool ksyms_loaded;
133static kmutex_t ksyms_lock __cacheline_aligned;
134static struct ksyms_symtab kernel_symtab;
135static kcondvar_t ksyms_cv;
136static struct lwp *ksyms_snapshotting;
137static struct ksyms_snapshot *ksyms_snapshot;
138static uint64_t ksyms_snapshot_gen;
139static pserialize_t ksyms_psz __read_mostly;
140
141static void ksyms_hdr_init(const void *);
142static void ksyms_sizes_calc(void);
143static struct ksyms_snapshot *ksyms_snapshot_alloc(int, size_t, dev_t,
144    uint64_t);
145static void ksyms_snapshot_release(struct ksyms_snapshot *);
146
147#ifdef KSYMS_DEBUG
148#define	FOLLOW_CALLS		1
149#define	FOLLOW_MORE_CALLS	2
150#define	FOLLOW_DEVKSYMS		4
151static int ksyms_debug;
152#endif
153
154#define		SYMTAB_FILLER	"|This is the symbol table!"
155
156#ifdef makeoptions_COPY_SYMTAB
157extern char db_symtab[];
158extern int db_symtabsize;
159#endif
160
161/*
162 * used by savecore(8) so non-static
163 */
164struct ksyms_hdr ksyms_hdr;
165int ksyms_symsz;
166int ksyms_strsz;
167int ksyms_ctfsz;	/* this is not currently used by savecore(8) */
168TAILQ_HEAD(ksyms_symtab_queue, ksyms_symtab) ksyms_symtabs =
169    TAILQ_HEAD_INITIALIZER(ksyms_symtabs);
170static struct pslist_head ksyms_symtabs_psz = PSLIST_INITIALIZER;
171
172static int
173ksyms_verify(const void *symstart, const void *strstart)
174{
175#if defined(DIAGNOSTIC) || defined(DEBUG)
176	if (symstart == NULL)
177		printf("ksyms: Symbol table not found\n");
178	if (strstart == NULL)
179		printf("ksyms: String table not found\n");
180	if (symstart == NULL || strstart == NULL)
181		printf("ksyms: Perhaps the kernel is stripped?\n");
182#endif
183	if (symstart == NULL || strstart == NULL)
184		return 0;
185	return 1;
186}
187
188/*
189 * Finds a certain symbol name in a certain symbol table.
190 */
191static Elf_Sym *
192findsym(const char *name, struct ksyms_symtab *table, int type)
193{
194	Elf_Sym *sym, *maxsym;
195	int low, mid, high, nglob;
196	char *str, *cmp;
197
198	sym = table->sd_symstart;
199	str = table->sd_strstart - table->sd_usroffset;
200	nglob = table->sd_nglob;
201	low = 0;
202	high = nglob;
203
204	/*
205	 * Start with a binary search of all global symbols in this table.
206	 * Global symbols must have unique names.
207	 */
208	while (low < high) {
209		mid = (low + high) >> 1;
210		cmp = sym[mid].st_name + str;
211		if (cmp[0] < name[0] || strcmp(cmp, name) < 0) {
212			low = mid + 1;
213		} else {
214			high = mid;
215		}
216	}
217	KASSERT(low == high);
218	if (__predict_true(low < nglob &&
219	    strcmp(sym[low].st_name + str, name) == 0)) {
220		KASSERT(ELF_ST_BIND(sym[low].st_info) == STB_GLOBAL);
221		return &sym[low];
222	}
223
224	/*
225	 * Perform a linear search of local symbols (rare).  Many local
226	 * symbols with the same name can exist so are not included in
227	 * the binary search.
228	 */
229	if (type != KSYMS_EXTERN) {
230		maxsym = sym + table->sd_symsize / sizeof(Elf_Sym);
231		for (sym += nglob; sym < maxsym; sym++) {
232			if (strcmp(name, sym->st_name + str) == 0) {
233				return sym;
234			}
235		}
236	}
237	return NULL;
238}
239
240/*
241 * The "attach" is in reality done in ksyms_init().
242 */
243#if NKSYMS > 0
244/*
245 * ksyms can be loaded even if the kernel has a missing "pseudo-device ksyms"
246 * statement because ddb and modules require it. Fixing it properly requires
247 * fixing config to warn about required, but missing preudo-devices. For now,
248 * if we don't have the pseudo-device we don't need the attach function; this
249 * is fine, as it does nothing.
250 */
251void
252ksymsattach(int arg)
253{
254}
255#endif
256
257void
258ksyms_init(void)
259{
260
261#ifdef makeoptions_COPY_SYMTAB
262	if (!ksyms_loaded &&
263	    strncmp(db_symtab, SYMTAB_FILLER, sizeof(SYMTAB_FILLER))) {
264		ksyms_addsyms_elf(db_symtabsize, db_symtab,
265		    db_symtab + db_symtabsize);
266	}
267#endif
268
269	if (!ksyms_initted) {
270		mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE);
271		cv_init(&ksyms_cv, "ksyms");
272		ksyms_psz = pserialize_create();
273		ksyms_initted = true;
274	}
275}
276
277/*
278 * Are any symbols available?
279 */
280bool
281ksyms_available(void)
282{
283
284	return ksyms_loaded;
285}
286
287/*
288 * Add a symbol table.
289 * This is intended for use when the symbol table and its corresponding
290 * string table are easily available.  If they are embedded in an ELF
291 * image, use addsymtab_elf() instead.
292 *
293 * name - Symbol's table name.
294 * symstart, symsize - Address and size of the symbol table.
295 * strstart, strsize - Address and size of the string table.
296 * tab - Symbol table to be updated with this information.
297 * newstart - Address to which the symbol table has to be copied during
298 *            shrinking.  If NULL, it is not moved.
299 */
300static const char *addsymtab_strstart;
301
302static int
303addsymtab_compar(const void *a, const void *b)
304{
305	const Elf_Sym *sa, *sb;
306
307	sa = a;
308	sb = b;
309
310	/*
311	 * Split the symbol table into two, with globals at the start
312	 * and locals at the end.
313	 */
314	if (ELF_ST_BIND(sa->st_info) != ELF_ST_BIND(sb->st_info)) {
315		if (ELF_ST_BIND(sa->st_info) == STB_GLOBAL) {
316			return -1;
317		}
318		if (ELF_ST_BIND(sb->st_info) == STB_GLOBAL) {
319			return 1;
320		}
321	}
322
323	/* Within each band, sort by name. */
324	return strcmp(sa->st_name + addsymtab_strstart,
325	    sb->st_name + addsymtab_strstart);
326}
327
328static void
329addsymtab(const char *name, void *symstart, size_t symsize,
330	  void *strstart, size_t strsize, struct ksyms_symtab *tab,
331	  void *newstart, void *ctfstart, size_t ctfsize, uint32_t *nmap)
332{
333	Elf_Sym *sym, *nsym, ts;
334	int i, j, n, nglob;
335	char *str;
336	int nsyms = symsize / sizeof(Elf_Sym);
337	int s;
338
339	/* Sanity check for pre-allocated map table used during startup. */
340	if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) {
341		printf("kern_ksyms: ERROR %d > %d, increase KSYMS_MAX_ID\n",
342		    nsyms, KSYMS_MAX_ID);
343
344		/* truncate for now */
345		nsyms = KSYMS_MAX_ID - 1;
346	}
347
348	tab->sd_symstart = symstart;
349	tab->sd_symsize = symsize;
350	tab->sd_strstart = strstart;
351	tab->sd_strsize = strsize;
352	tab->sd_name = name;
353	tab->sd_minsym = UINTPTR_MAX;
354	tab->sd_maxsym = 0;
355	tab->sd_usroffset = 0;
356	tab->sd_ctfstart = ctfstart;
357	tab->sd_ctfsize = ctfsize;
358	tab->sd_nmap = nmap;
359	tab->sd_nmapsize = nsyms;
360#ifdef KSYMS_DEBUG
361	printf("newstart %p sym %p ksyms_symsz %zu str %p strsz %zu send %p\n",
362	    newstart, symstart, symsize, strstart, strsize,
363	    tab->sd_strstart + tab->sd_strsize);
364#endif
365
366	if (nmap) {
367		memset(nmap, 0, nsyms * sizeof(uint32_t));
368	}
369
370	/* Pack symbol table by removing all file name references. */
371	sym = tab->sd_symstart;
372	nsym = (Elf_Sym *)newstart;
373	str = tab->sd_strstart;
374	nglob = 0;
375	for (i = n = 0; i < nsyms; i++) {
376
377		/*
378		 * This breaks CTF mapping, so don't do it when
379		 * DTrace is enabled.
380		 */
381#ifndef KDTRACE_HOOKS
382		/*
383		 * Remove useless symbols.
384		 * Should actually remove all typeless symbols.
385		 */
386		if (sym[i].st_name == 0)
387			continue; /* Skip nameless entries */
388		if (sym[i].st_shndx == SHN_UNDEF)
389			continue; /* Skip external references */
390		if (ELF_ST_TYPE(sym[i].st_info) == STT_FILE)
391			continue; /* Skip filenames */
392		if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
393		    sym[i].st_value == 0 &&
394		    strcmp(str + sym[i].st_name, "*ABS*") == 0)
395			continue; /* XXX */
396		if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
397		    strcmp(str + sym[i].st_name, "gcc2_compiled.") == 0)
398			continue; /* XXX */
399#endif
400
401		/* Save symbol. Set it as an absolute offset */
402		nsym[n] = sym[i];
403
404#ifdef KDTRACE_HOOKS
405		if (nmap != NULL) {
406			/*
407			 * Save the size, replace it with the symbol id so
408			 * the mapping can be done after the cleanup and sort.
409			 */
410			nmap[i] = nsym[n].st_size;
411			nsym[n].st_size = i + 1;	/* zero is reserved */
412		}
413#endif
414
415		if (sym[i].st_shndx != SHN_ABS) {
416			nsym[n].st_shndx = SHBSS;
417		} else {
418			/* SHN_ABS is a magic value, don't overwrite it */
419		}
420
421		j = strlen(nsym[n].st_name + str) + 1;
422		if (j > ksyms_maxlen)
423			ksyms_maxlen = j;
424		nglob += (ELF_ST_BIND(nsym[n].st_info) == STB_GLOBAL);
425
426		/* Compute min and max symbols. */
427		if (strcmp(str + sym[i].st_name, "*ABS*") != 0
428		    && ELF_ST_TYPE(nsym[n].st_info) != STT_NOTYPE) {
429			if (nsym[n].st_value < tab->sd_minsym) {
430				tab->sd_minsym = nsym[n].st_value;
431			}
432			if (nsym[n].st_value > tab->sd_maxsym) {
433				tab->sd_maxsym = nsym[n].st_value;
434			}
435		}
436		n++;
437	}
438
439	/* Fill the rest of the record, and sort the symbols. */
440	tab->sd_symstart = nsym;
441	tab->sd_symsize = n * sizeof(Elf_Sym);
442	tab->sd_nglob = nglob;
443
444	addsymtab_strstart = str;
445	if (kheapsort(nsym, n, sizeof(Elf_Sym), addsymtab_compar, &ts) != 0)
446		panic("addsymtab");
447
448#ifdef KDTRACE_HOOKS
449	/*
450	 * Build the mapping from original symbol id to new symbol table.
451	 * Deleted symbols will have a zero map, indices will be one based
452	 * instead of zero based.
453	 * Resulting map is sd_nmap[original_index] = new_index + 1
454	 */
455	if (nmap != NULL) {
456		int new;
457		for (new = 0; new < n; new++) {
458			uint32_t orig = nsym[new].st_size - 1;
459			uint32_t size = nmap[orig];
460
461			nmap[orig] = new + 1;
462
463			/* restore the size */
464			nsym[new].st_size = size;
465		}
466	}
467#endif
468
469	KASSERT(strcmp(name, "netbsd") == 0 || mutex_owned(&ksyms_lock));
470	KASSERT(cold || mutex_owned(&ksyms_lock));
471
472	/*
473	 * Publish the symtab.  Do this at splhigh to ensure ddb never
474	 * witnesses an inconsistent state of the queue, unless memory
475	 * is so corrupt that we crash in PSLIST_WRITER_INSERT_AFTER or
476	 * TAILQ_INSERT_TAIL.
477	 */
478	PSLIST_ENTRY_INIT(tab, sd_pslist);
479	s = splhigh();
480	if (TAILQ_EMPTY(&ksyms_symtabs)) {
481		PSLIST_WRITER_INSERT_HEAD(&ksyms_symtabs_psz, tab, sd_pslist);
482	} else {
483		struct ksyms_symtab *last;
484
485		last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
486		PSLIST_WRITER_INSERT_AFTER(last, tab, sd_pslist);
487	}
488	TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue);
489	splx(s);
490
491	ksyms_sizes_calc();
492	ksyms_loaded = true;
493}
494
495/*
496 * Setup the kernel symbol table stuff.
497 */
498void
499ksyms_addsyms_elf(int symsize, void *start, void *end)
500{
501	int i, j;
502	Elf_Shdr *shdr;
503	char *symstart = NULL, *strstart = NULL;
504	size_t strsize = 0;
505	Elf_Ehdr *ehdr;
506	char *ctfstart = NULL;
507	size_t ctfsize = 0;
508
509	if (symsize <= 0) {
510		printf("[ Kernel symbol table missing! ]\n");
511		return;
512	}
513
514	/* Sanity check */
515	if (ALIGNED_POINTER(start, long) == 0) {
516		printf("[ Kernel symbol table has bad start address %p ]\n",
517		    start);
518		return;
519	}
520
521	ehdr = (Elf_Ehdr *)start;
522
523	/* check if this is a valid ELF header */
524	/* No reason to verify arch type, the kernel is actually running! */
525	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) ||
526	    ehdr->e_ident[EI_CLASS] != ELFCLASS ||
527	    ehdr->e_version > 1) {
528		printf("[ Kernel symbol table invalid! ]\n");
529		return; /* nothing to do */
530	}
531
532	/* Loaded header will be scratched in addsymtab */
533	ksyms_hdr_init(start);
534
535	/* Find the symbol table and the corresponding string table. */
536	shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
537	for (i = 1; i < ehdr->e_shnum; i++) {
538		if (shdr[i].sh_type != SHT_SYMTAB)
539			continue;
540		if (shdr[i].sh_offset == 0)
541			continue;
542		symstart = (uint8_t *)start + shdr[i].sh_offset;
543		symsize = shdr[i].sh_size;
544		j = shdr[i].sh_link;
545		if (shdr[j].sh_offset == 0)
546			continue; /* Can this happen? */
547		strstart = (uint8_t *)start + shdr[j].sh_offset;
548		strsize = shdr[j].sh_size;
549		break;
550	}
551
552#ifdef KDTRACE_HOOKS
553	/* Find the CTF section */
554	shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
555	if (ehdr->e_shstrndx != 0) {
556		char *shstr = (uint8_t *)start +
557		    shdr[ehdr->e_shstrndx].sh_offset;
558		for (i = 1; i < ehdr->e_shnum; i++) {
559#ifdef KSYMS_DEBUG
560			printf("ksyms: checking %s\n", &shstr[shdr[i].sh_name]);
561#endif
562			if (shdr[i].sh_type != SHT_PROGBITS)
563				continue;
564			if (strncmp(".SUNW_ctf", &shstr[shdr[i].sh_name], 10)
565			    != 0)
566				continue;
567			ctfstart = (uint8_t *)start + shdr[i].sh_offset;
568			ctfsize = shdr[i].sh_size;
569			ksyms_ctfsz = ctfsize;
570#ifdef DEBUG
571			aprint_normal("Found CTF at %p, size 0x%zx\n",
572			    ctfstart, ctfsize);
573#endif
574			break;
575		}
576#ifdef DEBUG
577	} else {
578		printf("ksyms: e_shstrndx == 0\n");
579#endif
580	}
581#endif
582
583	if (!ksyms_verify(symstart, strstart))
584		return;
585
586	addsymtab("netbsd", symstart, symsize, strstart, strsize,
587	    &kernel_symtab, symstart, ctfstart, ctfsize, ksyms_nmap);
588
589#ifdef DEBUG
590	aprint_normal("Loaded initial symtab at %p, strtab at %p, # entries %ld\n",
591	    kernel_symtab.sd_symstart, kernel_symtab.sd_strstart,
592	    (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym));
593#endif
594
595	/* Should be no snapshot to invalidate yet.  */
596	KASSERT(ksyms_snapshot == NULL);
597}
598
599/*
600 * Setup the kernel symbol table stuff.
601 * Use this when the address of the symbol and string tables are known;
602 * otherwise use ksyms_init with an ELF image.
603 * We need to pass a minimal ELF header which will later be completed by
604 * ksyms_hdr_init and handed off to userland through /dev/ksyms.  We use
605 * a void *rather than a pointer to avoid exposing the Elf_Ehdr type.
606 */
607void
608ksyms_addsyms_explicit(void *ehdr, void *symstart, size_t symsize,
609    void *strstart, size_t strsize)
610{
611	if (!ksyms_verify(symstart, strstart))
612		return;
613
614	ksyms_hdr_init(ehdr);
615	addsymtab("netbsd", symstart, symsize, strstart, strsize,
616	    &kernel_symtab, symstart, NULL, 0, ksyms_nmap);
617
618	/* Should be no snapshot to invalidate yet.  */
619	KASSERT(ksyms_snapshot == NULL);
620}
621
622/*
623 * Get the value associated with a symbol.
624 * "mod" is the module name, or null if any module.
625 * "sym" is the symbol name.
626 * "val" is a pointer to the corresponding value, if call succeeded.
627 * Returns 0 if success or ENOENT if no such entry.
628 *
629 * If symp is nonnull, caller must hold ksyms_lock or module_lock, have
630 * ksyms_opencnt nonzero, be in a pserialize read section, be in ddb
631 * with all other CPUs quiescent.
632 */
633int
634ksyms_getval_unlocked(const char *mod, const char *sym, Elf_Sym **symp,
635    unsigned long *val, int type)
636{
637	struct ksyms_symtab *st;
638	Elf_Sym *es;
639	int s, error = ENOENT;
640
641#ifdef KSYMS_DEBUG
642	if (ksyms_debug & FOLLOW_CALLS)
643		printf("%s: mod %s sym %s valp %p\n", __func__, mod, sym, val);
644#endif
645
646	s = pserialize_read_enter();
647	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
648	    sd_pslist) {
649		if (mod != NULL && strcmp(st->sd_name, mod))
650			continue;
651		if ((es = findsym(sym, st, type)) != NULL) {
652			*val = es->st_value;
653			if (symp)
654				*symp = es;
655			error = 0;
656			break;
657		}
658	}
659	pserialize_read_exit(s);
660	return error;
661}
662
663int
664ksyms_getval(const char *mod, const char *sym, unsigned long *val, int type)
665{
666
667	if (!ksyms_loaded)
668		return ENOENT;
669
670	/* No locking needed -- we read the table pserialized.  */
671	return ksyms_getval_unlocked(mod, sym, NULL, val, type);
672}
673
674/*
675 * ksyms_get_mod(mod)
676 *
677 * Return the symtab for the given module name.  Caller must ensure
678 * that the module cannot be unloaded until after this returns.
679 */
680struct ksyms_symtab *
681ksyms_get_mod(const char *mod)
682{
683	struct ksyms_symtab *st;
684	int s;
685
686	s = pserialize_read_enter();
687	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
688	    sd_pslist) {
689		if (mod != NULL && strcmp(st->sd_name, mod))
690			continue;
691		break;
692	}
693	pserialize_read_exit(s);
694
695	return st;
696}
697
698
699/*
700 * ksyms_mod_foreach()
701 *
702 * Iterate over the symbol table of the specified module, calling the callback
703 * handler for each symbol. Stop iterating if the handler return is non-zero.
704 *
705 */
706
707int
708ksyms_mod_foreach(const char *mod, ksyms_callback_t callback, void *opaque)
709{
710	struct ksyms_symtab *st;
711	Elf_Sym *sym, *maxsym;
712	char *str;
713	int symindx;
714
715	if (!ksyms_loaded)
716		return ENOENT;
717
718	mutex_enter(&ksyms_lock);
719
720	/* find the module */
721	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
722		if (mod != NULL && strcmp(st->sd_name, mod))
723			continue;
724
725		sym = st->sd_symstart;
726		str = st->sd_strstart - st->sd_usroffset;
727
728		/* now iterate through the symbols */
729		maxsym = sym + st->sd_symsize / sizeof(Elf_Sym);
730		for (symindx = 0; sym < maxsym; sym++, symindx++) {
731			if (callback(str + sym->st_name, symindx,
732			    (void *)sym->st_value,
733			    sym->st_size,
734			    sym->st_info,
735			    opaque) != 0) {
736				break;
737			}
738		}
739	}
740	mutex_exit(&ksyms_lock);
741
742	return 0;
743}
744
745/*
746 * Get "mod" and "symbol" associated with an address.
747 * Returns 0 if success or ENOENT if no such entry.
748 *
749 * Caller must hold ksyms_lock or module_lock, have ksyms_opencnt
750 * nonzero, be in a pserialize read section, or be in ddb with all
751 * other CPUs quiescent.
752 */
753int
754ksyms_getname(const char **mod, const char **sym, vaddr_t v, int f)
755{
756	struct ksyms_symtab *st;
757	Elf_Sym *les, *es = NULL;
758	vaddr_t laddr = 0;
759	const char *lmod = NULL;
760	char *stable = NULL;
761	int type, i, sz;
762
763	if (!ksyms_loaded)
764		return ENOENT;
765
766	PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
767	    sd_pslist) {
768		if (v < st->sd_minsym || v > st->sd_maxsym)
769			continue;
770		sz = st->sd_symsize/sizeof(Elf_Sym);
771		for (i = 0; i < sz; i++) {
772			les = st->sd_symstart + i;
773			type = ELF_ST_TYPE(les->st_info);
774
775			if ((f & KSYMS_PROC) && (type != STT_FUNC))
776				continue;
777
778			if (type == STT_NOTYPE)
779				continue;
780
781			if (((f & KSYMS_ANY) == 0) &&
782			    (type != STT_FUNC) && (type != STT_OBJECT))
783				continue;
784
785			if ((les->st_value <= v) && (les->st_value > laddr)) {
786				laddr = les->st_value;
787				es = les;
788				lmod = st->sd_name;
789				stable = st->sd_strstart - st->sd_usroffset;
790			}
791		}
792	}
793	if (es == NULL)
794		return ENOENT;
795	if ((f & KSYMS_EXACT) && (v != es->st_value))
796		return ENOENT;
797	if (mod)
798		*mod = lmod;
799	if (sym)
800		*sym = stable + es->st_name;
801	return 0;
802}
803
804/*
805 * Add a symbol table from a loadable module.
806 */
807void
808ksyms_modload(const char *name, void *symstart, vsize_t symsize,
809    char *strstart, vsize_t strsize)
810{
811	struct ksyms_symtab *st;
812	struct ksyms_snapshot *ks;
813	void *nmap;
814
815	st = kmem_zalloc(sizeof(*st), KM_SLEEP);
816	nmap = kmem_zalloc(symsize / sizeof(Elf_Sym) * sizeof (uint32_t),
817			   KM_SLEEP);
818	mutex_enter(&ksyms_lock);
819	addsymtab(name, symstart, symsize, strstart, strsize, st, symstart,
820	    NULL, 0, nmap);
821	ks = ksyms_snapshot;
822	ksyms_snapshot = NULL;
823	mutex_exit(&ksyms_lock);
824
825	if (ks)
826		ksyms_snapshot_release(ks);
827}
828
829/*
830 * Remove a symbol table from a loadable module.
831 */
832void
833ksyms_modunload(const char *name)
834{
835	struct ksyms_symtab *st;
836	struct ksyms_snapshot *ks;
837	int s;
838
839	mutex_enter(&ksyms_lock);
840	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
841		if (strcmp(name, st->sd_name) != 0)
842			continue;
843		break;
844	}
845	KASSERT(st != NULL);
846
847	/* Wait for any snapshot in progress to complete.  */
848	while (ksyms_snapshotting)
849		cv_wait(&ksyms_cv, &ksyms_lock);
850
851	/*
852	 * Remove the symtab.  Do this at splhigh to ensure ddb never
853	 * witnesses an inconsistent state of the queue, unless memory
854	 * is so corrupt that we crash in TAILQ_REMOVE or
855	 * PSLIST_WRITER_REMOVE.
856	 */
857	s = splhigh();
858	TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
859	PSLIST_WRITER_REMOVE(st, sd_pslist);
860	splx(s);
861
862	/*
863	 * And wait a grace period, in case there are any pserialized
864	 * readers in flight.
865	 */
866	pserialize_perform(ksyms_psz);
867	PSLIST_ENTRY_DESTROY(st, sd_pslist);
868
869	/* Recompute the ksyms sizes now that we've removed st.  */
870	ksyms_sizes_calc();
871
872	/* Invalidate the global ksyms snapshot.  */
873	ks = ksyms_snapshot;
874	ksyms_snapshot = NULL;
875	mutex_exit(&ksyms_lock);
876
877	/*
878	 * No more references are possible.  Free the name map and the
879	 * symtab itself, which we had allocated in ksyms_modload.
880	 */
881	kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
882	kmem_free(st, sizeof(*st));
883
884	/* Release the formerly global ksyms snapshot, if any.  */
885	if (ks)
886		ksyms_snapshot_release(ks);
887}
888
889#ifdef DDB
890/*
891 * Keep sifting stuff here, to avoid export of ksyms internals.
892 *
893 * Systems is expected to be quiescent, so no locking done.
894 */
895int
896ksyms_sift(char *mod, char *sym, int mode)
897{
898	struct ksyms_symtab *st;
899	char *sb;
900	int i, sz;
901
902	if (!ksyms_loaded)
903		return ENOENT;
904
905	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
906		if (mod && strcmp(mod, st->sd_name))
907			continue;
908		sb = st->sd_strstart - st->sd_usroffset;
909
910		sz = st->sd_symsize/sizeof(Elf_Sym);
911		for (i = 0; i < sz; i++) {
912			Elf_Sym *les = st->sd_symstart + i;
913			char c;
914
915			if (strstr(sb + les->st_name, sym) == NULL)
916				continue;
917
918			if (mode == 'F') {
919				switch (ELF_ST_TYPE(les->st_info)) {
920				case STT_OBJECT:
921					c = '+';
922					break;
923				case STT_FUNC:
924					c = '*';
925					break;
926				case STT_SECTION:
927					c = '&';
928					break;
929				case STT_FILE:
930					c = '/';
931					break;
932				default:
933					c = ' ';
934					break;
935				}
936				db_printf("%s%c ", sb + les->st_name, c);
937			} else
938				db_printf("%s ", sb + les->st_name);
939		}
940	}
941	return ENOENT;
942}
943#endif /* DDB */
944
945/*
946 * In case we exposing the symbol table to the userland using the pseudo-
947 * device /dev/ksyms, it is easier to provide all the tables as one.
948 * However, it means we have to change all the st_name fields for the
949 * symbols so they match the ELF image that the userland will read
950 * through the device.
951 *
952 * The actual (correct) value of st_name is preserved through a global
953 * offset stored in the symbol table structure.
954 *
955 * Call with ksyms_lock held.
956 */
957static void
958ksyms_sizes_calc(void)
959{
960	struct ksyms_symtab *st;
961	int i, delta;
962
963	KASSERT(cold || mutex_owned(&ksyms_lock));
964
965	ksyms_symsz = ksyms_strsz = 0;
966	TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
967		delta = ksyms_strsz - st->sd_usroffset;
968		if (delta != 0) {
969			for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++)
970				st->sd_symstart[i].st_name += delta;
971			st->sd_usroffset = ksyms_strsz;
972		}
973		ksyms_symsz += st->sd_symsize;
974		ksyms_strsz += st->sd_strsize;
975	}
976}
977
978static void
979ksyms_fill_note(void)
980{
981	int32_t *note = ksyms_hdr.kh_note;
982	note[0] = ELF_NOTE_NETBSD_NAMESZ;
983	note[1] = ELF_NOTE_NETBSD_DESCSZ;
984	note[2] = ELF_NOTE_TYPE_NETBSD_TAG;
985	memcpy(&note[3],  "NetBSD\0", 8);
986	note[5] = __NetBSD_Version__;
987}
988
989static void
990ksyms_hdr_init(const void *hdraddr)
991{
992	/* Copy the loaded elf exec header */
993	memcpy(&ksyms_hdr.kh_ehdr, hdraddr, sizeof(Elf_Ehdr));
994
995	/* Set correct program/section header sizes, offsets and numbers */
996	ksyms_hdr.kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_phdr[0]);
997	ksyms_hdr.kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
998	ksyms_hdr.kh_ehdr.e_phnum = NPRGHDR;
999	ksyms_hdr.kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr[0]);
1000	ksyms_hdr.kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
1001	ksyms_hdr.kh_ehdr.e_shnum = NSECHDR;
1002	ksyms_hdr.kh_ehdr.e_shstrndx = SHSTRTAB;
1003
1004	/* Text/data - fake */
1005	ksyms_hdr.kh_phdr[0].p_type = PT_LOAD;
1006	ksyms_hdr.kh_phdr[0].p_memsz = (unsigned long)-1L;
1007	ksyms_hdr.kh_phdr[0].p_flags = PF_R | PF_X | PF_W;
1008
1009#define SHTCOPY(name)  strlcpy(&ksyms_hdr.kh_strtab[offs], (name), \
1010    sizeof(ksyms_hdr.kh_strtab) - offs), offs += sizeof(name)
1011
1012	uint32_t offs = 1;
1013	/* First section header ".note.netbsd.ident" */
1014	ksyms_hdr.kh_shdr[SHNOTE].sh_name = offs;
1015	ksyms_hdr.kh_shdr[SHNOTE].sh_type = SHT_NOTE;
1016	ksyms_hdr.kh_shdr[SHNOTE].sh_offset =
1017	    offsetof(struct ksyms_hdr, kh_note[0]);
1018	ksyms_hdr.kh_shdr[SHNOTE].sh_size = sizeof(ksyms_hdr.kh_note);
1019	ksyms_hdr.kh_shdr[SHNOTE].sh_addralign = sizeof(int);
1020	SHTCOPY(".note.netbsd.ident");
1021	ksyms_fill_note();
1022
1023	/* Second section header; ".symtab" */
1024	ksyms_hdr.kh_shdr[SYMTAB].sh_name = offs;
1025	ksyms_hdr.kh_shdr[SYMTAB].sh_type = SHT_SYMTAB;
1026	ksyms_hdr.kh_shdr[SYMTAB].sh_offset = sizeof(struct ksyms_hdr);
1027/*	ksyms_hdr.kh_shdr[SYMTAB].sh_size = filled in at open */
1028	ksyms_hdr.kh_shdr[SYMTAB].sh_link = STRTAB; /* Corresponding strtab */
1029	ksyms_hdr.kh_shdr[SYMTAB].sh_addralign = sizeof(long);
1030	ksyms_hdr.kh_shdr[SYMTAB].sh_entsize = sizeof(Elf_Sym);
1031	SHTCOPY(".symtab");
1032
1033	/* Third section header; ".strtab" */
1034	ksyms_hdr.kh_shdr[STRTAB].sh_name = offs;
1035	ksyms_hdr.kh_shdr[STRTAB].sh_type = SHT_STRTAB;
1036/*	ksyms_hdr.kh_shdr[STRTAB].sh_offset = filled in at open */
1037/*	ksyms_hdr.kh_shdr[STRTAB].sh_size = filled in at open */
1038	ksyms_hdr.kh_shdr[STRTAB].sh_addralign = sizeof(char);
1039	SHTCOPY(".strtab");
1040
1041	/* Fourth section, ".shstrtab" */
1042	ksyms_hdr.kh_shdr[SHSTRTAB].sh_name = offs;
1043	ksyms_hdr.kh_shdr[SHSTRTAB].sh_type = SHT_STRTAB;
1044	ksyms_hdr.kh_shdr[SHSTRTAB].sh_offset =
1045	    offsetof(struct ksyms_hdr, kh_strtab);
1046	ksyms_hdr.kh_shdr[SHSTRTAB].sh_size = SHSTRSIZ;
1047	ksyms_hdr.kh_shdr[SHSTRTAB].sh_addralign = sizeof(char);
1048	SHTCOPY(".shstrtab");
1049
1050	/* Fifth section, ".bss". All symbols reside here. */
1051	ksyms_hdr.kh_shdr[SHBSS].sh_name = offs;
1052	ksyms_hdr.kh_shdr[SHBSS].sh_type = SHT_NOBITS;
1053	ksyms_hdr.kh_shdr[SHBSS].sh_offset = 0;
1054	ksyms_hdr.kh_shdr[SHBSS].sh_size = (unsigned long)-1L;
1055	ksyms_hdr.kh_shdr[SHBSS].sh_addralign = PAGE_SIZE;
1056	ksyms_hdr.kh_shdr[SHBSS].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1057	SHTCOPY(".bss");
1058
1059	/* Sixth section header; ".SUNW_ctf" */
1060	ksyms_hdr.kh_shdr[SHCTF].sh_name = offs;
1061	ksyms_hdr.kh_shdr[SHCTF].sh_type = SHT_PROGBITS;
1062/*	ksyms_hdr.kh_shdr[SHCTF].sh_offset = filled in at open */
1063/*	ksyms_hdr.kh_shdr[SHCTF].sh_size = filled in at open */
1064	ksyms_hdr.kh_shdr[SHCTF].sh_link = SYMTAB; /* Corresponding symtab */
1065	ksyms_hdr.kh_shdr[SHCTF].sh_addralign = sizeof(char);
1066	SHTCOPY(".SUNW_ctf");
1067}
1068
1069static struct ksyms_snapshot *
1070ksyms_snapshot_alloc(int maxlen, size_t size, dev_t dev, uint64_t gen)
1071{
1072	struct ksyms_snapshot *ks;
1073
1074	ks = kmem_zalloc(sizeof(*ks), KM_SLEEP);
1075	ks->ks_refcnt = 1;
1076	ks->ks_gen = gen;
1077	ks->ks_uobj = uao_create(size, 0);
1078	ks->ks_size = size;
1079	ks->ks_dev = dev;
1080	ks->ks_maxlen = maxlen;
1081
1082	return ks;
1083}
1084
1085static void
1086ksyms_snapshot_release(struct ksyms_snapshot *ks)
1087{
1088	uint64_t refcnt;
1089
1090	mutex_enter(&ksyms_lock);
1091	refcnt = --ks->ks_refcnt;
1092	mutex_exit(&ksyms_lock);
1093
1094	if (refcnt)
1095		return;
1096
1097	uao_detach(ks->ks_uobj);
1098	kmem_free(ks, sizeof(*ks));
1099}
1100
1101static int
1102ubc_copyfrombuf(struct uvm_object *uobj, struct uio *uio, const void *buf,
1103    size_t n)
1104{
1105	struct iovec iov = { .iov_base = __UNCONST(buf), .iov_len = n };
1106
1107	uio->uio_iov = &iov;
1108	uio->uio_iovcnt = 1;
1109	uio->uio_resid = n;
1110
1111	return ubc_uiomove(uobj, uio, n, UVM_ADV_SEQUENTIAL, UBC_WRITE);
1112}
1113
1114static int
1115ksyms_take_snapshot(struct ksyms_snapshot *ks, struct ksyms_symtab *last)
1116{
1117	struct uvm_object *uobj = ks->ks_uobj;
1118	struct uio uio;
1119	struct ksyms_symtab *st;
1120	int error;
1121
1122	/* Caller must have initiated snapshotting.  */
1123	KASSERT(ksyms_snapshotting == curlwp);
1124
1125	/* Start a uio transfer to reuse incrementally.  */
1126	uio.uio_offset = 0;
1127	uio.uio_rw = UIO_WRITE; /* write from buffer to uobj */
1128	UIO_SETUP_SYSSPACE(&uio);
1129
1130	/*
1131	 * First: Copy out the ELF header.
1132	 */
1133	error = ubc_copyfrombuf(uobj, &uio, &ksyms_hdr, sizeof(ksyms_hdr));
1134	if (error)
1135		return error;
1136
1137	/*
1138	 * Copy out the symbol table.  The list of symtabs is
1139	 * guaranteed to be nonempty because we always have an entry
1140	 * for the main kernel.  We stop at last, not at the end of the
1141	 * tailq or NULL, because entries beyond last are not included
1142	 * in this snapshot (and may not be fully initialized memory as
1143	 * we witness it).
1144	 */
1145	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr));
1146	for (st = TAILQ_FIRST(&ksyms_symtabs);
1147	     ;
1148	     st = TAILQ_NEXT(st, sd_queue)) {
1149		error = ubc_copyfrombuf(uobj, &uio, st->sd_symstart,
1150		    st->sd_symsize);
1151		if (error)
1152			return error;
1153		if (st == last)
1154			break;
1155	}
1156
1157	/*
1158	 * Copy out the string table
1159	 */
1160	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
1161	    ksyms_hdr.kh_shdr[SYMTAB].sh_size);
1162	for (st = TAILQ_FIRST(&ksyms_symtabs);
1163	     ;
1164	     st = TAILQ_NEXT(st, sd_queue)) {
1165		error = ubc_copyfrombuf(uobj, &uio, st->sd_strstart,
1166		    st->sd_strsize);
1167		if (error)
1168			return error;
1169		if (st == last)
1170			break;
1171	}
1172
1173	/*
1174	 * Copy out the CTF table.
1175	 */
1176	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
1177	    ksyms_hdr.kh_shdr[SYMTAB].sh_size +
1178	    ksyms_hdr.kh_shdr[STRTAB].sh_size);
1179	st = TAILQ_FIRST(&ksyms_symtabs);
1180	if (st->sd_ctfstart != NULL) {
1181		error = ubc_copyfrombuf(uobj, &uio, st->sd_ctfstart,
1182		    st->sd_ctfsize);
1183		if (error)
1184			return error;
1185	}
1186
1187	KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
1188	    ksyms_hdr.kh_shdr[SYMTAB].sh_size +
1189	    ksyms_hdr.kh_shdr[STRTAB].sh_size +
1190	    ksyms_hdr.kh_shdr[SHCTF].sh_size);
1191	KASSERT(uio.uio_offset == ks->ks_size);
1192
1193	return 0;
1194}
1195
1196static const struct fileops ksyms_fileops;
1197
1198static int
1199ksymsopen(dev_t dev, int flags, int devtype, struct lwp *l)
1200{
1201	struct file *fp = NULL;
1202	int fd = -1;
1203	struct ksyms_snapshot *ks = NULL;
1204	size_t size;
1205	struct ksyms_symtab *last;
1206	int maxlen;
1207	uint64_t gen;
1208	int error;
1209
1210	if (minor(dev) != 0 || !ksyms_loaded)
1211		return ENXIO;
1212
1213	/* Allocate a private file.  */
1214	error = fd_allocfile(&fp, &fd);
1215	if (error)
1216		return error;
1217
1218	mutex_enter(&ksyms_lock);
1219
1220	/*
1221	 * Wait until we have a snapshot, or until there is no snapshot
1222	 * being taken right now so we can take one.
1223	 */
1224	while ((ks = ksyms_snapshot) == NULL && ksyms_snapshotting) {
1225		error = cv_wait_sig(&ksyms_cv, &ksyms_lock);
1226		if (error)
1227			goto out;
1228	}
1229
1230	/*
1231	 * If there's a usable snapshot, increment its reference count
1232	 * (can't overflow, 64-bit) and just reuse it.
1233	 */
1234	if (ks) {
1235		ks->ks_refcnt++;
1236		goto out;
1237	}
1238
1239	/* Find the current length of the symtab object. */
1240	size = sizeof(struct ksyms_hdr);
1241	size += ksyms_strsz;
1242	size += ksyms_symsz;
1243	size += ksyms_ctfsz;
1244
1245	/* Start a new snapshot.  */
1246	ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz;
1247	ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym);
1248	ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz +
1249	    ksyms_hdr.kh_shdr[SYMTAB].sh_offset;
1250	ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz;
1251	ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz +
1252	    ksyms_hdr.kh_shdr[STRTAB].sh_offset;
1253	ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz;
1254	last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
1255	maxlen = ksyms_maxlen;
1256	gen = ksyms_snapshot_gen++;
1257
1258	/*
1259	 * Prevent ksyms entries from being removed while we take the
1260	 * snapshot.
1261	 */
1262	KASSERT(ksyms_snapshotting == NULL);
1263	ksyms_snapshotting = curlwp;
1264	mutex_exit(&ksyms_lock);
1265
1266	/* Create a snapshot and write the symtab to it.  */
1267	ks = ksyms_snapshot_alloc(maxlen, size, dev, gen);
1268	error = ksyms_take_snapshot(ks, last);
1269
1270	/*
1271	 * Snapshot creation is done.  Wake up anyone waiting to remove
1272	 * entries (module unload).
1273	 */
1274	mutex_enter(&ksyms_lock);
1275	KASSERTMSG(ksyms_snapshotting == curlwp, "lwp %p stole snapshot",
1276	    ksyms_snapshotting);
1277	ksyms_snapshotting = NULL;
1278	cv_broadcast(&ksyms_cv);
1279
1280	/* If we failed, give up.  */
1281	if (error)
1282		goto out;
1283
1284	/* Cache the snapshot for the next reader.  */
1285	KASSERT(ksyms_snapshot == NULL);
1286	ksyms_snapshot = ks;
1287	ks->ks_refcnt++;
1288	KASSERT(ks->ks_refcnt == 2);
1289
1290out:	mutex_exit(&ksyms_lock);
1291	if (error) {
1292		if (fp)
1293			fd_abort(curproc, fp, fd);
1294		if (ks)
1295			ksyms_snapshot_release(ks);
1296	} else {
1297		KASSERT(fp);
1298		KASSERT(ks);
1299		error = fd_clone(fp, fd, flags, &ksyms_fileops, ks);
1300		KASSERTMSG(error == EMOVEFD, "error=%d", error);
1301	}
1302	return error;
1303}
1304
1305static int
1306ksymsclose(struct file *fp)
1307{
1308	struct ksyms_snapshot *ks = fp->f_data;
1309
1310	ksyms_snapshot_release(ks);
1311
1312	return 0;
1313}
1314
1315static int
1316ksymsread(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
1317    int flags)
1318{
1319	const struct ksyms_snapshot *ks = fp->f_data;
1320	size_t count;
1321	int error;
1322
1323	/*
1324	 * Since we don't have a per-object lock, we might as well use
1325	 * the struct file lock to serialize access to fp->f_offset --
1326	 * but if the caller isn't relying on or updating fp->f_offset,
1327	 * there's no need to do even that.  We could use ksyms_lock,
1328	 * but why bother with a global lock if not needed?  Either
1329	 * way, the lock we use here must agree with what ksymsseek
1330	 * takes (nothing else in ksyms uses fp->f_offset).
1331	 */
1332	if (offp == &fp->f_offset)
1333		mutex_enter(&fp->f_lock);
1334
1335	/* Refuse negative offsets.  */
1336	if (*offp < 0) {
1337		error = EINVAL;
1338		goto out;
1339	}
1340
1341	/* Return nothing at or past end of file.  */
1342	if (*offp >= ks->ks_size) {
1343		error = 0;
1344		goto out;
1345	}
1346
1347	/*
1348	 * 1. Set up the uio to transfer from offset *offp.
1349	 * 2. Transfer as many bytes as we can (at most uio->uio_resid
1350	 *    or what's left in the ksyms).
1351	 * 3. If requested, update *offp to reflect the number of bytes
1352	 *    transferred.
1353	 */
1354	uio->uio_offset = *offp;
1355	count = uio->uio_resid;
1356	error = ubc_uiomove(ks->ks_uobj, uio, MIN(count, ks->ks_size - *offp),
1357	    UVM_ADV_SEQUENTIAL, UBC_READ|UBC_PARTIALOK);
1358	if (flags & FOF_UPDATE_OFFSET)
1359		*offp += count - uio->uio_resid;
1360
1361out:	if (offp == &fp->f_offset)
1362		mutex_exit(&fp->f_lock);
1363	return error;
1364}
1365
1366static int
1367ksymsstat(struct file *fp, struct stat *st)
1368{
1369	const struct ksyms_snapshot *ks = fp->f_data;
1370
1371	memset(st, 0, sizeof(*st));
1372
1373	st->st_dev = NODEV;
1374	st->st_ino = 0;
1375	st->st_mode = S_IFCHR;
1376	st->st_nlink = 1;
1377	st->st_uid = kauth_cred_geteuid(fp->f_cred);
1378	st->st_gid = kauth_cred_getegid(fp->f_cred);
1379	st->st_rdev = ks->ks_dev;
1380	st->st_size = ks->ks_size;
1381	/* zero time */
1382	st->st_blksize = MAXPHYS; /* XXX arbitrary */
1383	st->st_blocks = 0;
1384	st->st_gen = ks->ks_gen;
1385
1386	return 0;
1387}
1388
1389static int
1390ksymsmmap(struct file *fp, off_t *offp, size_t nbytes, int prot, int *flagsp,
1391    int *advicep, struct uvm_object **uobjp, int *maxprotp)
1392{
1393	const struct ksyms_snapshot *ks = fp->f_data;
1394
1395	/* uvm_mmap guarantees page-aligned offset and size.  */
1396	KASSERT(*offp == round_page(*offp));
1397	KASSERT(nbytes == round_page(nbytes));
1398	KASSERT(nbytes > 0);
1399
1400	/* Refuse negative offsets.  */
1401	if (*offp < 0)
1402		return EINVAL;
1403
1404	/* Refuse mappings that pass the end of file.  */
1405	if (nbytes > round_page(ks->ks_size) ||
1406	    *offp > round_page(ks->ks_size) - nbytes)
1407		return EINVAL;	/* XXX ??? */
1408
1409	/* Success!  */
1410	uao_reference(ks->ks_uobj);
1411	*advicep = UVM_ADV_SEQUENTIAL;
1412	*uobjp = ks->ks_uobj;
1413	*maxprotp = prot & VM_PROT_READ;
1414	return 0;
1415}
1416
1417static int
1418ksymsseek(struct file *fp, off_t delta, int whence, off_t *newoffp, int flags)
1419{
1420	const off_t OFF_MAX = __type_max(off_t);
1421	struct ksyms_snapshot *ks = fp->f_data;
1422	off_t base, newoff;
1423	int error;
1424
1425	mutex_enter(&fp->f_lock);
1426
1427	switch (whence) {
1428	case SEEK_CUR:
1429		base = fp->f_offset;
1430		break;
1431	case SEEK_END:
1432		base = ks->ks_size;
1433		break;
1434	case SEEK_SET:
1435		base = 0;
1436		break;
1437	default:
1438		error = EINVAL;
1439		goto out;
1440	}
1441
1442	/* Check for arithmetic overflow and reject negative offsets.  */
1443	if (base < 0 || delta > OFF_MAX - base || base + delta < 0) {
1444		error = EINVAL;
1445		goto out;
1446	}
1447
1448	/* Compute the new offset.  */
1449	newoff = base + delta;
1450
1451	/* Success!  */
1452	if (newoffp)
1453		*newoffp = newoff;
1454	if (flags & FOF_UPDATE_OFFSET)
1455		fp->f_offset = newoff;
1456	error = 0;
1457
1458out:	mutex_exit(&fp->f_lock);
1459	return error;
1460}
1461
1462__CTASSERT(offsetof(struct ksyms_ogsymbol, kg_name) == offsetof(struct ksyms_gsymbol, kg_name));
1463__CTASSERT(offsetof(struct ksyms_gvalue, kv_name) == offsetof(struct ksyms_gsymbol, kg_name));
1464
1465static int
1466ksymsioctl(struct file *fp, u_long cmd, void *data)
1467{
1468	struct ksyms_snapshot *ks = fp->f_data;
1469	struct ksyms_ogsymbol *okg = (struct ksyms_ogsymbol *)data;
1470	struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data;
1471	struct ksyms_gvalue *kv = (struct ksyms_gvalue *)data;
1472	struct ksyms_symtab *st;
1473	Elf_Sym *sym = NULL, copy;
1474	unsigned long val;
1475	int error = 0;
1476	char *str = NULL;
1477	int len, s;
1478
1479	/* Read cached ksyms_maxlen.  */
1480	len = ks->ks_maxlen;
1481
1482	if (cmd == OKIOCGVALUE || cmd == OKIOCGSYMBOL ||
1483	    cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) {
1484		str = kmem_alloc(len, KM_SLEEP);
1485		if ((error = copyinstr(kg->kg_name, str, len, NULL)) != 0) {
1486			kmem_free(str, len);
1487			return error;
1488		}
1489	}
1490
1491	switch (cmd) {
1492	case OKIOCGVALUE:
1493		/*
1494		 * Use the in-kernel symbol lookup code for fast
1495		 * retreival of a value.
1496		 */
1497		error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
1498		if (error == 0)
1499			error = copyout(&val, okg->kg_value, sizeof(long));
1500		kmem_free(str, len);
1501		break;
1502
1503	case OKIOCGSYMBOL:
1504		/*
1505		 * Use the in-kernel symbol lookup code for fast
1506		 * retreival of a symbol.
1507		 */
1508		s = pserialize_read_enter();
1509		PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
1510		    struct ksyms_symtab, sd_pslist) {
1511			if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
1512				continue;
1513#ifdef notdef
1514			/* Skip if bad binding */
1515			if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
1516				sym = NULL;
1517				continue;
1518			}
1519#endif
1520			break;
1521		}
1522		if (sym != NULL) {
1523			memcpy(&copy, sym, sizeof(copy));
1524			pserialize_read_exit(s);
1525			error = copyout(&copy, okg->kg_sym, sizeof(Elf_Sym));
1526		} else {
1527			pserialize_read_exit(s);
1528			error = ENOENT;
1529		}
1530		kmem_free(str, len);
1531		break;
1532
1533	case KIOCGVALUE:
1534		/*
1535		 * Use the in-kernel symbol lookup code for fast
1536		 * retreival of a value.
1537		 */
1538		error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
1539		if (error == 0)
1540			kv->kv_value = val;
1541		kmem_free(str, len);
1542		break;
1543
1544	case KIOCGSYMBOL:
1545		/*
1546		 * Use the in-kernel symbol lookup code for fast
1547		 * retreival of a symbol.
1548		 */
1549		s = pserialize_read_enter();
1550		PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
1551		    struct ksyms_symtab, sd_pslist) {
1552			if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
1553				continue;
1554#ifdef notdef
1555			/* Skip if bad binding */
1556			if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
1557				sym = NULL;
1558				continue;
1559			}
1560#endif
1561			break;
1562		}
1563		if (sym != NULL) {
1564			kg->kg_sym = *sym;
1565		} else {
1566			error = ENOENT;
1567		}
1568		pserialize_read_exit(s);
1569		kmem_free(str, len);
1570		break;
1571
1572	case KIOCGSIZE:
1573		/*
1574		 * Get total size of symbol table.
1575		 */
1576		*(int *)data = ks->ks_size;
1577		break;
1578
1579	default:
1580		error = ENOTTY;
1581		break;
1582	}
1583
1584	return error;
1585}
1586
1587const struct cdevsw ksyms_cdevsw = {
1588	.d_open = ksymsopen,
1589	.d_close = noclose,
1590	.d_read = noread,
1591	.d_write = nowrite,
1592	.d_ioctl = noioctl,
1593	.d_stop = nostop,
1594	.d_tty = notty,
1595	.d_poll = nopoll,
1596	.d_mmap = nommap,
1597	.d_kqfilter = nokqfilter,
1598	.d_discard = nodiscard,
1599	.d_flag = D_OTHER | D_MPSAFE
1600};
1601
1602static const struct fileops ksyms_fileops = {
1603	.fo_name = "ksyms",
1604	.fo_read = ksymsread,
1605	.fo_write = fbadop_write,
1606	.fo_ioctl = ksymsioctl,
1607	.fo_fcntl = fnullop_fcntl,
1608	.fo_poll = fnullop_poll,
1609	.fo_stat = ksymsstat,
1610	.fo_close = ksymsclose,
1611	.fo_kqfilter = fnullop_kqfilter,
1612	.fo_restart = fnullop_restart,
1613	.fo_mmap = ksymsmmap,
1614	.fo_seek = ksymsseek,
1615};
1616