1/*	$NetBSD: symbol.c,v 1.76 2023/07/30 09:20:14 riastradh Exp $	 */
2
3/*
4 * Copyright 1996 John D. Polstra.
5 * Copyright 1996 Matt Thomas <matt@3am-software.com>
6 * Copyright 2002 Charles M. Hannum <root@ihack.net>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed by John Polstra.
20 * 4. The name of the author may not be used to endorse or promote products
21 *    derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35/*
36 * Dynamic linker for ELF.
37 *
38 * John Polstra <jdp@polstra.com>.
39 */
40
41#include <sys/cdefs.h>
42#ifndef lint
43__RCSID("$NetBSD: symbol.c,v 1.76 2023/07/30 09:20:14 riastradh Exp $");
44#endif /* not lint */
45
46#include <err.h>
47#include <errno.h>
48#include <fcntl.h>
49#include <stdarg.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <string.h>
53#include <unistd.h>
54#include <sys/types.h>
55#include <sys/mman.h>
56#include <sys/bitops.h>
57#include <dirent.h>
58
59#include "debug.h"
60#include "hash.h"
61#include "rtld.h"
62
63/*
64 * If the given object is already in the donelist, return true.  Otherwise
65 * add the object to the list and return false.
66 */
67static bool
68_rtld_donelist_check(DoneList *dlp, const Obj_Entry *obj)
69{
70	unsigned int i;
71
72	for (i = 0;  i < dlp->num_used;  i++)
73		if (dlp->objs[i] == obj)
74			return true;
75	/*
76	 * Our donelist allocation may not always be sufficient as we're not
77	 * thread safe. We'll handle it properly anyway.
78	 */
79	if (dlp->num_used < dlp->num_alloc)
80		dlp->objs[dlp->num_used++] = obj;
81	return false;
82}
83
84const Elf_Sym *
85_rtld_symlook_list(const char *name, Elf_Hash *hash, const Objlist *objlist,
86    const Obj_Entry **defobj_out, u_int flags, const Ver_Entry *ventry,
87    DoneList *dlp)
88{
89	const Elf_Sym *symp;
90	const Elf_Sym *def;
91	const Obj_Entry *defobj;
92	const Objlist_Entry *elm;
93
94	def = NULL;
95	defobj = NULL;
96	SIMPLEQ_FOREACH(elm, objlist, link) {
97		if (_rtld_donelist_check(dlp, elm->obj))
98			continue;
99		rdbg(("search object %p (%s) for %s", elm->obj, elm->obj->path,
100		    name));
101		symp = _rtld_symlook_obj(name, hash, elm->obj, flags, ventry);
102		if (symp != NULL) {
103			if ((def == NULL) ||
104			    (ELF_ST_BIND(symp->st_info) != STB_WEAK)) {
105				def = symp;
106				defobj = elm->obj;
107				if (ELF_ST_BIND(def->st_info) != STB_WEAK)
108					break;
109			}
110		}
111	}
112	if (def != NULL)
113		*defobj_out = defobj;
114	return def;
115}
116
117/*
118 * Search the symbol table of a shared object and all objects needed by it for
119 * a symbol of the given name. Search order is breadth-first. Returns a pointer
120 * to the symbol, or NULL if no definition was found.
121 */
122const Elf_Sym *
123_rtld_symlook_needed(const char *name, Elf_Hash *hash,
124    const Needed_Entry *needed, const Obj_Entry **defobj_out, u_int flags,
125    const Ver_Entry *ventry, DoneList *breadth, DoneList *depth)
126{
127	const Elf_Sym *def, *def_w;
128	const Needed_Entry *n;
129	const Obj_Entry *obj, *defobj, *defobj1;
130
131	def = def_w = NULL;
132	defobj = NULL;
133	for (n = needed; n != NULL; n = n->next) {
134		if ((obj = n->obj) == NULL)
135			continue;
136		if (_rtld_donelist_check(breadth, obj))
137			continue;
138		def = _rtld_symlook_obj(name, hash, obj, flags, ventry);
139		if (def == NULL)
140			continue;
141		defobj = obj;
142		if (ELF_ST_BIND(def->st_info) != STB_WEAK) {
143			*defobj_out = defobj;
144
145			return (def);
146		}
147	}
148	/*
149	 * Either the symbol definition has not been found in directly needed
150	 * objects, or the found symbol is weak.
151	 */
152	for (n = needed; n != NULL; n = n->next) {
153		if ((obj = n->obj) == NULL)
154			continue;
155		if (_rtld_donelist_check(depth, obj))
156			continue;
157		def_w = _rtld_symlook_needed(name, hash, obj->needed, &defobj1,
158		    flags, ventry, breadth, depth);
159		if (def_w == NULL)
160			continue;
161		if (def == NULL || ELF_ST_BIND(def_w->st_info) != STB_WEAK) {
162			def = def_w;
163			defobj = defobj1;
164			if (ELF_ST_BIND(def_w->st_info) != STB_WEAK)
165				break;
166		}
167	}
168	if (def != NULL)
169		*defobj_out = defobj;
170
171	return def;
172}
173
174static bool
175_rtld_symlook_obj_matched_symbol(const char *name,
176    const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry,
177    unsigned long symnum, const Elf_Sym **vsymp, int *vcount)
178{
179	const Elf_Sym  *symp;
180	const char     *strp;
181	Elf_Half verndx;
182
183	symp = obj->symtab + symnum;
184	strp = obj->strtab + symp->st_name;
185	rdbg(("check \"%s\" vs \"%s\" in %s", name, strp, obj->path));
186	if (name[1] != strp[1] || strcmp(name, strp))
187		return false;
188#if defined(__mips__) || defined(__vax__)
189	if (symp->st_shndx == SHN_UNDEF)
190		return false;
191#else
192	/*
193	 * XXX DANGER WILL ROBINSON!
194	 * If we have a function pointer in the executable's
195	 * data section, it points to the executable's PLT
196	 * slot, and there is NO relocation emitted.  To make
197	 * the function pointer comparable to function pointers
198	 * in shared libraries, we must resolve data references
199	 * in the libraries to point to PLT slots in the
200	 * executable, if they exist.
201	 */
202	if (symp->st_shndx == SHN_UNDEF &&
203	    ((flags & SYMLOOK_IN_PLT) ||
204	    symp->st_value == 0 ||
205	    ELF_ST_TYPE(symp->st_info) != STT_FUNC))
206		return false;
207#endif
208
209	if (ventry == NULL) {
210		if (obj->versyms != NULL) {
211			verndx = VER_NDX(obj->versyms[symnum].vs_vers);
212			if (verndx > obj->vertabnum) {
213				_rtld_error("%s: symbol %s references "
214				    "wrong version %d", obj->path,
215				    &obj->strtab[symnum], verndx);
216				return false;
217			}
218
219			/*
220			 * If we are not called from dlsym (i.e. this
221			 * is a normal relocation from unversioned
222			 * binary), accept the symbol immediately
223			 * if it happens to have first version after
224			 * this shared object became versioned.
225			 * Otherwise, if symbol is versioned and not
226			 * hidden, remember it. If it is the only
227			 * symbol with this name exported by the shared
228			 * object, it will be returned as a match at the
229			 * end of the function. If symbol is global
230			 * (verndx < 2) accept it unconditionally.
231			 */
232			if (!(flags & SYMLOOK_DLSYM) &&
233			    verndx == VER_NDX_GIVEN) {
234				*vsymp = symp;
235				return true;
236			} else if (verndx >= VER_NDX_GIVEN) {
237				if (!(obj->versyms[symnum].vs_vers & VER_NDX_HIDDEN)) {
238					if (*vsymp == NULL)
239						*vsymp = symp;
240					(*vcount)++;
241				}
242				return false;
243			}
244		}
245		*vsymp = symp;
246		return true;
247	} else {
248		if (obj->versyms == NULL) {
249			if (_rtld_object_match_name(obj, ventry->name)){
250				_rtld_error("%s: object %s should "
251				    "provide version %s for symbol %s",
252				    _rtld_objself.path, obj->path,
253				    ventry->name, &obj->strtab[symnum]);
254				return false;
255			}
256		} else {
257			verndx = VER_NDX(obj->versyms[symnum].vs_vers);
258			if (verndx > obj->vertabnum) {
259				_rtld_error("%s: symbol %s references "
260				    "wrong version %d", obj->path,
261				    &obj->strtab[symnum], verndx);
262				return false;
263			}
264			if (obj->vertab[verndx].hash != ventry->hash ||
265			    strcmp(obj->vertab[verndx].name, ventry->name)) {
266				/*
267				* Version does not match. Look if this
268				* is a global symbol and if it is not
269				* hidden. If global symbol (verndx < 2)
270				* is available, use it. Do not return
271				* symbol if we are called by dlvsym,
272				* because dlvsym looks for a specific
273				* version and default one is not what
274				* dlvsym wants.
275				*/
276				if ((flags & SYMLOOK_DLSYM) ||
277				    (obj->versyms[symnum].vs_vers & VER_NDX_HIDDEN) ||
278				    (verndx >= VER_NDX_GIVEN))
279					return false;
280			}
281		}
282		*vsymp = symp;
283		return true;
284	}
285}
286
287/*
288 * Search the symbol table of a single shared object for a symbol of
289 * the given name.  Returns a pointer to the symbol, or NULL if no
290 * definition was found.
291 *
292 * SysV Hash version.
293 */
294static const Elf_Sym *
295_rtld_symlook_obj_sysv(const char *name, unsigned long hash,
296    const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
297{
298	unsigned long symnum;
299	const Elf_Sym *vsymp = NULL;
300	int vcount = 0;
301
302	for (symnum = obj->buckets[fast_remainder32(hash, obj->nbuckets,
303	     obj->nbuckets_m, obj->nbuckets_s1, obj->nbuckets_s2)];
304	     symnum != ELF_SYM_UNDEFINED;
305	     symnum = obj->chains[symnum]) {
306		assert(symnum < obj->nchains);
307
308		if (_rtld_symlook_obj_matched_symbol(name, obj, flags,
309		    ventry, symnum, &vsymp, &vcount)) {
310			return vsymp;
311		}
312	}
313	if (vcount == 1)
314		return vsymp;
315	return NULL;
316}
317
318/*
319 * Search the symbol table of a single shared object for a symbol of
320 * the given name.  Returns a pointer to the symbol, or NULL if no
321 * definition was found.
322 *
323 * GNU Hash version.
324 */
325static const Elf_Sym *
326_rtld_symlook_obj_gnu(const char *name, unsigned long hash,
327    const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
328{
329	unsigned long symnum;
330	const Elf_Sym *vsymp = NULL;
331	const Elf32_Word *hashval;
332	Elf_Addr bloom_word;
333	Elf32_Word bucket;
334	int vcount = 0;
335	unsigned int h1, h2;
336
337	/* Pick right bitmask word from Bloom filter array */
338	bloom_word = obj->bloom_gnu[(hash / ELFSIZE) & obj->mask_bm_gnu];
339
340	/* Calculate modulus word size of gnu hash and its derivative */
341	h1 = hash & (ELFSIZE - 1);
342	h2 = ((hash >> obj->shift2_gnu) & (ELFSIZE - 1));
343
344	/* Filter out the "definitely not in set" queries */
345	if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0)
346		return NULL;
347
348	/* Locate hash chain and corresponding value element*/
349	bucket = obj->buckets_gnu[fast_remainder32(hash, obj->nbuckets_gnu,
350	     obj->nbuckets_m_gnu, obj->nbuckets_s1_gnu, obj->nbuckets_s2_gnu)];
351	if (bucket == 0)
352		return NULL;
353
354	hashval = &obj->chains_gnu[bucket];
355	do {
356		if (((*hashval ^ hash) >> 1) == 0) {
357			symnum = hashval - obj->chains_gnu;
358
359			if (_rtld_symlook_obj_matched_symbol(name, obj, flags,
360			    ventry, symnum, &vsymp, &vcount)) {
361				return vsymp;
362			}
363		}
364	} while ((*hashval++ & 1) == 0);
365	if (vcount == 1)
366		return vsymp;
367	return NULL;
368}
369
370/*
371 * Search the symbol table of a single shared object for a symbol of
372 * the given name.  Returns a pointer to the symbol, or NULL if no
373 * definition was found.
374 *
375 * The symbol's hash value is passed in for efficiency reasons; that
376 * eliminates many recomputations of the hash value.
377 *
378 * Redirect to either GNU Hash (whenever available) or ELF Hash.
379 */
380const Elf_Sym *
381_rtld_symlook_obj(const char *name, Elf_Hash *hash,
382    const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
383{
384
385	assert(obj->sysv_hash || obj->gnu_hash);
386
387	/* Always prefer the GNU Hash as it is faster. */
388	if (obj->gnu_hash)
389		return _rtld_symlook_obj_gnu(name, hash->gnu, obj, flags, ventry);
390	else
391		return _rtld_symlook_obj_sysv(name, hash->sysv, obj, flags, ventry);
392}
393
394/*
395 * Given a symbol number in a referencing object, find the corresponding
396 * definition of the symbol.  Returns a pointer to the symbol, or NULL if
397 * no definition was found.  Returns a pointer to the Obj_Entry of the
398 * defining object via the reference parameter DEFOBJ_OUT.
399 */
400const Elf_Sym *
401_rtld_find_symdef(unsigned long symnum, const Obj_Entry *refobj,
402    const Obj_Entry **defobj_out, u_int flags)
403{
404	const Elf_Sym  *ref;
405	const Elf_Sym  *def;
406	const Obj_Entry *defobj;
407	const char     *name;
408	Elf_Hash        hash;
409
410	ref = refobj->symtab + symnum;
411	name = refobj->strtab + ref->st_name;
412
413	/*
414	 * We don't have to do a full scale lookup if the symbol is local.
415	 * We know it will bind to the instance in this load module; to
416	 * which we already have a pointer (ie ref).
417	 */
418	if (ELF_ST_BIND(ref->st_info) != STB_LOCAL) {
419		if (ELF_ST_TYPE(ref->st_info) == STT_SECTION) {
420			_rtld_error("%s: Bogus symbol table entry %lu",
421			    refobj->path, symnum);
422        	}
423
424		hash.sysv = _rtld_sysv_hash(name);
425		hash.gnu = _rtld_gnu_hash(name);
426		defobj = NULL;
427		def = _rtld_symlook_default(name, &hash, refobj, &defobj, flags,
428		    _rtld_fetch_ventry(refobj, symnum));
429	} else {
430		rdbg(("STB_LOCAL symbol %s in %s", name, refobj->path));
431		def = ref;
432		defobj = refobj;
433	}
434
435	/*
436	 * If we found no definition and the reference is weak, treat the
437	 * symbol as having the value zero.
438	 */
439	if (def == NULL && ELF_ST_BIND(ref->st_info) == STB_WEAK) {
440		rdbg(("  returning _rtld_sym_zero@_rtld_objself"));
441		def = &_rtld_sym_zero;
442		defobj = &_rtld_objself;
443	}
444
445	if (def != NULL) {
446		*defobj_out = defobj;
447	} else {
448		rdbg(("lookup failed"));
449		_rtld_error("%s: Undefined %ssymbol \"%s\" (symnum = %ld)",
450		    refobj->path, (flags & SYMLOOK_IN_PLT) ? "PLT " : "",
451		    name, symnum);
452	}
453	return def;
454}
455
456const Elf_Sym *
457_rtld_find_plt_symdef(unsigned long symnum, const Obj_Entry *obj,
458    const Obj_Entry **defobj, bool imm)
459{
460 	const Elf_Sym  *def = _rtld_find_symdef(symnum, obj, defobj,
461	    SYMLOOK_IN_PLT);
462	if (__predict_false(def == NULL))
463 		return NULL;
464
465	if (__predict_false(def == &_rtld_sym_zero)) {
466		/* tp is set during lazy binding. */
467		if (imm) {
468			const Elf_Sym	*ref = obj->symtab + symnum;
469			const char	*name = obj->strtab + ref->st_name;
470
471			_rtld_error(
472			    "%s: Trying to call undefined weak symbol `%s'",
473			    obj->path, name);
474			return NULL;
475		}
476	}
477	return def;
478}
479
480/*
481 * Given a symbol name in a referencing object, find the corresponding
482 * definition of the symbol.  Returns a pointer to the symbol, or NULL if
483 * no definition was found.  Returns a pointer to the Obj_Entry of the
484 * defining object via the reference parameter DEFOBJ_OUT.
485 */
486const Elf_Sym *
487_rtld_symlook_default(const char *name, Elf_Hash *hash,
488    const Obj_Entry *refobj, const Obj_Entry **defobj_out, u_int flags,
489    const Ver_Entry *ventry)
490{
491	const Elf_Sym *def;
492	const Elf_Sym *symp;
493	const Obj_Entry *obj;
494	const Obj_Entry *defobj;
495	const Objlist_Entry *elm;
496	def = NULL;
497	defobj = NULL;
498	DoneList donelist;
499
500	_rtld_donelist_init(&donelist);
501
502	/* Look first in the referencing object if linked symbolically. */
503	if (refobj->symbolic && !_rtld_donelist_check(&donelist, refobj)) {
504		rdbg(("search referencing object for %s", name));
505		symp = _rtld_symlook_obj(name, hash, refobj, flags, ventry);
506		if (symp != NULL) {
507			def = symp;
508			defobj = refobj;
509		}
510	}
511
512	/* Search all objects loaded at program start up. */
513	if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
514		rdbg(("search _rtld_list_main for %s", name));
515		symp = _rtld_symlook_list(name, hash, &_rtld_list_main, &obj,
516		    flags, ventry, &donelist);
517		if (symp != NULL &&
518		    (def == NULL || ELF_ST_BIND(symp->st_info) != STB_WEAK)) {
519			def = symp;
520			defobj = obj;
521		}
522	}
523
524	/* Search all RTLD_GLOBAL objects. */
525	if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
526		rdbg(("search _rtld_list_global for %s", name));
527		symp = _rtld_symlook_list(name, hash, &_rtld_list_global,
528		    &obj, flags, ventry, &donelist);
529		if (symp != NULL &&
530		    (def == NULL || ELF_ST_BIND(symp->st_info) != STB_WEAK)) {
531			def = symp;
532			defobj = obj;
533		}
534	}
535
536	/* Search all dlopened DAGs containing the referencing object. */
537	SIMPLEQ_FOREACH(elm, &refobj->dldags, link) {
538		if (def != NULL && ELF_ST_BIND(def->st_info) != STB_WEAK)
539			break;
540		rdbg(("search DAG with root %p (%s) for %s", elm->obj,
541		    elm->obj->path, name));
542		symp = _rtld_symlook_list(name, hash, &elm->obj->dagmembers,
543		    &obj, flags, ventry, &donelist);
544		if (symp != NULL &&
545		    (def == NULL || ELF_ST_BIND(symp->st_info) != STB_WEAK)) {
546			def = symp;
547			defobj = obj;
548		}
549	}
550
551	/*
552	 * Finally, look in the referencing object if not linked symbolically.
553	 * This is necessary for DF_1_NODELETE objects where the containing DAG
554	 * has been unlinked, so local references are resolved properly.
555	 */
556	if ((def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) &&
557	    !refobj->symbolic && !_rtld_donelist_check(&donelist, refobj)) {
558		rdbg(("search referencing object for %s", name));
559		symp = _rtld_symlook_obj(name, hash, refobj, flags, ventry);
560		if (symp != NULL) {
561			def = symp;
562			defobj = refobj;
563		}
564	}
565
566	/*
567	 * Search the dynamic linker itself, and possibly resolve the
568	 * symbol from there.  This is how the application links to
569	 * dynamic linker services such as dlopen.
570	 */
571	if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
572		rdbg(("Search the dynamic linker itself."));
573		symp = _rtld_symlook_obj(name, hash, &_rtld_objself, flags,
574		    ventry);
575		if (symp != NULL) {
576			def = symp;
577			defobj = &_rtld_objself;
578		}
579	}
580
581	if (def != NULL)
582		*defobj_out = defobj;
583	return def;
584}
585