1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6#include <libelf.h>
7#include <gelf.h>
8#include <fcntl.h>
9#include <linux/kernel.h>
10
11#include "libbpf_internal.h"
12#include "str_error.h"
13
14/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
15 * the symbol is hidden and can only be seen when referenced using an
16 * explicit version number. This is a GNU extension.
17 */
18#define VERSYM_HIDDEN	0x8000
19
20/* This is the mask for the rest of the data in a word read from a
21 * SHT_GNU_versym section.
22 */
23#define VERSYM_VERSION	0x7fff
24
25int elf_open(const char *binary_path, struct elf_fd *elf_fd)
26{
27	char errmsg[STRERR_BUFSIZE];
28	int fd, ret;
29	Elf *elf;
30
31	if (elf_version(EV_CURRENT) == EV_NONE) {
32		pr_warn("elf: failed to init libelf for %s\n", binary_path);
33		return -LIBBPF_ERRNO__LIBELF;
34	}
35	fd = open(binary_path, O_RDONLY | O_CLOEXEC);
36	if (fd < 0) {
37		ret = -errno;
38		pr_warn("elf: failed to open %s: %s\n", binary_path,
39			libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
40		return ret;
41	}
42	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
43	if (!elf) {
44		pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
45		close(fd);
46		return -LIBBPF_ERRNO__FORMAT;
47	}
48	elf_fd->fd = fd;
49	elf_fd->elf = elf;
50	return 0;
51}
52
53void elf_close(struct elf_fd *elf_fd)
54{
55	if (!elf_fd)
56		return;
57	elf_end(elf_fd->elf);
58	close(elf_fd->fd);
59}
60
61/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
62static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
63{
64	while ((scn = elf_nextscn(elf, scn)) != NULL) {
65		GElf_Shdr sh;
66
67		if (!gelf_getshdr(scn, &sh))
68			continue;
69		if (sh.sh_type == sh_type)
70			return scn;
71	}
72	return NULL;
73}
74
75struct elf_sym {
76	const char *name;
77	GElf_Sym sym;
78	GElf_Shdr sh;
79	int ver;
80	bool hidden;
81};
82
83struct elf_sym_iter {
84	Elf *elf;
85	Elf_Data *syms;
86	Elf_Data *versyms;
87	Elf_Data *verdefs;
88	size_t nr_syms;
89	size_t strtabidx;
90	size_t verdef_strtabidx;
91	size_t next_sym_idx;
92	struct elf_sym sym;
93	int st_type;
94};
95
96static int elf_sym_iter_new(struct elf_sym_iter *iter,
97			    Elf *elf, const char *binary_path,
98			    int sh_type, int st_type)
99{
100	Elf_Scn *scn = NULL;
101	GElf_Ehdr ehdr;
102	GElf_Shdr sh;
103
104	memset(iter, 0, sizeof(*iter));
105
106	if (!gelf_getehdr(elf, &ehdr)) {
107		pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
108		return -EINVAL;
109	}
110
111	scn = elf_find_next_scn_by_type(elf, sh_type, NULL);
112	if (!scn) {
113		pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
114			 binary_path);
115		return -ENOENT;
116	}
117
118	if (!gelf_getshdr(scn, &sh))
119		return -EINVAL;
120
121	iter->strtabidx = sh.sh_link;
122	iter->syms = elf_getdata(scn, 0);
123	if (!iter->syms) {
124		pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
125			binary_path, elf_errmsg(-1));
126		return -EINVAL;
127	}
128	iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
129	iter->elf = elf;
130	iter->st_type = st_type;
131
132	/* Version symbol table is meaningful to dynsym only */
133	if (sh_type != SHT_DYNSYM)
134		return 0;
135
136	scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL);
137	if (!scn)
138		return 0;
139	iter->versyms = elf_getdata(scn, 0);
140
141	scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL);
142	if (!scn)
143		return 0;
144
145	iter->verdefs = elf_getdata(scn, 0);
146	if (!iter->verdefs || !gelf_getshdr(scn, &sh)) {
147		pr_warn("elf: failed to get verdef ELF section in '%s'\n", binary_path);
148		return -EINVAL;
149	}
150	iter->verdef_strtabidx = sh.sh_link;
151
152	return 0;
153}
154
155static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
156{
157	struct elf_sym *ret = &iter->sym;
158	GElf_Sym *sym = &ret->sym;
159	const char *name = NULL;
160	GElf_Versym versym;
161	Elf_Scn *sym_scn;
162	size_t idx;
163
164	for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
165		if (!gelf_getsym(iter->syms, idx, sym))
166			continue;
167		if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
168			continue;
169		name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
170		if (!name)
171			continue;
172		sym_scn = elf_getscn(iter->elf, sym->st_shndx);
173		if (!sym_scn)
174			continue;
175		if (!gelf_getshdr(sym_scn, &ret->sh))
176			continue;
177
178		iter->next_sym_idx = idx + 1;
179		ret->name = name;
180		ret->ver = 0;
181		ret->hidden = false;
182
183		if (iter->versyms) {
184			if (!gelf_getversym(iter->versyms, idx, &versym))
185				continue;
186			ret->ver = versym & VERSYM_VERSION;
187			ret->hidden = versym & VERSYM_HIDDEN;
188		}
189		return ret;
190	}
191
192	return NULL;
193}
194
195static const char *elf_get_vername(struct elf_sym_iter *iter, int ver)
196{
197	GElf_Verdaux verdaux;
198	GElf_Verdef verdef;
199	int offset;
200
201	if (!iter->verdefs)
202		return NULL;
203
204	offset = 0;
205	while (gelf_getverdef(iter->verdefs, offset, &verdef)) {
206		if (verdef.vd_ndx != ver) {
207			if (!verdef.vd_next)
208				break;
209
210			offset += verdef.vd_next;
211			continue;
212		}
213
214		if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux))
215			break;
216
217		return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name);
218
219	}
220	return NULL;
221}
222
223static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym,
224			 const char *name, size_t name_len, const char *lib_ver)
225{
226	const char *ver_name;
227
228	/* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER
229	 * make sure the func part matches the user specified name
230	 */
231	if (strncmp(sym->name, name, name_len) != 0)
232		return false;
233
234	/* ...but we don't want a search for "foo" to match 'foo2" also, so any
235	 * additional characters in sname should be of the form "@@LIB".
236	 */
237	if (sym->name[name_len] != '\0' && sym->name[name_len] != '@')
238		return false;
239
240	/* If user does not specify symbol version, then we got a match */
241	if (!lib_ver)
242		return true;
243
244	/* If user specifies symbol version, for dynamic symbols,
245	 * get version name from ELF verdef section for comparison.
246	 */
247	if (sh_type == SHT_DYNSYM) {
248		ver_name = elf_get_vername(iter, sym->ver);
249		if (!ver_name)
250			return false;
251		return strcmp(ver_name, lib_ver) == 0;
252	}
253
254	/* For normal symbols, it is already in form of func@LIB_VER */
255	return strcmp(sym->name, name) == 0;
256}
257
258/* Transform symbol's virtual address (absolute for binaries and relative
259 * for shared libs) into file offset, which is what kernel is expecting
260 * for uprobe/uretprobe attachment.
261 * See Documentation/trace/uprobetracer.rst for more details. This is done
262 * by looking up symbol's containing section's header and using iter's virtual
263 * address (sh_addr) and corresponding file offset (sh_offset) to transform
264 * sym.st_value (virtual address) into desired final file offset.
265 */
266static unsigned long elf_sym_offset(struct elf_sym *sym)
267{
268	return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
269}
270
271/* Find offset of function name in the provided ELF object. "binary_path" is
272 * the path to the ELF binary represented by "elf", and only used for error
273 * reporting matters. "name" matches symbol name or name@@LIB for library
274 * functions.
275 */
276long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
277{
278	int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
279	const char *at_symbol, *lib_ver;
280	bool is_shared_lib;
281	long ret = -ENOENT;
282	size_t name_len;
283	GElf_Ehdr ehdr;
284
285	if (!gelf_getehdr(elf, &ehdr)) {
286		pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
287		ret = -LIBBPF_ERRNO__FORMAT;
288		goto out;
289	}
290	/* for shared lib case, we do not need to calculate relative offset */
291	is_shared_lib = ehdr.e_type == ET_DYN;
292
293	/* Does name specify "@@LIB_VER" or "@LIB_VER" ? */
294	at_symbol = strchr(name, '@');
295	if (at_symbol) {
296		name_len = at_symbol - name;
297		/* skip second @ if it's @@LIB_VER case */
298		if (at_symbol[1] == '@')
299			at_symbol++;
300		lib_ver = at_symbol + 1;
301	} else {
302		name_len = strlen(name);
303		lib_ver = NULL;
304	}
305
306	/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
307	 * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
308	 * linked binary may not have SHT_DYMSYM, so absence of a section should not be
309	 * reported as a warning/error.
310	 */
311	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
312		struct elf_sym_iter iter;
313		struct elf_sym *sym;
314		int last_bind = -1;
315		int cur_bind;
316
317		ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
318		if (ret == -ENOENT)
319			continue;
320		if (ret)
321			goto out;
322
323		while ((sym = elf_sym_iter_next(&iter))) {
324			if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver))
325				continue;
326
327			cur_bind = GELF_ST_BIND(sym->sym.st_info);
328
329			if (ret > 0) {
330				/* handle multiple matches */
331				if (elf_sym_offset(sym) == ret) {
332					/* same offset, no problem */
333					continue;
334				} else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
335					/* Only accept one non-weak bind. */
336					pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
337						sym->name, name, binary_path);
338					ret = -LIBBPF_ERRNO__FORMAT;
339					goto out;
340				} else if (cur_bind == STB_WEAK) {
341					/* already have a non-weak bind, and
342					 * this is a weak bind, so ignore.
343					 */
344					continue;
345				}
346			}
347
348			ret = elf_sym_offset(sym);
349			last_bind = cur_bind;
350		}
351		if (ret > 0)
352			break;
353	}
354
355	if (ret > 0) {
356		pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
357			 ret);
358	} else {
359		if (ret == 0) {
360			pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
361				is_shared_lib ? "should not be 0 in a shared library" :
362						"try using shared library path instead");
363			ret = -ENOENT;
364		} else {
365			pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
366		}
367	}
368out:
369	return ret;
370}
371
372/* Find offset of function name in ELF object specified by path. "name" matches
373 * symbol name or name@@LIB for library functions.
374 */
375long elf_find_func_offset_from_file(const char *binary_path, const char *name)
376{
377	struct elf_fd elf_fd;
378	long ret = -ENOENT;
379
380	ret = elf_open(binary_path, &elf_fd);
381	if (ret)
382		return ret;
383	ret = elf_find_func_offset(elf_fd.elf, binary_path, name);
384	elf_close(&elf_fd);
385	return ret;
386}
387
388struct symbol {
389	const char *name;
390	int bind;
391	int idx;
392};
393
394static int symbol_cmp(const void *a, const void *b)
395{
396	const struct symbol *sym_a = a;
397	const struct symbol *sym_b = b;
398
399	return strcmp(sym_a->name, sym_b->name);
400}
401
402/*
403 * Return offsets in @poffsets for symbols specified in @syms array argument.
404 * On success returns 0 and offsets are returned in allocated array with @cnt
405 * size, that needs to be released by the caller.
406 */
407int elf_resolve_syms_offsets(const char *binary_path, int cnt,
408			     const char **syms, unsigned long **poffsets,
409			     int st_type)
410{
411	int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
412	int err = 0, i, cnt_done = 0;
413	unsigned long *offsets;
414	struct symbol *symbols;
415	struct elf_fd elf_fd;
416
417	err = elf_open(binary_path, &elf_fd);
418	if (err)
419		return err;
420
421	offsets = calloc(cnt, sizeof(*offsets));
422	symbols = calloc(cnt, sizeof(*symbols));
423
424	if (!offsets || !symbols) {
425		err = -ENOMEM;
426		goto out;
427	}
428
429	for (i = 0; i < cnt; i++) {
430		symbols[i].name = syms[i];
431		symbols[i].idx = i;
432	}
433
434	qsort(symbols, cnt, sizeof(*symbols), symbol_cmp);
435
436	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
437		struct elf_sym_iter iter;
438		struct elf_sym *sym;
439
440		err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], st_type);
441		if (err == -ENOENT)
442			continue;
443		if (err)
444			goto out;
445
446		while ((sym = elf_sym_iter_next(&iter))) {
447			unsigned long sym_offset = elf_sym_offset(sym);
448			int bind = GELF_ST_BIND(sym->sym.st_info);
449			struct symbol *found, tmp = {
450				.name = sym->name,
451			};
452			unsigned long *offset;
453
454			found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp);
455			if (!found)
456				continue;
457
458			offset = &offsets[found->idx];
459			if (*offset > 0) {
460				/* same offset, no problem */
461				if (*offset == sym_offset)
462					continue;
463				/* handle multiple matches */
464				if (found->bind != STB_WEAK && bind != STB_WEAK) {
465					/* Only accept one non-weak bind. */
466					pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n",
467						sym->name, sym_offset, binary_path, *offset);
468					err = -ESRCH;
469					goto out;
470				} else if (bind == STB_WEAK) {
471					/* already have a non-weak bind, and
472					 * this is a weak bind, so ignore.
473					 */
474					continue;
475				}
476			} else {
477				cnt_done++;
478			}
479			*offset = sym_offset;
480			found->bind = bind;
481		}
482	}
483
484	if (cnt != cnt_done) {
485		err = -ENOENT;
486		goto out;
487	}
488
489	*poffsets = offsets;
490
491out:
492	free(symbols);
493	if (err)
494		free(offsets);
495	elf_close(&elf_fd);
496	return err;
497}
498
499/*
500 * Return offsets in @poffsets for symbols specified by @pattern argument.
501 * On success returns 0 and offsets are returned in allocated @poffsets
502 * array with the @pctn size, that needs to be released by the caller.
503 */
504int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
505				unsigned long **poffsets, size_t *pcnt)
506{
507	int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM };
508	unsigned long *offsets = NULL;
509	size_t cap = 0, cnt = 0;
510	struct elf_fd elf_fd;
511	int err = 0, i;
512
513	err = elf_open(binary_path, &elf_fd);
514	if (err)
515		return err;
516
517	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
518		struct elf_sym_iter iter;
519		struct elf_sym *sym;
520
521		err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC);
522		if (err == -ENOENT)
523			continue;
524		if (err)
525			goto out;
526
527		while ((sym = elf_sym_iter_next(&iter))) {
528			if (!glob_match(sym->name, pattern))
529				continue;
530
531			err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets),
532						cnt + 1);
533			if (err)
534				goto out;
535
536			offsets[cnt++] = elf_sym_offset(sym);
537		}
538
539		/* If we found anything in the first symbol section,
540		 * do not search others to avoid duplicates.
541		 */
542		if (cnt)
543			break;
544	}
545
546	if (cnt) {
547		*poffsets = offsets;
548		*pcnt = cnt;
549	} else {
550		err = -ENOENT;
551	}
552
553out:
554	if (err)
555		free(offsets);
556	elf_close(&elf_fd);
557	return err;
558}
559