1/*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include <unistd.h>
26#include <search.h>
27#include <stdlib.h>
28#include <string.h>
29#include <db.h>
30#include <fcntl.h>
31
32#include "libproc_impl.h"
33#include "symtab.h"
34#ifndef __APPLE__
35#include "salibelf.h"
36#endif // __APPLE__
37
38
39// ----------------------------------------------------
40// functions for symbol lookups
41// ----------------------------------------------------
42
43typedef struct symtab_symbol {
44  char *name;                // name like __ZThread_...
45  uintptr_t offset;          // to loaded address
46  uintptr_t size;            // size strlen
47} symtab_symbol;
48
49typedef struct symtab {
50  char *strs;                // all symbols "__symbol1__'\0'__symbol2__...."
51  size_t num_symbols;
52  DB* hash_table;
53  symtab_symbol* symbols;
54} symtab_t;
55
56#ifdef __APPLE__
57
58void build_search_table(symtab_t *symtab) {
59  int i;
60  for (i = 0; i < symtab->num_symbols; i++) {
61    DBT key, value;
62    key.data = symtab->symbols[i].name;
63    key.size = strlen(key.data) + 1;
64    value.data = &(symtab->symbols[i]);
65    value.size = sizeof(symtab_symbol);
66    (*symtab->hash_table->put)(symtab->hash_table, &key, &value, 0);
67
68    // check result
69    if (is_debug()) {
70      DBT rkey, rvalue;
71      char* tmp = (char *)malloc(strlen(symtab->symbols[i].name) + 1);
72      strcpy(tmp, symtab->symbols[i].name);
73      rkey.data = tmp;
74      rkey.size = strlen(tmp) + 1;
75      (*symtab->hash_table->get)(symtab->hash_table, &rkey, &rvalue, 0);
76      // we may get a copy back so compare contents
77      symtab_symbol *res = (symtab_symbol *)rvalue.data;
78      if (strcmp(res->name, symtab->symbols[i].name)  ||
79          res->offset != symtab->symbols[i].offset    ||
80          res->size != symtab->symbols[i].size) {
81        print_debug("error to get hash_table value!\n");
82      }
83      free(tmp);
84    }
85  }
86}
87
88// read symbol table from given fd.
89struct symtab* build_symtab(int fd) {
90  symtab_t* symtab = NULL;
91  int i;
92  mach_header_64 header;
93  off_t image_start;
94
95  if (!get_arch_off(fd, CPU_TYPE_X86_64, &image_start)) {
96    print_debug("failed in get fat header\n");
97    return NULL;
98  }
99  lseek(fd, image_start, SEEK_SET);
100  if (read(fd, (void *)&header, sizeof(mach_header_64)) != sizeof(mach_header_64)) {
101    print_debug("reading header failed!\n");
102    return NULL;
103  }
104  // header
105  if (header.magic != MH_MAGIC_64) {
106    print_debug("not a valid .dylib file\n");
107    return NULL;
108  }
109
110  load_command lcmd;
111  symtab_command symtabcmd;
112  nlist_64 lentry;
113
114  bool lcsymtab_exist = false;
115
116  long filepos = ltell(fd);
117  for (i = 0; i < header.ncmds; i++) {
118    lseek(fd, filepos, SEEK_SET);
119    if (read(fd, (void *)&lcmd, sizeof(load_command)) != sizeof(load_command)) {
120      print_debug("read load_command failed for file\n");
121      return NULL;
122    }
123    filepos += lcmd.cmdsize;  // next command position
124    if (lcmd.cmd == LC_SYMTAB) {
125      lseek(fd, -sizeof(load_command), SEEK_CUR);
126      lcsymtab_exist = true;
127      break;
128    }
129  }
130  if (!lcsymtab_exist) {
131    print_debug("No symtab command found!\n");
132    return NULL;
133  }
134  if (read(fd, (void *)&symtabcmd, sizeof(symtab_command)) != sizeof(symtab_command)) {
135    print_debug("read symtab_command failed for file");
136    return NULL;
137  }
138  symtab = (symtab_t *)malloc(sizeof(symtab_t));
139  if (symtab == NULL) {
140    print_debug("out of memory: allocating symtab\n");
141    return NULL;
142  }
143
144  // create hash table, we use berkeley db to
145  // manipulate the hash table.
146  symtab->hash_table = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL);
147  if (symtab->hash_table == NULL)
148    goto quit;
149
150  symtab->num_symbols = symtabcmd.nsyms;
151  symtab->symbols = (symtab_symbol *)malloc(sizeof(symtab_symbol) * symtab->num_symbols);
152  symtab->strs    = (char *)malloc(sizeof(char) * symtabcmd.strsize);
153  if (symtab->symbols == NULL || symtab->strs == NULL) {
154     print_debug("out of memory: allocating symtab.symbol or symtab.strs\n");
155     goto quit;
156  }
157  lseek(fd, image_start + symtabcmd.symoff, SEEK_SET);
158  for (i = 0; i < symtab->num_symbols; i++) {
159    if (read(fd, (void *)&lentry, sizeof(nlist_64)) != sizeof(nlist_64)) {
160      print_debug("read nlist_64 failed at %i\n", i);
161      goto quit;
162    }
163    symtab->symbols[i].offset = lentry.n_value;
164    symtab->symbols[i].size  = lentry.n_un.n_strx;        // index
165  }
166
167  // string table
168  lseek(fd, image_start + symtabcmd.stroff, SEEK_SET);
169  int size = read(fd, (void *)(symtab->strs), symtabcmd.strsize * sizeof(char));
170  if (size != symtabcmd.strsize * sizeof(char)) {
171     print_debug("reading string table failed\n");
172     goto quit;
173  }
174
175  for (i = 0; i < symtab->num_symbols; i++) {
176    symtab->symbols[i].name = symtab->strs + symtab->symbols[i].size;
177    if (i > 0) {
178      // fix size
179      symtab->symbols[i - 1].size = symtab->symbols[i].size - symtab->symbols[i - 1].size;
180      print_debug("%s size = %d\n", symtab->symbols[i - 1].name, symtab->symbols[i - 1].size);
181
182    }
183
184    if (i == symtab->num_symbols - 1) {
185      // last index
186      symtab->symbols[i].size =
187            symtabcmd.strsize - symtab->symbols[i].size;
188      print_debug("%s size = %d\n", symtab->symbols[i].name, symtab->symbols[i].size);
189    }
190  }
191
192  // build a hashtable for fast query
193  build_search_table(symtab);
194  return symtab;
195quit:
196  if (symtab) destroy_symtab(symtab);
197  return NULL;
198}
199
200#else // __APPLE__
201
202struct elf_section {
203  ELF_SHDR   *c_shdr;
204  void       *c_data;
205};
206
207// read symbol table from given fd.
208struct symtab* build_symtab(int fd) {
209  ELF_EHDR ehdr;
210  struct symtab* symtab = NULL;
211
212  // Reading of elf header
213  struct elf_section *scn_cache = NULL;
214  int cnt = 0;
215  ELF_SHDR* shbuf = NULL;
216  ELF_SHDR* cursct = NULL;
217  ELF_PHDR* phbuf = NULL;
218  int symtab_found = 0;
219  int dynsym_found = 0;
220  uint32_t symsection = SHT_SYMTAB;
221
222  uintptr_t baseaddr = (uintptr_t)-1;
223
224  lseek(fd, (off_t)0L, SEEK_SET);
225  if (! read_elf_header(fd, &ehdr)) {
226    // not an elf
227    return NULL;
228  }
229
230  // read ELF header
231  if ((shbuf = read_section_header_table(fd, &ehdr)) == NULL) {
232    goto quit;
233  }
234
235  baseaddr = find_base_address(fd, &ehdr);
236
237  scn_cache = calloc(ehdr.e_shnum, sizeof(*scn_cache));
238  if (scn_cache == NULL) {
239    goto quit;
240  }
241
242  for (cursct = shbuf, cnt = 0; cnt < ehdr.e_shnum; cnt++) {
243    scn_cache[cnt].c_shdr = cursct;
244    if (cursct->sh_type == SHT_SYMTAB ||
245        cursct->sh_type == SHT_STRTAB ||
246        cursct->sh_type == SHT_DYNSYM) {
247      if ( (scn_cache[cnt].c_data = read_section_data(fd, &ehdr, cursct)) == NULL) {
248         goto quit;
249      }
250    }
251
252    if (cursct->sh_type == SHT_SYMTAB)
253       symtab_found++;
254
255    if (cursct->sh_type == SHT_DYNSYM)
256       dynsym_found++;
257
258    cursct++;
259  }
260
261  if (!symtab_found && dynsym_found)
262     symsection = SHT_DYNSYM;
263
264  for (cnt = 1; cnt < ehdr.e_shnum; cnt++) {
265    ELF_SHDR *shdr = scn_cache[cnt].c_shdr;
266
267    if (shdr->sh_type == symsection) {
268      ELF_SYM  *syms;
269      int j, n;
270      size_t size;
271
272      // FIXME: there could be multiple data buffers associated with the
273      // same ELF section. Here we can handle only one buffer. See man page
274      // for elf_getdata on Solaris.
275
276      // guarantee(symtab == NULL, "multiple symtab");
277      symtab = calloc(1, sizeof(*symtab));
278      if (symtab == NULL) {
279         goto quit;
280      }
281      // the symbol table
282      syms = (ELF_SYM *)scn_cache[cnt].c_data;
283
284      // number of symbols
285      n = shdr->sh_size / shdr->sh_entsize;
286
287      // create hash table, we use berkeley db to
288      // manipulate the hash table.
289      symtab->hash_table = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL);
290      // guarantee(symtab->hash_table, "unexpected failure: dbopen");
291      if (symtab->hash_table == NULL)
292        goto bad;
293
294      // shdr->sh_link points to the section that contains the actual strings
295      // for symbol names. the st_name field in ELF_SYM is just the
296      // string table index. we make a copy of the string table so the
297      // strings will not be destroyed by elf_end.
298      size = scn_cache[shdr->sh_link].c_shdr->sh_size;
299      symtab->strs = malloc(size);
300      if (symtab->strs == NULL)
301        goto bad;
302      memcpy(symtab->strs, scn_cache[shdr->sh_link].c_data, size);
303
304      // allocate memory for storing symbol offset and size;
305      symtab->num_symbols = n;
306      symtab->symbols = calloc(n , sizeof(*symtab->symbols));
307      if (symtab->symbols == NULL)
308        goto bad;
309
310      // copy symbols info our symtab and enter them info the hash table
311      for (j = 0; j < n; j++, syms++) {
312        DBT key, value;
313        char *sym_name = symtab->strs + syms->st_name;
314
315        // skip non-object and non-function symbols
316        int st_type = ELF_ST_TYPE(syms->st_info);
317        if ( st_type != STT_FUNC && st_type != STT_OBJECT)
318           continue;
319        // skip empty strings and undefined symbols
320        if (*sym_name == '\0' || syms->st_shndx == SHN_UNDEF) continue;
321
322        symtab->symbols[j].name   = sym_name;
323        symtab->symbols[j].offset = syms->st_value - baseaddr;
324        symtab->symbols[j].size   = syms->st_size;
325
326        key.data = sym_name;
327        key.size = strlen(sym_name) + 1;
328        value.data = &(symtab->symbols[j]);
329        value.size = sizeof(symtab_symbol);
330        (*symtab->hash_table->put)(symtab->hash_table, &key, &value, 0);
331      }
332    }
333  }
334  goto quit;
335
336bad:
337  destroy_symtab(symtab);
338  symtab = NULL;
339
340quit:
341  if (shbuf) free(shbuf);
342  if (phbuf) free(phbuf);
343  if (scn_cache) {
344    for (cnt = 0; cnt < ehdr.e_shnum; cnt++) {
345      if (scn_cache[cnt].c_data != NULL) {
346        free(scn_cache[cnt].c_data);
347      }
348    }
349    free(scn_cache);
350  }
351  return symtab;
352}
353
354#endif // __APPLE__
355
356void destroy_symtab(symtab_t* symtab) {
357  if (!symtab) return;
358  free(symtab->strs);
359  free(symtab->symbols);
360  free(symtab);
361}
362
363uintptr_t search_symbol(struct symtab* symtab, uintptr_t base, const char *sym_name, int *sym_size) {
364  DBT key, value;
365  int ret;
366
367  // library does not have symbol table
368  if (!symtab || !symtab->hash_table) {
369     return 0;
370  }
371
372  key.data = (char*)(uintptr_t)sym_name;
373  key.size = strlen(sym_name) + 1;
374  ret = (*symtab->hash_table->get)(symtab->hash_table, &key, &value, 0);
375  if (ret == 0) {
376    symtab_symbol *sym = value.data;
377    uintptr_t rslt = (uintptr_t) ((char*)base + sym->offset);
378    if (sym_size) *sym_size = sym->size;
379    return rslt;
380  }
381
382  return 0;
383}
384
385const char* nearest_symbol(struct symtab* symtab, uintptr_t offset,
386                           uintptr_t* poffset) {
387  int n = 0;
388  if (!symtab) return NULL;
389  for (; n < symtab->num_symbols; n++) {
390    symtab_symbol* sym = &(symtab->symbols[n]);
391    if (sym->name != NULL &&
392      offset >= sym->offset && offset < sym->offset + sym->size) {
393      if (poffset) *poffset = (offset - sym->offset);
394      return sym->name;
395    }
396  }
397  return NULL;
398}
399