1/* Symbol, variable and name lookup.
2   Copyright (C) 2019-2020 Free Software Foundation, Inc.
3
4   This file is part of libctf.
5
6   libctf is free software; you can redistribute it and/or modify it under
7   the terms of the GNU General Public License as published by the Free
8   Software Foundation; either version 3, or (at your option) any later
9   version.
10
11   This program is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14   See the GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; see the file COPYING.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20#include <ctf-impl.h>
21#include <elf.h>
22#include <string.h>
23
24/* Compare the given input string and length against a table of known C storage
25   qualifier keywords.  We just ignore these in ctf_lookup_by_name, below.  To
26   do this quickly, we use a pre-computed Perfect Hash Function similar to the
27   technique originally described in the classic paper:
28
29   R.J. Cichelli, "Minimal Perfect Hash Functions Made Simple",
30   Communications of the ACM, Volume 23, Issue 1, January 1980, pp. 17-19.
31
32   For an input string S of length N, we use hash H = S[N - 1] + N - 105, which
33   for the current set of qualifiers yields a unique H in the range [0 .. 20].
34   The hash can be modified when the keyword set changes as necessary.  We also
35   store the length of each keyword and check it prior to the final strcmp().
36
37   TODO: just use gperf.  */
38
39static int
40isqualifier (const char *s, size_t len)
41{
42  static const struct qual
43  {
44    const char *q_name;
45    size_t q_len;
46  } qhash[] = {
47    {"static", 6}, {"", 0}, {"", 0}, {"", 0},
48    {"volatile", 8}, {"", 0}, {"", 0}, {"", 0}, {"", 0},
49    {"", 0}, {"auto", 4}, {"extern", 6}, {"", 0}, {"", 0},
50    {"", 0}, {"", 0}, {"const", 5}, {"register", 8},
51    {"", 0}, {"restrict", 8}, {"_Restrict", 9}
52  };
53
54  int h = s[len - 1] + (int) len - 105;
55  const struct qual *qp = &qhash[h];
56
57  return (h >= 0 && (size_t) h < sizeof (qhash) / sizeof (qhash[0])
58	  && (size_t) len == qp->q_len &&
59	  strncmp (qp->q_name, s, qp->q_len) == 0);
60}
61
62/* Attempt to convert the given C type name into the corresponding CTF type ID.
63   It is not possible to do complete and proper conversion of type names
64   without implementing a more full-fledged parser, which is necessary to
65   handle things like types that are function pointers to functions that
66   have arguments that are function pointers, and fun stuff like that.
67   Instead, this function implements a very simple conversion algorithm that
68   finds the things that we actually care about: structs, unions, enums,
69   integers, floats, typedefs, and pointers to any of these named types.  */
70
71ctf_id_t
72ctf_lookup_by_name (ctf_file_t *fp, const char *name)
73{
74  static const char delimiters[] = " \t\n\r\v\f*";
75
76  const ctf_lookup_t *lp;
77  const char *p, *q, *end;
78  ctf_id_t type = 0;
79  ctf_id_t ntype, ptype;
80
81  if (name == NULL)
82    return (ctf_set_errno (fp, EINVAL));
83
84  for (p = name, end = name + strlen (name); *p != '\0'; p = q)
85    {
86      while (isspace ((int) *p))
87	p++;			/* Skip leading whitespace.  */
88
89      if (p == end)
90	break;
91
92      if ((q = strpbrk (p + 1, delimiters)) == NULL)
93	q = end;		/* Compare until end.  */
94
95      if (*p == '*')
96	{
97	  /* Find a pointer to type by looking in fp->ctf_ptrtab.
98	     If we can't find a pointer to the given type, see if
99	     we can compute a pointer to the type resulting from
100	     resolving the type down to its base type and use
101	     that instead.  This helps with cases where the CTF
102	     data includes "struct foo *" but not "foo_t *" and
103	     the user tries to access "foo_t *" in the debugger.
104
105	     TODO need to handle parent containers too.  */
106
107	  ntype = fp->ctf_ptrtab[LCTF_TYPE_TO_INDEX (fp, type)];
108	  if (ntype == 0)
109	    {
110	      ntype = ctf_type_resolve_unsliced (fp, type);
111	      if (ntype == CTF_ERR
112		  || (ntype =
113		      fp->ctf_ptrtab[LCTF_TYPE_TO_INDEX (fp, ntype)]) == 0)
114		{
115		  (void) ctf_set_errno (fp, ECTF_NOTYPE);
116		  goto err;
117		}
118	    }
119
120	  type = LCTF_INDEX_TO_TYPE (fp, ntype, (fp->ctf_flags & LCTF_CHILD));
121
122	  q = p + 1;
123	  continue;
124	}
125
126      if (isqualifier (p, (size_t) (q - p)))
127	continue;		/* Skip qualifier keyword.  */
128
129      for (lp = fp->ctf_lookups; lp->ctl_prefix != NULL; lp++)
130	{
131	  /* TODO: This is not MT-safe.  */
132	  if ((lp->ctl_prefix[0] == '\0' ||
133	       strncmp (p, lp->ctl_prefix, (size_t) (q - p)) == 0) &&
134	      (size_t) (q - p) >= lp->ctl_len)
135	    {
136	      for (p += lp->ctl_len; isspace ((int) *p); p++)
137		continue;	/* Skip prefix and next whitespace.  */
138
139	      if ((q = strchr (p, '*')) == NULL)
140		q = end;	/* Compare until end.  */
141
142	      while (isspace ((int) q[-1]))
143		q--;		/* Exclude trailing whitespace.  */
144
145	      /* Expand and/or allocate storage for a slice of the name, then
146		 copy it in.  */
147
148	      if (fp->ctf_tmp_typeslicelen >= (size_t) (q - p) + 1)
149		{
150		  memcpy (fp->ctf_tmp_typeslice, p, (size_t) (q - p));
151		  fp->ctf_tmp_typeslice[(size_t) (q - p)] = '\0';
152		}
153	      else
154		{
155		  free (fp->ctf_tmp_typeslice);
156		  fp->ctf_tmp_typeslice = xstrndup (p, (size_t) (q - p));
157		  if (fp->ctf_tmp_typeslice == NULL)
158		    {
159		      (void) ctf_set_errno (fp, ENOMEM);
160		      return CTF_ERR;
161		    }
162		}
163
164	      if ((type = ctf_lookup_by_rawhash (fp, lp->ctl_hash,
165						 fp->ctf_tmp_typeslice)) == 0)
166		{
167		  (void) ctf_set_errno (fp, ECTF_NOTYPE);
168		  goto err;
169		}
170
171	      break;
172	    }
173	}
174
175      if (lp->ctl_prefix == NULL)
176	{
177	  (void) ctf_set_errno (fp, ECTF_NOTYPE);
178	  goto err;
179	}
180    }
181
182  if (*p != '\0' || type == 0)
183    return (ctf_set_errno (fp, ECTF_SYNTAX));
184
185  return type;
186
187err:
188  if (fp->ctf_parent != NULL
189      && (ptype = ctf_lookup_by_name (fp->ctf_parent, name)) != CTF_ERR)
190    return ptype;
191
192  return CTF_ERR;
193}
194
195typedef struct ctf_lookup_var_key
196{
197  ctf_file_t *clvk_fp;
198  const char *clvk_name;
199} ctf_lookup_var_key_t;
200
201/* A bsearch function for variable names.  */
202
203static int
204ctf_lookup_var (const void *key_, const void *memb_)
205{
206  const ctf_lookup_var_key_t *key = key_;
207  const ctf_varent_t *memb = memb_;
208
209  return (strcmp (key->clvk_name, ctf_strptr (key->clvk_fp, memb->ctv_name)));
210}
211
212/* Given a variable name, return the type of the variable with that name.  */
213
214ctf_id_t
215ctf_lookup_variable (ctf_file_t *fp, const char *name)
216{
217  ctf_varent_t *ent;
218  ctf_lookup_var_key_t key = { fp, name };
219
220  /* This array is sorted, so we can bsearch for it.  */
221
222  ent = bsearch (&key, fp->ctf_vars, fp->ctf_nvars, sizeof (ctf_varent_t),
223		 ctf_lookup_var);
224
225  if (ent == NULL)
226    {
227      if (fp->ctf_parent != NULL)
228	return ctf_lookup_variable (fp->ctf_parent, name);
229
230      return (ctf_set_errno (fp, ECTF_NOTYPEDAT));
231    }
232
233  return ent->ctv_type;
234}
235
236/* Given a symbol table index, return the name of that symbol from the secondary
237   string table, or the null string (never NULL).  */
238const char *
239ctf_lookup_symbol_name (ctf_file_t *fp, unsigned long symidx)
240{
241  const ctf_sect_t *sp = &fp->ctf_symtab;
242  Elf64_Sym sym, *gsp;
243
244  if (sp->cts_data == NULL)
245    {
246      ctf_set_errno (fp, ECTF_NOSYMTAB);
247      return _CTF_NULLSTR;
248    }
249
250  if (symidx >= fp->ctf_nsyms)
251    {
252      ctf_set_errno (fp, EINVAL);
253      return _CTF_NULLSTR;
254    }
255
256  if (sp->cts_entsize == sizeof (Elf32_Sym))
257    {
258      const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx;
259      gsp = ctf_sym_to_elf64 (symp, &sym);
260    }
261  else
262      gsp = (Elf64_Sym *) sp->cts_data + symidx;
263
264  if (gsp->st_name < fp->ctf_str[CTF_STRTAB_1].cts_len)
265    return (const char *) fp->ctf_str[CTF_STRTAB_1].cts_strs + gsp->st_name;
266
267  return _CTF_NULLSTR;
268}
269
270/* Given a symbol table index, return the type of the data object described
271   by the corresponding entry in the symbol table.  */
272
273ctf_id_t
274ctf_lookup_by_symbol (ctf_file_t *fp, unsigned long symidx)
275{
276  const ctf_sect_t *sp = &fp->ctf_symtab;
277  ctf_id_t type;
278
279  if (sp->cts_data == NULL)
280    return (ctf_set_errno (fp, ECTF_NOSYMTAB));
281
282  if (symidx >= fp->ctf_nsyms)
283    return (ctf_set_errno (fp, EINVAL));
284
285  if (sp->cts_entsize == sizeof (Elf32_Sym))
286    {
287      const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx;
288      if (ELF32_ST_TYPE (symp->st_info) != STT_OBJECT)
289	return (ctf_set_errno (fp, ECTF_NOTDATA));
290    }
291  else
292    {
293      const Elf64_Sym *symp = (Elf64_Sym *) sp->cts_data + symidx;
294      if (ELF64_ST_TYPE (symp->st_info) != STT_OBJECT)
295	return (ctf_set_errno (fp, ECTF_NOTDATA));
296    }
297
298  if (fp->ctf_sxlate[symidx] == -1u)
299    return (ctf_set_errno (fp, ECTF_NOTYPEDAT));
300
301  type = *(uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]);
302  if (type == 0)
303    return (ctf_set_errno (fp, ECTF_NOTYPEDAT));
304
305  return type;
306}
307
308/* Return the native dict of a given type: if called on a child and the
309   type is in the parent, return the parent.  Needed if you plan to access
310   the type directly, without using the API.  */
311ctf_file_t *
312ctf_get_dict (ctf_file_t *fp, ctf_id_t type)
313{
314    if ((fp->ctf_flags & LCTF_CHILD) && LCTF_TYPE_ISPARENT (fp, type))
315      return fp->ctf_parent;
316
317    return fp;
318}
319
320/* Return the pointer to the internal CTF type data corresponding to the
321   given type ID.  If the ID is invalid, the function returns NULL.
322   This function is not exported outside of the library.  */
323
324const ctf_type_t *
325ctf_lookup_by_id (ctf_file_t **fpp, ctf_id_t type)
326{
327  ctf_file_t *fp = *fpp;	/* Caller passes in starting CTF dict.  */
328  ctf_id_t idx;
329
330  if ((fp = ctf_get_dict (fp, type)) == NULL)
331    {
332      (void) ctf_set_errno (*fpp, ECTF_NOPARENT);
333      return NULL;
334    }
335
336  /* If this dict is writable, check for a dynamic type.  */
337
338  if (fp->ctf_flags & LCTF_RDWR)
339    {
340      ctf_dtdef_t *dtd;
341
342      if ((dtd = ctf_dynamic_type (fp, type)) != NULL)
343	{
344	  *fpp = fp;
345	  return &dtd->dtd_data;
346	}
347      (void) ctf_set_errno (*fpp, ECTF_BADID);
348      return NULL;
349    }
350
351  /* Check for a type in the static portion.  */
352
353  idx = LCTF_TYPE_TO_INDEX (fp, type);
354  if (idx > 0 && (unsigned long) idx <= fp->ctf_typemax)
355    {
356      *fpp = fp;		/* Function returns ending CTF container.  */
357      return (LCTF_INDEX_TO_TYPEPTR (fp, idx));
358    }
359
360  (void) ctf_set_errno (*fpp, ECTF_BADID);
361  return NULL;
362}
363
364/* Given a symbol table index, return the info for the function described
365   by the corresponding entry in the symbol table.  */
366
367int
368ctf_func_info (ctf_file_t *fp, unsigned long symidx, ctf_funcinfo_t *fip)
369{
370  const ctf_sect_t *sp = &fp->ctf_symtab;
371  const uint32_t *dp;
372  uint32_t info, kind, n;
373
374  if (sp->cts_data == NULL)
375    return (ctf_set_errno (fp, ECTF_NOSYMTAB));
376
377  if (symidx >= fp->ctf_nsyms)
378    return (ctf_set_errno (fp, EINVAL));
379
380  if (sp->cts_entsize == sizeof (Elf32_Sym))
381    {
382      const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx;
383      if (ELF32_ST_TYPE (symp->st_info) != STT_FUNC)
384	return (ctf_set_errno (fp, ECTF_NOTFUNC));
385    }
386  else
387    {
388      const Elf64_Sym *symp = (Elf64_Sym *) sp->cts_data + symidx;
389      if (ELF64_ST_TYPE (symp->st_info) != STT_FUNC)
390	return (ctf_set_errno (fp, ECTF_NOTFUNC));
391    }
392
393  if (fp->ctf_sxlate[symidx] == -1u)
394    return (ctf_set_errno (fp, ECTF_NOFUNCDAT));
395
396  dp = (uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]);
397
398  info = *dp++;
399  kind = LCTF_INFO_KIND (fp, info);
400  n = LCTF_INFO_VLEN (fp, info);
401
402  if (kind == CTF_K_UNKNOWN && n == 0)
403    return (ctf_set_errno (fp, ECTF_NOFUNCDAT));
404
405  if (kind != CTF_K_FUNCTION)
406    return (ctf_set_errno (fp, ECTF_CORRUPT));
407
408  fip->ctc_return = *dp++;
409  fip->ctc_argc = n;
410  fip->ctc_flags = 0;
411
412  if (n != 0 && dp[n - 1] == 0)
413    {
414      fip->ctc_flags |= CTF_FUNC_VARARG;
415      fip->ctc_argc--;
416    }
417
418  return 0;
419}
420
421/* Given a symbol table index, return the arguments for the function described
422   by the corresponding entry in the symbol table.  */
423
424int
425ctf_func_args (ctf_file_t * fp, unsigned long symidx, uint32_t argc,
426	       ctf_id_t * argv)
427{
428  const uint32_t *dp;
429  ctf_funcinfo_t f;
430
431  if (ctf_func_info (fp, symidx, &f) < 0)
432    return -1;			/* errno is set for us.  */
433
434  /* The argument data is two uint32_t's past the translation table
435     offset: one for the function info, and one for the return type. */
436
437  dp = (uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]) + 2;
438
439  for (argc = MIN (argc, f.ctc_argc); argc != 0; argc--)
440    *argv++ = *dp++;
441
442  return 0;
443}
444