1/* CTF string table management.
2   Copyright (C) 2019-2022 Free Software Foundation, Inc.
3
4   This file is part of libctf.
5
6   libctf is free software; you can redistribute it and/or modify it under
7   the terms of the GNU General Public License as published by the Free
8   Software Foundation; either version 3, or (at your option) any later
9   version.
10
11   This program is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14   See the GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; see the file COPYING.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20#include <ctf-impl.h>
21#include <string.h>
22#include <assert.h>
23
24/* Convert an encoded CTF string name into a pointer to a C string, using an
25  explicit internal strtab rather than the fp-based one.  */
26const char *
27ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
28{
29  ctf_strs_t *ctsp = &fp->ctf_str[CTF_NAME_STID (name)];
30
31  if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL))
32    ctsp = strtab;
33
34  /* If this name is in the external strtab, and there is a synthetic strtab,
35     use it in preference.  */
36
37  if (CTF_NAME_STID (name) == CTF_STRTAB_1
38      && fp->ctf_syn_ext_strtab != NULL)
39    return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab,
40			       (void *) (uintptr_t) name);
41
42  /* If the name is in the internal strtab, and the offset is beyond the end of
43     the ctsp->cts_len but below the ctf_str_prov_offset, this is a provisional
44     string added by ctf_str_add*() but not yet built into a real strtab: get
45     the value out of the ctf_prov_strtab.  */
46
47  if (CTF_NAME_STID (name) == CTF_STRTAB_0
48      && name >= ctsp->cts_len && name < fp->ctf_str_prov_offset)
49      return ctf_dynhash_lookup (fp->ctf_prov_strtab,
50				 (void *) (uintptr_t) name);
51
52  if (ctsp->cts_strs != NULL && CTF_NAME_OFFSET (name) < ctsp->cts_len)
53    return (ctsp->cts_strs + CTF_NAME_OFFSET (name));
54
55  /* String table not loaded or corrupt offset.  */
56  return NULL;
57}
58
59/* Convert an encoded CTF string name into a pointer to a C string by looking
60  up the appropriate string table buffer and then adding the offset.  */
61const char *
62ctf_strraw (ctf_dict_t *fp, uint32_t name)
63{
64  return ctf_strraw_explicit (fp, name, NULL);
65}
66
67/* Return a guaranteed-non-NULL pointer to the string with the given CTF
68   name.  */
69const char *
70ctf_strptr (ctf_dict_t *fp, uint32_t name)
71{
72  const char *s = ctf_strraw (fp, name);
73  return (s != NULL ? s : "(?)");
74}
75
76/* Remove all refs to a given atom.  */
77static void
78ctf_str_purge_atom_refs (ctf_str_atom_t *atom)
79{
80  ctf_str_atom_ref_t *ref, *next;
81
82  for (ref = ctf_list_next (&atom->csa_refs); ref != NULL; ref = next)
83    {
84      next = ctf_list_next (ref);
85      ctf_list_delete (&atom->csa_refs, ref);
86      free (ref);
87    }
88}
89
90/* Free an atom (only called on ctf_close().)  */
91static void
92ctf_str_free_atom (void *a)
93{
94  ctf_str_atom_t *atom = a;
95
96  ctf_str_purge_atom_refs (atom);
97  free (atom);
98}
99
100/* Create the atoms table.  There is always at least one atom in it, the null
101   string.  */
102int
103ctf_str_create_atoms (ctf_dict_t *fp)
104{
105  fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
106					  free, ctf_str_free_atom);
107  if (!fp->ctf_str_atoms)
108    return -ENOMEM;
109
110  if (!fp->ctf_prov_strtab)
111    fp->ctf_prov_strtab = ctf_dynhash_create (ctf_hash_integer,
112					      ctf_hash_eq_integer,
113					      NULL, NULL);
114  if (!fp->ctf_prov_strtab)
115    goto oom_prov_strtab;
116
117  if (!fp->ctf_str_pending_ref)
118    fp->ctf_str_pending_ref = ctf_dynset_create (htab_hash_pointer,
119						 htab_eq_pointer,
120						 NULL);
121  if (!fp->ctf_str_pending_ref)
122    goto oom_str_pending_ref;
123
124  errno = 0;
125  ctf_str_add (fp, "");
126  if (errno == ENOMEM)
127    goto oom_str_add;
128
129  return 0;
130
131 oom_str_add:
132  ctf_dynhash_destroy (fp->ctf_prov_strtab);
133  fp->ctf_prov_strtab = NULL;
134 oom_str_pending_ref:
135  ctf_dynset_destroy (fp->ctf_str_pending_ref);
136  fp->ctf_str_pending_ref = NULL;
137 oom_prov_strtab:
138  ctf_dynhash_destroy (fp->ctf_str_atoms);
139  fp->ctf_str_atoms = NULL;
140  return -ENOMEM;
141}
142
143/* Destroy the atoms table.  */
144void
145ctf_str_free_atoms (ctf_dict_t *fp)
146{
147  ctf_dynhash_destroy (fp->ctf_prov_strtab);
148  ctf_dynhash_destroy (fp->ctf_str_atoms);
149  ctf_dynset_destroy (fp->ctf_str_pending_ref);
150}
151
152#define CTF_STR_ADD_REF 0x1
153#define CTF_STR_MAKE_PROVISIONAL 0x2
154#define CTF_STR_PENDING_REF 0x4
155
156/* Add a string to the atoms table, copying the passed-in string.  Return the
157   atom added. Return NULL only when out of memory (and do not touch the
158   passed-in string in that case).  Possibly augment the ref list with the
159   passed-in ref.  Possibly add a provisional entry for this string to the
160   provisional strtab.   */
161static ctf_str_atom_t *
162ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
163			  int flags, uint32_t *ref)
164{
165  char *newstr = NULL;
166  ctf_str_atom_t *atom = NULL;
167  ctf_str_atom_ref_t *aref = NULL;
168
169  atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
170
171  if (flags & CTF_STR_ADD_REF)
172    {
173      if ((aref = malloc (sizeof (struct ctf_str_atom_ref))) == NULL)
174	return NULL;
175      aref->caf_ref = ref;
176    }
177
178  if (atom)
179    {
180      if (flags & CTF_STR_ADD_REF)
181	{
182	  ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
183	  ctf_list_append (&atom->csa_refs, aref);
184	  fp->ctf_str_num_refs++;
185	}
186      return atom;
187    }
188
189  if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
190    goto oom;
191  memset (atom, 0, sizeof (struct ctf_str_atom));
192
193  if ((newstr = strdup (str)) == NULL)
194    goto oom;
195
196  if (ctf_dynhash_insert (fp->ctf_str_atoms, newstr, atom) < 0)
197    goto oom;
198
199  atom->csa_str = newstr;
200  atom->csa_snapshot_id = fp->ctf_snapshots;
201
202  if (flags & CTF_STR_MAKE_PROVISIONAL)
203    {
204      atom->csa_offset = fp->ctf_str_prov_offset;
205
206      if (ctf_dynhash_insert (fp->ctf_prov_strtab, (void *) (uintptr_t)
207			      atom->csa_offset, (void *) atom->csa_str) < 0)
208	goto oom;
209
210      fp->ctf_str_prov_offset += strlen (atom->csa_str) + 1;
211    }
212
213  if (flags & CTF_STR_PENDING_REF)
214    {
215      if (ctf_dynset_insert (fp->ctf_str_pending_ref, (void *) ref) < 0)
216	goto oom;
217    }
218  else if (flags & CTF_STR_ADD_REF)
219    {
220      ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
221      ctf_list_append (&atom->csa_refs, aref);
222      fp->ctf_str_num_refs++;
223    }
224  return atom;
225
226 oom:
227  if (newstr)
228    ctf_dynhash_remove (fp->ctf_str_atoms, newstr);
229  free (atom);
230  free (aref);
231  free (newstr);
232  ctf_set_errno (fp, ENOMEM);
233  return NULL;
234}
235
236/* Add a string to the atoms table, without augmenting the ref list for this
237   string: return a 'provisional offset' which can be used to return this string
238   until ctf_str_write_strtab is called, or 0 on failure.  (Everywhere the
239   provisional offset is assigned to should be added as a ref using
240   ctf_str_add_ref() as well.) */
241uint32_t
242ctf_str_add (ctf_dict_t *fp, const char *str)
243{
244  ctf_str_atom_t *atom;
245
246  if (!str)
247    str = "";
248
249  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_MAKE_PROVISIONAL, 0);
250  if (!atom)
251    return 0;
252
253  return atom->csa_offset;
254}
255
256/* Like ctf_str_add(), but additionally augment the atom's refs list with the
257   passed-in ref, whether or not the string is already present.  There is no
258   attempt to deduplicate the refs list (but duplicates are harmless).  */
259uint32_t
260ctf_str_add_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
261{
262  ctf_str_atom_t *atom;
263
264  if (!str)
265    str = "";
266
267  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF
268				   | CTF_STR_MAKE_PROVISIONAL, ref);
269  if (!atom)
270    return 0;
271
272  return atom->csa_offset;
273}
274
275/* Like ctf_str_add_ref(), but notes that this memory location must be added as
276   a ref by a later serialization phase, rather than adding it itself.  */
277uint32_t
278ctf_str_add_pending (ctf_dict_t *fp, const char *str, uint32_t *ref)
279{
280  ctf_str_atom_t *atom;
281
282  if (!str)
283    str = "";
284
285  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_PENDING_REF
286				   | CTF_STR_MAKE_PROVISIONAL, ref);
287  if (!atom)
288    return 0;
289
290  return atom->csa_offset;
291}
292
293/* Note that a pending ref now located at NEW_REF has moved by BYTES bytes.  */
294int
295ctf_str_move_pending (ctf_dict_t *fp, uint32_t *new_ref, ptrdiff_t bytes)
296{
297  if (bytes == 0)
298    return 0;
299
300  if (ctf_dynset_insert (fp->ctf_str_pending_ref, (void *) new_ref) < 0)
301    return (ctf_set_errno (fp, ENOMEM));
302
303  ctf_dynset_remove (fp->ctf_str_pending_ref,
304		     (void *) ((signed char *) new_ref - bytes));
305  return 0;
306}
307
308/* Add an external strtab reference at OFFSET.  Returns zero if the addition
309   failed, nonzero otherwise.  */
310int
311ctf_str_add_external (ctf_dict_t *fp, const char *str, uint32_t offset)
312{
313  ctf_str_atom_t *atom;
314
315  if (!str)
316    str = "";
317
318  atom = ctf_str_add_ref_internal (fp, str, 0, 0);
319  if (!atom)
320    return 0;
321
322  atom->csa_external_offset = CTF_SET_STID (offset, CTF_STRTAB_1);
323
324  if (!fp->ctf_syn_ext_strtab)
325    fp->ctf_syn_ext_strtab = ctf_dynhash_create (ctf_hash_integer,
326						 ctf_hash_eq_integer,
327						 NULL, NULL);
328  if (!fp->ctf_syn_ext_strtab)
329    {
330      ctf_set_errno (fp, ENOMEM);
331      return 0;
332    }
333
334  if (ctf_dynhash_insert (fp->ctf_syn_ext_strtab,
335			  (void *) (uintptr_t)
336			  atom->csa_external_offset,
337			  (void *) atom->csa_str) < 0)
338    {
339      /* No need to bother freeing the syn_ext_strtab: it will get freed at
340	 ctf_str_write_strtab time if unreferenced.  */
341      ctf_set_errno (fp, ENOMEM);
342      return 0;
343    }
344
345  return 1;
346}
347
348/* Remove a single ref.  */
349void
350ctf_str_remove_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
351{
352  ctf_str_atom_ref_t *aref, *anext;
353  ctf_str_atom_t *atom = NULL;
354
355  atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
356  if (!atom)
357    return;
358
359  for (aref = ctf_list_next (&atom->csa_refs); aref != NULL; aref = anext)
360    {
361      anext = ctf_list_next (aref);
362      if (aref->caf_ref == ref)
363	{
364	  ctf_list_delete (&atom->csa_refs, aref);
365	  free (aref);
366	}
367    }
368
369  ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
370}
371
372/* A ctf_dynhash_iter_remove() callback that removes atoms later than a given
373   snapshot ID.  External atoms are never removed, because they came from the
374   linker string table and are still present even if you roll back type
375   additions.  */
376static int
377ctf_str_rollback_atom (void *key _libctf_unused_, void *value, void *arg)
378{
379  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
380  ctf_snapshot_id_t *id = (ctf_snapshot_id_t *) arg;
381
382  return (atom->csa_snapshot_id > id->snapshot_id)
383    && (atom->csa_external_offset == 0);
384}
385
386/* Roll back, deleting all (internal) atoms created after a particular ID.  */
387void
388ctf_str_rollback (ctf_dict_t *fp, ctf_snapshot_id_t id)
389{
390  ctf_dynhash_iter_remove (fp->ctf_str_atoms, ctf_str_rollback_atom, &id);
391}
392
393/* An adaptor around ctf_purge_atom_refs.  */
394static void
395ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value,
396			     void *arg _libctf_unused_)
397{
398  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
399  ctf_str_purge_atom_refs (atom);
400}
401
402/* Remove all the recorded refs from the atoms table.  */
403void
404ctf_str_purge_refs (ctf_dict_t *fp)
405{
406  if (fp->ctf_str_num_refs > 0)
407    ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_purge_one_atom_refs, NULL);
408  fp->ctf_str_num_refs = 0;
409}
410
411/* Update a list of refs to the specified value. */
412static void
413ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
414{
415  ctf_str_atom_ref_t *ref;
416
417  for (ref = ctf_list_next (&refs->csa_refs); ref != NULL;
418       ref = ctf_list_next (ref))
419      *(ref->caf_ref) = value;
420}
421
422/* State shared across the strtab write process.  */
423typedef struct ctf_strtab_write_state
424{
425  /* Strtab we are writing, and the number of strings in it.  */
426  ctf_strs_writable_t *strtab;
427  size_t strtab_count;
428
429  /* Pointers to (existing) atoms in the atoms table, for qsorting.  */
430  ctf_str_atom_t **sorttab;
431
432  /* Loop counter for sorttab population.  */
433  size_t i;
434
435  /* The null-string atom (skipped during population).  */
436  ctf_str_atom_t *nullstr;
437} ctf_strtab_write_state_t;
438
439/* Count the number of entries in the strtab, and its length.  */
440static void
441ctf_str_count_strtab (void *key _libctf_unused_, void *value,
442	      void *arg)
443{
444  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
445  ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
446
447  /* We only factor in the length of items that have no offset and have refs:
448     other items are in the external strtab, or will simply not be written out
449     at all.  They still contribute to the total count, though, because we still
450     have to sort them.  We add in the null string's length explicitly, outside
451     this function, since it is explicitly written out even if it has no refs at
452     all.  */
453
454  if (s->nullstr == atom)
455    {
456      s->strtab_count++;
457      return;
458    }
459
460  if (!ctf_list_empty_p (&atom->csa_refs))
461    {
462      if (!atom->csa_external_offset)
463	s->strtab->cts_len += strlen (atom->csa_str) + 1;
464      s->strtab_count++;
465    }
466}
467
468/* Populate the sorttab with pointers to the strtab atoms.  */
469static void
470ctf_str_populate_sorttab (void *key _libctf_unused_, void *value,
471		  void *arg)
472{
473  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
474  ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
475
476  /* Skip the null string.  */
477  if (s->nullstr == atom)
478    return;
479
480  /* Skip atoms with no refs.  */
481  if (!ctf_list_empty_p (&atom->csa_refs))
482    s->sorttab[s->i++] = atom;
483}
484
485/* Sort the strtab.  */
486static int
487ctf_str_sort_strtab (const void *a, const void *b)
488{
489  ctf_str_atom_t **one = (ctf_str_atom_t **) a;
490  ctf_str_atom_t **two = (ctf_str_atom_t **) b;
491
492  return (strcmp ((*one)->csa_str, (*two)->csa_str));
493}
494
495/* Write out and return a strtab containing all strings with recorded refs,
496   adjusting the refs to refer to the corresponding string.  The returned strtab
497   may be NULL on error.  Also populate the synthetic strtab with mappings from
498   external strtab offsets to names, so we can look them up with ctf_strptr().
499   Only external strtab offsets with references are added.  */
500ctf_strs_writable_t
501ctf_str_write_strtab (ctf_dict_t *fp)
502{
503  ctf_strs_writable_t strtab;
504  ctf_str_atom_t *nullstr;
505  uint32_t cur_stroff = 0;
506  ctf_strtab_write_state_t s;
507  ctf_str_atom_t **sorttab;
508  size_t i;
509  int any_external = 0;
510
511  memset (&strtab, 0, sizeof (struct ctf_strs_writable));
512  memset (&s, 0, sizeof (struct ctf_strtab_write_state));
513  s.strtab = &strtab;
514
515  nullstr = ctf_dynhash_lookup (fp->ctf_str_atoms, "");
516  if (!nullstr)
517    {
518      ctf_err_warn (fp, 0, ECTF_INTERNAL, _("null string not found in strtab"));
519      strtab.cts_strs = NULL;
520      return strtab;
521    }
522
523  s.nullstr = nullstr;
524  ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_count_strtab, &s);
525  strtab.cts_len++;				/* For the null string.  */
526
527  ctf_dprintf ("%lu bytes of strings in strtab.\n",
528	       (unsigned long) strtab.cts_len);
529
530  /* Sort the strtab.  Force the null string to be first.  */
531  sorttab = calloc (s.strtab_count, sizeof (ctf_str_atom_t *));
532  if (!sorttab)
533    goto oom;
534
535  sorttab[0] = nullstr;
536  s.i = 1;
537  s.sorttab = sorttab;
538  ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_populate_sorttab, &s);
539
540  qsort (&sorttab[1], s.strtab_count - 1, sizeof (ctf_str_atom_t *),
541	 ctf_str_sort_strtab);
542
543  if ((strtab.cts_strs = malloc (strtab.cts_len)) == NULL)
544    goto oom_sorttab;
545
546  /* Update all refs: also update the strtab appropriately.  */
547  for (i = 0; i < s.strtab_count; i++)
548    {
549      if (sorttab[i]->csa_external_offset)
550	{
551	  /* External strtab entry.  */
552
553	  any_external = 1;
554	  ctf_str_update_refs (sorttab[i], sorttab[i]->csa_external_offset);
555	  sorttab[i]->csa_offset = sorttab[i]->csa_external_offset;
556	}
557      else
558	{
559	  /* Internal strtab entry with refs: actually add to the string
560	     table.  */
561
562	  ctf_str_update_refs (sorttab[i], cur_stroff);
563	  sorttab[i]->csa_offset = cur_stroff;
564	  strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
565	  cur_stroff += strlen (sorttab[i]->csa_str) + 1;
566	}
567    }
568  free (sorttab);
569
570  if (!any_external)
571    {
572      ctf_dynhash_destroy (fp->ctf_syn_ext_strtab);
573      fp->ctf_syn_ext_strtab = NULL;
574    }
575
576  /* All the provisional strtab entries are now real strtab entries, and
577     ctf_strptr() will find them there.  The provisional offset now starts right
578     beyond the new end of the strtab.  */
579
580  ctf_dynhash_empty (fp->ctf_prov_strtab);
581  fp->ctf_str_prov_offset = strtab.cts_len + 1;
582  return strtab;
583
584 oom_sorttab:
585  free (sorttab);
586 oom:
587  return strtab;
588}
589