1/* CTF string table management.
2   Copyright (C) 2019-2020 Free Software Foundation, Inc.
3
4   This file is part of libctf.
5
6   libctf is free software; you can redistribute it and/or modify it under
7   the terms of the GNU General Public License as published by the Free
8   Software Foundation; either version 3, or (at your option) any later
9   version.
10
11   This program is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14   See the GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; see the file COPYING.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20#include <ctf-impl.h>
21#include <string.h>
22
23/* Convert an encoded CTF string name into a pointer to a C string, using an
24  explicit internal strtab rather than the fp-based one.  */
25const char *
26ctf_strraw_explicit (ctf_file_t *fp, uint32_t name, ctf_strs_t *strtab)
27{
28  ctf_strs_t *ctsp = &fp->ctf_str[CTF_NAME_STID (name)];
29
30  if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL))
31    ctsp = strtab;
32
33  /* If this name is in the external strtab, and there is a synthetic strtab,
34     use it in preference.  */
35
36  if (CTF_NAME_STID (name) == CTF_STRTAB_1
37      && fp->ctf_syn_ext_strtab != NULL)
38    return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab,
39			       (void *) (uintptr_t) name);
40
41  /* If the name is in the internal strtab, and the offset is beyond the end of
42     the ctsp->cts_len but below the ctf_str_prov_offset, this is a provisional
43     string added by ctf_str_add*() but not yet built into a real strtab: get
44     the value out of the ctf_prov_strtab.  */
45
46  if (CTF_NAME_STID (name) == CTF_STRTAB_0
47      && name >= ctsp->cts_len && name < fp->ctf_str_prov_offset)
48      return ctf_dynhash_lookup (fp->ctf_prov_strtab,
49				 (void *) (uintptr_t) name);
50
51  if (ctsp->cts_strs != NULL && CTF_NAME_OFFSET (name) < ctsp->cts_len)
52    return (ctsp->cts_strs + CTF_NAME_OFFSET (name));
53
54  /* String table not loaded or corrupt offset.  */
55  return NULL;
56}
57
58/* Convert an encoded CTF string name into a pointer to a C string by looking
59  up the appropriate string table buffer and then adding the offset.  */
60const char *
61ctf_strraw (ctf_file_t *fp, uint32_t name)
62{
63  return ctf_strraw_explicit (fp, name, NULL);
64}
65
66/* Return a guaranteed-non-NULL pointer to the string with the given CTF
67   name.  */
68const char *
69ctf_strptr (ctf_file_t *fp, uint32_t name)
70{
71  const char *s = ctf_strraw (fp, name);
72  return (s != NULL ? s : "(?)");
73}
74
75/* Remove all refs to a given atom.  */
76static void
77ctf_str_purge_atom_refs (ctf_str_atom_t *atom)
78{
79  ctf_str_atom_ref_t *ref, *next;
80
81  for (ref = ctf_list_next (&atom->csa_refs); ref != NULL; ref = next)
82    {
83      next = ctf_list_next (ref);
84      ctf_list_delete (&atom->csa_refs, ref);
85      free (ref);
86    }
87}
88
89/* Free an atom (only called on ctf_close().)  */
90static void
91ctf_str_free_atom (void *a)
92{
93  ctf_str_atom_t *atom = a;
94
95  ctf_str_purge_atom_refs (atom);
96  free (atom);
97}
98
99/* Create the atoms table.  There is always at least one atom in it, the null
100   string.  */
101int
102ctf_str_create_atoms (ctf_file_t *fp)
103{
104  fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
105					  free, ctf_str_free_atom);
106  if (fp->ctf_str_atoms == NULL)
107    return -ENOMEM;
108
109  if (!fp->ctf_prov_strtab)
110    fp->ctf_prov_strtab = ctf_dynhash_create (ctf_hash_integer,
111					      ctf_hash_eq_integer,
112					      NULL, NULL);
113  if (!fp->ctf_prov_strtab)
114    goto oom_prov_strtab;
115
116  errno = 0;
117  ctf_str_add (fp, "");
118  if (errno == ENOMEM)
119    goto oom_str_add;
120
121  return 0;
122
123 oom_str_add:
124  ctf_dynhash_destroy (fp->ctf_prov_strtab);
125  fp->ctf_prov_strtab = NULL;
126 oom_prov_strtab:
127  ctf_dynhash_destroy (fp->ctf_str_atoms);
128  fp->ctf_str_atoms = NULL;
129  return -ENOMEM;
130}
131
132/* Destroy the atoms table.  */
133void
134ctf_str_free_atoms (ctf_file_t *fp)
135{
136  ctf_dynhash_destroy (fp->ctf_prov_strtab);
137  ctf_dynhash_destroy (fp->ctf_str_atoms);
138}
139
140/* Add a string to the atoms table, copying the passed-in string.  Return the
141   atom added. Return NULL only when out of memory (and do not touch the
142   passed-in string in that case).  Possibly augment the ref list with the
143   passed-in ref.  Possibly add a provisional entry for this string to the
144   provisional strtab.   */
145static ctf_str_atom_t *
146ctf_str_add_ref_internal (ctf_file_t *fp, const char *str,
147			  int add_ref, int make_provisional, uint32_t *ref)
148{
149  char *newstr = NULL;
150  ctf_str_atom_t *atom = NULL;
151  ctf_str_atom_ref_t *aref = NULL;
152
153  atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
154
155  if (add_ref)
156    {
157      if ((aref = malloc (sizeof (struct ctf_str_atom_ref))) == NULL)
158	return NULL;
159      aref->caf_ref = ref;
160    }
161
162  if (atom)
163    {
164      if (add_ref)
165	{
166	  ctf_list_append (&atom->csa_refs, aref);
167	  fp->ctf_str_num_refs++;
168	}
169      return atom;
170    }
171
172  if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
173    goto oom;
174  memset (atom, 0, sizeof (struct ctf_str_atom));
175
176  if ((newstr = strdup (str)) == NULL)
177    goto oom;
178
179  if (ctf_dynhash_insert (fp->ctf_str_atoms, newstr, atom) < 0)
180    goto oom;
181
182  atom->csa_str = newstr;
183  atom->csa_snapshot_id = fp->ctf_snapshots;
184
185  if (make_provisional)
186    {
187      atom->csa_offset = fp->ctf_str_prov_offset;
188
189      if (ctf_dynhash_insert (fp->ctf_prov_strtab, (void *) (uintptr_t)
190			      atom->csa_offset, (void *) atom->csa_str) < 0)
191	goto oom;
192
193      fp->ctf_str_prov_offset += strlen (atom->csa_str) + 1;
194    }
195
196  if (add_ref)
197    {
198      ctf_list_append (&atom->csa_refs, aref);
199      fp->ctf_str_num_refs++;
200    }
201  return atom;
202
203 oom:
204  if (newstr)
205    ctf_dynhash_remove (fp->ctf_str_atoms, newstr);
206  free (atom);
207  free (aref);
208  free (newstr);
209  return NULL;
210}
211
212/* Add a string to the atoms table, without augmenting the ref list for this
213   string: return a 'provisional offset' which can be used to return this string
214   until ctf_str_write_strtab is called, or 0 on failure.  (Everywhere the
215   provisional offset is assigned to should be added as a ref using
216   ctf_str_add_ref() as well.) */
217uint32_t
218ctf_str_add (ctf_file_t *fp, const char *str)
219{
220  ctf_str_atom_t *atom;
221  if (!str)
222    return 0;
223
224  atom = ctf_str_add_ref_internal (fp, str, FALSE, TRUE, 0);
225  if (!atom)
226    return 0;
227
228  return atom->csa_offset;
229}
230
231/* Like ctf_str_add(), but additionally augment the atom's refs list with the
232   passed-in ref, whether or not the string is already present.  There is no
233   attempt to deduplicate the refs list (but duplicates are harmless).  */
234uint32_t
235ctf_str_add_ref (ctf_file_t *fp, const char *str, uint32_t *ref)
236{
237  ctf_str_atom_t *atom;
238  if (!str)
239    return 0;
240
241  atom = ctf_str_add_ref_internal (fp, str, TRUE, TRUE, ref);
242  if (!atom)
243    return 0;
244
245  return atom->csa_offset;
246}
247
248/* Add an external strtab reference at OFFSET.  Returns zero if the addition
249   failed, nonzero otherwise.  */
250int
251ctf_str_add_external (ctf_file_t *fp, const char *str, uint32_t offset)
252{
253  ctf_str_atom_t *atom;
254  if (!str)
255    return 0;
256
257  atom = ctf_str_add_ref_internal (fp, str, FALSE, FALSE, 0);
258  if (!atom)
259    return 0;
260
261  atom->csa_external_offset = CTF_SET_STID (offset, CTF_STRTAB_1);
262  return 1;
263}
264
265/* Remove a single ref.  */
266void
267ctf_str_remove_ref (ctf_file_t *fp, const char *str, uint32_t *ref)
268{
269  ctf_str_atom_ref_t *aref, *anext;
270  ctf_str_atom_t *atom = NULL;
271
272  atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
273  if (!atom)
274    return;
275
276  for (aref = ctf_list_next (&atom->csa_refs); aref != NULL; aref = anext)
277    {
278      anext = ctf_list_next (aref);
279      if (aref->caf_ref == ref)
280	{
281	  ctf_list_delete (&atom->csa_refs, aref);
282	  free (aref);
283	}
284    }
285}
286
287/* A ctf_dynhash_iter_remove() callback that removes atoms later than a given
288   snapshot ID.  */
289static int
290ctf_str_rollback_atom (void *key _libctf_unused_, void *value, void *arg)
291{
292  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
293  ctf_snapshot_id_t *id = (ctf_snapshot_id_t *) arg;
294
295  return (atom->csa_snapshot_id > id->snapshot_id);
296}
297
298/* Roll back, deleting all atoms created after a particular ID.  */
299void
300ctf_str_rollback (ctf_file_t *fp, ctf_snapshot_id_t id)
301{
302  ctf_dynhash_iter_remove (fp->ctf_str_atoms, ctf_str_rollback_atom, &id);
303}
304
305/* An adaptor around ctf_purge_atom_refs.  */
306static void
307ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value,
308			     void *arg _libctf_unused_)
309{
310  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
311  ctf_str_purge_atom_refs (atom);
312}
313
314/* Remove all the recorded refs from the atoms table.  */
315void
316ctf_str_purge_refs (ctf_file_t *fp)
317{
318  if (fp->ctf_str_num_refs > 0)
319    ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_purge_one_atom_refs, NULL);
320  fp->ctf_str_num_refs = 0;
321}
322
323/* Update a list of refs to the specified value. */
324static void
325ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
326{
327  ctf_str_atom_ref_t *ref;
328
329  for (ref = ctf_list_next (&refs->csa_refs); ref != NULL;
330       ref = ctf_list_next (ref))
331      *(ref->caf_ref) = value;
332}
333
334/* State shared across the strtab write process.  */
335typedef struct ctf_strtab_write_state
336{
337  /* Strtab we are writing, and the number of strings in it.  */
338  ctf_strs_writable_t *strtab;
339  size_t strtab_count;
340
341  /* Pointers to (existing) atoms in the atoms table, for qsorting.  */
342  ctf_str_atom_t **sorttab;
343
344  /* Loop counter for sorttab population.  */
345  size_t i;
346
347  /* The null-string atom (skipped during population).  */
348  ctf_str_atom_t *nullstr;
349} ctf_strtab_write_state_t;
350
351/* Count the number of entries in the strtab, and its length.  */
352static void
353ctf_str_count_strtab (void *key _libctf_unused_, void *value,
354	      void *arg)
355{
356  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
357  ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
358
359  /* We only factor in the length of items that have no offset and have refs:
360     other items are in the external strtab, or will simply not be written out
361     at all.  They still contribute to the total count, though, because we still
362     have to sort them.  We add in the null string's length explicitly, outside
363     this function, since it is explicitly written out even if it has no refs at
364     all.  */
365
366  if (s->nullstr == atom)
367    {
368      s->strtab_count++;
369      return;
370    }
371
372  if (!ctf_list_empty_p (&atom->csa_refs))
373    {
374      if (!atom->csa_external_offset)
375	s->strtab->cts_len += strlen (atom->csa_str) + 1;
376      s->strtab_count++;
377    }
378}
379
380/* Populate the sorttab with pointers to the strtab atoms.  */
381static void
382ctf_str_populate_sorttab (void *key _libctf_unused_, void *value,
383		  void *arg)
384{
385  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
386  ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
387
388  /* Skip the null string.  */
389  if (s->nullstr == atom)
390    return;
391
392  /* Skip atoms with no refs.  */
393  if (!ctf_list_empty_p (&atom->csa_refs))
394    s->sorttab[s->i++] = atom;
395}
396
397/* Sort the strtab.  */
398static int
399ctf_str_sort_strtab (const void *a, const void *b)
400{
401  ctf_str_atom_t **one = (ctf_str_atom_t **) a;
402  ctf_str_atom_t **two = (ctf_str_atom_t **) b;
403
404  return (strcmp ((*one)->csa_str, (*two)->csa_str));
405}
406
407/* Write out and return a strtab containing all strings with recorded refs,
408   adjusting the refs to refer to the corresponding string.  The returned strtab
409   may be NULL on error.  Also populate the synthetic strtab with mappings from
410   external strtab offsets to names, so we can look them up with ctf_strptr().
411   Only external strtab offsets with references are added.  */
412ctf_strs_writable_t
413ctf_str_write_strtab (ctf_file_t *fp)
414{
415  ctf_strs_writable_t strtab;
416  ctf_str_atom_t *nullstr;
417  uint32_t cur_stroff = 0;
418  ctf_strtab_write_state_t s;
419  ctf_str_atom_t **sorttab;
420  size_t i;
421  int any_external = 0;
422
423  memset (&strtab, 0, sizeof (struct ctf_strs_writable));
424  memset (&s, 0, sizeof (struct ctf_strtab_write_state));
425  s.strtab = &strtab;
426
427  nullstr = ctf_dynhash_lookup (fp->ctf_str_atoms, "");
428  if (!nullstr)
429    {
430      ctf_err_warn (fp, 0, ECTF_INTERNAL, _("null string not found in strtab"));
431      strtab.cts_strs = NULL;
432      return strtab;
433    }
434
435  s.nullstr = nullstr;
436  ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_count_strtab, &s);
437  strtab.cts_len++;				/* For the null string.  */
438
439  ctf_dprintf ("%lu bytes of strings in strtab.\n",
440	       (unsigned long) strtab.cts_len);
441
442  /* Sort the strtab.  Force the null string to be first.  */
443  sorttab = calloc (s.strtab_count, sizeof (ctf_str_atom_t *));
444  if (!sorttab)
445    goto oom;
446
447  sorttab[0] = nullstr;
448  s.i = 1;
449  s.sorttab = sorttab;
450  ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_populate_sorttab, &s);
451
452  qsort (&sorttab[1], s.strtab_count - 1, sizeof (ctf_str_atom_t *),
453	 ctf_str_sort_strtab);
454
455  if ((strtab.cts_strs = malloc (strtab.cts_len)) == NULL)
456    goto oom_sorttab;
457
458  if (!fp->ctf_syn_ext_strtab)
459    fp->ctf_syn_ext_strtab = ctf_dynhash_create (ctf_hash_integer,
460						 ctf_hash_eq_integer,
461						 NULL, NULL);
462  if (!fp->ctf_syn_ext_strtab)
463    goto oom_strtab;
464
465  /* Update all refs: also update the strtab appropriately.  */
466  for (i = 0; i < s.strtab_count; i++)
467    {
468      if (sorttab[i]->csa_external_offset)
469	{
470	  /* External strtab entry: populate the synthetic external strtab.
471
472	     This is safe because you cannot ctf_rollback to before the point
473	     when a ctf_update is done, and the strtab is written at ctf_update
474	     time.  So any atoms we reference here are sure to stick around
475	     until ctf_file_close.  */
476
477	  any_external = 1;
478	  ctf_str_update_refs (sorttab[i], sorttab[i]->csa_external_offset);
479	  if (ctf_dynhash_insert (fp->ctf_syn_ext_strtab,
480				  (void *) (uintptr_t)
481				  sorttab[i]->csa_external_offset,
482				  (void *) sorttab[i]->csa_str) < 0)
483	    goto oom_strtab;
484	  sorttab[i]->csa_offset = sorttab[i]->csa_external_offset;
485	}
486      else
487	{
488	  /* Internal strtab entry with refs: actually add to the string
489	     table.  */
490
491	  ctf_str_update_refs (sorttab[i], cur_stroff);
492	  sorttab[i]->csa_offset = cur_stroff;
493	  strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
494	  cur_stroff += strlen (sorttab[i]->csa_str) + 1;
495	}
496    }
497  free (sorttab);
498
499  if (!any_external)
500    {
501      ctf_dynhash_destroy (fp->ctf_syn_ext_strtab);
502      fp->ctf_syn_ext_strtab = NULL;
503    }
504
505  /* All the provisional strtab entries are now real strtab entries, and
506     ctf_strptr() will find them there.  The provisional offset now starts right
507     beyond the new end of the strtab.  */
508
509  ctf_dynhash_empty (fp->ctf_prov_strtab);
510  fp->ctf_str_prov_offset = strtab.cts_len + 1;
511  return strtab;
512
513 oom_strtab:
514  free (strtab.cts_strs);
515  strtab.cts_strs = NULL;
516 oom_sorttab:
517  free (sorttab);
518 oom:
519  return strtab;
520}
521