1/* CTF string table management.
2   Copyright (C) 2019-2024 Free Software Foundation, Inc.
3
4   This file is part of libctf.
5
6   libctf is free software; you can redistribute it and/or modify it under
7   the terms of the GNU General Public License as published by the Free
8   Software Foundation; either version 3, or (at your option) any later
9   version.
10
11   This program is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14   See the GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; see the file COPYING.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20#include <ctf-impl.h>
21#include <string.h>
22#include <assert.h>
23
24/* Convert an encoded CTF string name into a pointer to a C string, using an
25  explicit internal strtab rather than the fp-based one.  */
26const char *
27ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
28{
29  ctf_strs_t *ctsp = &fp->ctf_str[CTF_NAME_STID (name)];
30
31  if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL))
32    ctsp = strtab;
33
34  /* If this name is in the external strtab, and there is a synthetic strtab,
35     use it in preference.  */
36
37  if (CTF_NAME_STID (name) == CTF_STRTAB_1
38      && fp->ctf_syn_ext_strtab != NULL)
39    return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab,
40			       (void *) (uintptr_t) name);
41
42  /* If the name is in the internal strtab, and the offset is beyond the end of
43     the ctsp->cts_len but below the ctf_str_prov_offset, this is a provisional
44     string added by ctf_str_add*() but not yet built into a real strtab: get
45     the value out of the ctf_prov_strtab.  */
46
47  if (CTF_NAME_STID (name) == CTF_STRTAB_0
48      && name >= ctsp->cts_len && name < fp->ctf_str_prov_offset)
49      return ctf_dynhash_lookup (fp->ctf_prov_strtab,
50				 (void *) (uintptr_t) name);
51
52  if (ctsp->cts_strs != NULL && CTF_NAME_OFFSET (name) < ctsp->cts_len)
53    return (ctsp->cts_strs + CTF_NAME_OFFSET (name));
54
55  /* String table not loaded or corrupt offset.  */
56  return NULL;
57}
58
59/* Convert an encoded CTF string name into a pointer to a C string by looking
60  up the appropriate string table buffer and then adding the offset.  */
61const char *
62ctf_strraw (ctf_dict_t *fp, uint32_t name)
63{
64  return ctf_strraw_explicit (fp, name, NULL);
65}
66
67/* Return a guaranteed-non-NULL pointer to the string with the given CTF
68   name.  */
69const char *
70ctf_strptr (ctf_dict_t *fp, uint32_t name)
71{
72  const char *s = ctf_strraw (fp, name);
73  return (s != NULL ? s : "(?)");
74}
75
76/* Remove all refs to a given atom.  */
77static void
78ctf_str_purge_atom_refs (ctf_str_atom_t *atom)
79{
80  ctf_str_atom_ref_t *ref, *next;
81
82  for (ref = ctf_list_next (&atom->csa_refs); ref != NULL; ref = next)
83    {
84      next = ctf_list_next (ref);
85      ctf_list_delete (&atom->csa_refs, ref);
86      free (ref);
87    }
88}
89
90/* Free an atom (only called on ctf_close().)  */
91static void
92ctf_str_free_atom (void *a)
93{
94  ctf_str_atom_t *atom = a;
95
96  ctf_str_purge_atom_refs (atom);
97  free (atom);
98}
99
100/* Create the atoms table.  There is always at least one atom in it, the null
101   string.  */
102int
103ctf_str_create_atoms (ctf_dict_t *fp)
104{
105  fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
106					  free, ctf_str_free_atom);
107  if (!fp->ctf_str_atoms)
108    return -ENOMEM;
109
110  if (!fp->ctf_prov_strtab)
111    fp->ctf_prov_strtab = ctf_dynhash_create (ctf_hash_integer,
112					      ctf_hash_eq_integer,
113					      NULL, NULL);
114  if (!fp->ctf_prov_strtab)
115    goto oom_prov_strtab;
116
117  if (!fp->ctf_str_pending_ref)
118    fp->ctf_str_pending_ref = ctf_dynset_create (htab_hash_pointer,
119						 htab_eq_pointer,
120						 NULL);
121  if (!fp->ctf_str_pending_ref)
122    goto oom_str_pending_ref;
123
124  errno = 0;
125  ctf_str_add (fp, "");
126  if (errno == ENOMEM)
127    goto oom_str_add;
128
129  return 0;
130
131 oom_str_add:
132  ctf_dynhash_destroy (fp->ctf_prov_strtab);
133  fp->ctf_prov_strtab = NULL;
134 oom_str_pending_ref:
135  ctf_dynset_destroy (fp->ctf_str_pending_ref);
136  fp->ctf_str_pending_ref = NULL;
137 oom_prov_strtab:
138  ctf_dynhash_destroy (fp->ctf_str_atoms);
139  fp->ctf_str_atoms = NULL;
140  return -ENOMEM;
141}
142
143/* Destroy the atoms table.  */
144void
145ctf_str_free_atoms (ctf_dict_t *fp)
146{
147  ctf_dynhash_destroy (fp->ctf_prov_strtab);
148  ctf_dynhash_destroy (fp->ctf_str_atoms);
149  ctf_dynset_destroy (fp->ctf_str_pending_ref);
150}
151
152#define CTF_STR_ADD_REF 0x1
153#define CTF_STR_MAKE_PROVISIONAL 0x2
154#define CTF_STR_PENDING_REF 0x4
155
156/* Add a string to the atoms table, copying the passed-in string.  Return the
157   atom added. Return NULL only when out of memory (and do not touch the
158   passed-in string in that case).  Possibly augment the ref list with the
159   passed-in ref.  Possibly add a provisional entry for this string to the
160   provisional strtab.   */
161static ctf_str_atom_t *
162ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
163			  int flags, uint32_t *ref)
164{
165  char *newstr = NULL;
166  ctf_str_atom_t *atom = NULL;
167  ctf_str_atom_ref_t *aref = NULL;
168
169  atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
170
171  if (flags & CTF_STR_ADD_REF)
172    {
173      if ((aref = malloc (sizeof (struct ctf_str_atom_ref))) == NULL) {
174	ctf_set_errno (fp, ENOMEM);
175	return NULL;
176      }
177      aref->caf_ref = ref;
178    }
179
180  if (atom)
181    {
182      if (flags & CTF_STR_ADD_REF)
183	{
184	  ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
185	  ctf_list_append (&atom->csa_refs, aref);
186	  fp->ctf_str_num_refs++;
187	}
188      return atom;
189    }
190
191  if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
192    goto oom;
193  memset (atom, 0, sizeof (struct ctf_str_atom));
194
195  if ((newstr = strdup (str)) == NULL)
196    goto oom;
197
198  if (ctf_dynhash_insert (fp->ctf_str_atoms, newstr, atom) < 0)
199    goto oom;
200
201  atom->csa_str = newstr;
202  atom->csa_snapshot_id = fp->ctf_snapshots;
203
204  if (flags & CTF_STR_MAKE_PROVISIONAL)
205    {
206      atom->csa_offset = fp->ctf_str_prov_offset;
207
208      if (ctf_dynhash_insert (fp->ctf_prov_strtab, (void *) (uintptr_t)
209			      atom->csa_offset, (void *) atom->csa_str) < 0)
210	goto oom;
211
212      fp->ctf_str_prov_offset += strlen (atom->csa_str) + 1;
213    }
214
215  if (flags & CTF_STR_PENDING_REF)
216    {
217      if (ctf_dynset_insert (fp->ctf_str_pending_ref, (void *) ref) < 0)
218	goto oom;
219    }
220  else if (flags & CTF_STR_ADD_REF)
221    {
222      ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
223      ctf_list_append (&atom->csa_refs, aref);
224      fp->ctf_str_num_refs++;
225    }
226  return atom;
227
228 oom:
229  if (newstr)
230    ctf_dynhash_remove (fp->ctf_str_atoms, newstr);
231  free (atom);
232  free (aref);
233  free (newstr);
234  ctf_set_errno (fp, ENOMEM);
235  return NULL;
236}
237
238/* Add a string to the atoms table, without augmenting the ref list for this
239   string: return a 'provisional offset' which can be used to return this string
240   until ctf_str_write_strtab is called, or 0 on failure.  (Everywhere the
241   provisional offset is assigned to should be added as a ref using
242   ctf_str_add_ref() as well.) */
243uint32_t
244ctf_str_add (ctf_dict_t *fp, const char *str)
245{
246  ctf_str_atom_t *atom;
247
248  if (!str)
249    str = "";
250
251  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_MAKE_PROVISIONAL, 0);
252  if (!atom)
253    return 0;
254
255  return atom->csa_offset;
256}
257
258/* Like ctf_str_add(), but additionally augment the atom's refs list with the
259   passed-in ref, whether or not the string is already present.  There is no
260   attempt to deduplicate the refs list (but duplicates are harmless).  */
261uint32_t
262ctf_str_add_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
263{
264  ctf_str_atom_t *atom;
265
266  if (!str)
267    str = "";
268
269  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF
270				   | CTF_STR_MAKE_PROVISIONAL, ref);
271  if (!atom)
272    return 0;
273
274  return atom->csa_offset;
275}
276
277/* Like ctf_str_add_ref(), but notes that this memory location must be added as
278   a ref by a later serialization phase, rather than adding it itself.  */
279uint32_t
280ctf_str_add_pending (ctf_dict_t *fp, const char *str, uint32_t *ref)
281{
282  ctf_str_atom_t *atom;
283
284  if (!str)
285    str = "";
286
287  atom = ctf_str_add_ref_internal (fp, str, CTF_STR_PENDING_REF
288				   | CTF_STR_MAKE_PROVISIONAL, ref);
289  if (!atom)
290    return 0;
291
292  return atom->csa_offset;
293}
294
295/* Note that a pending ref now located at NEW_REF has moved by BYTES bytes.  */
296int
297ctf_str_move_pending (ctf_dict_t *fp, uint32_t *new_ref, ptrdiff_t bytes)
298{
299  if (bytes == 0)
300    return 0;
301
302  if (ctf_dynset_insert (fp->ctf_str_pending_ref, (void *) new_ref) < 0)
303    return (ctf_set_errno (fp, ENOMEM));
304
305  ctf_dynset_remove (fp->ctf_str_pending_ref,
306		     (void *) ((signed char *) new_ref - bytes));
307  return 0;
308}
309
310/* Add an external strtab reference at OFFSET.  Returns zero if the addition
311   failed, nonzero otherwise.  */
312int
313ctf_str_add_external (ctf_dict_t *fp, const char *str, uint32_t offset)
314{
315  ctf_str_atom_t *atom;
316
317  if (!str)
318    str = "";
319
320  atom = ctf_str_add_ref_internal (fp, str, 0, 0);
321  if (!atom)
322    return 0;
323
324  atom->csa_external_offset = CTF_SET_STID (offset, CTF_STRTAB_1);
325
326  if (!fp->ctf_syn_ext_strtab)
327    fp->ctf_syn_ext_strtab = ctf_dynhash_create (ctf_hash_integer,
328						 ctf_hash_eq_integer,
329						 NULL, NULL);
330  if (!fp->ctf_syn_ext_strtab)
331    {
332      ctf_set_errno (fp, ENOMEM);
333      return 0;
334    }
335
336  if (ctf_dynhash_insert (fp->ctf_syn_ext_strtab,
337			  (void *) (uintptr_t)
338			  atom->csa_external_offset,
339			  (void *) atom->csa_str) < 0)
340    {
341      /* No need to bother freeing the syn_ext_strtab: it will get freed at
342	 ctf_str_write_strtab time if unreferenced.  */
343      ctf_set_errno (fp, ENOMEM);
344      return 0;
345    }
346
347  return 1;
348}
349
350/* Remove a single ref.  */
351void
352ctf_str_remove_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
353{
354  ctf_str_atom_ref_t *aref, *anext;
355  ctf_str_atom_t *atom = NULL;
356
357  atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
358  if (!atom)
359    return;
360
361  for (aref = ctf_list_next (&atom->csa_refs); aref != NULL; aref = anext)
362    {
363      anext = ctf_list_next (aref);
364      if (aref->caf_ref == ref)
365	{
366	  ctf_list_delete (&atom->csa_refs, aref);
367	  free (aref);
368	}
369    }
370
371  ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
372}
373
374/* A ctf_dynhash_iter_remove() callback that removes atoms later than a given
375   snapshot ID.  External atoms are never removed, because they came from the
376   linker string table and are still present even if you roll back type
377   additions.  */
378static int
379ctf_str_rollback_atom (void *key _libctf_unused_, void *value, void *arg)
380{
381  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
382  ctf_snapshot_id_t *id = (ctf_snapshot_id_t *) arg;
383
384  return (atom->csa_snapshot_id > id->snapshot_id)
385    && (atom->csa_external_offset == 0);
386}
387
388/* Roll back, deleting all (internal) atoms created after a particular ID.  */
389void
390ctf_str_rollback (ctf_dict_t *fp, ctf_snapshot_id_t id)
391{
392  ctf_dynhash_iter_remove (fp->ctf_str_atoms, ctf_str_rollback_atom, &id);
393}
394
395/* An adaptor around ctf_purge_atom_refs.  */
396static void
397ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value,
398			     void *arg _libctf_unused_)
399{
400  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
401  ctf_str_purge_atom_refs (atom);
402}
403
404/* Remove all the recorded refs from the atoms table.  */
405void
406ctf_str_purge_refs (ctf_dict_t *fp)
407{
408  if (fp->ctf_str_num_refs > 0)
409    ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_purge_one_atom_refs, NULL);
410  fp->ctf_str_num_refs = 0;
411}
412
413/* Update a list of refs to the specified value. */
414static void
415ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
416{
417  ctf_str_atom_ref_t *ref;
418
419  for (ref = ctf_list_next (&refs->csa_refs); ref != NULL;
420       ref = ctf_list_next (ref))
421      *(ref->caf_ref) = value;
422}
423
424/* State shared across the strtab write process.  */
425typedef struct ctf_strtab_write_state
426{
427  /* Strtab we are writing, and the number of strings in it.  */
428  ctf_strs_writable_t *strtab;
429  size_t strtab_count;
430
431  /* Pointers to (existing) atoms in the atoms table, for qsorting.  */
432  ctf_str_atom_t **sorttab;
433
434  /* Loop counter for sorttab population.  */
435  size_t i;
436
437  /* The null-string atom (skipped during population).  */
438  ctf_str_atom_t *nullstr;
439} ctf_strtab_write_state_t;
440
441/* Count the number of entries in the strtab, and its length.  */
442static void
443ctf_str_count_strtab (void *key _libctf_unused_, void *value,
444	      void *arg)
445{
446  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
447  ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
448
449  /* We only factor in the length of items that have no offset and have refs:
450     other items are in the external strtab, or will simply not be written out
451     at all.  They still contribute to the total count, though, because we still
452     have to sort them.  We add in the null string's length explicitly, outside
453     this function, since it is explicitly written out even if it has no refs at
454     all.  */
455
456  if (s->nullstr == atom)
457    {
458      s->strtab_count++;
459      return;
460    }
461
462  if (!ctf_list_empty_p (&atom->csa_refs))
463    {
464      if (!atom->csa_external_offset)
465	s->strtab->cts_len += strlen (atom->csa_str) + 1;
466      s->strtab_count++;
467    }
468}
469
470/* Populate the sorttab with pointers to the strtab atoms.  */
471static void
472ctf_str_populate_sorttab (void *key _libctf_unused_, void *value,
473		  void *arg)
474{
475  ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
476  ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
477
478  /* Skip the null string.  */
479  if (s->nullstr == atom)
480    return;
481
482  /* Skip atoms with no refs.  */
483  if (!ctf_list_empty_p (&atom->csa_refs))
484    s->sorttab[s->i++] = atom;
485}
486
487/* Sort the strtab.  */
488static int
489ctf_str_sort_strtab (const void *a, const void *b)
490{
491  ctf_str_atom_t **one = (ctf_str_atom_t **) a;
492  ctf_str_atom_t **two = (ctf_str_atom_t **) b;
493
494  return (strcmp ((*one)->csa_str, (*two)->csa_str));
495}
496
497/* Write out and return a strtab containing all strings with recorded refs,
498   adjusting the refs to refer to the corresponding string.  The returned strtab
499   may be NULL on error.  Also populate the synthetic strtab with mappings from
500   external strtab offsets to names, so we can look them up with ctf_strptr().
501   Only external strtab offsets with references are added.  */
502ctf_strs_writable_t
503ctf_str_write_strtab (ctf_dict_t *fp)
504{
505  ctf_strs_writable_t strtab;
506  ctf_str_atom_t *nullstr;
507  uint32_t cur_stroff = 0;
508  ctf_strtab_write_state_t s;
509  ctf_str_atom_t **sorttab;
510  size_t i;
511  int any_external = 0;
512
513  memset (&strtab, 0, sizeof (struct ctf_strs_writable));
514  memset (&s, 0, sizeof (struct ctf_strtab_write_state));
515  s.strtab = &strtab;
516
517  nullstr = ctf_dynhash_lookup (fp->ctf_str_atoms, "");
518  if (!nullstr)
519    {
520      ctf_err_warn (fp, 0, ECTF_INTERNAL, _("null string not found in strtab"));
521      strtab.cts_strs = NULL;
522      return strtab;
523    }
524
525  s.nullstr = nullstr;
526  ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_count_strtab, &s);
527  strtab.cts_len++;				/* For the null string.  */
528
529  ctf_dprintf ("%lu bytes of strings in strtab.\n",
530	       (unsigned long) strtab.cts_len);
531
532  /* Sort the strtab.  Force the null string to be first.  */
533  sorttab = calloc (s.strtab_count, sizeof (ctf_str_atom_t *));
534  if (!sorttab)
535    goto oom;
536
537  sorttab[0] = nullstr;
538  s.i = 1;
539  s.sorttab = sorttab;
540  ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_populate_sorttab, &s);
541
542  qsort (&sorttab[1], s.strtab_count - 1, sizeof (ctf_str_atom_t *),
543	 ctf_str_sort_strtab);
544
545  if ((strtab.cts_strs = malloc (strtab.cts_len)) == NULL)
546    goto oom_sorttab;
547
548  /* Update all refs: also update the strtab appropriately.  */
549  for (i = 0; i < s.strtab_count; i++)
550    {
551      if (sorttab[i]->csa_external_offset)
552	{
553	  /* External strtab entry.  */
554
555	  any_external = 1;
556	  ctf_str_update_refs (sorttab[i], sorttab[i]->csa_external_offset);
557	  sorttab[i]->csa_offset = sorttab[i]->csa_external_offset;
558	}
559      else
560	{
561	  /* Internal strtab entry with refs: actually add to the string
562	     table.  */
563
564	  ctf_str_update_refs (sorttab[i], cur_stroff);
565	  sorttab[i]->csa_offset = cur_stroff;
566	  strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
567	  cur_stroff += strlen (sorttab[i]->csa_str) + 1;
568	}
569    }
570  free (sorttab);
571
572  if (!any_external)
573    {
574      ctf_dynhash_destroy (fp->ctf_syn_ext_strtab);
575      fp->ctf_syn_ext_strtab = NULL;
576    }
577
578  /* All the provisional strtab entries are now real strtab entries, and
579     ctf_strptr() will find them there.  The provisional offset now starts right
580     beyond the new end of the strtab.  */
581
582  ctf_dynhash_empty (fp->ctf_prov_strtab);
583  fp->ctf_str_prov_offset = strtab.cts_len + 1;
584  return strtab;
585
586 oom_sorttab:
587  free (sorttab);
588 oom:
589  return strtab;
590}
591