1/* CTF type deduplication.
2   Copyright (C) 2019-2022 Free Software Foundation, Inc.
3
4   This file is part of libctf.
5
6   libctf is free software; you can redistribute it and/or modify it under
7   the terms of the GNU General Public License as published by the Free
8   Software Foundation; either version 3, or (at your option) any later
9   version.
10
11   This program is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14   See the GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; see the file COPYING.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20#include <ctf-impl.h>
21#include <string.h>
22#include <errno.h>
23#include <assert.h>
24#include "hashtab.h"
25
26/* (In the below, relevant functions are named in square brackets.)  */
27
28/* Type deduplication is a three-phase process:
29
30    [ctf_dedup, ctf_dedup_hash_type, ctf_dedup_rhash_type]
31    1) come up with unambiguous hash values for all types: no two types may have
32       the same hash value, and any given type should have only one hash value
33       (for optimal deduplication).
34
35    [ctf_dedup, ctf_dedup_detect_name_ambiguity,
36     ctf_dedup_conflictify_unshared, ctf_dedup_mark_conflicting_hash]
37    2) mark those distinct types with names that collide (and thus cannot be
38       declared simultaneously in the same translation unit) as conflicting, and
39       recursively mark all types that cite one of those types as conflicting as
40       well.  Possibly mark all types cited in only one TU as conflicting, if
41       the CTF_LINK_SHARE_DUPLICATED link mode is active.
42
43    [ctf_dedup_emit, ctf_dedup_emit_struct_members, ctf_dedup_id_to_target]
44    3) emit all the types, one hash value at a time.  Types not marked
45       conflicting are emitted once, into the shared dictionary: types marked
46       conflicting are emitted once per TU into a dictionary corresponding to
47       each TU in which they appear.  Structs marked conflicting get at the very
48       least a forward emitted into the shared dict so that other dicts can cite
49       it if needed.
50
51   [id_to_packed_id]
52   This all works over an array of inputs (usually in the same order as the
53   inputs on the link line).  We don't use the ctf_link_inputs hash directly
54   because it is convenient to be able to address specific input types as a
55   *global type ID* or 'GID', a pair of an array offset and a ctf_id_t.  Since
56   both are already 32 bits or less or can easily be constrained to that range,
57   we can pack them both into a single 64-bit hash word for easy lookups, which
58   would be much more annoying to do with a ctf_dict_t * and a ctf_id_t.  (On
59   32-bit platforms, we must do that anyway, since pointers, and thus hash keys
60   and values, are only 32 bits wide).  We track which inputs are parents of
61   which other inputs so that we can correctly recognize that types we have
62   traversed in children may cite types in parents, and so that we can process
63   the parents first.)
64
65   Note that thanks to ld -r, the deduplicator can be fed its own output, so the
66   inputs may themselves have child dicts.  Since we need to support this usage
67   anyway, we can use it in one other place.  If the caller finds translation
68   units to be too small a unit ambiguous types, links can be 'cu-mapped', where
69   the caller provides a mapping of input TU names to output child dict names.
70   This mapping can fuse many child TUs into one potential child dict, so that
71   ambiguous types in any of those input TUs go into the same child dict.
72   When a many:1 cu-mapping is detected, the ctf_dedup machinery is called
73   repeatedly, once for every output name that has more than one input, to fuse
74   all the input TUs associated with a given output dict into one, and once again
75   as normal to deduplicate all those intermediate outputs (and any 1:1 inputs)
76   together.  This has much higher memory usage than otherwise, because in the
77   intermediate state, all the output TUs are in memory at once and cannot be
78   lazily opened.  It also has implications for the emission code: if types
79   appear ambiguously in multiple input TUs that are all mapped to the same
80   child dict, we cannot put them in children in the cu-mapping link phase
81   because this output is meant to *become* a child in the next link stage and
82   parent/child relationships are only one level deep: so instead, we just hide
83   all but one of the ambiguous types.
84
85   There are a few other subtleties here that make this more complex than it
86   seems.  Let's go over the steps above in more detail.
87
88   1) HASHING.
89
90   [ctf_dedup_hash_type, ctf_dedup_rhash_type]
91   Hashing proceeds recursively, mixing in the properties of each input type
92   (including its name, if any), and then adding the hash values of every type
93   cited by that type.  The result is stashed in the cd_type_hashes so other
94   phases can find the hash values of input types given their IDs, and so that
95   if we encounter this type again while hashing we can just return its hash
96   value: it is also stashed in the *output mapping*, a mapping from hash value
97   to the set of GIDs corresponding to that type in all inputs.  We also keep
98   track of the GID of the first appearance of the type in any input (in
99   cd_output_first_gid), and the GID of structs, unions, and forwards that only
100   appear in one TU (in cd_struct_origin).  See below for where these things are
101   used.
102
103   Everything in this phase is time-critical, because it is operating over
104   non-deduplicated types and so may have hundreds or thousands of times the
105   data volume to deal with than later phases.  Trace output is hidden behind
106   ENABLE_LIBCTF_HASH_DEBUGGING to prevent the sheer number of calls to
107   ctf_dprintf from slowing things down (tenfold slowdowns are observed purely
108   from the calls to ctf_dprintf(), even with debugging switched off), and keep
109   down the volume of output (hundreds of gigabytes of debug output are not
110   uncommon on larger links).
111
112   We have to do *something* about potential cycles in the type graph.  We'd
113   like to avoid emitting forwards in the final output if possible, because
114   forwards aren't much use: they have no members.  We are mostly saved from
115   needing to worry about this at emission time by ctf_add_struct*()
116   automatically replacing newly-created forwards when the real struct/union
117   comes along.  So we only have to avoid getting stuck in cycles during the
118   hashing phase, while also not confusing types that cite members that are
119   structs with each other.  It is easiest to solve this problem by noting two
120   things:
121
122    - all cycles in C depend on the presence of tagged structs/unions
123    - all tagged structs/unions have a unique name they can be disambiguated by
124
125   [ctf_dedup_is_stub]
126   This means that we can break all cycles by ceasing to hash in cited types at
127   every tagged struct/union and instead hashing in a stub consisting of the
128   struct/union's *decorated name*, which is the name preceded by "s " or "u "
129   depending on the namespace (cached in cd_decorated_names).  Forwards are
130   decorated identically (so a forward to "struct foo" would be represented as
131   "s foo"): this means that a citation of a forward to a type and a citation of
132   a concrete definition of a type with the same name ends up getting the same
133   hash value.
134
135   Of course, it is quite possible to have two TUs with structs with the same
136   name and different definitions, but that's OK because when we scan for types
137   with ambiguous names we will identify these and mark them conflicting.
138
139   We populate one thing to help conflictedness marking.  No unconflicted type
140   may cite a conflicted one, but this means that conflictedness marking must
141   walk from types to the types that cite them, which is the opposite of the
142   usual order.  We can make this easier to do by constructing a *citers* graph
143   in cd_citers, which points from types to the types that cite them: because we
144   emit forwards corresponding to every conflicted struct/union, we don't need
145   to do this for citations of structs/unions by other types.  This is very
146   convenient for us, because that's the only type we don't traverse
147   recursively: so we can construct the citers graph at the same time as we
148   hash, rather than needing to add an extra pass.  (This graph is a dynhash of
149   *type hash values*, so it's small: in effect it is automatically
150   deduplicated.)
151
152   2) COLLISIONAL MARKING.
153
154   [ctf_dedup_detect_name_ambiguity, ctf_dedup_mark_conflicting_hash]
155   We identify types whose names collide during the hashing process, and count
156   the rough number of uses of each name (caching may throw it off a bit: this
157   doesn't need to be accurate).  We then mark the less-frequently-cited types
158   with each names conflicting: the most-frequently-cited one goes into the
159   shared type dictionary, while all others are duplicated into per-TU
160   dictionaries, named after the input TU, that have the shared dictionary as a
161   parent.  For structures and unions this is not quite good enough: we'd like
162   to have citations of forwards to ambiguously named structures and unions
163   *stay* as citations of forwards, so that the user can tell that the caller
164   didn't actually know which structure definition was meant: but if we put one
165   of those structures into the shared dictionary, it would supplant and replace
166   the forward, leaving no sign.  So structures and unions do not take part in
167   this popularity contest: if their names are ambiguous, they are just
168   duplicated, and only a forward appears in the shared dict.
169
170   [ctf_dedup_propagate_conflictedness]
171   The process of marking types conflicted is itself recursive: we recursively
172   traverse the cd_citers graph populated in the hashing pass above and mark
173   everything that we encounter conflicted (without wasting time re-marking
174   anything that is already marked).  This naturally terminates just where we
175   want it to (at types that are cited by no other types, and at structures and
176   unions) and suffices to ensure that types that cite conflicted types are
177   always marked conflicted.
178
179   [ctf_dedup_conflictify_unshared, ctf_dedup_multiple_input_dicts]
180   When linking in CTF_LINK_SHARE_DUPLICATED mode, we would like all types that
181   are used in only one TU to end up in a per-CU dict. The easiest way to do
182   that is to mark them conflicted.  ctf_dedup_conflictify_unshared does this,
183   traversing the output mapping and using ctf_dedup_multiple_input_dicts to
184   check the number of input dicts each distinct type hash value came from:
185   types that only came from one get marked conflicted.  One caveat here is that
186   we need to consider both structs and forwards to them: a struct that appears
187   in one TU and has a dozen citations to an opaque forward in other TUs should
188   *not* be considered to be used in only one TU, because users would find it
189   useful to be able to traverse into opaque structures of that sort: so we use
190   cd_struct_origin to check both structs/unions and the forwards corresponding
191   to them.
192
193   3) EMISSION.
194
195   [ctf_dedup_walk_output_mapping, ctf_dedup_rwalk_output_mapping,
196    ctf_dedup_rwalk_one_output_mapping]
197   Emission involves another walk of the entire output mapping, this time
198   traversing everything other than struct members, recursively.  Types are
199   emitted from leaves to trunk, emitting all types a type cites before emitting
200   the type itself.  We sort the output mapping before traversing it, for
201   reproducibility and also correctness: the input dicts may have parent/child
202   relationships, so we simply sort all types that first appear in parents
203   before all children, then sort types that first appear in dicts appearing
204   earlier on the linker command line before those that appear later, then sort
205   by input ctf_id_t.  (This is where we use cd_output_first_gid, collected
206   above.)
207
208   The walking is done using a recursive traverser which arranges to not revisit
209   any type already visited and to call its callback once per input GID for
210   input GIDs corresponding to conflicted output types.  The traverser only
211   finds input types and calls a callback for them as many times as the output
212   needs to appear: it doesn't try to figure out anything about where the output
213   might go.  That's done by the callback based on whether the type is
214   marked conflicted or not.
215
216   [ctf_dedup_emit_type, ctf_dedup_id_to_target, ctf_dedup_synthesize_forward]
217   ctf_dedup_emit_type is the (sole) callback for ctf_dedup_walk_output_mapping.
218   Conflicted types have all necessary dictionaries created, and then we emit
219   the type into each dictionary in turn, working over each input CTF type
220   corresponding to each hash value and using ctf_dedup_id_to_target to map each
221   input ctf_id_t into the corresponding type in the output (dealing with input
222   ctf_id_t's with parents in the process by simply chasing to the parent dict
223   if the type we're looking up is in there).  Emitting structures involves
224   simply noting that the members of this structure need emission later on:
225   because you cannot cite a single structure member from another type, we avoid
226   emitting the members at this stage to keep recursion depths down a bit.
227
228   At this point, if we have by some mischance decided that two different types
229   with child types that hash to different values have in fact got the same hash
230   value themselves and *not* marked it conflicting, the type walk will walk
231   only *one* of them and in all likelihood we'll find that we are trying to
232   emit a type into some child dictionary that references a type that was never
233   emitted into that dictionary and assertion-fail.  This always indicates a bug
234   in the conflictedness marking machinery or the hashing code, or both.
235
236   ctf_dedup_id_to_target calls ctf_dedup_synthesize_forward to do one extra
237   thing, alluded to above: if this is a conflicted tagged structure or union,
238   and the target is the shared dict (i.e., the type we're being asked to emit
239   is not itself conflicted so can't just point straight at the conflicted
240   type), we instead synthesise a forward with the same name, emit it into the
241   shared dict, record it in cd_output_emission_conflicted_forwards so that we
242   don't re-emit it, and return it.  This means that cycles that contain
243   conflicts do not cause the entire cycle to be replicated in every child: only
244   that piece of the cycle which takes you back as far as the closest tagged
245   struct/union needs to be replicated.  This trick means that no part of the
246   deduplicator needs a cycle detector: every recursive walk can stop at tagged
247   structures.
248
249   [ctf_dedup_emit_struct_members]
250   The final stage of emission is to walk over all structures with members
251   that need emission and emit all of them. Every type has been emitted at
252   this stage, so emission cannot fail.
253
254   [ctf_dedup_populate_type_mappings, ctf_dedup_populate_type_mapping]
255   Finally, we update the input -> output type ID mappings used by the ctf-link
256   machinery to update all the other sections.  This is surprisingly expensive
257   and may be replaced with a scheme which lets the ctf-link machinery extract
258   the needed info directly from the deduplicator.  */
259
260/* Possible future optimizations are flagged with 'optimization opportunity'
261   below.  */
262
263/* Global optimization opportunity: a GC pass, eliminating types with no direct
264   or indirect citations from the other sections in the dictionary.  */
265
266/* Internal flag values for ctf_dedup_hash_type.  */
267
268/* Child call: consider forwardable types equivalent to forwards or stubs below
269   this point.  */
270#define CTF_DEDUP_HASH_INTERNAL_CHILD         0x01
271
272/* Transform references to single ctf_id_ts in passed-in inputs into a number
273   that will fit in a uint64_t.  Needs rethinking if CTF_MAX_TYPE is boosted.
274
275   On 32-bit platforms, we pack things together differently: see the note
276   above.  */
277
278#if UINTPTR_MAX < UINT64_MAX
279# define IDS_NEED_ALLOCATION 1
280# define CTF_DEDUP_GID(fp, input, type) id_to_packed_id (fp, input, type)
281# define CTF_DEDUP_GID_TO_INPUT(id) packed_id_to_input (id)
282# define CTF_DEDUP_GID_TO_TYPE(id) packed_id_to_type (id)
283#else
284# define CTF_DEDUP_GID(fp, input, type)	\
285  (void *) (((uint64_t) input) << 32 | (type))
286# define CTF_DEDUP_GID_TO_INPUT(id) ((int) (((uint64_t) id) >> 32))
287# define CTF_DEDUP_GID_TO_TYPE(id) (ctf_id_t) (((uint64_t) id) & ~(0xffffffff00000000ULL))
288#endif
289
290#ifdef IDS_NEED_ALLOCATION
291
292 /* This is the 32-bit path, which stores GIDs in a pool and returns a pointer
293    into the pool.  It is notably less efficient than the 64-bit direct storage
294    approach, but with a smaller key, this is all we can do.  */
295
296static void *
297id_to_packed_id (ctf_dict_t *fp, int input_num, ctf_id_t type)
298{
299  const void *lookup;
300  ctf_type_id_key_t *dynkey = NULL;
301  ctf_type_id_key_t key = { input_num, type };
302
303  if (!ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_dict_t,
304			      &key, &lookup, NULL))
305    {
306      if ((dynkey = malloc (sizeof (ctf_type_id_key_t))) == NULL)
307	goto oom;
308      memcpy (dynkey, &key, sizeof (ctf_type_id_key_t));
309
310      if (ctf_dynhash_insert (fp->ctf_dedup.cd_id_to_dict_t, dynkey, NULL) < 0)
311	goto oom;
312
313      ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_dict_t,
314			     dynkey, &lookup, NULL);
315    }
316  /* We use a raw assert() here because there isn't really a way to get any sort
317     of error back from this routine without vastly complicating things for the
318     much more common case of !IDS_NEED_ALLOCATION.  */
319  assert (lookup);
320  return (void *) lookup;
321
322 oom:
323  free (dynkey);
324  ctf_set_errno (fp, ENOMEM);
325  return NULL;
326}
327
328static int
329packed_id_to_input (const void *id)
330{
331  const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id;
332
333  return key->ctii_input_num;
334}
335
336static ctf_id_t
337packed_id_to_type (const void *id)
338{
339  const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id;
340
341  return key->ctii_type;
342}
343#endif
344
345/* Make an element in a dynhash-of-dynsets, or return it if already present.  */
346
347static ctf_dynset_t *
348make_set_element (ctf_dynhash_t *set, const void *key)
349{
350  ctf_dynset_t *element;
351
352  if ((element = ctf_dynhash_lookup (set, key)) == NULL)
353    {
354      if ((element = ctf_dynset_create (htab_hash_string,
355					htab_eq_string,
356					NULL)) == NULL)
357	return NULL;
358
359      if (ctf_dynhash_insert (set, (void *) key, element) < 0)
360	{
361	  ctf_dynset_destroy (element);
362	  return NULL;
363	}
364    }
365
366  return element;
367}
368
369/* Initialize the dedup atoms table.  */
370int
371ctf_dedup_atoms_init (ctf_dict_t *fp)
372{
373  if (fp->ctf_dedup_atoms)
374    return 0;
375
376  if (!fp->ctf_dedup_atoms_alloc)
377    {
378      if ((fp->ctf_dedup_atoms_alloc
379	   = ctf_dynset_create (htab_hash_string, htab_eq_string,
380				free)) == NULL)
381	return ctf_set_errno (fp, ENOMEM);
382    }
383  fp->ctf_dedup_atoms = fp->ctf_dedup_atoms_alloc;
384  return 0;
385}
386
387/* Intern things in the dedup atoms table.  */
388
389static const char *
390intern (ctf_dict_t *fp, char *atom)
391{
392  const void *foo;
393
394  if (atom == NULL)
395    return NULL;
396
397  if (!ctf_dynset_exists (fp->ctf_dedup_atoms, atom, &foo))
398    {
399      if (ctf_dynset_insert (fp->ctf_dedup_atoms, atom) < 0)
400	{
401	  ctf_set_errno (fp, ENOMEM);
402	  return NULL;
403	}
404      foo = atom;
405    }
406  else
407    free (atom);
408
409  return (const char *) foo;
410}
411
412/* Add an indication of the namespace to a type name in a way that is not valid
413   for C identifiers.  Used to maintain hashes of type names to other things
414   while allowing for the four C namespaces (normal, struct, union, enum).
415   Return a new dynamically-allocated string.  */
416static const char *
417ctf_decorate_type_name (ctf_dict_t *fp, const char *name, int kind)
418{
419  ctf_dedup_t *d = &fp->ctf_dedup;
420  const char *ret;
421  const char *k;
422  char *p;
423  size_t i;
424
425  switch (kind)
426    {
427    case CTF_K_STRUCT:
428      k = "s ";
429      i = 0;
430      break;
431    case CTF_K_UNION:
432      k = "u ";
433      i = 1;
434      break;
435    case CTF_K_ENUM:
436      k = "e ";
437      i = 2;
438      break;
439    default:
440      k = "";
441      i = 3;
442    }
443
444  if ((ret = ctf_dynhash_lookup (d->cd_decorated_names[i], name)) == NULL)
445    {
446      char *str;
447
448      if ((str = malloc (strlen (name) + strlen (k) + 1)) == NULL)
449	goto oom;
450
451      p = stpcpy (str, k);
452      strcpy (p, name);
453      ret = intern (fp, str);
454      if (!ret)
455	goto oom;
456
457      if (ctf_dynhash_cinsert (d->cd_decorated_names[i], name, ret) < 0)
458	goto oom;
459    }
460
461  return ret;
462
463 oom:
464  ctf_set_errno (fp, ENOMEM);
465  return NULL;
466}
467
468/* Hash a type, possibly debugging-dumping something about it as well.  */
469static inline void
470ctf_dedup_sha1_add (ctf_sha1_t *sha1, const void *buf, size_t len,
471		    const char *description _libctf_unused_,
472		    unsigned long depth _libctf_unused_)
473{
474  ctf_sha1_add (sha1, buf, len);
475
476#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
477  ctf_sha1_t tmp;
478  char tmp_hval[CTF_SHA1_SIZE];
479  tmp = *sha1;
480  ctf_sha1_fini (&tmp, tmp_hval);
481  ctf_dprintf ("%lu: after hash addition of %s: %s\n", depth, description,
482	       tmp_hval);
483#endif
484}
485
486static const char *
487ctf_dedup_hash_type (ctf_dict_t *fp, ctf_dict_t *input,
488		     ctf_dict_t **inputs, uint32_t *parents,
489		     int input_num, ctf_id_t type, int flags,
490		     unsigned long depth,
491		     int (*populate_fun) (ctf_dict_t *fp,
492					  ctf_dict_t *input,
493					  ctf_dict_t **inputs,
494					  int input_num,
495					  ctf_id_t type,
496					  void *id,
497					  const char *decorated_name,
498					  const char *hash));
499
500/* Determine whether this type is being hashed as a stub (in which case it is
501   unsafe to cache it).  */
502static int
503ctf_dedup_is_stub (const char *name, int kind, int fwdkind, int flags)
504{
505  /* We can cache all types unless we are recursing to children and are hashing
506     in a tagged struct, union or forward, all of which are replaced with their
507     decorated name as a stub and will have different hash values when hashed at
508     the top level.  */
509
510  return ((flags & CTF_DEDUP_HASH_INTERNAL_CHILD) && name
511	  && (kind == CTF_K_STRUCT || kind == CTF_K_UNION
512	      || (kind == CTF_K_FORWARD && (fwdkind == CTF_K_STRUCT
513					    || fwdkind == CTF_K_UNION))));
514}
515
516/* Populate struct_origin if need be (not already populated, or populated with
517   a different origin), in which case it must go to -1, "shared".)
518
519   Only called for forwards or forwardable types with names, when the link mode
520   is CTF_LINK_SHARE_DUPLICATED.  */
521static int
522ctf_dedup_record_origin (ctf_dict_t *fp, int input_num, const char *decorated,
523			 void *id)
524{
525  ctf_dedup_t *d = &fp->ctf_dedup;
526  void *origin;
527  int populate_origin = 0;
528
529  if (ctf_dynhash_lookup_kv (d->cd_struct_origin, decorated, NULL, &origin))
530    {
531      if (CTF_DEDUP_GID_TO_INPUT (origin) != input_num
532	  && CTF_DEDUP_GID_TO_INPUT (origin) != -1)
533	{
534	  populate_origin = 1;
535	  origin = CTF_DEDUP_GID (fp, -1, -1);
536	}
537    }
538  else
539    {
540      populate_origin = 1;
541      origin = id;
542    }
543
544  if (populate_origin)
545    if (ctf_dynhash_cinsert (d->cd_struct_origin, decorated, origin) < 0)
546      return ctf_set_errno (fp, errno);
547  return 0;
548}
549
550/* Do the underlying hashing and recursion for ctf_dedup_hash_type (which it
551   calls, recursively).  */
552
553static const char *
554ctf_dedup_rhash_type (ctf_dict_t *fp, ctf_dict_t *input, ctf_dict_t **inputs,
555		      uint32_t *parents, int input_num, ctf_id_t type,
556		      void *type_id, const ctf_type_t *tp, const char *name,
557		      const char *decorated, int kind, int flags,
558		      unsigned long depth,
559		      int (*populate_fun) (ctf_dict_t *fp,
560					   ctf_dict_t *input,
561					   ctf_dict_t **inputs,
562					   int input_num,
563					   ctf_id_t type,
564					   void *id,
565					   const char *decorated_name,
566					   const char *hash))
567{
568  ctf_dedup_t *d = &fp->ctf_dedup;
569  ctf_next_t *i = NULL;
570  ctf_sha1_t hash;
571  ctf_id_t child_type;
572  char hashbuf[CTF_SHA1_SIZE];
573  const char *hval = NULL;
574  const char *whaterr;
575  int err = 0;
576
577  const char *citer = NULL;
578  ctf_dynset_t *citers = NULL;
579
580  /* Add a citer to the citers set.  */
581#define ADD_CITER(citers, hval)						\
582  do									\
583    {									\
584      whaterr = N_("error updating citers");				\
585      if (!citers)							\
586	if ((citers = ctf_dynset_create (htab_hash_string,		\
587					 htab_eq_string,		\
588					 NULL)) == NULL)		\
589	  goto oom;							\
590      if (ctf_dynset_cinsert (citers, hval) < 0)			\
591	goto oom;							\
592    }									\
593  while (0)
594
595  /* If this is a named struct or union or a forward to one, and this is a child
596     traversal, treat this type as if it were a forward -- do not recurse to
597     children, ignore all content not already hashed in, and hash in the
598     decorated name of the type instead.  */
599
600  if (ctf_dedup_is_stub (name, kind, tp->ctt_type, flags))
601    {
602#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
603      ctf_dprintf ("Struct/union/forward citation: substituting forwarding "
604		   "stub with decorated name %s\n", decorated);
605
606#endif
607      ctf_sha1_init (&hash);
608      ctf_dedup_sha1_add (&hash, decorated, strlen (decorated) + 1,
609			  "decorated struct/union/forward name", depth);
610      ctf_sha1_fini (&hash, hashbuf);
611
612      if ((hval = intern (fp, strdup (hashbuf))) == NULL)
613	{
614	  ctf_err_warn (fp, 0, 0, _("%s (%i): out of memory during forwarding-"
615				    "stub hashing for type with GID %p"),
616			ctf_link_input_name (input), input_num, type_id);
617	  return NULL;				/* errno is set for us.  */
618	}
619
620      /* In share-duplicated link mode, make sure the origin of this type is
621	 recorded, even if this is a type in a parent dict which will not be
622	 directly traversed.  */
623      if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED
624	  && ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0)
625	return NULL;				/* errno is set for us.  */
626
627      return hval;
628    }
629
630  /* Now ensure that subsequent recursive calls (but *not* the top-level call)
631     get this treatment.  */
632  flags |= CTF_DEDUP_HASH_INTERNAL_CHILD;
633
634  /* If this is a struct, union, or forward with a name, record the unique
635     originating input TU, if there is one.  */
636
637  if (decorated && (ctf_forwardable_kind (kind) || kind != CTF_K_FORWARD))
638    if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED
639	&& ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0)
640      return NULL;				/* errno is set for us.  */
641
642#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
643  ctf_dprintf ("%lu: hashing thing with ID %i/%lx (kind %i): %s.\n",
644	       depth, input_num, type, kind, name ? name : "");
645#endif
646
647  /* Some type kinds don't have names: the API provides no way to set the name,
648     so the type the deduplicator outputs will be nameless even if the input
649     somehow has a name, and the name should not be mixed into the hash.  */
650
651  switch (kind)
652    {
653    case CTF_K_POINTER:
654    case CTF_K_ARRAY:
655    case CTF_K_FUNCTION:
656    case CTF_K_VOLATILE:
657    case CTF_K_CONST:
658    case CTF_K_RESTRICT:
659    case CTF_K_SLICE:
660      name = NULL;
661    }
662
663  /* Mix in invariant stuff, transforming the type kind if needed.  Note that
664     the vlen is *not* hashed in: the actual variable-length info is hashed in
665     instead, piecewise.  The vlen is not part of the type, only the
666     variable-length data is: identical types with distinct vlens are quite
667     possible.  Equally, we do not want to hash in the isroot flag: both the
668     compiler and the deduplicator set the nonroot flag to indicate clashes with
669     *other types in the same TU* with the same name: so two types can easily
670     have distinct nonroot flags, yet be exactly the same type.*/
671
672  ctf_sha1_init (&hash);
673  if (name)
674    ctf_dedup_sha1_add (&hash, name, strlen (name) + 1, "name", depth);
675  ctf_dedup_sha1_add (&hash, &kind, sizeof (uint32_t), "kind", depth);
676
677  /* Hash content of this type.  */
678  switch (kind)
679    {
680    case CTF_K_UNKNOWN:
681      /* No extra state.  */
682      break;
683    case CTF_K_FORWARD:
684
685      /* Add the forwarded kind, stored in the ctt_type.  */
686      ctf_dedup_sha1_add (&hash, &tp->ctt_type, sizeof (tp->ctt_type),
687			  "forwarded kind", depth);
688      break;
689    case CTF_K_INTEGER:
690    case CTF_K_FLOAT:
691      {
692	ctf_encoding_t ep;
693	memset (&ep, 0, sizeof (ctf_encoding_t));
694
695	ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t), "size",
696			    depth);
697	if (ctf_type_encoding (input, type, &ep) < 0)
698	  {
699	    whaterr = N_("error getting encoding");
700	    goto input_err;
701	  }
702	ctf_dedup_sha1_add (&hash, &ep, sizeof (ctf_encoding_t), "encoding",
703			    depth);
704	break;
705      }
706      /* Types that reference other types.  */
707    case CTF_K_TYPEDEF:
708    case CTF_K_VOLATILE:
709    case CTF_K_CONST:
710    case CTF_K_RESTRICT:
711    case CTF_K_POINTER:
712      /* Hash the referenced type, if not already hashed, and mix it in.  */
713      child_type = ctf_type_reference (input, type);
714      if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
715				       child_type, flags, depth,
716				       populate_fun)) == NULL)
717	{
718	  whaterr = N_("error doing referenced type hashing");
719	  goto err;
720	}
721      ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "referenced type",
722			  depth);
723      citer = hval;
724
725      break;
726
727      /* The slices of two types hash identically only if the type they overlay
728	 also has the same encoding.  This is not ideal, but in practice will work
729	 well enough.  We work directly rather than using the CTF API because
730	 we do not want the slice's normal automatically-shine-through
731	 semantics to kick in here.  */
732    case CTF_K_SLICE:
733      {
734	const ctf_slice_t *slice;
735	const ctf_dtdef_t *dtd;
736	ssize_t size;
737	ssize_t increment;
738
739	child_type = ctf_type_reference (input, type);
740	ctf_get_ctt_size (input, tp, &size, &increment);
741	ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "size", depth);
742
743	if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
744					 child_type, flags, depth,
745					 populate_fun)) == NULL)
746	  {
747	    whaterr = N_("error doing slice-referenced type hashing");
748	    goto err;
749	  }
750	ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "sliced type",
751			    depth);
752	citer = hval;
753
754	if ((dtd = ctf_dynamic_type (input, type)) != NULL)
755	  slice = (ctf_slice_t *) dtd->dtd_vlen;
756	else
757	  slice = (ctf_slice_t *) ((uintptr_t) tp + increment);
758
759	ctf_dedup_sha1_add (&hash, &slice->cts_offset,
760			    sizeof (slice->cts_offset), "slice offset", depth);
761	ctf_dedup_sha1_add (&hash, &slice->cts_bits,
762			    sizeof (slice->cts_bits), "slice bits", depth);
763	break;
764      }
765
766    case CTF_K_ARRAY:
767      {
768	ctf_arinfo_t ar;
769
770	if (ctf_array_info (input, type, &ar) < 0)
771	  {
772	    whaterr = N_("error getting array info");
773	    goto input_err;
774	  }
775
776	if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
777					 ar.ctr_contents, flags, depth,
778					 populate_fun)) == NULL)
779	  {
780	    whaterr = N_("error doing array contents type hashing");
781	    goto err;
782	  }
783	ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array contents",
784			    depth);
785	ADD_CITER (citers, hval);
786
787	if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
788					 ar.ctr_index, flags, depth,
789					 populate_fun)) == NULL)
790	  {
791	    whaterr = N_("error doing array index type hashing");
792	    goto err;
793	  }
794	ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array index",
795			    depth);
796	ctf_dedup_sha1_add (&hash, &ar.ctr_nelems, sizeof (ar.ctr_nelems),
797			    "element count", depth);
798	ADD_CITER (citers, hval);
799
800	break;
801      }
802    case CTF_K_FUNCTION:
803      {
804	ctf_funcinfo_t fi;
805	ctf_id_t *args;
806	uint32_t j;
807
808	if (ctf_func_type_info (input, type, &fi) < 0)
809	  {
810	    whaterr = N_("error getting func type info");
811	    goto input_err;
812	  }
813
814	if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
815					 fi.ctc_return, flags, depth,
816					 populate_fun)) == NULL)
817	  {
818	    whaterr = N_("error getting func return type");
819	    goto err;
820	  }
821	ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func return",
822			    depth);
823	ctf_dedup_sha1_add (&hash, &fi.ctc_argc, sizeof (fi.ctc_argc),
824			    "func argc", depth);
825	ctf_dedup_sha1_add (&hash, &fi.ctc_flags, sizeof (fi.ctc_flags),
826			    "func flags", depth);
827	ADD_CITER (citers, hval);
828
829	if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL)
830	  {
831	    err = ENOMEM;
832	    whaterr = N_("error doing memory allocation");
833	    goto err;
834	  }
835
836	if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0)
837	  {
838	    free (args);
839	    whaterr = N_("error getting func arg type");
840	    goto input_err;
841	  }
842	for (j = 0; j < fi.ctc_argc; j++)
843	  {
844	    if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents,
845					     input_num, args[j], flags, depth,
846					     populate_fun)) == NULL)
847	      {
848		free (args);
849		whaterr = N_("error doing func arg type hashing");
850		goto err;
851	      }
852	    ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func arg type",
853				depth);
854	    ADD_CITER (citers, hval);
855	  }
856	free (args);
857	break;
858      }
859    case CTF_K_ENUM:
860      {
861	int val;
862	const char *ename;
863
864	ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t),
865			    "enum size", depth);
866	while ((ename = ctf_enum_next (input, type, &i, &val)) != NULL)
867	  {
868	    ctf_dedup_sha1_add (&hash, ename, strlen (ename) + 1, "enumerator",
869				depth);
870	    ctf_dedup_sha1_add (&hash, &val, sizeof (val), "enumerand", depth);
871	  }
872	if (ctf_errno (input) != ECTF_NEXT_END)
873	  {
874	    whaterr = N_("error doing enum member iteration");
875	    goto input_err;
876	  }
877	break;
878      }
879    /* Top-level only.  */
880    case CTF_K_STRUCT:
881    case CTF_K_UNION:
882      {
883	ssize_t offset;
884	const char *mname;
885	ctf_id_t membtype;
886	ssize_t size;
887
888	ctf_get_ctt_size (input, tp, &size, NULL);
889	ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "struct size",
890			    depth);
891
892	while ((offset = ctf_member_next (input, type, &i, &mname, &membtype,
893					  0)) >= 0)
894	  {
895	    if (mname == NULL)
896	      mname = "";
897	    ctf_dedup_sha1_add (&hash, mname, strlen (mname) + 1,
898				"member name", depth);
899
900#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
901	    ctf_dprintf ("%lu: Traversing to member %s\n", depth, mname);
902#endif
903	    if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents,
904					     input_num, membtype, flags, depth,
905					     populate_fun)) == NULL)
906	      {
907		whaterr = N_("error doing struct/union member type hashing");
908		goto iterr;
909	      }
910
911	    ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "member hash",
912				depth);
913	    ctf_dedup_sha1_add (&hash, &offset, sizeof (offset), "member offset",
914				depth);
915	    ADD_CITER (citers, hval);
916	  }
917	if (ctf_errno (input) != ECTF_NEXT_END)
918	  {
919	    whaterr = N_("error doing struct/union member iteration");
920	    goto input_err;
921	  }
922	break;
923      }
924    default:
925      whaterr = N_("error: unknown type kind");
926      goto err;
927    }
928  ctf_sha1_fini (&hash, hashbuf);
929
930  if ((hval = intern (fp, strdup (hashbuf))) == NULL)
931    {
932      whaterr = N_("cannot intern hash");
933      goto oom;
934    }
935
936  /* Populate the citers for this type's subtypes, now the hash for the type
937     itself is known.  */
938  whaterr = N_("error tracking citers");
939
940  if (citer)
941    {
942      ctf_dynset_t *citer_hashes;
943
944      if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL)
945	goto oom;
946      if (ctf_dynset_cinsert (citer_hashes, hval) < 0)
947	goto oom;
948    }
949  else if (citers)
950    {
951      const void *k;
952
953      while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0)
954	{
955	  ctf_dynset_t *citer_hashes;
956	  citer = (const char *) k;
957
958	  if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL)
959	    goto oom;
960
961	  if (ctf_dynset_exists (citer_hashes, hval, NULL))
962	    continue;
963	  if (ctf_dynset_cinsert (citer_hashes, hval) < 0)
964	    goto oom;
965	}
966      if (err != ECTF_NEXT_END)
967	goto err;
968      ctf_dynset_destroy (citers);
969    }
970
971  return hval;
972
973 iterr:
974  ctf_next_destroy (i);
975 input_err:
976  err = ctf_errno (input);
977 err:
978  ctf_sha1_fini (&hash, NULL);
979  ctf_err_warn (fp, 0, err, _("%s (%i): %s: during type hashing for type %lx, "
980			      "kind %i"), ctf_link_input_name (input),
981		input_num, gettext (whaterr), type, kind);
982  return NULL;
983 oom:
984  ctf_set_errno (fp, errno);
985  ctf_err_warn (fp, 0, 0, _("%s (%i): %s: during type hashing for type %lx, "
986			    "kind %i"), ctf_link_input_name (input),
987		input_num, gettext (whaterr), type, kind);
988  return NULL;
989}
990
991/* Hash a TYPE in the INPUT: FP is the eventual output, where the ctf_dedup
992   state is stored.  INPUT_NUM is the number of this input in the set of inputs.
993   Record its hash in FP's cd_type_hashes once it is known.  PARENTS is
994   described in the comment above ctf_dedup.
995
996   (The flags argument currently accepts only the flag
997   CTF_DEDUP_HASH_INTERNAL_CHILD, an implementation detail used to prevent
998   struct/union hashing in recursive traversals below the TYPE.)
999
1000   We use the CTF API rather than direct access wherever possible, because types
1001   that appear identical through the API should be considered identical, with
1002   one exception: slices should only be considered identical to other slices,
1003   not to the corresponding unsliced type.
1004
1005   The POPULATE_FUN is a mandatory hook that populates other mappings with each
1006   type we see (excepting types that are recursively hashed as stubs).  The
1007   caller should not rely on the order of calls to this hook, though it will be
1008   called at least once for every non-stub reference to every type.
1009
1010   Returns a hash value (an atom), or NULL on error.  */
1011
1012static const char *
1013ctf_dedup_hash_type (ctf_dict_t *fp, ctf_dict_t *input,
1014		     ctf_dict_t **inputs, uint32_t *parents,
1015		     int input_num, ctf_id_t type, int flags,
1016		     unsigned long depth,
1017		     int (*populate_fun) (ctf_dict_t *fp,
1018					  ctf_dict_t *input,
1019					  ctf_dict_t **inputs,
1020					  int input_num,
1021					  ctf_id_t type,
1022					  void *id,
1023					  const char *decorated_name,
1024					  const char *hash))
1025{
1026  ctf_dedup_t *d = &fp->ctf_dedup;
1027  const ctf_type_t *tp;
1028  void *type_id;
1029  const char *hval = NULL;
1030  const char *name;
1031  const char *whaterr;
1032  const char *decorated = NULL;
1033  uint32_t kind, fwdkind;
1034
1035  depth++;
1036
1037#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1038  ctf_dprintf ("%lu: ctf_dedup_hash_type (%i, %lx, flags %x)\n", depth, input_num, type, flags);
1039#endif
1040
1041  /* The unimplemented type doesn't really exist, but must be noted in parent
1042     hashes: so it gets a fixed, arbitrary hash.  */
1043  if (type == 0)
1044    return "00000000000000000000";
1045
1046  /* Possible optimization: if the input type is in the parent type space, just
1047     copy recursively-cited hashes from the parent's types into the output
1048     mapping rather than rehashing them.  */
1049
1050  type_id = CTF_DEDUP_GID (fp, input_num, type);
1051
1052  if ((tp = ctf_lookup_by_id (&input, type)) == NULL)
1053    {
1054      ctf_set_errno (fp, ctf_errno (input));
1055      ctf_err_warn (fp, 0, 0, _("%s (%i): lookup failure for type %lx: "
1056				"flags %x"), ctf_link_input_name (input),
1057		    input_num, type, flags);
1058      return NULL;		/* errno is set for us.  */
1059    }
1060
1061  kind = LCTF_INFO_KIND (input, tp->ctt_info);
1062  name = ctf_strraw (input, tp->ctt_name);
1063
1064  if (tp->ctt_name == 0 || !name || name[0] == '\0')
1065    name = NULL;
1066
1067  /* Decorate the name appropriately for the namespace it appears in: forwards
1068     appear in the namespace of their referent.  */
1069
1070  fwdkind = kind;
1071  if (name)
1072    {
1073      if (kind == CTF_K_FORWARD)
1074	fwdkind = tp->ctt_type;
1075
1076      if ((decorated = ctf_decorate_type_name (fp, name, fwdkind)) == NULL)
1077	return NULL;				/* errno is set for us.  */
1078    }
1079
1080  /* If not hashing a stub, we can rely on various sorts of caches.
1081
1082     Optimization opportunity: we may be able to avoid calling the populate_fun
1083     sometimes here.  */
1084
1085  if (!ctf_dedup_is_stub (name, kind, fwdkind, flags))
1086    {
1087      if ((hval = ctf_dynhash_lookup (d->cd_type_hashes, type_id)) != NULL)
1088	{
1089#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1090	  ctf_dprintf ("%lu: Known hash for ID %i/%lx: %s\n", depth, input_num,
1091		       type,  hval);
1092#endif
1093	  populate_fun (fp, input, inputs, input_num, type, type_id,
1094			decorated, hval);
1095
1096	  return hval;
1097	}
1098    }
1099
1100  /* We have never seen this type before, and must figure out its hash and the
1101     hashes of the types it cites.
1102
1103     Hash this type, and call ourselves recursively.  (The hashing part is
1104     optional, and is disabled if overidden_hval is set.)  */
1105
1106  if ((hval = ctf_dedup_rhash_type (fp, input, inputs, parents, input_num,
1107				    type, type_id, tp, name, decorated,
1108				    kind, flags, depth, populate_fun)) == NULL)
1109    return NULL;				/* errno is set for us.  */
1110
1111  /* The hash of this type is now known: record it unless caching is unsafe
1112     because the hash value will change later.  This will be the final storage
1113     of this type's hash, so we call the population function on it.  */
1114
1115  if (!ctf_dedup_is_stub (name, kind, fwdkind, flags))
1116    {
1117#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1118      ctf_dprintf ("Caching %lx, ID %p (%s), %s in final location\n", type,
1119		   type_id, name ? name : "", hval);
1120#endif
1121
1122      if (ctf_dynhash_cinsert (d->cd_type_hashes, type_id, hval) < 0)
1123	{
1124	  whaterr = N_("error hash caching");
1125	  goto oom;
1126	}
1127
1128      if (populate_fun (fp, input, inputs, input_num, type, type_id,
1129			decorated, hval) < 0)
1130	{
1131	  whaterr = N_("error calling population function");
1132	  goto err;				/* errno is set for us. */
1133	}
1134    }
1135
1136#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1137  ctf_dprintf ("%lu: Returning final hash for ID %i/%lx: %s\n", depth,
1138	       input_num, type, hval);
1139#endif
1140  return hval;
1141
1142 oom:
1143  ctf_set_errno (fp, errno);
1144 err:
1145  ctf_err_warn (fp, 0, 0, _("%s (%i): %s: during type hashing, "
1146			    "type %lx, kind %i"),
1147		ctf_link_input_name (input), input_num,
1148		gettext (whaterr), type, kind);
1149  return NULL;
1150}
1151
1152/* Populate a number of useful mappings not directly used by the hashing
1153   machinery: the output mapping, the cd_name_counts mapping from name -> hash
1154   -> count of hashval deduplication state for a given hashed type, and the
1155   cd_output_first_tu mapping.  */
1156
1157static int
1158ctf_dedup_populate_mappings (ctf_dict_t *fp, ctf_dict_t *input _libctf_unused_,
1159			     ctf_dict_t **inputs _libctf_unused_,
1160			     int input_num _libctf_unused_,
1161			     ctf_id_t type _libctf_unused_, void *id,
1162			     const char *decorated_name,
1163			     const char *hval)
1164{
1165  ctf_dedup_t *d = &fp->ctf_dedup;
1166  ctf_dynset_t *type_ids;
1167  ctf_dynhash_t *name_counts;
1168  long int count;
1169
1170#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1171  ctf_dprintf ("Hash %s, %s, into output mapping for %i/%lx @ %s\n",
1172	       hval, decorated_name ? decorated_name : "(unnamed)",
1173	       input_num, type, ctf_link_input_name (input));
1174
1175  const char *orig_hval;
1176
1177  /* Make sure we never map a single GID to multiple hash values.  */
1178
1179  if ((orig_hval = ctf_dynhash_lookup (d->cd_output_mapping_guard, id)) != NULL)
1180    {
1181      /* We can rely on pointer identity here, since all hashes are
1182	 interned.  */
1183      if (!ctf_assert (fp, orig_hval == hval))
1184	return -1;
1185    }
1186  else
1187    if (ctf_dynhash_cinsert (d->cd_output_mapping_guard, id, hval) < 0)
1188      return ctf_set_errno (fp, errno);
1189#endif
1190
1191  /* Record the type in the output mapping: if this is the first time this type
1192     has been seen, also record it in the cd_output_first_gid.  Because we
1193     traverse types in TU order and we do not merge types after the hashing
1194     phase, this will be the lowest TU this type ever appears in.  */
1195
1196  if ((type_ids = ctf_dynhash_lookup (d->cd_output_mapping,
1197				      hval)) == NULL)
1198    {
1199      if (ctf_dynhash_cinsert (d->cd_output_first_gid, hval, id) < 0)
1200	return ctf_set_errno (fp, errno);
1201
1202      if ((type_ids = ctf_dynset_create (htab_hash_pointer,
1203					 htab_eq_pointer,
1204					 NULL)) == NULL)
1205	return ctf_set_errno (fp, errno);
1206      if (ctf_dynhash_insert (d->cd_output_mapping, (void *) hval,
1207			      type_ids) < 0)
1208	{
1209	  ctf_dynset_destroy (type_ids);
1210	  return ctf_set_errno (fp, errno);
1211	}
1212    }
1213#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1214    {
1215      /* Verify that all types with this hash are of the same kind, and that the
1216	 first TU a type was seen in never falls.  */
1217
1218      int err;
1219      const void *one_id;
1220      ctf_next_t *i = NULL;
1221      int orig_kind = ctf_type_kind_unsliced (input, type);
1222      int orig_first_tu;
1223
1224      orig_first_tu = CTF_DEDUP_GID_TO_INPUT
1225	(ctf_dynhash_lookup (d->cd_output_first_gid, hval));
1226      if (!ctf_assert (fp, orig_first_tu <= CTF_DEDUP_GID_TO_INPUT (id)))
1227	return -1;
1228
1229      while ((err = ctf_dynset_cnext (type_ids, &i, &one_id)) == 0)
1230	{
1231	  ctf_dict_t *foo = inputs[CTF_DEDUP_GID_TO_INPUT (one_id)];
1232	  ctf_id_t bar = CTF_DEDUP_GID_TO_TYPE (one_id);
1233	  if (ctf_type_kind_unsliced (foo, bar) != orig_kind)
1234	    {
1235	      ctf_err_warn (fp, 1, 0, "added wrong kind to output mapping "
1236			    "for hash %s named %s: %p/%lx from %s is "
1237			    "kind %i, but newly-added %p/%lx from %s is "
1238			    "kind %i", hval,
1239			    decorated_name ? decorated_name : "(unnamed)",
1240			    (void *) foo, bar,
1241			    ctf_link_input_name (foo),
1242			    ctf_type_kind_unsliced (foo, bar),
1243			    (void *) input, type,
1244			    ctf_link_input_name (input), orig_kind);
1245	      if (!ctf_assert (fp, ctf_type_kind_unsliced (foo, bar)
1246			       == orig_kind))
1247		return -1;
1248	    }
1249	}
1250      if (err != ECTF_NEXT_END)
1251	return ctf_set_errno (fp, err);
1252    }
1253#endif
1254
1255  /* This function will be repeatedly called for the same types many times:
1256     don't waste time reinserting the same keys in that case.  */
1257  if (!ctf_dynset_exists (type_ids, id, NULL)
1258      && ctf_dynset_insert (type_ids, id) < 0)
1259    return ctf_set_errno (fp, errno);
1260
1261  /* The rest only needs to happen for types with names.  */
1262  if (!decorated_name)
1263    return 0;
1264
1265  /* Count the number of occurrences of the hash value for this GID.  */
1266
1267  hval = ctf_dynhash_lookup (d->cd_type_hashes, id);
1268
1269  /* Mapping from name -> hash(hashval, count) not already present?  */
1270  if ((name_counts = ctf_dynhash_lookup (d->cd_name_counts,
1271					 decorated_name)) == NULL)
1272    {
1273      if ((name_counts = ctf_dynhash_create (ctf_hash_string,
1274					     ctf_hash_eq_string,
1275					     NULL, NULL)) == NULL)
1276	  return ctf_set_errno (fp, errno);
1277      if (ctf_dynhash_cinsert (d->cd_name_counts, decorated_name,
1278			       name_counts) < 0)
1279	{
1280	  ctf_dynhash_destroy (name_counts);
1281	  return ctf_set_errno (fp, errno);
1282	}
1283    }
1284
1285  /* This will, conveniently, return NULL (i.e. 0) for a new entry.  */
1286  count = (long int) (uintptr_t) ctf_dynhash_lookup (name_counts, hval);
1287
1288  if (ctf_dynhash_cinsert (name_counts, hval,
1289			   (const void *) (uintptr_t) (count + 1)) < 0)
1290    return ctf_set_errno (fp, errno);
1291
1292  return 0;
1293}
1294
1295/* Mark a single hash as corresponding to a conflicting type.  Mark all types
1296   that cite it as conflicting as well, terminating the recursive walk only when
1297   types that are already conflicted or types do not cite other types are seen.
1298   (Tagged structures and unions do not appear in the cd_citers graph, so the
1299   walk also terminates there, since any reference to a conflicting structure is
1300   just going to reference an unconflicting forward instead: see
1301   ctf_dedup_maybe_synthesize_forward.)  */
1302
1303static int
1304ctf_dedup_mark_conflicting_hash (ctf_dict_t *fp, const char *hval)
1305{
1306  ctf_dedup_t *d = &fp->ctf_dedup;
1307  ctf_next_t *i = NULL;
1308  int err;
1309  const void *k;
1310  ctf_dynset_t *citers;
1311
1312  /* Mark conflicted if not already so marked.  */
1313  if (ctf_dynset_exists (d->cd_conflicting_types, hval, NULL))
1314    return 0;
1315
1316  ctf_dprintf ("Marking %s as conflicted\n", hval);
1317
1318  if (ctf_dynset_cinsert (d->cd_conflicting_types, hval) < 0)
1319    {
1320      ctf_dprintf ("Out of memory marking %s as conflicted\n", hval);
1321      ctf_set_errno (fp, errno);
1322      return -1;
1323    }
1324
1325  /* If any types cite this type, mark them conflicted too.  */
1326  if ((citers = ctf_dynhash_lookup (d->cd_citers, hval)) == NULL)
1327    return 0;
1328
1329  while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0)
1330    {
1331      const char *hv = (const char *) k;
1332
1333      if (ctf_dynset_exists (d->cd_conflicting_types, hv, NULL))
1334	continue;
1335
1336      if (ctf_dedup_mark_conflicting_hash (fp, hv) < 0)
1337	{
1338	  ctf_next_destroy (i);
1339	  return -1;				/* errno is set for us.  */
1340	}
1341    }
1342  if (err != ECTF_NEXT_END)
1343    return ctf_set_errno (fp, err);
1344
1345  return 0;
1346}
1347
1348/* Look up a type kind from the output mapping, given a type hash value.  */
1349static int
1350ctf_dedup_hash_kind (ctf_dict_t *fp, ctf_dict_t **inputs, const char *hash)
1351{
1352  ctf_dedup_t *d = &fp->ctf_dedup;
1353  void *id;
1354  ctf_dynset_t *type_ids;
1355
1356  /* Precondition: the output mapping is populated.  */
1357  if (!ctf_assert (fp, ctf_dynhash_elements (d->cd_output_mapping) > 0))
1358    return -1;
1359
1360  /* Look up some GID from the output hash for this type.  (They are all
1361     identical, so we can pick any).  Don't assert if someone calls this
1362     function wrongly, but do assert if the output mapping knows about the hash,
1363     but has nothing associated with it.  */
1364
1365  type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hash);
1366  if (!type_ids)
1367    {
1368      ctf_dprintf ("Looked up type kind by nonexistent hash %s.\n", hash);
1369      return ctf_set_errno (fp, ECTF_INTERNAL);
1370    }
1371  id = ctf_dynset_lookup_any (type_ids);
1372  if (!ctf_assert (fp, id))
1373    return -1;
1374
1375  return ctf_type_kind_unsliced (inputs[CTF_DEDUP_GID_TO_INPUT (id)],
1376				 CTF_DEDUP_GID_TO_TYPE (id));
1377}
1378
1379/* Used to keep a count of types: i.e. distinct type hash values.  */
1380typedef struct ctf_dedup_type_counter
1381{
1382  ctf_dict_t *fp;
1383  ctf_dict_t **inputs;
1384  int num_non_forwards;
1385} ctf_dedup_type_counter_t;
1386
1387/* Add to the type counter for one name entry from the cd_name_counts.  */
1388static int
1389ctf_dedup_count_types (void *key_, void *value _libctf_unused_, void *arg_)
1390{
1391  const char *hval = (const char *) key_;
1392  int kind;
1393  ctf_dedup_type_counter_t *arg = (ctf_dedup_type_counter_t *) arg_;
1394
1395  kind = ctf_dedup_hash_kind (arg->fp, arg->inputs, hval);
1396
1397  /* We rely on ctf_dedup_hash_kind setting the fp to -ECTF_INTERNAL on error to
1398     smuggle errors out of here.  */
1399
1400  if (kind != CTF_K_FORWARD)
1401    {
1402      arg->num_non_forwards++;
1403      ctf_dprintf ("Counting hash %s: kind %i: num_non_forwards is %i\n",
1404		   hval, kind, arg->num_non_forwards);
1405    }
1406
1407  /* We only need to know if there is more than one non-forward (an ambiguous
1408     type): don't waste time iterating any more than needed to figure that
1409     out.  */
1410
1411  if (arg->num_non_forwards > 1)
1412    return 1;
1413
1414  return 0;
1415}
1416
1417/* Detect name ambiguity and mark ambiguous names as conflicting, other than the
1418   most common.  */
1419static int
1420ctf_dedup_detect_name_ambiguity (ctf_dict_t *fp, ctf_dict_t **inputs)
1421{
1422  ctf_dedup_t *d = &fp->ctf_dedup;
1423  ctf_next_t *i = NULL;
1424  void *k;
1425  void *v;
1426  int err;
1427  const char *whaterr;
1428
1429  /* Go through cd_name_counts for all CTF namespaces in turn.  */
1430
1431  while ((err = ctf_dynhash_next (d->cd_name_counts, &i, &k, &v)) == 0)
1432    {
1433      const char *decorated = (const char *) k;
1434      ctf_dynhash_t *name_counts = (ctf_dynhash_t *) v;
1435      ctf_next_t *j = NULL;
1436
1437      /* If this is a forwardable kind or a forward (which we can tell without
1438	 consulting the type because its decorated name has a space as its
1439	 second character: see ctf_decorate_type_name), we are only interested
1440	 in whether this name has many hashes associated with it: any such name
1441	 is necessarily ambiguous, and types with that name are conflicting.
1442	 Once we know whether this is true, we can skip to the next name: so use
1443	 ctf_dynhash_iter_find for efficiency.  */
1444
1445      if (decorated[0] != '\0' && decorated[1] == ' ')
1446	{
1447	  ctf_dedup_type_counter_t counters = { fp, inputs, 0 };
1448	  ctf_dynhash_t *counts = (ctf_dynhash_t *) v;
1449
1450	  ctf_dynhash_iter_find (counts, ctf_dedup_count_types, &counters);
1451
1452	  /* Check for assertion failure and pass it up.  */
1453	  if (ctf_errno (fp) == ECTF_INTERNAL)
1454	    goto assert_err;
1455
1456	  if (counters.num_non_forwards > 1)
1457	    {
1458	      const void *hval_;
1459
1460	      while ((err = ctf_dynhash_cnext (counts, &j, &hval_, NULL)) == 0)
1461		{
1462		  const char *hval = (const char *) hval_;
1463		  ctf_dynset_t *type_ids;
1464		  void *id;
1465		  int kind;
1466
1467		  /* Dig through the types in this hash to find the non-forwards
1468		     and mark them ambiguous.  */
1469
1470		  type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval);
1471
1472		  /* Nonexistent? Must be a forward with no referent.  */
1473		  if (!type_ids)
1474		    continue;
1475
1476		  id = ctf_dynset_lookup_any (type_ids);
1477
1478		  kind = ctf_type_kind (inputs[CTF_DEDUP_GID_TO_INPUT (id)],
1479					CTF_DEDUP_GID_TO_TYPE (id));
1480
1481		  if (kind != CTF_K_FORWARD)
1482		    {
1483		      ctf_dprintf ("Marking %p, with hash %s, conflicting: one "
1484				   "of many non-forward GIDs for %s\n", id,
1485				   hval, (char *) k);
1486		      ctf_dedup_mark_conflicting_hash (fp, hval);
1487		    }
1488		}
1489	      if (err != ECTF_NEXT_END)
1490		{
1491		  whaterr = N_("error marking conflicting structs/unions");
1492		  goto iterr;
1493		}
1494	    }
1495	}
1496      else
1497	{
1498	  /* This is an ordinary type.  Find the most common type with this
1499	     name, and mark it unconflicting: all others are conflicting.  (We
1500	     cannot do this sort of popularity contest with forwardable types
1501	     because any forwards to that type would be immediately unified with
1502	     the most-popular type on insertion, and we want conflicting structs
1503	     et al to have all forwards left intact, so the user is notified
1504	     that this type is conflicting.  TODO: improve this in future by
1505	     setting such forwards non-root-visible.)
1506
1507	     If multiple distinct types are "most common", pick the one that
1508	     appears first on the link line, and within that, the one with the
1509	     lowest type ID.  (See sort_output_mapping.)  */
1510
1511	  const void *key;
1512	  const void *count;
1513	  const char *hval;
1514	  long max_hcount = -1;
1515	  void *max_gid = NULL;
1516	  const char *max_hval = NULL;
1517
1518	  if (ctf_dynhash_elements (name_counts) <= 1)
1519	    continue;
1520
1521	  /* First find the most common.  */
1522	  while ((err = ctf_dynhash_cnext (name_counts, &j, &key, &count)) == 0)
1523	    {
1524	      hval = (const char *) key;
1525
1526	      if ((long int) (uintptr_t) count > max_hcount)
1527		{
1528		  max_hcount = (long int) (uintptr_t) count;
1529		  max_hval = hval;
1530		  max_gid = ctf_dynhash_lookup (d->cd_output_first_gid, hval);
1531		}
1532	      else if ((long int) (uintptr_t) count == max_hcount)
1533		{
1534		  void *gid = ctf_dynhash_lookup (d->cd_output_first_gid, hval);
1535
1536		  if (CTF_DEDUP_GID_TO_INPUT(gid) < CTF_DEDUP_GID_TO_INPUT(max_gid)
1537		      || (CTF_DEDUP_GID_TO_INPUT(gid) == CTF_DEDUP_GID_TO_INPUT(max_gid)
1538			  && CTF_DEDUP_GID_TO_TYPE(gid) < CTF_DEDUP_GID_TO_TYPE(max_gid)))
1539		    {
1540		      max_hval = hval;
1541		      max_gid = ctf_dynhash_lookup (d->cd_output_first_gid, hval);
1542		    }
1543		}
1544	    }
1545	  if (err != ECTF_NEXT_END)
1546	    {
1547	      whaterr = N_("error finding commonest conflicting type");
1548	      goto iterr;
1549	    }
1550
1551	  /* Mark all the others as conflicting.   */
1552	  while ((err = ctf_dynhash_cnext (name_counts, &j, &key, NULL)) == 0)
1553	    {
1554	      hval = (const char *) key;
1555	      if (strcmp (max_hval, hval) == 0)
1556		continue;
1557
1558	      ctf_dprintf ("Marking %s, an uncommon hash for %s, conflicting\n",
1559			   hval, (const char *) k);
1560	      if (ctf_dedup_mark_conflicting_hash (fp, hval) < 0)
1561		{
1562		  whaterr = N_("error marking hashes as conflicting");
1563		  goto err;
1564		}
1565	    }
1566	  if (err != ECTF_NEXT_END)
1567	    {
1568	      whaterr = N_("marking uncommon conflicting types");
1569	      goto iterr;
1570	    }
1571	}
1572    }
1573  if (err != ECTF_NEXT_END)
1574    {
1575      whaterr = N_("scanning for ambiguous names");
1576      goto iterr;
1577    }
1578
1579  return 0;
1580
1581 err:
1582  ctf_next_destroy (i);
1583  ctf_err_warn (fp, 0, 0, "%s", gettext (whaterr));
1584  return -1;					/* errno is set for us.  */
1585
1586 iterr:
1587  ctf_err_warn (fp, 0, err, _("iteration failed: %s"), gettext (whaterr));
1588  return ctf_set_errno (fp, err);
1589
1590 assert_err:
1591  ctf_next_destroy (i);
1592  return -1; 					/* errno is set for us.  */
1593}
1594
1595/* Initialize the deduplication machinery.  */
1596
1597static int
1598ctf_dedup_init (ctf_dict_t *fp)
1599{
1600  ctf_dedup_t *d = &fp->ctf_dedup;
1601  size_t i;
1602
1603  if (ctf_dedup_atoms_init (fp) < 0)
1604      goto oom;
1605
1606#if IDS_NEED_ALLOCATION
1607  if ((d->cd_id_to_dict_t = ctf_dynhash_create (ctf_hash_type_id_key,
1608						ctf_hash_eq_type_id_key,
1609						free, NULL)) == NULL)
1610    goto oom;
1611#endif
1612
1613  for (i = 0; i < 4; i++)
1614    {
1615      if ((d->cd_decorated_names[i] = ctf_dynhash_create (ctf_hash_string,
1616							  ctf_hash_eq_string,
1617							  NULL, NULL)) == NULL)
1618	goto oom;
1619    }
1620
1621  if ((d->cd_name_counts
1622       = ctf_dynhash_create (ctf_hash_string,
1623			     ctf_hash_eq_string, NULL,
1624			     (ctf_hash_free_fun) ctf_dynhash_destroy)) == NULL)
1625    goto oom;
1626
1627  if ((d->cd_type_hashes
1628       = ctf_dynhash_create (ctf_hash_integer,
1629			     ctf_hash_eq_integer,
1630			     NULL, NULL)) == NULL)
1631    goto oom;
1632
1633  if ((d->cd_struct_origin
1634       = ctf_dynhash_create (ctf_hash_string,
1635			     ctf_hash_eq_string,
1636			     NULL, NULL)) == NULL)
1637    goto oom;
1638
1639  if ((d->cd_citers
1640       = ctf_dynhash_create (ctf_hash_string,
1641			     ctf_hash_eq_string, NULL,
1642			     (ctf_hash_free_fun) ctf_dynset_destroy)) == NULL)
1643    goto oom;
1644
1645  if ((d->cd_output_mapping
1646       = ctf_dynhash_create (ctf_hash_string,
1647			     ctf_hash_eq_string, NULL,
1648			     (ctf_hash_free_fun) ctf_dynset_destroy)) == NULL)
1649    goto oom;
1650
1651  if ((d->cd_output_first_gid
1652       = ctf_dynhash_create (ctf_hash_string,
1653			     ctf_hash_eq_string,
1654			     NULL, NULL)) == NULL)
1655    goto oom;
1656
1657#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1658  if ((d->cd_output_mapping_guard
1659       = ctf_dynhash_create (ctf_hash_integer,
1660			     ctf_hash_eq_integer, NULL, NULL)) == NULL)
1661    goto oom;
1662#endif
1663
1664  if ((d->cd_input_nums
1665       = ctf_dynhash_create (ctf_hash_integer,
1666			     ctf_hash_eq_integer,
1667			     NULL, NULL)) == NULL)
1668    goto oom;
1669
1670  if ((d->cd_emission_struct_members
1671       = ctf_dynhash_create (ctf_hash_integer,
1672			     ctf_hash_eq_integer,
1673			     NULL, NULL)) == NULL)
1674    goto oom;
1675
1676  if ((d->cd_conflicting_types
1677       = ctf_dynset_create (htab_hash_string,
1678			    htab_eq_string, NULL)) == NULL)
1679    goto oom;
1680
1681  return 0;
1682
1683 oom:
1684  ctf_err_warn (fp, 0, ENOMEM, _("ctf_dedup_init: cannot initialize: "
1685				 "out of memory"));
1686  return ctf_set_errno (fp, ENOMEM);
1687}
1688
1689/* No ctf_dedup calls are allowed after this call other than starting a new
1690   deduplication via ctf_dedup (not even ctf_dedup_type_mapping lookups).  */
1691void
1692ctf_dedup_fini (ctf_dict_t *fp, ctf_dict_t **outputs, uint32_t noutputs)
1693{
1694  ctf_dedup_t *d = &fp->ctf_dedup;
1695  size_t i;
1696
1697  /* ctf_dedup_atoms is kept across links.  */
1698#if IDS_NEED_ALLOCATION
1699  ctf_dynhash_destroy (d->cd_id_to_dict_t);
1700#endif
1701  for (i = 0; i < 4; i++)
1702    ctf_dynhash_destroy (d->cd_decorated_names[i]);
1703  ctf_dynhash_destroy (d->cd_name_counts);
1704  ctf_dynhash_destroy (d->cd_type_hashes);
1705  ctf_dynhash_destroy (d->cd_struct_origin);
1706  ctf_dynhash_destroy (d->cd_citers);
1707  ctf_dynhash_destroy (d->cd_output_mapping);
1708  ctf_dynhash_destroy (d->cd_output_first_gid);
1709#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1710  ctf_dynhash_destroy (d->cd_output_mapping_guard);
1711#endif
1712  ctf_dynhash_destroy (d->cd_input_nums);
1713  ctf_dynhash_destroy (d->cd_emission_struct_members);
1714  ctf_dynset_destroy (d->cd_conflicting_types);
1715
1716  /* Free the per-output state.  */
1717  if (outputs)
1718    {
1719      for (i = 0; i < noutputs; i++)
1720	{
1721	  ctf_dedup_t *od = &outputs[i]->ctf_dedup;
1722	  ctf_dynhash_destroy (od->cd_output_emission_hashes);
1723	  ctf_dynhash_destroy (od->cd_output_emission_conflicted_forwards);
1724	  ctf_dict_close (od->cd_output);
1725	}
1726    }
1727  memset (d, 0, sizeof (ctf_dedup_t));
1728}
1729
1730/* Return 1 if this type is cited by multiple input dictionaries.  */
1731
1732static int
1733ctf_dedup_multiple_input_dicts (ctf_dict_t *output, ctf_dict_t **inputs,
1734				const char *hval)
1735{
1736  ctf_dedup_t *d = &output->ctf_dedup;
1737  ctf_dynset_t *type_ids;
1738  ctf_next_t *i = NULL;
1739  void *id;
1740  ctf_dict_t *found = NULL, *relative_found = NULL;
1741  const char *type_id;
1742  ctf_dict_t *input_fp;
1743  ctf_id_t input_id;
1744  const char *name;
1745  const char *decorated;
1746  int fwdkind;
1747  int multiple = 0;
1748  int err;
1749
1750  type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval);
1751  if (!ctf_assert (output, type_ids))
1752    return -1;
1753
1754  /* Scan across the IDs until we find proof that two disjoint dictionaries
1755     are referenced.  Exit as soon as possible.  Optimization opportunity, but
1756     possibly not worth it, given that this is only executed in
1757     CTF_LINK_SHARE_DUPLICATED mode.  */
1758
1759  while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0)
1760    {
1761      ctf_dict_t *fp = inputs[CTF_DEDUP_GID_TO_INPUT (id)];
1762
1763      if (fp == found || fp == relative_found)
1764	continue;
1765
1766      if (!found)
1767	{
1768	  found = fp;
1769	  continue;
1770	}
1771
1772      if (!relative_found
1773	  && (fp->ctf_parent == found || found->ctf_parent == fp))
1774	{
1775	  relative_found = fp;
1776	  continue;
1777	}
1778
1779      multiple = 1;
1780      ctf_next_destroy (i);
1781      break;
1782    }
1783  if ((err != ECTF_NEXT_END) && (err != 0))
1784    {
1785      ctf_err_warn (output, 0, err, _("iteration error "
1786				      "propagating conflictedness"));
1787      return ctf_set_errno (output, err);
1788    }
1789
1790  if (multiple)
1791    return multiple;
1792
1793  /* This type itself does not appear in multiple input dicts: how about another
1794     related type with the same name (e.g. a forward if this is a struct,
1795     etc).  */
1796
1797  type_id = ctf_dynset_lookup_any (type_ids);
1798  if (!ctf_assert (output, type_id))
1799    return -1;
1800
1801  input_fp = inputs[CTF_DEDUP_GID_TO_INPUT (type_id)];
1802  input_id = CTF_DEDUP_GID_TO_TYPE (type_id);
1803  fwdkind = ctf_type_kind_forwarded (input_fp, input_id);
1804  name = ctf_type_name_raw (input_fp, input_id);
1805
1806  if ((fwdkind == CTF_K_STRUCT || fwdkind == CTF_K_UNION)
1807      && name[0] != '\0')
1808    {
1809      const void *origin;
1810
1811      if ((decorated = ctf_decorate_type_name (output, name,
1812					       fwdkind)) == NULL)
1813	return -1;				/* errno is set for us.  */
1814
1815      origin = ctf_dynhash_lookup (d->cd_struct_origin, decorated);
1816      if ((origin != NULL) && (CTF_DEDUP_GID_TO_INPUT (origin) < 0))
1817	multiple = 1;
1818    }
1819
1820  return multiple;
1821}
1822
1823/* Demote unconflicting types which reference only one input, or which reference
1824   two inputs where one input is the parent of the other, into conflicting
1825   types.  Only used if the link mode is CTF_LINK_SHARE_DUPLICATED.  */
1826
1827static int
1828ctf_dedup_conflictify_unshared (ctf_dict_t *output, ctf_dict_t **inputs)
1829{
1830  ctf_dedup_t *d = &output->ctf_dedup;
1831  ctf_next_t *i = NULL;
1832  int err;
1833  const void *k;
1834  ctf_dynset_t *to_mark = NULL;
1835
1836  if ((to_mark = ctf_dynset_create (htab_hash_string, htab_eq_string,
1837				    NULL)) == NULL)
1838    goto err_no;
1839
1840  while ((err = ctf_dynhash_cnext (d->cd_output_mapping, &i, &k, NULL)) == 0)
1841    {
1842      const char *hval = (const char *) k;
1843      int conflicting;
1844
1845      /* Types referenced by only one dict, with no type appearing under that
1846	 name elsewhere, are marked conflicting.  */
1847
1848      conflicting = !ctf_dedup_multiple_input_dicts (output, inputs, hval);
1849
1850      if (conflicting < 0)
1851	goto err;				/* errno is set for us.  */
1852
1853      if (conflicting)
1854	if (ctf_dynset_cinsert (to_mark, hval) < 0)
1855	  goto err;
1856    }
1857  if (err != ECTF_NEXT_END)
1858    goto iterr;
1859
1860  while ((err = ctf_dynset_cnext (to_mark, &i, &k)) == 0)
1861    {
1862      const char *hval = (const char *) k;
1863
1864      if (ctf_dedup_mark_conflicting_hash (output, hval) < 0)
1865	goto err;
1866    }
1867  if (err != ECTF_NEXT_END)
1868    goto iterr;
1869
1870  ctf_dynset_destroy (to_mark);
1871
1872  return 0;
1873
1874 err_no:
1875  ctf_set_errno (output, errno);
1876 err:
1877  err = ctf_errno (output);
1878  ctf_next_destroy (i);
1879 iterr:
1880  ctf_dynset_destroy (to_mark);
1881  ctf_err_warn (output, 0, err, _("conflictifying unshared types"));
1882  return ctf_set_errno (output, err);
1883}
1884
1885/* The core deduplicator.  Populate cd_output_mapping in the output ctf_dedup
1886   with a mapping of all types that belong in this dictionary and where they
1887   come from, and cd_conflicting_types with an indication of whether each type
1888   is conflicted or not.  OUTPUT is the top-level output: INPUTS is the array of
1889   input dicts; NINPUTS is the size of that array; PARENTS is an NINPUTS-element
1890   array with each element corresponding to a input which is a child dict set to
1891   the number in the INPUTS array of that input's parent.
1892
1893   If CU_MAPPED is set, this is a first pass for a link with a non-empty CU
1894   mapping: only one output will result.
1895
1896   Only deduplicates: does not emit the types into the output.  Call
1897   ctf_dedup_emit afterwards to do that.  */
1898
1899int
1900ctf_dedup (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs,
1901	   uint32_t *parents, int cu_mapped)
1902{
1903  ctf_dedup_t *d = &output->ctf_dedup;
1904  size_t i;
1905  ctf_next_t *it = NULL;
1906
1907  if (ctf_dedup_init (output) < 0)
1908    return -1; 					/* errno is set for us.  */
1909
1910  for (i = 0; i < ninputs; i++)
1911    {
1912      ctf_dprintf ("Input %i: %s\n", (int) i, ctf_link_input_name (inputs[i]));
1913      if (ctf_dynhash_insert (d->cd_input_nums, inputs[i],
1914			      (void *) (uintptr_t) i) < 0)
1915	{
1916	  ctf_set_errno (output, errno);
1917	  ctf_err_warn (output, 0, errno, _("ctf_dedup: cannot initialize: %s\n"),
1918			ctf_errmsg (errno));
1919	  goto err;
1920	}
1921    }
1922
1923  /* Some flags do not apply when CU-mapping: this is not a duplicated link,
1924     because there is only one output and we really don't want to end up marking
1925     all nonconflicting but appears-only-once types as conflicting (which in the
1926     CU-mapped link means we'd mark them all as non-root-visible!).  */
1927  d->cd_link_flags = output->ctf_link_flags;
1928  if (cu_mapped)
1929    d->cd_link_flags &= ~(CTF_LINK_SHARE_DUPLICATED);
1930
1931  /* Compute hash values for all types, recursively, treating child structures
1932     and unions equivalent to forwards, and hashing in the name of the referent
1933     of each such type into structures, unions, and non-opaque forwards.
1934     Populate a mapping from decorated name (including an indication of
1935     struct/union/enum namespace) to count of type hash values in
1936     cd_name_counts, a mapping from and a mapping from hash values to input type
1937     IDs in cd_output_mapping.  */
1938
1939  ctf_dprintf ("Computing type hashes\n");
1940  for (i = 0; i < ninputs; i++)
1941    {
1942      ctf_id_t id;
1943
1944      while ((id = ctf_type_next (inputs[i], &it, NULL, 1)) != CTF_ERR)
1945	{
1946	  if (ctf_dedup_hash_type (output, inputs[i], inputs,
1947				   parents, i, id, 0, 0,
1948				   ctf_dedup_populate_mappings) == NULL)
1949	    goto err;				/* errno is set for us.  */
1950	}
1951      if (ctf_errno (inputs[i]) != ECTF_NEXT_END)
1952	{
1953	  ctf_set_errno (output, ctf_errno (inputs[i]));
1954	  ctf_err_warn (output, 0, 0, _("iteration failure "
1955					"computing type hashes"));
1956	  goto err;
1957	}
1958    }
1959
1960  /* Go through the cd_name_counts name->hash->count mapping for all CTF
1961     namespaces: any name with many hashes associated with it at this stage is
1962     necessarily ambiguous.  Mark all the hashes except the most common as
1963     conflicting in the output.  */
1964
1965  ctf_dprintf ("Detecting type name ambiguity\n");
1966  if (ctf_dedup_detect_name_ambiguity (output, inputs) < 0)
1967      goto err;					/* errno is set for us.  */
1968
1969  /* If the link mode is CTF_LINK_SHARE_DUPLICATED, we change any unconflicting
1970     types whose output mapping references only one input dict into a
1971     conflicting type, so that they end up in the per-CU dictionaries.  */
1972
1973  if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED)
1974    {
1975      ctf_dprintf ("Conflictifying unshared types\n");
1976      if (ctf_dedup_conflictify_unshared (output, inputs) < 0)
1977	goto err;				/* errno is set for us.  */
1978    }
1979  return 0;
1980
1981 err:
1982  ctf_dedup_fini (output, NULL, 0);
1983  return -1;
1984}
1985
1986static int
1987ctf_dedup_rwalk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs,
1988				uint32_t ninputs, uint32_t *parents,
1989				ctf_dynset_t *already_visited,
1990				const char *hval,
1991				int (*visit_fun) (const char *hval,
1992						  ctf_dict_t *output,
1993						  ctf_dict_t **inputs,
1994						  uint32_t ninputs,
1995						  uint32_t *parents,
1996						  int already_visited,
1997						  ctf_dict_t *input,
1998						  ctf_id_t type,
1999						  void *id,
2000						  int depth,
2001						  void *arg),
2002				void *arg, unsigned long depth);
2003
2004/* Like ctf_dedup_rwalk_output_mapping (which see), only takes a single target
2005   type and visits it.  */
2006static int
2007ctf_dedup_rwalk_one_output_mapping (ctf_dict_t *output,
2008				    ctf_dict_t **inputs, uint32_t ninputs,
2009				    uint32_t *parents,
2010				    ctf_dynset_t *already_visited,
2011				    int visited, void *type_id,
2012				    const char *hval,
2013				    int (*visit_fun) (const char *hval,
2014						      ctf_dict_t *output,
2015						      ctf_dict_t **inputs,
2016						      uint32_t ninputs,
2017						      uint32_t *parents,
2018						      int already_visited,
2019						      ctf_dict_t *input,
2020						      ctf_id_t type,
2021						      void *id,
2022						      int depth,
2023						      void *arg),
2024				    void *arg, unsigned long depth)
2025{
2026  ctf_dedup_t *d = &output->ctf_dedup;
2027  ctf_dict_t *fp;
2028  int input_num;
2029  ctf_id_t type;
2030  int ret;
2031  const char *whaterr;
2032
2033  input_num = CTF_DEDUP_GID_TO_INPUT (type_id);
2034  fp = inputs[input_num];
2035  type = CTF_DEDUP_GID_TO_TYPE (type_id);
2036
2037  ctf_dprintf ("%lu: Starting walk over type %s, %i/%lx (%p), from %s, "
2038	       "kind %i\n", depth, hval, input_num, type, (void *) fp,
2039	       ctf_link_input_name (fp), ctf_type_kind_unsliced (fp, type));
2040
2041  /* Get the single call we do if this type has already been visited out of the
2042     way.  */
2043  if (visited)
2044    return visit_fun (hval, output, inputs, ninputs, parents, visited, fp,
2045		      type, type_id, depth, arg);
2046
2047  /* This macro is really ugly, but the alternative is repeating this code many
2048     times, which is worse.  */
2049
2050#define CTF_TYPE_WALK(type, errlabel, errmsg)				\
2051  do									\
2052    {									\
2053      void *type_id;							\
2054      const char *hashval;						\
2055      int cited_type_input_num = input_num;				\
2056									\
2057      if ((fp->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (fp, type))) \
2058	cited_type_input_num = parents[input_num];			\
2059									\
2060      type_id = CTF_DEDUP_GID (output, cited_type_input_num, type);	\
2061									\
2062      if (type == 0)							\
2063	{								\
2064	  ctf_dprintf ("Walking: unimplemented type\n");		\
2065	  break;							\
2066	}								\
2067									\
2068      ctf_dprintf ("Looking up ID %i/%lx in type hashes\n",		\
2069		   cited_type_input_num, type);				\
2070      hashval = ctf_dynhash_lookup (d->cd_type_hashes, type_id);	\
2071      if (!ctf_assert (output, hashval))				\
2072	{								\
2073	  whaterr = N_("error looking up ID in type hashes");		\
2074	  goto errlabel;						\
2075	}								\
2076      ctf_dprintf ("ID %i/%lx has hash %s\n", cited_type_input_num, type, \
2077		   hashval);						\
2078									\
2079      ret = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, \
2080					    already_visited, hashval,	\
2081					    visit_fun, arg, depth);	\
2082      if (ret < 0)							\
2083	{								\
2084	  whaterr = errmsg;						\
2085	  goto errlabel;						\
2086	}								\
2087    }									\
2088  while (0)
2089
2090  switch (ctf_type_kind_unsliced (fp, type))
2091    {
2092    case CTF_K_UNKNOWN:
2093    case CTF_K_FORWARD:
2094    case CTF_K_INTEGER:
2095    case CTF_K_FLOAT:
2096    case CTF_K_ENUM:
2097      /* No types referenced.  */
2098      break;
2099
2100    case CTF_K_TYPEDEF:
2101    case CTF_K_VOLATILE:
2102    case CTF_K_CONST:
2103    case CTF_K_RESTRICT:
2104    case CTF_K_POINTER:
2105    case CTF_K_SLICE:
2106      CTF_TYPE_WALK (ctf_type_reference (fp, type), err,
2107		     N_("error during referenced type walk"));
2108      break;
2109
2110    case CTF_K_ARRAY:
2111      {
2112	ctf_arinfo_t ar;
2113
2114	if (ctf_array_info (fp, type, &ar) < 0)
2115	  {
2116	    whaterr = N_("error during array info lookup");
2117	    goto err_msg;
2118	  }
2119
2120	CTF_TYPE_WALK (ar.ctr_contents, err,
2121		       N_("error during array contents type walk"));
2122	CTF_TYPE_WALK (ar.ctr_index, err,
2123		       N_("error during array index type walk"));
2124	break;
2125      }
2126
2127    case CTF_K_FUNCTION:
2128      {
2129	ctf_funcinfo_t fi;
2130	ctf_id_t *args;
2131	uint32_t j;
2132
2133	if (ctf_func_type_info (fp, type, &fi) < 0)
2134	  {
2135	    whaterr = N_("error during func type info lookup");
2136	    goto err_msg;
2137	  }
2138
2139	CTF_TYPE_WALK (fi.ctc_return, err,
2140		       N_("error during func return type walk"));
2141
2142	if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL)
2143	  {
2144	    whaterr = N_("error doing memory allocation");
2145	    goto err_msg;
2146	  }
2147
2148	if (ctf_func_type_args (fp, type, fi.ctc_argc, args) < 0)
2149	  {
2150	    whaterr = N_("error doing func arg type lookup");
2151	    free (args);
2152	    goto err_msg;
2153	  }
2154
2155	for (j = 0; j < fi.ctc_argc; j++)
2156	  CTF_TYPE_WALK (args[j], err_free_args,
2157			 N_("error during Func arg type walk"));
2158	free (args);
2159	break;
2160
2161      err_free_args:
2162	free (args);
2163	goto err;
2164      }
2165    case CTF_K_STRUCT:
2166    case CTF_K_UNION:
2167      /* We do not recursively traverse the members of structures: they are
2168	 emitted later, in a separate pass.  */
2169	break;
2170    default:
2171      whaterr = N_("CTF dict corruption: unknown type kind");
2172      goto err_msg;
2173    }
2174
2175  return visit_fun (hval, output, inputs, ninputs, parents, visited, fp, type,
2176		    type_id, depth, arg);
2177
2178 err_msg:
2179  ctf_set_errno (output, ctf_errno (fp));
2180  ctf_err_warn (output, 0, 0, _("%s in input file %s at type ID %lx"),
2181		gettext (whaterr), ctf_link_input_name (fp), type);
2182 err:
2183  return -1;
2184}
2185/* Recursively traverse the output mapping, and do something with each type
2186   visited, from leaves to root.  VISIT_FUN, called as recursion unwinds,
2187   returns a negative error code or zero.  Type hashes may be visited more than
2188   once, but are not recursed through repeatedly: ALREADY_VISITED tracks whether
2189   types have already been visited.  */
2190static int
2191ctf_dedup_rwalk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs,
2192				uint32_t ninputs, uint32_t *parents,
2193				ctf_dynset_t *already_visited,
2194				const char *hval,
2195				int (*visit_fun) (const char *hval,
2196						  ctf_dict_t *output,
2197						  ctf_dict_t **inputs,
2198						  uint32_t ninputs,
2199						  uint32_t *parents,
2200						  int already_visited,
2201						  ctf_dict_t *input,
2202						  ctf_id_t type,
2203						  void *id,
2204						  int depth,
2205						  void *arg),
2206				void *arg, unsigned long depth)
2207{
2208  ctf_dedup_t *d = &output->ctf_dedup;
2209  ctf_next_t *i = NULL;
2210  int err;
2211  int visited = 1;
2212  ctf_dynset_t *type_ids;
2213  void *id;
2214
2215  depth++;
2216
2217  type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval);
2218  if (!type_ids)
2219    {
2220      ctf_err_warn (output, 0, ECTF_INTERNAL,
2221		    _("looked up type kind by nonexistent hash %s"), hval);
2222      return ctf_set_errno (output, ECTF_INTERNAL);
2223    }
2224
2225  /* Have we seen this type before?  */
2226
2227  if (!ctf_dynset_exists (already_visited, hval, NULL))
2228    {
2229      /* Mark as already-visited immediately, to eliminate the possibility of
2230	 cycles: but remember we have not actually visited it yet for the
2231	 upcoming call to the visit_fun.  (All our callers handle cycles
2232	 properly themselves, so we can just abort them aggressively as soon as
2233	 we find ourselves in one.)  */
2234
2235      visited = 0;
2236      if (ctf_dynset_cinsert (already_visited, hval) < 0)
2237	{
2238	  ctf_err_warn (output, 0, ENOMEM,
2239			_("out of memory tracking already-visited types"));
2240	  return ctf_set_errno (output, ENOMEM);
2241	}
2242    }
2243
2244  /* If this type is marked conflicted, traverse members and call
2245     ctf_dedup_rwalk_output_mapping_once on all the unique ones: otherwise, just
2246     pick a random one and use it.  */
2247
2248  if (!ctf_dynset_exists (d->cd_conflicting_types, hval, NULL))
2249    {
2250      id = ctf_dynset_lookup_any (type_ids);
2251      if (!ctf_assert (output, id))
2252	return -1;
2253
2254      return ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs,
2255						 parents, already_visited,
2256						 visited, id, hval, visit_fun,
2257						 arg, depth);
2258    }
2259
2260  while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0)
2261    {
2262      int ret;
2263
2264      ret = ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs,
2265						parents, already_visited,
2266						visited, id, hval,
2267						visit_fun, arg, depth);
2268      if (ret < 0)
2269	{
2270	  ctf_next_destroy (i);
2271	  return ret;				/* errno is set for us.  */
2272	}
2273    }
2274  if (err != ECTF_NEXT_END)
2275    {
2276      ctf_err_warn (output, 0, err, _("cannot walk conflicted type"));
2277      return ctf_set_errno (output, err);
2278    }
2279
2280  return 0;
2281}
2282
2283typedef struct ctf_sort_om_cb_arg
2284{
2285  ctf_dict_t **inputs;
2286  uint32_t ninputs;
2287  ctf_dedup_t *d;
2288} ctf_sort_om_cb_arg_t;
2289
2290/* Sort the output mapping into order: types first appearing in earlier inputs
2291   first, parents preceding children: if types first appear in the same input,
2292   sort those with earlier ctf_id_t's first.  */
2293static int
2294sort_output_mapping (const ctf_next_hkv_t *one, const ctf_next_hkv_t *two,
2295		     void *arg_)
2296{
2297  ctf_sort_om_cb_arg_t *arg = (ctf_sort_om_cb_arg_t *) arg_;
2298  ctf_dedup_t *d = arg->d;
2299  const char *one_hval = (const char *) one->hkv_key;
2300  const char *two_hval = (const char *) two->hkv_key;
2301  void *one_gid, *two_gid;
2302  uint32_t one_ninput;
2303  uint32_t two_ninput;
2304  ctf_dict_t *one_fp;
2305  ctf_dict_t *two_fp;
2306  ctf_id_t one_type;
2307  ctf_id_t two_type;
2308
2309  one_gid = ctf_dynhash_lookup (d->cd_output_first_gid, one_hval);
2310  two_gid = ctf_dynhash_lookup (d->cd_output_first_gid, two_hval);
2311
2312  one_ninput = CTF_DEDUP_GID_TO_INPUT (one_gid);
2313  two_ninput = CTF_DEDUP_GID_TO_INPUT (two_gid);
2314
2315  one_type = CTF_DEDUP_GID_TO_TYPE (one_gid);
2316  two_type = CTF_DEDUP_GID_TO_TYPE (two_gid);
2317
2318  /* It's kind of hard to smuggle an assertion failure out of here.  */
2319  assert (one_ninput < arg->ninputs && two_ninput < arg->ninputs);
2320
2321  one_fp = arg->inputs[one_ninput];
2322  two_fp = arg->inputs[two_ninput];
2323
2324  /* Parents before children.  */
2325
2326  if (!(one_fp->ctf_flags & LCTF_CHILD)
2327      && (two_fp->ctf_flags & LCTF_CHILD))
2328    return -1;
2329  else if ((one_fp->ctf_flags & LCTF_CHILD)
2330      && !(two_fp->ctf_flags & LCTF_CHILD))
2331    return 1;
2332
2333  /* ninput order, types appearing in earlier TUs first.  */
2334
2335  if (one_ninput < two_ninput)
2336    return -1;
2337  else if (two_ninput < one_ninput)
2338    return 1;
2339
2340  /* Same TU.  Earliest ctf_id_t first.  They cannot be the same.  */
2341
2342  assert (one_type != two_type);
2343  if (one_type < two_type)
2344    return -1;
2345  else
2346    return 1;
2347}
2348
2349/* The public entry point to ctf_dedup_rwalk_output_mapping, above.  */
2350static int
2351ctf_dedup_walk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs,
2352			       uint32_t ninputs, uint32_t *parents,
2353			       int (*visit_fun) (const char *hval,
2354						 ctf_dict_t *output,
2355						 ctf_dict_t **inputs,
2356						 uint32_t ninputs,
2357						 uint32_t *parents,
2358						 int already_visited,
2359						 ctf_dict_t *input,
2360						 ctf_id_t type,
2361						 void *id,
2362						 int depth,
2363						 void *arg),
2364			       void *arg)
2365{
2366  ctf_dynset_t *already_visited;
2367  ctf_next_t *i = NULL;
2368  ctf_sort_om_cb_arg_t sort_arg;
2369  int err;
2370  void *k;
2371
2372  if ((already_visited = ctf_dynset_create (htab_hash_string,
2373					    htab_eq_string,
2374					    NULL)) == NULL)
2375    return ctf_set_errno (output, ENOMEM);
2376
2377  sort_arg.inputs = inputs;
2378  sort_arg.ninputs = ninputs;
2379  sort_arg.d = &output->ctf_dedup;
2380
2381  while ((err = ctf_dynhash_next_sorted (output->ctf_dedup.cd_output_mapping,
2382					 &i, &k, NULL, sort_output_mapping,
2383					 &sort_arg)) == 0)
2384    {
2385      const char *hval = (const char *) k;
2386
2387      err = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents,
2388					    already_visited, hval, visit_fun,
2389					    arg, 0);
2390      if (err < 0)
2391	{
2392	  ctf_next_destroy (i);
2393	  goto err;				/* errno is set for us.  */
2394	}
2395    }
2396  if (err != ECTF_NEXT_END)
2397    {
2398      ctf_err_warn (output, 0, err, _("cannot recurse over output mapping"));
2399      ctf_set_errno (output, err);
2400      goto err;
2401    }
2402  ctf_dynset_destroy (already_visited);
2403
2404  return 0;
2405 err:
2406  ctf_dynset_destroy (already_visited);
2407  return -1;
2408}
2409
2410/* Possibly synthesise a synthetic forward in TARGET to subsitute for a
2411   conflicted per-TU type ID in INPUT with hash HVAL.  Return its CTF ID, or 0
2412   if none was needed.  */
2413static ctf_id_t
2414ctf_dedup_maybe_synthesize_forward (ctf_dict_t *output, ctf_dict_t *target,
2415				    ctf_dict_t *input, ctf_id_t id,
2416				    const char *hval)
2417{
2418  ctf_dedup_t *od = &output->ctf_dedup;
2419  ctf_dedup_t *td = &target->ctf_dedup;
2420  int kind;
2421  int fwdkind;
2422  const char *name = ctf_type_name_raw (input, id);
2423  const char *decorated;
2424  void *v;
2425  ctf_id_t emitted_forward;
2426
2427  if (!ctf_dynset_exists (od->cd_conflicting_types, hval, NULL)
2428      || target->ctf_flags & LCTF_CHILD
2429      || name[0] == '\0'
2430      || (((kind = ctf_type_kind_unsliced (input, id)) != CTF_K_STRUCT
2431	   && kind != CTF_K_UNION && kind != CTF_K_FORWARD)))
2432    return 0;
2433
2434  fwdkind = ctf_type_kind_forwarded (input, id);
2435
2436  ctf_dprintf ("Using synthetic forward for conflicted struct/union with "
2437	       "hval %s\n", hval);
2438
2439  if (!ctf_assert (output, name))
2440    return CTF_ERR;
2441
2442  if ((decorated = ctf_decorate_type_name (output, name, fwdkind)) == NULL)
2443    return CTF_ERR;
2444
2445  if (!ctf_dynhash_lookup_kv (td->cd_output_emission_conflicted_forwards,
2446			      decorated, NULL, &v))
2447    {
2448      if ((emitted_forward = ctf_add_forward (target, CTF_ADD_ROOT, name,
2449					      fwdkind)) == CTF_ERR)
2450	{
2451	  ctf_set_errno (output, ctf_errno (target));
2452	  return CTF_ERR;
2453	}
2454
2455      if (ctf_dynhash_cinsert (td->cd_output_emission_conflicted_forwards,
2456			       decorated, (void *) (uintptr_t)
2457			       emitted_forward) < 0)
2458	{
2459	  ctf_set_errno (output, ENOMEM);
2460	  return CTF_ERR;
2461	}
2462    }
2463  else
2464    emitted_forward = (ctf_id_t) (uintptr_t) v;
2465
2466  ctf_dprintf ("Cross-TU conflicted struct: passing back forward, %lx\n",
2467	       emitted_forward);
2468
2469  return emitted_forward;
2470}
2471
2472/* Map a GID in some INPUT dict, in the form of an input number and a ctf_id_t,
2473   into a GID in a target output dict.  If it returns 0, this is the
2474   unimplemented type, and the input type must have been 0.  The OUTPUT dict is
2475   assumed to be the parent of the TARGET, if it is not the TARGET itself.
2476
2477   Returns CTF_ERR on failure.  Responds to an incoming CTF_ERR as an 'id' by
2478   returning CTF_ERR, to simplify callers.  Errors are always propagated to the
2479   input, even if they relate to the target, for the same reason.  (Target
2480   errors are expected to be very rare.)
2481
2482   If the type in question is a citation of a conflicted type in a different TU,
2483   emit a forward of the right type in its place (if not already emitted), and
2484   record that forward in cd_output_emission_conflicted_forwards.  This avoids
2485   the need to replicate the entire type graph below this point in the current
2486   TU (an appalling waste of space).
2487
2488   TODO: maybe replace forwards in the same TU with their referents?  Might
2489   make usability a bit better.  */
2490
2491static ctf_id_t
2492ctf_dedup_id_to_target (ctf_dict_t *output, ctf_dict_t *target,
2493			ctf_dict_t **inputs, uint32_t ninputs,
2494			uint32_t *parents, ctf_dict_t *input, int input_num,
2495			ctf_id_t id)
2496{
2497  ctf_dedup_t *od = &output->ctf_dedup;
2498  ctf_dedup_t *td = &target->ctf_dedup;
2499  ctf_dict_t *err_fp = input;
2500  const char *hval;
2501  void *target_id;
2502  ctf_id_t emitted_forward;
2503
2504  /* The target type of an error is an error.  */
2505  if (id == CTF_ERR)
2506    return CTF_ERR;
2507
2508  /* The unimplemented type's ID never changes.  */
2509  if (!id)
2510    {
2511      ctf_dprintf ("%i/%lx: unimplemented type\n", input_num, id);
2512      return 0;
2513    }
2514
2515  ctf_dprintf ("Mapping %i/%lx to target %p (%s)\n", input_num,
2516	       id, (void *) target, ctf_link_input_name (target));
2517
2518  /* If the input type is in the parent type space, and this is a child, reset
2519     the input to the parent (which must already have been emitted, since
2520     emission of parent dicts happens before children).  */
2521  if ((input->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (input, id)))
2522    {
2523      if (!ctf_assert (output, parents[input_num] <= ninputs))
2524	return -1;
2525      input = inputs[parents[input_num]];
2526      input_num = parents[input_num];
2527    }
2528
2529  hval = ctf_dynhash_lookup (od->cd_type_hashes,
2530			     CTF_DEDUP_GID (output, input_num, id));
2531
2532  if (!ctf_assert (output, hval && td->cd_output_emission_hashes))
2533    return -1;
2534
2535  /* If this type is a conflicted tagged structure, union, or forward,
2536     substitute a synthetic forward instead, emitting it if need be.  Only do
2537     this if the target is in the parent dict: if it's in the child dict, we can
2538     just point straight at the thing itself.  Of course, we might be looking in
2539     the child dict right now and not find it and have to look in the parent, so
2540     we have to do this check twice.  */
2541
2542  emitted_forward = ctf_dedup_maybe_synthesize_forward (output, target,
2543							input, id, hval);
2544  switch (emitted_forward)
2545    {
2546    case 0: /* No forward needed.  */
2547      break;
2548    case -1:
2549      ctf_set_errno (err_fp, ctf_errno (output));
2550      ctf_err_warn (err_fp, 0, 0, _("cannot add synthetic forward for type "
2551				    "%i/%lx"), input_num, id);
2552      return -1;
2553    default:
2554      return emitted_forward;
2555    }
2556
2557  ctf_dprintf ("Looking up %i/%lx, hash %s, in target\n", input_num, id, hval);
2558
2559  target_id = ctf_dynhash_lookup (td->cd_output_emission_hashes, hval);
2560  if (!target_id)
2561    {
2562      /* Must be in the parent, so this must be a child, and they must not be
2563	 the same dict.  */
2564      ctf_dprintf ("Checking shared parent for target\n");
2565      if (!ctf_assert (output, (target != output)
2566		       && (target->ctf_flags & LCTF_CHILD)))
2567	return -1;
2568
2569      target_id = ctf_dynhash_lookup (od->cd_output_emission_hashes, hval);
2570
2571      emitted_forward = ctf_dedup_maybe_synthesize_forward (output, output,
2572							    input, id, hval);
2573      switch (emitted_forward)
2574	{
2575	case 0: /* No forward needed.  */
2576	  break;
2577	case -1:
2578	  ctf_err_warn (err_fp, 0, ctf_errno (output),
2579			_("cannot add synthetic forward for type %i/%lx"),
2580			input_num, id);
2581	  return ctf_set_errno (err_fp, ctf_errno (output));
2582	default:
2583	  return emitted_forward;
2584	}
2585    }
2586  if (!ctf_assert (output, target_id))
2587    return -1;
2588  return (ctf_id_t) (uintptr_t) target_id;
2589}
2590
2591/* Emit a single deduplicated TYPE with the given HVAL, located in a given
2592   INPUT, with the given (G)ID, into the shared OUTPUT or a
2593   possibly-newly-created per-CU dict.  All the types this type depends upon
2594   have already been emitted.  (This type itself may also have been emitted.)
2595
2596   If the ARG is 1, this is a CU-mapped deduplication round mapping many
2597   ctf_dict_t's into precisely one: conflicting types should be marked
2598   non-root-visible.  If the ARG is 0, conflicting types go into per-CU
2599   dictionaries stored in the input's ctf_dedup.cd_output: otherwise, everything
2600   is emitted directly into the output.  No struct/union members are emitted.
2601
2602   Optimization opportunity: trace the ancestry of non-root-visible types and
2603   elide all that neither have a root-visible type somewhere towards their root,
2604   nor have the type visible via any other route (the function info section,
2605   data object section, backtrace section etc).  */
2606
2607static int
2608ctf_dedup_emit_type (const char *hval, ctf_dict_t *output, ctf_dict_t **inputs,
2609		     uint32_t ninputs, uint32_t *parents, int already_visited,
2610		     ctf_dict_t *input, ctf_id_t type, void *id, int depth,
2611		     void *arg)
2612{
2613  ctf_dedup_t *d = &output->ctf_dedup;
2614  int kind = ctf_type_kind_unsliced (input, type);
2615  const char *name;
2616  ctf_dict_t *target = output;
2617  ctf_dict_t *real_input;
2618  const ctf_type_t *tp;
2619  int input_num = CTF_DEDUP_GID_TO_INPUT (id);
2620  int output_num = (uint32_t) -1;		/* 'shared' */
2621  int cu_mapped = *(int *)arg;
2622  int isroot = 1;
2623  int is_conflicting;
2624
2625  ctf_next_t *i = NULL;
2626  ctf_id_t new_type;
2627  ctf_id_t ref;
2628  ctf_id_t maybe_dup = 0;
2629  ctf_encoding_t ep;
2630  const char *errtype;
2631  int emission_hashed = 0;
2632
2633  /* We don't want to re-emit something we've already emitted.  */
2634
2635  if (already_visited)
2636    return 0;
2637
2638  ctf_dprintf ("%i: Emitting type with hash %s from %s: determining target\n",
2639	       depth, hval, ctf_link_input_name (input));
2640
2641  /* Conflicting types go into a per-CU output dictionary, unless this is a
2642     CU-mapped run.  The import is not refcounted, since it goes into the
2643     ctf_link_outputs dict of the output that is its parent.  */
2644  is_conflicting = ctf_dynset_exists (d->cd_conflicting_types, hval, NULL);
2645
2646  if (is_conflicting && !cu_mapped)
2647    {
2648      ctf_dprintf ("%i: Type %s in %i/%lx is conflicted: "
2649		   "inserting into per-CU target.\n",
2650		   depth, hval, input_num, type);
2651
2652      if (input->ctf_dedup.cd_output)
2653	target = input->ctf_dedup.cd_output;
2654      else
2655	{
2656	  int err;
2657
2658	  if ((target = ctf_create (&err)) == NULL)
2659	    {
2660	      ctf_err_warn (output, 0, err,
2661			    _("cannot create per-CU CTF archive for CU %s"),
2662			    ctf_link_input_name (input));
2663	      return ctf_set_errno (output, err);
2664	    }
2665
2666	  ctf_import_unref (target, output);
2667	  if (ctf_cuname (input) != NULL)
2668	    ctf_cuname_set (target, ctf_cuname (input));
2669	  else
2670	    ctf_cuname_set (target, "unnamed-CU");
2671	  ctf_parent_name_set (target, _CTF_SECTION);
2672
2673	  input->ctf_dedup.cd_output = target;
2674	  input->ctf_link_in_out = target;
2675	  target->ctf_link_in_out = input;
2676	}
2677      output_num = input_num;
2678    }
2679
2680  real_input = input;
2681  if ((tp = ctf_lookup_by_id (&real_input, type)) == NULL)
2682    {
2683      ctf_err_warn (output, 0, ctf_errno (input),
2684		    _("%s: lookup failure for type %lx"),
2685		    ctf_link_input_name (real_input), type);
2686      return ctf_set_errno (output, ctf_errno (input));
2687    }
2688
2689  name = ctf_strraw (real_input, tp->ctt_name);
2690
2691  /* Hide conflicting types, if we were asked to: also hide if a type with this
2692     name already exists and is not a forward.  */
2693  if (cu_mapped && is_conflicting)
2694    isroot = 0;
2695  else if (name
2696	   && (maybe_dup = ctf_lookup_by_rawname (target, kind, name)) != 0)
2697    {
2698      if (ctf_type_kind (target, maybe_dup) != CTF_K_FORWARD)
2699	isroot = 0;
2700    }
2701
2702  ctf_dprintf ("%i: Emitting type with hash %s (%s), into target %i/%p\n",
2703	       depth, hval, name ? name : "", input_num, (void *) target);
2704
2705  if (!target->ctf_dedup.cd_output_emission_hashes)
2706    if ((target->ctf_dedup.cd_output_emission_hashes
2707	 = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
2708			      NULL, NULL)) == NULL)
2709      goto oom_hash;
2710
2711  if (!target->ctf_dedup.cd_output_emission_conflicted_forwards)
2712    if ((target->ctf_dedup.cd_output_emission_conflicted_forwards
2713	 = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
2714			      NULL, NULL)) == NULL)
2715      goto oom_hash;
2716
2717  switch (kind)
2718    {
2719    case CTF_K_UNKNOWN:
2720      /* These are types that CTF cannot encode, marked as such by the
2721	 compiler.  */
2722      errtype = _("unknown type");
2723      if ((new_type = ctf_add_unknown (target, isroot, name)) == CTF_ERR)
2724	goto err_target;
2725      break;
2726    case CTF_K_FORWARD:
2727      /* This will do nothing if the type to which this forwards already exists,
2728	 and will be replaced with such a type if it appears later.  */
2729
2730      errtype = _("forward");
2731      if ((new_type = ctf_add_forward (target, isroot, name,
2732				       ctf_type_kind_forwarded (input, type)))
2733	  == CTF_ERR)
2734	goto err_target;
2735      break;
2736
2737    case CTF_K_FLOAT:
2738    case CTF_K_INTEGER:
2739      errtype = _("float/int");
2740      if (ctf_type_encoding (input, type, &ep) < 0)
2741	goto err_input;				/* errno is set for us.  */
2742      if ((new_type = ctf_add_encoded (target, isroot, name, &ep, kind))
2743	  == CTF_ERR)
2744	goto err_target;
2745      break;
2746
2747    case CTF_K_ENUM:
2748      {
2749	int val;
2750	errtype = _("enum");
2751	if ((new_type = ctf_add_enum (target, isroot, name)) == CTF_ERR)
2752	  goto err_input;				/* errno is set for us.  */
2753
2754	while ((name = ctf_enum_next (input, type, &i, &val)) != NULL)
2755	  {
2756	    if (ctf_add_enumerator (target, new_type, name, val) < 0)
2757	      {
2758		ctf_err_warn (target, 0, ctf_errno (target),
2759			      _("%s (%i): cannot add enumeration value %s "
2760				"from input type %lx"),
2761			      ctf_link_input_name (input), input_num, name,
2762			      type);
2763		ctf_next_destroy (i);
2764		return ctf_set_errno (output, ctf_errno (target));
2765	      }
2766	  }
2767	if (ctf_errno (input) != ECTF_NEXT_END)
2768	  goto err_input;
2769	break;
2770      }
2771
2772    case CTF_K_TYPEDEF:
2773      errtype = _("typedef");
2774
2775      ref = ctf_type_reference (input, type);
2776      if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs,
2777					 parents, input, input_num,
2778					 ref)) == CTF_ERR)
2779	goto err_input;				/* errno is set for us.  */
2780
2781      if ((new_type = ctf_add_typedef (target, isroot, name, ref)) == CTF_ERR)
2782	goto err_target;			/* errno is set for us.  */
2783      break;
2784
2785    case CTF_K_VOLATILE:
2786    case CTF_K_CONST:
2787    case CTF_K_RESTRICT:
2788    case CTF_K_POINTER:
2789      errtype = _("pointer or cvr-qual");
2790
2791      ref = ctf_type_reference (input, type);
2792      if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs,
2793					 parents, input, input_num,
2794					 ref)) == CTF_ERR)
2795	goto err_input;				/* errno is set for us.  */
2796
2797      if ((new_type = ctf_add_reftype (target, isroot, ref, kind)) == CTF_ERR)
2798	goto err_target;			/* errno is set for us.  */
2799      break;
2800
2801    case CTF_K_SLICE:
2802      errtype = _("slice");
2803
2804      if (ctf_type_encoding (input, type, &ep) < 0)
2805	goto err_input;				/* errno is set for us.  */
2806
2807      ref = ctf_type_reference (input, type);
2808      if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs,
2809					 parents, input, input_num,
2810					 ref)) == CTF_ERR)
2811	goto err_input;
2812
2813      if ((new_type = ctf_add_slice (target, isroot, ref, &ep)) == CTF_ERR)
2814	goto err_target;
2815      break;
2816
2817    case CTF_K_ARRAY:
2818      {
2819	ctf_arinfo_t ar;
2820
2821	errtype = _("array info");
2822	if (ctf_array_info (input, type, &ar) < 0)
2823	  goto err_input;
2824
2825	ar.ctr_contents = ctf_dedup_id_to_target (output, target, inputs,
2826						  ninputs, parents, input,
2827						  input_num, ar.ctr_contents);
2828	ar.ctr_index = ctf_dedup_id_to_target (output, target, inputs, ninputs,
2829					       parents, input, input_num,
2830					       ar.ctr_index);
2831
2832	if (ar.ctr_contents == CTF_ERR || ar.ctr_index == CTF_ERR)
2833	  goto err_input;
2834
2835	if ((new_type = ctf_add_array (target, isroot, &ar)) == CTF_ERR)
2836	  goto err_target;
2837
2838	break;
2839      }
2840
2841    case CTF_K_FUNCTION:
2842      {
2843	ctf_funcinfo_t fi;
2844	ctf_id_t *args;
2845	uint32_t j;
2846
2847	errtype = _("function");
2848	if (ctf_func_type_info (input, type, &fi) < 0)
2849	  goto err_input;
2850
2851	fi.ctc_return = ctf_dedup_id_to_target (output, target, inputs, ninputs,
2852						parents, input, input_num,
2853						fi.ctc_return);
2854	if (fi.ctc_return == CTF_ERR)
2855	  goto err_input;
2856
2857	if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL)
2858	  {
2859	    ctf_set_errno (input, ENOMEM);
2860	    goto err_input;
2861	  }
2862
2863	errtype = _("function args");
2864	if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0)
2865	  {
2866	    free (args);
2867	    goto err_input;
2868	  }
2869
2870	for (j = 0; j < fi.ctc_argc; j++)
2871	  {
2872	    args[j] = ctf_dedup_id_to_target (output, target, inputs, ninputs,
2873					      parents, input, input_num,
2874					      args[j]);
2875	    if (args[j] == CTF_ERR)
2876	      goto err_input;
2877	  }
2878
2879	if ((new_type = ctf_add_function (target, isroot,
2880					  &fi, args)) == CTF_ERR)
2881	  {
2882	    free (args);
2883	    goto err_target;
2884	  }
2885	free (args);
2886	break;
2887      }
2888
2889    case CTF_K_STRUCT:
2890    case CTF_K_UNION:
2891      {
2892	size_t size = ctf_type_size (input, type);
2893	void *out_id;
2894	/* Insert the structure itself, so other types can refer to it.  */
2895
2896	errtype = _("structure/union");
2897	if (kind == CTF_K_STRUCT)
2898	  new_type = ctf_add_struct_sized (target, isroot, name, size);
2899	else
2900	  new_type = ctf_add_union_sized (target, isroot, name, size);
2901
2902	if (new_type == CTF_ERR)
2903	  goto err_target;
2904
2905	out_id = CTF_DEDUP_GID (output, output_num, new_type);
2906	ctf_dprintf ("%i: Noting need to emit members of %p -> %p\n", depth,
2907		     id, out_id);
2908	/* Record the need to emit the members of this structure later.  */
2909	if (ctf_dynhash_insert (d->cd_emission_struct_members, id, out_id) < 0)
2910	  {
2911	    ctf_set_errno (target, errno);
2912	    goto err_target;
2913	  }
2914	break;
2915      }
2916    default:
2917      ctf_err_warn (output, 0, ECTF_CORRUPT, _("%s: unknown type kind for "
2918					       "input type %lx"),
2919		    ctf_link_input_name (input), type);
2920      return ctf_set_errno (output, ECTF_CORRUPT);
2921    }
2922
2923  if (!emission_hashed
2924      && new_type != 0
2925      && ctf_dynhash_cinsert (target->ctf_dedup.cd_output_emission_hashes,
2926			      hval, (void *) (uintptr_t) new_type) < 0)
2927    {
2928      ctf_err_warn (output, 0, ENOMEM, _("out of memory tracking deduplicated "
2929					 "global type IDs"));
2930	return ctf_set_errno (output, ENOMEM);
2931    }
2932
2933  if (!emission_hashed && new_type != 0)
2934    ctf_dprintf ("%i: Inserted %s, %i/%lx -> %lx into emission hash for "
2935		 "target %p (%s)\n", depth, hval, input_num, type, new_type,
2936		 (void *) target, ctf_link_input_name (target));
2937
2938  return 0;
2939
2940 oom_hash:
2941  ctf_err_warn (output, 0, ENOMEM, _("out of memory creating emission-tracking "
2942				     "hashes"));
2943  return ctf_set_errno (output, ENOMEM);
2944
2945 err_input:
2946  ctf_err_warn (output, 0, ctf_errno (input),
2947		_("%s (%i): while emitting deduplicated %s, error getting "
2948		  "input type %lx"), ctf_link_input_name (input),
2949		input_num, errtype, type);
2950  return ctf_set_errno (output, ctf_errno (input));
2951 err_target:
2952  ctf_err_warn (output, 0, ctf_errno (target),
2953		_("%s (%i): while emitting deduplicated %s, error emitting "
2954		  "target type from input type %lx"),
2955		ctf_link_input_name (input), input_num,
2956		errtype, type);
2957  return ctf_set_errno (output, ctf_errno (target));
2958}
2959
2960/* Traverse the cd_emission_struct_members and emit the members of all
2961   structures and unions.  All other types are emitted and complete by this
2962   point.  */
2963
2964static int
2965ctf_dedup_emit_struct_members (ctf_dict_t *output, ctf_dict_t **inputs,
2966			       uint32_t ninputs, uint32_t *parents)
2967{
2968  ctf_dedup_t *d = &output->ctf_dedup;
2969  ctf_next_t *i = NULL;
2970  void *input_id, *target_id;
2971  int err;
2972  ctf_dict_t *err_fp, *input_fp;
2973  int input_num;
2974  ctf_id_t err_type;
2975
2976  while ((err = ctf_dynhash_next (d->cd_emission_struct_members, &i,
2977				  &input_id, &target_id)) == 0)
2978    {
2979      ctf_next_t *j = NULL;
2980      ctf_dict_t *target;
2981      uint32_t target_num;
2982      ctf_id_t input_type, target_type;
2983      ssize_t offset;
2984      ctf_id_t membtype;
2985      const char *name;
2986
2987      input_num = CTF_DEDUP_GID_TO_INPUT (input_id);
2988      input_fp = inputs[input_num];
2989      input_type = CTF_DEDUP_GID_TO_TYPE (input_id);
2990
2991      /* The output is either -1 (for the shared, parent output dict) or the
2992	 number of the corresponding input.  */
2993      target_num = CTF_DEDUP_GID_TO_INPUT (target_id);
2994      if (target_num == (uint32_t) -1)
2995	target = output;
2996      else
2997	{
2998	  target = inputs[target_num]->ctf_dedup.cd_output;
2999	  if (!ctf_assert (output, target))
3000	    {
3001	      err_fp = output;
3002	      err_type = input_type;
3003	      goto err_target;
3004	    }
3005	}
3006      target_type = CTF_DEDUP_GID_TO_TYPE (target_id);
3007
3008      while ((offset = ctf_member_next (input_fp, input_type, &j, &name,
3009					&membtype, 0)) >= 0)
3010	{
3011	  err_fp = target;
3012	  err_type = target_type;
3013	  if ((membtype = ctf_dedup_id_to_target (output, target, inputs,
3014						  ninputs, parents, input_fp,
3015						  input_num,
3016						  membtype)) == CTF_ERR)
3017	    {
3018	      ctf_next_destroy (j);
3019	      goto err_target;
3020	    }
3021
3022	  if (name == NULL)
3023	    name = "";
3024#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
3025	  ctf_dprintf ("Emitting %s, offset %zi\n", name, offset);
3026#endif
3027	  if (ctf_add_member_offset (target, target_type, name,
3028				     membtype, offset) < 0)
3029	    {
3030	      ctf_next_destroy (j);
3031	      goto err_target;
3032	    }
3033	}
3034      if (ctf_errno (input_fp) != ECTF_NEXT_END)
3035	{
3036	  err = ctf_errno (input_fp);
3037	  ctf_next_destroy (i);
3038	  goto iterr;
3039	}
3040    }
3041  if (err != ECTF_NEXT_END)
3042    goto iterr;
3043
3044  return 0;
3045 err_target:
3046  ctf_next_destroy (i);
3047  ctf_err_warn (output, 0, ctf_errno (err_fp),
3048		_("%s (%i): error emitting members for structure type %lx"),
3049		ctf_link_input_name (input_fp), input_num, err_type);
3050  return ctf_set_errno (output, ctf_errno (err_fp));
3051 iterr:
3052  ctf_err_warn (output, 0, err, _("iteration failure emitting "
3053				  "structure members"));
3054  return ctf_set_errno (output, err);
3055}
3056
3057/* Emit deduplicated types into the outputs.  The shared type repository is
3058   OUTPUT, on which the ctf_dedup function must have already been called.  The
3059   PARENTS array contains the INPUTS index of the parent dict for every child
3060   dict at the corresponding index in the INPUTS (for non-child dicts, the value
3061   is undefined).
3062
3063   Return an array of fps with content emitted into them (starting with OUTPUT,
3064   which is the parent of all others, then all the newly-generated outputs).
3065
3066   If CU_MAPPED is set, this is a first pass for a link with a non-empty CU
3067   mapping: only one output will result.  */
3068
3069ctf_dict_t **
3070ctf_dedup_emit (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs,
3071		uint32_t *parents, uint32_t *noutputs, int cu_mapped)
3072{
3073  size_t num_outputs = 1;		/* Always at least one output: us.  */
3074  ctf_dict_t **outputs;
3075  ctf_dict_t **walk;
3076  size_t i;
3077
3078  ctf_dprintf ("Triggering emission.\n");
3079  if (ctf_dedup_walk_output_mapping (output, inputs, ninputs, parents,
3080				     ctf_dedup_emit_type, &cu_mapped) < 0)
3081    return NULL;				/* errno is set for us.  */
3082
3083  ctf_dprintf ("Populating struct members.\n");
3084  if (ctf_dedup_emit_struct_members (output, inputs, ninputs, parents) < 0)
3085    return NULL;				/* errno is set for us.  */
3086
3087  for (i = 0; i < ninputs; i++)
3088    {
3089      if (inputs[i]->ctf_dedup.cd_output)
3090	num_outputs++;
3091    }
3092
3093  if (!ctf_assert (output, !cu_mapped || (cu_mapped && num_outputs == 1)))
3094    return NULL;
3095
3096  if ((outputs = calloc (num_outputs, sizeof (ctf_dict_t *))) == NULL)
3097    {
3098      ctf_err_warn (output, 0, ENOMEM,
3099		    _("out of memory allocating link outputs array"));
3100      ctf_set_errno (output, ENOMEM);
3101      return NULL;
3102    }
3103  *noutputs = num_outputs;
3104
3105  walk = outputs;
3106  *walk = output;
3107  output->ctf_refcnt++;
3108  walk++;
3109
3110  for (i = 0; i < ninputs; i++)
3111    {
3112      if (inputs[i]->ctf_dedup.cd_output)
3113	{
3114	  *walk = inputs[i]->ctf_dedup.cd_output;
3115	  inputs[i]->ctf_dedup.cd_output = NULL;
3116	  walk++;
3117	}
3118    }
3119
3120  return outputs;
3121}
3122
3123/* Determine what type SRC_FP / SRC_TYPE was emitted as in the FP, which
3124   must be the shared dict or have it as a parent: return 0 if none.  The SRC_FP
3125   must be a past input to ctf_dedup.  */
3126
3127ctf_id_t
3128ctf_dedup_type_mapping (ctf_dict_t *fp, ctf_dict_t *src_fp, ctf_id_t src_type)
3129{
3130  ctf_dict_t *output = NULL;
3131  ctf_dedup_t *d;
3132  int input_num;
3133  void *num_ptr;
3134  void *type_ptr;
3135  int found;
3136  const char *hval;
3137
3138  /* It is an error (an internal error in the caller, in ctf-link.c) to call
3139     this with an FP that is not a per-CU output or shared output dict, or with
3140     a SRC_FP that was not passed to ctf_dedup as an input; it is an internal
3141     error in ctf-dedup for the type passed not to have been hashed, though if
3142     the src_fp is a child dict and the type is not a child type, it will have
3143     been hashed under the GID corresponding to the parent.  */
3144
3145  if (fp->ctf_dedup.cd_type_hashes != NULL)
3146    output = fp;
3147  else if (fp->ctf_parent && fp->ctf_parent->ctf_dedup.cd_type_hashes != NULL)
3148    output = fp->ctf_parent;
3149  else
3150    {
3151      ctf_set_errno (fp, ECTF_INTERNAL);
3152      ctf_err_warn (fp, 0, ECTF_INTERNAL,
3153		    _("dict %p passed to ctf_dedup_type_mapping is not a "
3154		      "deduplicated output"), (void *) fp);
3155      return CTF_ERR;
3156    }
3157
3158  if (src_fp->ctf_parent && ctf_type_isparent (src_fp, src_type))
3159    src_fp = src_fp->ctf_parent;
3160
3161  d = &output->ctf_dedup;
3162
3163  found = ctf_dynhash_lookup_kv (d->cd_input_nums, src_fp, NULL, &num_ptr);
3164  if (!ctf_assert (output, found != 0))
3165    return CTF_ERR;				/* errno is set for us.  */
3166  input_num = (uintptr_t) num_ptr;
3167
3168  hval = ctf_dynhash_lookup (d->cd_type_hashes,
3169			     CTF_DEDUP_GID (output, input_num, src_type));
3170
3171  if (!ctf_assert (output, hval != NULL))
3172    return CTF_ERR;				/* errno is set for us.  */
3173
3174  /* The emission hashes may be unset if this dict was created after
3175     deduplication to house variables or other things that would conflict if
3176     stored in the shared dict.  */
3177  if (fp->ctf_dedup.cd_output_emission_hashes)
3178    if (ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_output_emission_hashes, hval,
3179			       NULL, &type_ptr))
3180      return (ctf_id_t) (uintptr_t) type_ptr;
3181
3182  if (fp->ctf_parent)
3183    {
3184      ctf_dict_t *pfp = fp->ctf_parent;
3185      if (pfp->ctf_dedup.cd_output_emission_hashes)
3186	if (ctf_dynhash_lookup_kv (pfp->ctf_dedup.cd_output_emission_hashes,
3187				   hval, NULL, &type_ptr))
3188	  return (ctf_id_t) (uintptr_t) type_ptr;
3189    }
3190
3191  return 0;
3192}
3193