1178481Sjb/*
2178481Sjb * CDDL HEADER START
3178481Sjb *
4178481Sjb * The contents of this file are subject to the terms of the
5178481Sjb * Common Development and Distribution License (the "License").
6178481Sjb * You may not use this file except in compliance with the License.
7178481Sjb *
8178481Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9178481Sjb * or http://www.opensolaris.org/os/licensing.
10178481Sjb * See the License for the specific language governing permissions
11178481Sjb * and limitations under the License.
12178481Sjb *
13178481Sjb * When distributing Covered Code, include this CDDL HEADER in each
14178481Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15178481Sjb * If applicable, add the following below this CDDL HEADER, with the
16178481Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17178481Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18178481Sjb *
19178481Sjb * CDDL HEADER END
20178481Sjb */
21178481Sjb/*
22210767Srpaulo * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23178481Sjb * Use is subject to license terms.
24178481Sjb */
25178481Sjb
26178481Sjb#pragma ident	"%Z%%M%	%I%	%E% SMI"
27178481Sjb
28178481Sjb/*
29178481Sjb * Given several files containing CTF data, merge and uniquify that data into
30178481Sjb * a single CTF section in an output file.
31178481Sjb *
32178481Sjb * Merges can proceed independently.  As such, we perform the merges in parallel
33178481Sjb * using a worker thread model.  A given glob of CTF data (either all of the CTF
34178481Sjb * data from a single input file, or the result of one or more merges) can only
35178481Sjb * be involved in a single merge at any given time, so the process decreases in
36178481Sjb * parallelism, especially towards the end, as more and more files are
37178481Sjb * consolidated, finally resulting in a single merge of two large CTF graphs.
38178481Sjb * Unfortunately, the last merge is also the slowest, as the two graphs being
39178481Sjb * merged are each the product of merges of half of the input files.
40178481Sjb *
41178481Sjb * The algorithm consists of two phases, described in detail below.  The first
42178481Sjb * phase entails the merging of CTF data in groups of eight.  The second phase
43178481Sjb * takes the results of Phase I, and merges them two at a time.  This disparity
44178481Sjb * is due to an observation that the merge time increases at least quadratically
45178481Sjb * with the size of the CTF data being merged.  As such, merges of CTF graphs
46178481Sjb * newly read from input files are much faster than merges of CTF graphs that
47178481Sjb * are themselves the results of prior merges.
48178481Sjb *
49178481Sjb * A further complication is the need to ensure the repeatability of CTF merges.
50178481Sjb * That is, a merge should produce the same output every time, given the same
51178481Sjb * input.  In both phases, this consistency requirement is met by imposing an
52178481Sjb * ordering on the merge process, thus ensuring that a given set of input files
53178481Sjb * are merged in the same order every time.
54178481Sjb *
55178481Sjb *   Phase I
56178481Sjb *
57178481Sjb *   The main thread reads the input files one by one, transforming the CTF
58178481Sjb *   data they contain into tdata structures.  When a given file has been read
59178481Sjb *   and parsed, it is placed on the work queue for retrieval by worker threads.
60178481Sjb *
61178481Sjb *   Central to Phase I is the Work In Progress (wip) array, which is used to
62178481Sjb *   merge batches of files in a predictable order.  Files are read by the main
63178481Sjb *   thread, and are merged into wip array elements in round-robin order.  When
64178481Sjb *   the number of files merged into a given array slot equals the batch size,
65178481Sjb *   the merged CTF graph in that array is added to the done slot in order by
66178481Sjb *   array slot.
67178481Sjb *
68178481Sjb *   For example, consider a case where we have five input files, a batch size
69178481Sjb *   of two, a wip array size of two, and two worker threads (T1 and T2).
70178481Sjb *
71178481Sjb *    1. The wip array elements are assigned initial batch numbers 0 and 1.
72178481Sjb *    2. T1 reads an input file from the input queue (wq_queue).  This is the
73178481Sjb *       first input file, so it is placed into wip[0].  The second file is
74178481Sjb *       similarly read and placed into wip[1].  The wip array slots now contain
75178481Sjb *       one file each (wip_nmerged == 1).
76178481Sjb *    3. T1 reads the third input file, which it merges into wip[0].  The
77178481Sjb *       number of files in wip[0] is equal to the batch size.
78178481Sjb *    4. T2 reads the fourth input file, which it merges into wip[1].  wip[1]
79178481Sjb *       is now full too.
80178481Sjb *    5. T2 attempts to place the contents of wip[1] on the done queue
81178481Sjb *       (wq_done_queue), but it can't, since the batch ID for wip[1] is 1.
82178481Sjb *       Batch 0 needs to be on the done queue before batch 1 can be added, so
83178481Sjb *       T2 blocks on wip[1]'s cv.
84178481Sjb *    6. T1 attempts to place the contents of wip[0] on the done queue, and
85178481Sjb *       succeeds, updating wq_lastdonebatch to 0.  It clears wip[0], and sets
86178481Sjb *       its batch ID to 2.  T1 then signals wip[1]'s cv to awaken T2.
87178481Sjb *    7. T2 wakes up, notices that wq_lastdonebatch is 0, which means that
88178481Sjb *       batch 1 can now be added.  It adds wip[1] to the done queue, clears
89178481Sjb *       wip[1], and sets its batch ID to 3.  It signals wip[0]'s cv, and
90178481Sjb *       restarts.
91178481Sjb *
92178481Sjb *   The above process continues until all input files have been consumed.  At
93178481Sjb *   this point, a pair of barriers are used to allow a single thread to move
94178481Sjb *   any partial batches from the wip array to the done array in batch ID order.
95178481Sjb *   When this is complete, wq_done_queue is moved to wq_queue, and Phase II
96178481Sjb *   begins.
97178481Sjb *
98178481Sjb *	Locking Semantics (Phase I)
99178481Sjb *
100178481Sjb *	The input queue (wq_queue) and the done queue (wq_done_queue) are
101178481Sjb *	protected by separate mutexes - wq_queue_lock and wq_done_queue.  wip
102178481Sjb *	array slots are protected by their own mutexes, which must be grabbed
103178481Sjb *	before releasing the input queue lock.  The wip array lock is dropped
104178481Sjb *	when the thread restarts the loop.  If the array slot was full, the
105178481Sjb *	array lock will be held while the slot contents are added to the done
106178481Sjb *	queue.  The done queue lock is used to protect the wip slot cv's.
107178481Sjb *
108178481Sjb *	The pow number is protected by the queue lock.  The master batch ID
109178481Sjb *	and last completed batch (wq_lastdonebatch) counters are protected *in
110178481Sjb *	Phase I* by the done queue lock.
111178481Sjb *
112178481Sjb *   Phase II
113178481Sjb *
114178481Sjb *   When Phase II begins, the queue consists of the merged batches from the
115178481Sjb *   first phase.  Assume we have five batches:
116178481Sjb *
117178481Sjb *	Q:	a b c d e
118178481Sjb *
119178481Sjb *   Using the same batch ID mechanism we used in Phase I, but without the wip
120178481Sjb *   array, worker threads remove two entries at a time from the beginning of
121178481Sjb *   the queue.  These two entries are merged, and are added back to the tail
122178481Sjb *   of the queue, as follows:
123178481Sjb *
124178481Sjb *	Q:	a b c d e	# start
125178481Sjb *	Q:	c d e ab	# a, b removed, merged, added to end
126178481Sjb *	Q:	e ab cd		# c, d removed, merged, added to end
127178481Sjb *	Q:	cd eab		# e, ab removed, merged, added to end
128178481Sjb *	Q:	cdeab		# cd, eab removed, merged, added to end
129178481Sjb *
130178481Sjb *   When one entry remains on the queue, with no merges outstanding, Phase II
131178481Sjb *   finishes.  We pre-determine the stopping point by pre-calculating the
132178481Sjb *   number of nodes that will appear on the list.  In the example above, the
133178481Sjb *   number (wq_ninqueue) is 9.  When ninqueue is 1, we conclude Phase II by
134178481Sjb *   signaling the main thread via wq_done_cv.
135178481Sjb *
136178481Sjb *	Locking Semantics (Phase II)
137178481Sjb *
138178481Sjb *	The queue (wq_queue), ninqueue, and the master batch ID and last
139178481Sjb *	completed batch counters are protected by wq_queue_lock.  The done
140178481Sjb *	queue and corresponding lock are unused in Phase II as is the wip array.
141178481Sjb *
142178481Sjb *   Uniquification
143178481Sjb *
144178481Sjb *   We want the CTF data that goes into a given module to be as small as
145178481Sjb *   possible.  For example, we don't want it to contain any type data that may
146178481Sjb *   be present in another common module.  As such, after creating the master
147178481Sjb *   tdata_t for a given module, we can, if requested by the user, uniquify it
148178481Sjb *   against the tdata_t from another module (genunix in the case of the SunOS
149178481Sjb *   kernel).  We perform a merge between the tdata_t for this module and the
150178481Sjb *   tdata_t from genunix.  Nodes found in this module that are not present in
151178481Sjb *   genunix are added to a third tdata_t - the uniquified tdata_t.
152178481Sjb *
153178481Sjb *   Additive Merges
154178481Sjb *
155178481Sjb *   In some cases, for example if we are issuing a new version of a common
156178481Sjb *   module in a patch, we need to make sure that the CTF data already present
157178481Sjb *   in that module does not change.  Changes to this data would void the CTF
158178481Sjb *   data in any module that uniquified against the common module.  To preserve
159178481Sjb *   the existing data, we can perform what is known as an additive merge.  In
160178481Sjb *   this case, a final uniquification is performed against the CTF data in the
161178481Sjb *   previous version of the module.  The result will be the placement of new
162178481Sjb *   and changed data after the existing data, thus preserving the existing type
163178481Sjb *   ID space.
164178481Sjb *
165178481Sjb *   Saving the result
166178481Sjb *
167178481Sjb *   When the merges are complete, the resulting tdata_t is placed into the
168178481Sjb *   output file, replacing the .SUNW_ctf section (if any) already in that file.
169178481Sjb *
170178481Sjb * The person who changes the merging thread code in this file without updating
171178481Sjb * this comment will not live to see the stock hit five.
172178481Sjb */
173178481Sjb
174178481Sjb#include <stdio.h>
175178481Sjb#include <stdlib.h>
176178481Sjb#include <unistd.h>
177178481Sjb#include <pthread.h>
178178481Sjb#include <assert.h>
179297077Smav#ifdef illumos
180178481Sjb#include <synch.h>
181178546Sjb#endif
182178481Sjb#include <signal.h>
183178481Sjb#include <libgen.h>
184178481Sjb#include <string.h>
185178481Sjb#include <errno.h>
186297077Smav#ifdef illumos
187178481Sjb#include <alloca.h>
188178546Sjb#endif
189178481Sjb#include <sys/param.h>
190178481Sjb#include <sys/types.h>
191178481Sjb#include <sys/mman.h>
192297077Smav#ifdef illumos
193178481Sjb#include <sys/sysconf.h>
194178546Sjb#endif
195178481Sjb
196178481Sjb#include "ctf_headers.h"
197178481Sjb#include "ctftools.h"
198178481Sjb#include "ctfmerge.h"
199178481Sjb#include "traverse.h"
200178481Sjb#include "memory.h"
201178481Sjb#include "fifo.h"
202178481Sjb#include "barrier.h"
203178481Sjb
204178481Sjb#pragma init(bigheap)
205178481Sjb
206178481Sjb#define	MERGE_PHASE1_BATCH_SIZE		8
207178481Sjb#define	MERGE_PHASE1_MAX_SLOTS		5
208178481Sjb#define	MERGE_INPUT_THROTTLE_LEN	10
209178481Sjb
210178481Sjbconst char *progname;
211178481Sjbstatic char *outfile = NULL;
212178481Sjbstatic char *tmpname = NULL;
213178481Sjbstatic int dynsym;
214178481Sjbint debug_level = DEBUG_LEVEL;
215178481Sjbstatic size_t maxpgsize = 0x400000;
216178481Sjb
217178481Sjb
218178481Sjbvoid
219178481Sjbusage(void)
220178481Sjb{
221178481Sjb	(void) fprintf(stderr,
222178481Sjb	    "Usage: %s [-fgstv] -l label | -L labelenv -o outfile file ...\n"
223178481Sjb	    "       %s [-fgstv] -l label | -L labelenv -o outfile -d uniqfile\n"
224178481Sjb	    "       %*s [-g] [-D uniqlabel] file ...\n"
225178481Sjb	    "       %s [-fgstv] -l label | -L labelenv -o outfile -w withfile "
226178481Sjb	    "file ...\n"
227178481Sjb	    "       %s [-g] -c srcfile destfile\n"
228178481Sjb	    "\n"
229178481Sjb	    "  Note: if -L labelenv is specified and labelenv is not set in\n"
230178481Sjb	    "  the environment, a default value is used.\n",
231228597Sdim	    progname, progname, (int)strlen(progname), " ",
232178481Sjb	    progname, progname);
233178481Sjb}
234178481Sjb
235297077Smav#ifdef illumos
236178481Sjbstatic void
237178481Sjbbigheap(void)
238178481Sjb{
239178481Sjb	size_t big, *size;
240178481Sjb	int sizes;
241178481Sjb	struct memcntl_mha mha;
242178481Sjb
243178481Sjb	/*
244178481Sjb	 * First, get the available pagesizes.
245178481Sjb	 */
246178481Sjb	if ((sizes = getpagesizes(NULL, 0)) == -1)
247178481Sjb		return;
248178481Sjb
249178481Sjb	if (sizes == 1 || (size = alloca(sizeof (size_t) * sizes)) == NULL)
250178481Sjb		return;
251178481Sjb
252178481Sjb	if (getpagesizes(size, sizes) == -1)
253178481Sjb		return;
254178481Sjb
255178481Sjb	while (size[sizes - 1] > maxpgsize)
256178481Sjb		sizes--;
257178481Sjb
258178481Sjb	/* set big to the largest allowed page size */
259178481Sjb	big = size[sizes - 1];
260178481Sjb	if (big & (big - 1)) {
261178481Sjb		/*
262178481Sjb		 * The largest page size is not a power of two for some
263178481Sjb		 * inexplicable reason; return.
264178481Sjb		 */
265178481Sjb		return;
266178481Sjb	}
267178481Sjb
268178481Sjb	/*
269178481Sjb	 * Now, align our break to the largest page size.
270178481Sjb	 */
271178481Sjb	if (brk((void *)((((uintptr_t)sbrk(0) - 1) & ~(big - 1)) + big)) != 0)
272178481Sjb		return;
273178481Sjb
274178481Sjb	/*
275178481Sjb	 * set the preferred page size for the heap
276178481Sjb	 */
277178481Sjb	mha.mha_cmd = MHA_MAPSIZE_BSSBRK;
278178481Sjb	mha.mha_flags = 0;
279178481Sjb	mha.mha_pagesize = big;
280178481Sjb
281178481Sjb	(void) memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mha, 0, 0);
282178481Sjb}
283297077Smav#endif	/* illumos */
284178481Sjb
285178481Sjbstatic void
286178481Sjbfinalize_phase_one(workqueue_t *wq)
287178481Sjb{
288178481Sjb	int startslot, i;
289178481Sjb
290178481Sjb	/*
291178481Sjb	 * wip slots are cleared out only when maxbatchsz td's have been merged
292178481Sjb	 * into them.  We're not guaranteed that the number of files we're
293178481Sjb	 * merging is a multiple of maxbatchsz, so there will be some partial
294178481Sjb	 * groups in the wip array.  Move them to the done queue in batch ID
295178481Sjb	 * order, starting with the slot containing the next batch that would
296178481Sjb	 * have been placed on the done queue, followed by the others.
297178481Sjb	 * One thread will be doing this while the others wait at the barrier
298178481Sjb	 * back in worker_thread(), so we don't need to worry about pesky things
299178481Sjb	 * like locks.
300178481Sjb	 */
301178481Sjb
302178481Sjb	for (startslot = -1, i = 0; i < wq->wq_nwipslots; i++) {
303178481Sjb		if (wq->wq_wip[i].wip_batchid == wq->wq_lastdonebatch + 1) {
304178481Sjb			startslot = i;
305178481Sjb			break;
306178481Sjb		}
307178481Sjb	}
308178481Sjb
309178481Sjb	assert(startslot != -1);
310178481Sjb
311178481Sjb	for (i = startslot; i < startslot + wq->wq_nwipslots; i++) {
312178481Sjb		int slotnum = i % wq->wq_nwipslots;
313178481Sjb		wip_t *wipslot = &wq->wq_wip[slotnum];
314178481Sjb
315178481Sjb		if (wipslot->wip_td != NULL) {
316178481Sjb			debug(2, "clearing slot %d (%d) (saving %d)\n",
317178481Sjb			    slotnum, i, wipslot->wip_nmerged);
318178481Sjb		} else
319178481Sjb			debug(2, "clearing slot %d (%d)\n", slotnum, i);
320178481Sjb
321178481Sjb		if (wipslot->wip_td != NULL) {
322178481Sjb			fifo_add(wq->wq_donequeue, wipslot->wip_td);
323178481Sjb			wq->wq_wip[slotnum].wip_td = NULL;
324178481Sjb		}
325178481Sjb	}
326178481Sjb
327178481Sjb	wq->wq_lastdonebatch = wq->wq_next_batchid++;
328178481Sjb
329178481Sjb	debug(2, "phase one done: donequeue has %d items\n",
330178481Sjb	    fifo_len(wq->wq_donequeue));
331178481Sjb}
332178481Sjb
333178481Sjbstatic void
334178481Sjbinit_phase_two(workqueue_t *wq)
335178481Sjb{
336178481Sjb	int num;
337178481Sjb
338178481Sjb	/*
339178481Sjb	 * We're going to continually merge the first two entries on the queue,
340178481Sjb	 * placing the result on the end, until there's nothing left to merge.
341178481Sjb	 * At that point, everything will have been merged into one.  The
342178481Sjb	 * initial value of ninqueue needs to be equal to the total number of
343178481Sjb	 * entries that will show up on the queue, both at the start of the
344178481Sjb	 * phase and as generated by merges during the phase.
345178481Sjb	 */
346178481Sjb	wq->wq_ninqueue = num = fifo_len(wq->wq_donequeue);
347178481Sjb	while (num != 1) {
348178481Sjb		wq->wq_ninqueue += num / 2;
349178481Sjb		num = num / 2 + num % 2;
350178481Sjb	}
351178481Sjb
352178481Sjb	/*
353178481Sjb	 * Move the done queue to the work queue.  We won't be using the done
354178481Sjb	 * queue in phase 2.
355178481Sjb	 */
356178481Sjb	assert(fifo_len(wq->wq_queue) == 0);
357178481Sjb	fifo_free(wq->wq_queue, NULL);
358178481Sjb	wq->wq_queue = wq->wq_donequeue;
359178481Sjb}
360178481Sjb
361178481Sjbstatic void
362178481Sjbwip_save_work(workqueue_t *wq, wip_t *slot, int slotnum)
363178481Sjb{
364178481Sjb	pthread_mutex_lock(&wq->wq_donequeue_lock);
365178481Sjb
366178481Sjb	while (wq->wq_lastdonebatch + 1 < slot->wip_batchid)
367178481Sjb		pthread_cond_wait(&slot->wip_cv, &wq->wq_donequeue_lock);
368178481Sjb	assert(wq->wq_lastdonebatch + 1 == slot->wip_batchid);
369178481Sjb
370178481Sjb	fifo_add(wq->wq_donequeue, slot->wip_td);
371178481Sjb	wq->wq_lastdonebatch++;
372178481Sjb	pthread_cond_signal(&wq->wq_wip[(slotnum + 1) %
373178481Sjb	    wq->wq_nwipslots].wip_cv);
374178481Sjb
375178481Sjb	/* reset the slot for next use */
376178481Sjb	slot->wip_td = NULL;
377178481Sjb	slot->wip_batchid = wq->wq_next_batchid++;
378178481Sjb
379178481Sjb	pthread_mutex_unlock(&wq->wq_donequeue_lock);
380178481Sjb}
381178481Sjb
382178481Sjbstatic void
383178481Sjbwip_add_work(wip_t *slot, tdata_t *pow)
384178481Sjb{
385178481Sjb	if (slot->wip_td == NULL) {
386178481Sjb		slot->wip_td = pow;
387178481Sjb		slot->wip_nmerged = 1;
388178481Sjb	} else {
389178481Sjb		debug(2, "%d: merging %p into %p\n", pthread_self(),
390178481Sjb		    (void *)pow, (void *)slot->wip_td);
391178481Sjb
392178481Sjb		merge_into_master(pow, slot->wip_td, NULL, 0);
393178481Sjb		tdata_free(pow);
394178481Sjb
395178481Sjb		slot->wip_nmerged++;
396178481Sjb	}
397178481Sjb}
398178481Sjb
399178481Sjbstatic void
400178481Sjbworker_runphase1(workqueue_t *wq)
401178481Sjb{
402178481Sjb	wip_t *wipslot;
403178481Sjb	tdata_t *pow;
404178481Sjb	int wipslotnum, pownum;
405178481Sjb
406178481Sjb	for (;;) {
407178481Sjb		pthread_mutex_lock(&wq->wq_queue_lock);
408178481Sjb
409178481Sjb		while (fifo_empty(wq->wq_queue)) {
410178481Sjb			if (wq->wq_nomorefiles == 1) {
411178481Sjb				pthread_cond_broadcast(&wq->wq_work_avail);
412178481Sjb				pthread_mutex_unlock(&wq->wq_queue_lock);
413178481Sjb
414178481Sjb				/* on to phase 2 ... */
415178481Sjb				return;
416178481Sjb			}
417178481Sjb
418178481Sjb			pthread_cond_wait(&wq->wq_work_avail,
419178481Sjb			    &wq->wq_queue_lock);
420178481Sjb		}
421178481Sjb
422178481Sjb		/* there's work to be done! */
423178481Sjb		pow = fifo_remove(wq->wq_queue);
424178481Sjb		pownum = wq->wq_nextpownum++;
425178481Sjb		pthread_cond_broadcast(&wq->wq_work_removed);
426178481Sjb
427178481Sjb		assert(pow != NULL);
428178481Sjb
429178481Sjb		/* merge it into the right slot */
430178481Sjb		wipslotnum = pownum % wq->wq_nwipslots;
431178481Sjb		wipslot = &wq->wq_wip[wipslotnum];
432178481Sjb
433178481Sjb		pthread_mutex_lock(&wipslot->wip_lock);
434178481Sjb
435178481Sjb		pthread_mutex_unlock(&wq->wq_queue_lock);
436178481Sjb
437178481Sjb		wip_add_work(wipslot, pow);
438178481Sjb
439178481Sjb		if (wipslot->wip_nmerged == wq->wq_maxbatchsz)
440178481Sjb			wip_save_work(wq, wipslot, wipslotnum);
441178481Sjb
442178481Sjb		pthread_mutex_unlock(&wipslot->wip_lock);
443178481Sjb	}
444178481Sjb}
445178481Sjb
446178481Sjbstatic void
447178481Sjbworker_runphase2(workqueue_t *wq)
448178481Sjb{
449178481Sjb	tdata_t *pow1, *pow2;
450178481Sjb	int batchid;
451178481Sjb
452178481Sjb	for (;;) {
453178481Sjb		pthread_mutex_lock(&wq->wq_queue_lock);
454178481Sjb
455178481Sjb		if (wq->wq_ninqueue == 1) {
456178481Sjb			pthread_cond_broadcast(&wq->wq_work_avail);
457178481Sjb			pthread_mutex_unlock(&wq->wq_queue_lock);
458178481Sjb
459178481Sjb			debug(2, "%d: entering p2 completion barrier\n",
460178481Sjb			    pthread_self());
461178481Sjb			if (barrier_wait(&wq->wq_bar1)) {
462178481Sjb				pthread_mutex_lock(&wq->wq_queue_lock);
463178481Sjb				wq->wq_alldone = 1;
464178481Sjb				pthread_cond_signal(&wq->wq_alldone_cv);
465178481Sjb				pthread_mutex_unlock(&wq->wq_queue_lock);
466178481Sjb			}
467178481Sjb
468178481Sjb			return;
469178481Sjb		}
470178481Sjb
471178481Sjb		if (fifo_len(wq->wq_queue) < 2) {
472178481Sjb			pthread_cond_wait(&wq->wq_work_avail,
473178481Sjb			    &wq->wq_queue_lock);
474178481Sjb			pthread_mutex_unlock(&wq->wq_queue_lock);
475178481Sjb			continue;
476178481Sjb		}
477178481Sjb
478178481Sjb		/* there's work to be done! */
479178481Sjb		pow1 = fifo_remove(wq->wq_queue);
480178481Sjb		pow2 = fifo_remove(wq->wq_queue);
481178481Sjb		wq->wq_ninqueue -= 2;
482178481Sjb
483178481Sjb		batchid = wq->wq_next_batchid++;
484178481Sjb
485178481Sjb		pthread_mutex_unlock(&wq->wq_queue_lock);
486178481Sjb
487178481Sjb		debug(2, "%d: merging %p into %p\n", pthread_self(),
488178481Sjb		    (void *)pow1, (void *)pow2);
489178481Sjb		merge_into_master(pow1, pow2, NULL, 0);
490178481Sjb		tdata_free(pow1);
491178481Sjb
492178481Sjb		/*
493178481Sjb		 * merging is complete.  place at the tail of the queue in
494178481Sjb		 * proper order.
495178481Sjb		 */
496178481Sjb		pthread_mutex_lock(&wq->wq_queue_lock);
497178481Sjb		while (wq->wq_lastdonebatch + 1 != batchid) {
498178481Sjb			pthread_cond_wait(&wq->wq_done_cv,
499178481Sjb			    &wq->wq_queue_lock);
500178481Sjb		}
501178481Sjb
502178481Sjb		wq->wq_lastdonebatch = batchid;
503178481Sjb
504178481Sjb		fifo_add(wq->wq_queue, pow2);
505178481Sjb		debug(2, "%d: added %p to queue, len now %d, ninqueue %d\n",
506178481Sjb		    pthread_self(), (void *)pow2, fifo_len(wq->wq_queue),
507178481Sjb		    wq->wq_ninqueue);
508178481Sjb		pthread_cond_broadcast(&wq->wq_done_cv);
509178481Sjb		pthread_cond_signal(&wq->wq_work_avail);
510178481Sjb		pthread_mutex_unlock(&wq->wq_queue_lock);
511178481Sjb	}
512178481Sjb}
513178481Sjb
514178481Sjb/*
515178481Sjb * Main loop for worker threads.
516178481Sjb */
517178481Sjbstatic void
518178481Sjbworker_thread(workqueue_t *wq)
519178481Sjb{
520178481Sjb	worker_runphase1(wq);
521178481Sjb
522178481Sjb	debug(2, "%d: entering first barrier\n", pthread_self());
523178481Sjb
524178481Sjb	if (barrier_wait(&wq->wq_bar1)) {
525178481Sjb
526178481Sjb		debug(2, "%d: doing work in first barrier\n", pthread_self());
527178481Sjb
528178481Sjb		finalize_phase_one(wq);
529178481Sjb
530178481Sjb		init_phase_two(wq);
531178481Sjb
532178481Sjb		debug(2, "%d: ninqueue is %d, %d on queue\n", pthread_self(),
533178481Sjb		    wq->wq_ninqueue, fifo_len(wq->wq_queue));
534178481Sjb	}
535178481Sjb
536178481Sjb	debug(2, "%d: entering second barrier\n", pthread_self());
537178481Sjb
538178481Sjb	(void) barrier_wait(&wq->wq_bar2);
539178481Sjb
540178481Sjb	debug(2, "%d: phase 1 complete\n", pthread_self());
541178481Sjb
542178481Sjb	worker_runphase2(wq);
543178481Sjb}
544178481Sjb
545178481Sjb/*
546178481Sjb * Pass a tdata_t tree, built from an input file, off to the work queue for
547178481Sjb * consumption by worker threads.
548178481Sjb */
549178481Sjbstatic int
550178481Sjbmerge_ctf_cb(tdata_t *td, char *name, void *arg)
551178481Sjb{
552178481Sjb	workqueue_t *wq = arg;
553178481Sjb
554178481Sjb	debug(3, "Adding tdata %p for processing\n", (void *)td);
555178481Sjb
556178481Sjb	pthread_mutex_lock(&wq->wq_queue_lock);
557178481Sjb	while (fifo_len(wq->wq_queue) > wq->wq_ithrottle) {
558178481Sjb		debug(2, "Throttling input (len = %d, throttle = %d)\n",
559178481Sjb		    fifo_len(wq->wq_queue), wq->wq_ithrottle);
560178481Sjb		pthread_cond_wait(&wq->wq_work_removed, &wq->wq_queue_lock);
561178481Sjb	}
562178481Sjb
563178481Sjb	fifo_add(wq->wq_queue, td);
564178481Sjb	debug(1, "Thread %d announcing %s\n", pthread_self(), name);
565178481Sjb	pthread_cond_broadcast(&wq->wq_work_avail);
566178481Sjb	pthread_mutex_unlock(&wq->wq_queue_lock);
567178481Sjb
568178481Sjb	return (1);
569178481Sjb}
570178481Sjb
571178481Sjb/*
572178481Sjb * This program is intended to be invoked from a Makefile, as part of the build.
573178481Sjb * As such, in the event of a failure or user-initiated interrupt (^C), we need
574178481Sjb * to ensure that a subsequent re-make will cause ctfmerge to be executed again.
575178481Sjb * Unfortunately, ctfmerge will usually be invoked directly after (and as part
576178481Sjb * of the same Makefile rule as) a link, and will operate on the linked file
577178481Sjb * in place.  If we merely exit upon receipt of a SIGINT, a subsequent make
578178481Sjb * will notice that the *linked* file is newer than the object files, and thus
579178481Sjb * will not reinvoke ctfmerge.  The only way to ensure that a subsequent make
580178481Sjb * reinvokes ctfmerge, is to remove the file to which we are adding CTF
581178481Sjb * data (confusingly named the output file).  This means that the link will need
582178481Sjb * to happen again, but links are generally fast, and we can't allow the merge
583178481Sjb * to be skipped.
584178481Sjb *
585178481Sjb * Another possibility would be to block SIGINT entirely - to always run to
586178481Sjb * completion.  The run time of ctfmerge can, however, be measured in minutes
587178481Sjb * in some cases, so this is not a valid option.
588178481Sjb */
589178481Sjbstatic void
590178481Sjbhandle_sig(int sig)
591178481Sjb{
592178481Sjb	terminate("Caught signal %d - exiting\n", sig);
593178481Sjb}
594178481Sjb
595178481Sjbstatic void
596178481Sjbterminate_cleanup(void)
597178481Sjb{
598178481Sjb	int dounlink = getenv("CTFMERGE_TERMINATE_NO_UNLINK") ? 0 : 1;
599178481Sjb
600178481Sjb	if (tmpname != NULL && dounlink)
601178481Sjb		unlink(tmpname);
602178481Sjb
603178481Sjb	if (outfile == NULL)
604178481Sjb		return;
605178481Sjb
606178546Sjb#if !defined(__FreeBSD__)
607178481Sjb	if (dounlink) {
608178481Sjb		fprintf(stderr, "Removing %s\n", outfile);
609178481Sjb		unlink(outfile);
610178481Sjb	}
611178546Sjb#endif
612178481Sjb}
613178481Sjb
614178481Sjbstatic void
615178481Sjbcopy_ctf_data(char *srcfile, char *destfile, int keep_stabs)
616178481Sjb{
617178481Sjb	tdata_t *srctd;
618178481Sjb
619178481Sjb	if (read_ctf(&srcfile, 1, NULL, read_ctf_save_cb, &srctd, 1) == 0)
620178481Sjb		terminate("No CTF data found in source file %s\n", srcfile);
621178481Sjb
622178481Sjb	tmpname = mktmpname(destfile, ".ctf");
623233407Sgonzo	write_ctf(srctd, destfile, tmpname, CTF_COMPRESS | CTF_SWAP_BYTES | keep_stabs);
624178481Sjb	if (rename(tmpname, destfile) != 0) {
625178481Sjb		terminate("Couldn't rename temp file %s to %s", tmpname,
626178481Sjb		    destfile);
627178481Sjb	}
628178481Sjb	free(tmpname);
629178481Sjb	tdata_free(srctd);
630178481Sjb}
631178481Sjb
632178481Sjbstatic void
633178481Sjbwq_init(workqueue_t *wq, int nfiles)
634178481Sjb{
635178481Sjb	int throttle, nslots, i;
636178481Sjb
637178481Sjb	if (getenv("CTFMERGE_MAX_SLOTS"))
638178481Sjb		nslots = atoi(getenv("CTFMERGE_MAX_SLOTS"));
639178481Sjb	else
640178481Sjb		nslots = MERGE_PHASE1_MAX_SLOTS;
641178481Sjb
642178481Sjb	if (getenv("CTFMERGE_PHASE1_BATCH_SIZE"))
643178481Sjb		wq->wq_maxbatchsz = atoi(getenv("CTFMERGE_PHASE1_BATCH_SIZE"));
644178481Sjb	else
645178481Sjb		wq->wq_maxbatchsz = MERGE_PHASE1_BATCH_SIZE;
646178481Sjb
647178481Sjb	nslots = MIN(nslots, (nfiles + wq->wq_maxbatchsz - 1) /
648178481Sjb	    wq->wq_maxbatchsz);
649178481Sjb
650178481Sjb	wq->wq_wip = xcalloc(sizeof (wip_t) * nslots);
651178481Sjb	wq->wq_nwipslots = nslots;
652178481Sjb	wq->wq_nthreads = MIN(sysconf(_SC_NPROCESSORS_ONLN) * 3 / 2, nslots);
653210767Srpaulo	wq->wq_thread = xmalloc(sizeof (pthread_t) * wq->wq_nthreads);
654178481Sjb
655178481Sjb	if (getenv("CTFMERGE_INPUT_THROTTLE"))
656178481Sjb		throttle = atoi(getenv("CTFMERGE_INPUT_THROTTLE"));
657178481Sjb	else
658178481Sjb		throttle = MERGE_INPUT_THROTTLE_LEN;
659178481Sjb	wq->wq_ithrottle = throttle * wq->wq_nthreads;
660178481Sjb
661178481Sjb	debug(1, "Using %d slots, %d threads\n", wq->wq_nwipslots,
662178481Sjb	    wq->wq_nthreads);
663178481Sjb
664178481Sjb	wq->wq_next_batchid = 0;
665178481Sjb
666178481Sjb	for (i = 0; i < nslots; i++) {
667178481Sjb		pthread_mutex_init(&wq->wq_wip[i].wip_lock, NULL);
668178481Sjb		wq->wq_wip[i].wip_batchid = wq->wq_next_batchid++;
669178481Sjb	}
670178481Sjb
671178481Sjb	pthread_mutex_init(&wq->wq_queue_lock, NULL);
672178481Sjb	wq->wq_queue = fifo_new();
673178481Sjb	pthread_cond_init(&wq->wq_work_avail, NULL);
674178481Sjb	pthread_cond_init(&wq->wq_work_removed, NULL);
675178481Sjb	wq->wq_ninqueue = nfiles;
676178481Sjb	wq->wq_nextpownum = 0;
677178481Sjb
678178481Sjb	pthread_mutex_init(&wq->wq_donequeue_lock, NULL);
679178481Sjb	wq->wq_donequeue = fifo_new();
680178481Sjb	wq->wq_lastdonebatch = -1;
681178481Sjb
682178481Sjb	pthread_cond_init(&wq->wq_done_cv, NULL);
683178481Sjb
684178481Sjb	pthread_cond_init(&wq->wq_alldone_cv, NULL);
685178481Sjb	wq->wq_alldone = 0;
686178481Sjb
687178481Sjb	barrier_init(&wq->wq_bar1, wq->wq_nthreads);
688178481Sjb	barrier_init(&wq->wq_bar2, wq->wq_nthreads);
689178481Sjb
690178481Sjb	wq->wq_nomorefiles = 0;
691178481Sjb}
692178481Sjb
693178481Sjbstatic void
694178481Sjbstart_threads(workqueue_t *wq)
695178481Sjb{
696178481Sjb	sigset_t sets;
697178481Sjb	int i;
698178481Sjb
699178481Sjb	sigemptyset(&sets);
700178481Sjb	sigaddset(&sets, SIGINT);
701178481Sjb	sigaddset(&sets, SIGQUIT);
702178481Sjb	sigaddset(&sets, SIGTERM);
703178481Sjb	pthread_sigmask(SIG_BLOCK, &sets, NULL);
704178481Sjb
705178481Sjb	for (i = 0; i < wq->wq_nthreads; i++) {
706210767Srpaulo		pthread_create(&wq->wq_thread[i], NULL,
707210767Srpaulo		    (void *(*)(void *))worker_thread, wq);
708178481Sjb	}
709178481Sjb
710297077Smav#ifdef illumos
711178481Sjb	sigset(SIGINT, handle_sig);
712178481Sjb	sigset(SIGQUIT, handle_sig);
713178481Sjb	sigset(SIGTERM, handle_sig);
714178546Sjb#else
715178546Sjb	signal(SIGINT, handle_sig);
716178546Sjb	signal(SIGQUIT, handle_sig);
717178546Sjb	signal(SIGTERM, handle_sig);
718178546Sjb#endif
719178481Sjb	pthread_sigmask(SIG_UNBLOCK, &sets, NULL);
720178481Sjb}
721178481Sjb
722210767Srpaulostatic void
723210767Srpaulojoin_threads(workqueue_t *wq)
724210767Srpaulo{
725210767Srpaulo	int i;
726210767Srpaulo
727210767Srpaulo	for (i = 0; i < wq->wq_nthreads; i++) {
728210767Srpaulo		pthread_join(wq->wq_thread[i], NULL);
729210767Srpaulo	}
730210767Srpaulo}
731210767Srpaulo
732178481Sjbstatic int
733178481Sjbstrcompare(const void *p1, const void *p2)
734178481Sjb{
735178481Sjb	char *s1 = *((char **)p1);
736178481Sjb	char *s2 = *((char **)p2);
737178481Sjb
738178481Sjb	return (strcmp(s1, s2));
739178481Sjb}
740178481Sjb
741210767Srpaulo/*
742210767Srpaulo * Core work queue structure; passed to worker threads on thread creation
743210767Srpaulo * as the main point of coordination.  Allocate as a static structure; we
744210767Srpaulo * could have put this into a local variable in main, but passing a pointer
745210767Srpaulo * into your stack to another thread is fragile at best and leads to some
746210767Srpaulo * hard-to-debug failure modes.
747210767Srpaulo */
748210767Srpaulostatic workqueue_t wq;
749210767Srpaulo
750178481Sjbint
751178481Sjbmain(int argc, char **argv)
752178481Sjb{
753178481Sjb	tdata_t *mstrtd, *savetd;
754178481Sjb	char *uniqfile = NULL, *uniqlabel = NULL;
755178481Sjb	char *withfile = NULL;
756178481Sjb	char *label = NULL;
757178481Sjb	char **ifiles, **tifiles;
758178481Sjb	int verbose = 0, docopy = 0;
759178481Sjb	int write_fuzzy_match = 0;
760178481Sjb	int keep_stabs = 0;
761178481Sjb	int require_ctf = 0;
762178481Sjb	int nifiles, nielems;
763178481Sjb	int c, i, idx, tidx, err;
764178481Sjb
765178481Sjb	progname = basename(argv[0]);
766178481Sjb
767178481Sjb	if (getenv("CTFMERGE_DEBUG_LEVEL"))
768178481Sjb		debug_level = atoi(getenv("CTFMERGE_DEBUG_LEVEL"));
769178481Sjb
770178481Sjb	err = 0;
771178481Sjb	while ((c = getopt(argc, argv, ":cd:D:fgl:L:o:tvw:s")) != EOF) {
772178481Sjb		switch (c) {
773178481Sjb		case 'c':
774178481Sjb			docopy = 1;
775178481Sjb			break;
776178481Sjb		case 'd':
777178481Sjb			/* Uniquify against `uniqfile' */
778178481Sjb			uniqfile = optarg;
779178481Sjb			break;
780178481Sjb		case 'D':
781178481Sjb			/* Uniquify against label `uniqlabel' in `uniqfile' */
782178481Sjb			uniqlabel = optarg;
783178481Sjb			break;
784178481Sjb		case 'f':
785178481Sjb			write_fuzzy_match = CTF_FUZZY_MATCH;
786178481Sjb			break;
787178481Sjb		case 'g':
788178481Sjb			keep_stabs = CTF_KEEP_STABS;
789178481Sjb			break;
790178481Sjb		case 'l':
791178481Sjb			/* Label merged types with `label' */
792178481Sjb			label = optarg;
793178481Sjb			break;
794178481Sjb		case 'L':
795178481Sjb			/* Label merged types with getenv(`label`) */
796178481Sjb			if ((label = getenv(optarg)) == NULL)
797178481Sjb				label = CTF_DEFAULT_LABEL;
798178481Sjb			break;
799178481Sjb		case 'o':
800178481Sjb			/* Place merged types in CTF section in `outfile' */
801178481Sjb			outfile = optarg;
802178481Sjb			break;
803178481Sjb		case 't':
804178481Sjb			/* Insist *all* object files built from C have CTF */
805178481Sjb			require_ctf = 1;
806178481Sjb			break;
807178481Sjb		case 'v':
808178481Sjb			/* More debugging information */
809178481Sjb			verbose = 1;
810178481Sjb			break;
811178481Sjb		case 'w':
812178481Sjb			/* Additive merge with data from `withfile' */
813178481Sjb			withfile = optarg;
814178481Sjb			break;
815178481Sjb		case 's':
816178481Sjb			/* use the dynsym rather than the symtab */
817178481Sjb			dynsym = CTF_USE_DYNSYM;
818178481Sjb			break;
819178481Sjb		default:
820178481Sjb			usage();
821178481Sjb			exit(2);
822178481Sjb		}
823178481Sjb	}
824178481Sjb
825178481Sjb	/* Validate arguments */
826178481Sjb	if (docopy) {
827178481Sjb		if (uniqfile != NULL || uniqlabel != NULL || label != NULL ||
828178481Sjb		    outfile != NULL || withfile != NULL || dynsym != 0)
829178481Sjb			err++;
830178481Sjb
831178481Sjb		if (argc - optind != 2)
832178481Sjb			err++;
833178481Sjb	} else {
834178481Sjb		if (uniqfile != NULL && withfile != NULL)
835178481Sjb			err++;
836178481Sjb
837178481Sjb		if (uniqlabel != NULL && uniqfile == NULL)
838178481Sjb			err++;
839178481Sjb
840178481Sjb		if (outfile == NULL || label == NULL)
841178481Sjb			err++;
842178481Sjb
843178481Sjb		if (argc - optind == 0)
844178481Sjb			err++;
845178481Sjb	}
846178481Sjb
847178481Sjb	if (err) {
848178481Sjb		usage();
849178481Sjb		exit(2);
850178481Sjb	}
851178481Sjb
852178481Sjb	if (getenv("STRIPSTABS_KEEP_STABS") != NULL)
853178481Sjb		keep_stabs = CTF_KEEP_STABS;
854178481Sjb
855178481Sjb	if (uniqfile && access(uniqfile, R_OK) != 0) {
856178481Sjb		warning("Uniquification file %s couldn't be opened and "
857178481Sjb		    "will be ignored.\n", uniqfile);
858178481Sjb		uniqfile = NULL;
859178481Sjb	}
860178481Sjb	if (withfile && access(withfile, R_OK) != 0) {
861178481Sjb		warning("With file %s couldn't be opened and will be "
862178481Sjb		    "ignored.\n", withfile);
863178481Sjb		withfile = NULL;
864178481Sjb	}
865178481Sjb	if (outfile && access(outfile, R_OK|W_OK) != 0)
866178481Sjb		terminate("Cannot open output file %s for r/w", outfile);
867178481Sjb
868178481Sjb	/*
869178481Sjb	 * This is ugly, but we don't want to have to have a separate tool
870178481Sjb	 * (yet) just for copying an ELF section with our specific requirements,
871178481Sjb	 * so we shoe-horn a copier into ctfmerge.
872178481Sjb	 */
873178481Sjb	if (docopy) {
874178481Sjb		copy_ctf_data(argv[optind], argv[optind + 1], keep_stabs);
875178481Sjb
876178481Sjb		exit(0);
877178481Sjb	}
878178481Sjb
879178481Sjb	set_terminate_cleanup(terminate_cleanup);
880178481Sjb
881178481Sjb	/* Sort the input files and strip out duplicates */
882178481Sjb	nifiles = argc - optind;
883178481Sjb	ifiles = xmalloc(sizeof (char *) * nifiles);
884178481Sjb	tifiles = xmalloc(sizeof (char *) * nifiles);
885178481Sjb
886178481Sjb	for (i = 0; i < nifiles; i++)
887178481Sjb		tifiles[i] = argv[optind + i];
888178481Sjb	qsort(tifiles, nifiles, sizeof (char *), (int (*)())strcompare);
889178481Sjb
890178481Sjb	ifiles[0] = tifiles[0];
891178481Sjb	for (idx = 0, tidx = 1; tidx < nifiles; tidx++) {
892178481Sjb		if (strcmp(ifiles[idx], tifiles[tidx]) != 0)
893178481Sjb			ifiles[++idx] = tifiles[tidx];
894178481Sjb	}
895178481Sjb	nifiles = idx + 1;
896178481Sjb
897178481Sjb	/* Make sure they all exist */
898178481Sjb	if ((nielems = count_files(ifiles, nifiles)) < 0)
899178481Sjb		terminate("Some input files were inaccessible\n");
900178481Sjb
901178481Sjb	/* Prepare for the merge */
902178481Sjb	wq_init(&wq, nielems);
903178481Sjb
904178481Sjb	start_threads(&wq);
905178481Sjb
906178481Sjb	/*
907178481Sjb	 * Start the merge
908178481Sjb	 *
909178481Sjb	 * We're reading everything from each of the object files, so we
910178481Sjb	 * don't need to specify labels.
911178481Sjb	 */
912178481Sjb	if (read_ctf(ifiles, nifiles, NULL, merge_ctf_cb,
913178481Sjb	    &wq, require_ctf) == 0) {
914178481Sjb		/*
915178481Sjb		 * If we're verifying that C files have CTF, it's safe to
916178481Sjb		 * assume that in this case, we're building only from assembly
917178481Sjb		 * inputs.
918178481Sjb		 */
919178481Sjb		if (require_ctf)
920178481Sjb			exit(0);
921178481Sjb		terminate("No ctf sections found to merge\n");
922178481Sjb	}
923178481Sjb
924178481Sjb	pthread_mutex_lock(&wq.wq_queue_lock);
925178481Sjb	wq.wq_nomorefiles = 1;
926178481Sjb	pthread_cond_broadcast(&wq.wq_work_avail);
927178481Sjb	pthread_mutex_unlock(&wq.wq_queue_lock);
928178481Sjb
929178481Sjb	pthread_mutex_lock(&wq.wq_queue_lock);
930178481Sjb	while (wq.wq_alldone == 0)
931178481Sjb		pthread_cond_wait(&wq.wq_alldone_cv, &wq.wq_queue_lock);
932178481Sjb	pthread_mutex_unlock(&wq.wq_queue_lock);
933178481Sjb
934210767Srpaulo	join_threads(&wq);
935210767Srpaulo
936178481Sjb	/*
937178481Sjb	 * All requested files have been merged, with the resulting tree in
938178481Sjb	 * mstrtd.  savetd is the tree that will be placed into the output file.
939178481Sjb	 *
940178481Sjb	 * Regardless of whether we're doing a normal uniquification or an
941178481Sjb	 * additive merge, we need a type tree that has been uniquified
942178481Sjb	 * against uniqfile or withfile, as appropriate.
943178481Sjb	 *
944178481Sjb	 * If we're doing a uniquification, we stuff the resulting tree into
945178481Sjb	 * outfile.  Otherwise, we add the tree to the tree already in withfile.
946178481Sjb	 */
947178481Sjb	assert(fifo_len(wq.wq_queue) == 1);
948178481Sjb	mstrtd = fifo_remove(wq.wq_queue);
949178481Sjb
950178481Sjb	if (verbose || debug_level) {
951178481Sjb		debug(2, "Statistics for td %p\n", (void *)mstrtd);
952178481Sjb
953178481Sjb		iidesc_stats(mstrtd->td_iihash);
954178481Sjb	}
955178481Sjb
956178481Sjb	if (uniqfile != NULL || withfile != NULL) {
957178481Sjb		char *reffile, *reflabel = NULL;
958178481Sjb		tdata_t *reftd;
959178481Sjb
960178481Sjb		if (uniqfile != NULL) {
961178481Sjb			reffile = uniqfile;
962178481Sjb			reflabel = uniqlabel;
963178481Sjb		} else
964178481Sjb			reffile = withfile;
965178481Sjb
966178481Sjb		if (read_ctf(&reffile, 1, reflabel, read_ctf_save_cb,
967178481Sjb		    &reftd, require_ctf) == 0) {
968178481Sjb			terminate("No CTF data found in reference file %s\n",
969178481Sjb			    reffile);
970178481Sjb		}
971178481Sjb
972178481Sjb		savetd = tdata_new();
973178481Sjb
974178481Sjb		if (CTF_TYPE_ISCHILD(reftd->td_nextid))
975178481Sjb			terminate("No room for additional types in master\n");
976178481Sjb
977178481Sjb		savetd->td_nextid = withfile ? reftd->td_nextid :
978178481Sjb		    CTF_INDEX_TO_TYPE(1, TRUE);
979178481Sjb		merge_into_master(mstrtd, reftd, savetd, 0);
980178481Sjb
981178481Sjb		tdata_label_add(savetd, label, CTF_LABEL_LASTIDX);
982178481Sjb
983178481Sjb		if (withfile) {
984178481Sjb			/*
985178481Sjb			 * savetd holds the new data to be added to the withfile
986178481Sjb			 */
987178481Sjb			tdata_t *withtd = reftd;
988178481Sjb
989178481Sjb			tdata_merge(withtd, savetd);
990178481Sjb
991178481Sjb			savetd = withtd;
992178481Sjb		} else {
993178481Sjb			char uniqname[MAXPATHLEN];
994178481Sjb			labelent_t *parle;
995178481Sjb
996178481Sjb			parle = tdata_label_top(reftd);
997178481Sjb
998178481Sjb			savetd->td_parlabel = xstrdup(parle->le_name);
999178481Sjb
1000178481Sjb			strncpy(uniqname, reffile, sizeof (uniqname));
1001178481Sjb			uniqname[MAXPATHLEN - 1] = '\0';
1002178481Sjb			savetd->td_parname = xstrdup(basename(uniqname));
1003178481Sjb		}
1004178481Sjb
1005178481Sjb	} else {
1006178481Sjb		/*
1007178481Sjb		 * No post processing.  Write the merged tree as-is into the
1008178481Sjb		 * output file.
1009178481Sjb		 */
1010178481Sjb		tdata_label_free(mstrtd);
1011178481Sjb		tdata_label_add(mstrtd, label, CTF_LABEL_LASTIDX);
1012178481Sjb
1013178481Sjb		savetd = mstrtd;
1014178481Sjb	}
1015178481Sjb
1016178481Sjb	tmpname = mktmpname(outfile, ".ctf");
1017178481Sjb	write_ctf(savetd, outfile, tmpname,
1018233407Sgonzo	    CTF_COMPRESS | CTF_SWAP_BYTES | write_fuzzy_match | dynsym | keep_stabs);
1019178481Sjb	if (rename(tmpname, outfile) != 0)
1020178481Sjb		terminate("Couldn't rename output temp file %s", tmpname);
1021178481Sjb	free(tmpname);
1022178481Sjb
1023178481Sjb	return (0);
1024178481Sjb}
1025