1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"@(#)output.c	1.16	06/08/22 SMI"
27
28/*
29 * Routines for preparing tdata trees for conversion into CTF data, and
30 * for placing the resulting data into an output file.
31 */
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <strings.h>
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <fcntl.h>
39#include <libelf.h>
40#include <gelf.h>
41#include <unistd.h>
42
43#include "ctftools.h"
44#include "list.h"
45#include "memory.h"
46#include "traverse.h"
47#include "symbol.h"
48
49#if defined(__APPLE__)
50#include <mach-o/loader.h>
51#include <mach-o/nlist.h>
52#include <sys/mman.h>
53
54static GElf_Sym *
55gelf_getsym_macho(Elf_Data * data, int ndx, int nent, GElf_Sym * dst, const char *base)
56{
57	const struct nlist *nsym = ((const struct nlist *)(data->d_buf)) + ndx;
58	const char *name = base + nsym->n_un.n_strx;
59	char *tmp;
60
61	if (0 == nsym->n_un.n_strx) // iff a null, "", name.
62		name = "null name"; // return NULL;
63
64	if ('_' == name[0])
65		name++; // Lop off omnipresent underscore to match DWARF convention
66
67	dst->st_name = (GElf_Sxword)(name - base);
68	dst->st_value = nsym->n_value;
69	dst->st_size = 0;
70	dst->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_NOTYPE));
71	dst->st_other = 0;
72	dst->st_shndx = SHN_MACHO; /* Mark underlying file as Mach-o */
73
74	if (nsym->n_type & N_STAB) {
75
76		switch(nsym->n_type) {
77		case N_FUN:
78			dst->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_FUNC));
79			break;
80		case N_GSYM:
81			dst->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT));
82			break;
83		default:
84			break;
85		}
86
87	} else if ((N_ABS | N_EXT) == (nsym->n_type & (N_TYPE | N_EXT)) ||
88		(N_SECT | N_EXT) == (nsym->n_type & (N_TYPE | N_EXT))) {
89
90		dst->st_info = GELF_ST_INFO((STB_GLOBAL), (nsym->n_desc));
91	} else if ((N_UNDF | N_EXT) == (nsym->n_type & (N_TYPE | N_EXT)) &&
92				nsym->n_sect == NO_SECT) {
93		dst->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT)); /* Common */
94	}
95
96	return dst;
97}
98
99static GElf_Sym *
100gelf_getsym_macho_64(Elf_Data * data, int ndx, int nent, GElf_Sym * dst, const char *base)
101{
102	const struct nlist_64 *nsym = ((const struct nlist_64 *)(data->d_buf)) + ndx;
103	const char *name = base + nsym->n_un.n_strx;
104	char *tmp;
105
106	if (0 == nsym->n_un.n_strx) // iff a null, "", name.
107		name = "null name"; // return NULL;
108
109	if ('_' == name[0])
110		name++; // Lop off omnipresent underscore to match DWARF convention
111
112	dst->st_name = (GElf_Sxword)(name - base);
113	dst->st_value = nsym->n_value;
114	dst->st_size = 0;
115	dst->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_NOTYPE));
116	dst->st_other = 0;
117	dst->st_shndx = SHN_MACHO_64; /* Mark underlying file as Mach-o 64 */
118
119	if (nsym->n_type & N_STAB) {
120
121		switch(nsym->n_type) {
122		case N_FUN:
123			dst->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_FUNC));
124			break;
125		case N_GSYM:
126			dst->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT));
127			break;
128		default:
129			break;
130		}
131
132	} else if ((N_ABS | N_EXT) == (nsym->n_type & (N_TYPE | N_EXT)) ||
133		(N_SECT | N_EXT) == (nsym->n_type & (N_TYPE | N_EXT))) {
134
135		dst->st_info = GELF_ST_INFO((STB_GLOBAL), (nsym->n_desc));
136	} else if ((N_UNDF | N_EXT) == (nsym->n_type & (N_TYPE | N_EXT)) &&
137				nsym->n_sect == NO_SECT) {
138		dst->st_info = GELF_ST_INFO((STB_GLOBAL), (STT_OBJECT)); /* Common */
139	}
140
141	return dst;
142}
143#endif /* __APPLE__ */
144
145typedef struct iidesc_match {
146	int iim_fuzzy;
147	iidesc_t *iim_ret;
148	char *iim_name;
149	char *iim_file;
150	uchar_t iim_bind;
151} iidesc_match_t;
152
153static int
154burst_iitypes(void *data, void *arg)
155{
156	iidesc_t *ii = data;
157	iiburst_t *iiburst = arg;
158
159	switch (ii->ii_type) {
160	case II_GFUN:
161	case II_SFUN:
162	case II_GVAR:
163	case II_SVAR:
164		if (!(ii->ii_flags & IIDESC_F_USED))
165			return (0);
166		break;
167	default:
168		break;
169	}
170
171	ii->ii_dtype->t_flags |= TDESC_F_ISROOT;
172	(void) iitraverse_td(ii, iiburst->iib_tdtd);
173	return (1);
174}
175
176/*ARGSUSED1*/
177static int
178save_type_by_id(tdesc_t *tdp, tdesc_t **tdpp, void *private)
179{
180	iiburst_t *iiburst = private;
181
182	/*
183	 * Doing this on every node is horribly inefficient, but given that
184	 * we may be suppressing some types, we can't trust nextid in the
185	 * tdata_t.
186	 */
187	if (tdp->t_id > iiburst->iib_maxtypeid)
188		iiburst->iib_maxtypeid = tdp->t_id;
189
190	slist_add(&iiburst->iib_types, tdp, tdesc_idcmp);
191
192	return (1);
193}
194
195static tdtrav_cb_f burst_types_cbs[] = {
196	NULL,
197	save_type_by_id,	/* intrinsic */
198	save_type_by_id,	/* pointer */
199	save_type_by_id,	/* array */
200	save_type_by_id,	/* function */
201	save_type_by_id,	/* struct */
202	save_type_by_id,	/* union */
203	save_type_by_id,	/* enum */
204	save_type_by_id,	/* forward */
205	save_type_by_id,	/* typedef */
206	tdtrav_assert,		/* typedef_unres */
207	save_type_by_id,	/* volatile */
208	save_type_by_id,	/* const */
209	save_type_by_id		/* restrict */
210};
211
212
213static iiburst_t *
214iiburst_new(tdata_t *td, int max)
215{
216	iiburst_t *iiburst = xcalloc(sizeof (iiburst_t));
217	iiburst->iib_td = td;
218	iiburst->iib_funcs = xcalloc(sizeof (iidesc_t *) * max);
219	iiburst->iib_nfuncs = 0;
220	iiburst->iib_objts = xcalloc(sizeof (iidesc_t *) * max);
221	iiburst->iib_nobjts = 0;
222	return (iiburst);
223}
224
225static void
226iiburst_types(iiburst_t *iiburst)
227{
228	tdtrav_data_t tdtd;
229
230	tdtrav_init(&tdtd, &iiburst->iib_td->td_curvgen, NULL, burst_types_cbs,
231	    NULL, (void *)iiburst);
232
233	iiburst->iib_tdtd = &tdtd;
234
235	(void) hash_iter(iiburst->iib_td->td_iihash, burst_iitypes, iiburst);
236}
237
238static void
239iiburst_free(iiburst_t *iiburst)
240{
241	free(iiburst->iib_funcs);
242	free(iiburst->iib_objts);
243	list_free(iiburst->iib_types, NULL, NULL);
244	free(iiburst);
245}
246
247/*
248 * See if this iidesc matches the ELF symbol data we pass in.
249 *
250 * A fuzzy match is where we have a local symbol matching the name of a
251 * global type description. This is common when a mapfile is used for a
252 * DSO, but we don't accept it by default.
253 *
254 * A weak fuzzy match is when a weak symbol was resolved and matched to
255 * a global type description.
256 */
257static int
258matching_iidesc(iidesc_t *iidesc, iidesc_match_t *match)
259{
260	if (streq(iidesc->ii_name, match->iim_name) == 0)
261		return (0);
262
263	switch (iidesc->ii_type) {
264	case II_GFUN:
265	case II_GVAR:
266		if (match->iim_bind == STB_GLOBAL) {
267			match->iim_ret = iidesc;
268			return (-1);
269		} else if (match->iim_fuzzy && match->iim_ret == NULL) {
270			match->iim_ret = iidesc;
271			/* continue to look for strong match */
272			return (0);
273		}
274		break;
275	case II_SFUN:
276	case II_SVAR:
277		if (match->iim_bind == STB_LOCAL &&
278		    match->iim_file != NULL &&
279		    streq(iidesc->ii_owner, match->iim_file)) {
280			match->iim_ret = iidesc;
281			return (-1);
282		}
283		break;
284	}
285	return (0);
286}
287
288static iidesc_t *
289find_iidesc(tdata_t *td, iidesc_match_t *match)
290{
291	match->iim_ret = NULL;
292	iter_iidescs_by_name(td, match->iim_name,
293	    (int (*)())matching_iidesc, match);
294	return (match->iim_ret);
295}
296
297/*
298 * If we have a weak symbol, attempt to find the strong symbol it will
299 * resolve to.  Note: the code where this actually happens is in
300 * sym_process() in cmd/sgs/libld/common/syms.c
301 *
302 * Finding the matching symbol is unfortunately not trivial.  For a
303 * symbol to be a candidate, it must:
304 *
305 * - have the same type (function, object)
306 * - have the same value (address)
307 * - have the same size
308 * - not be another weak symbol
309 * - belong to the same section (checked via section index)
310 *
311 * If such a candidate is global, then we assume we've found it.  The
312 * linker generates the symbol table such that the curfile might be
313 * incorrect; this is OK for global symbols, since find_iidesc() doesn't
314 * need to check for the source file for the symbol.
315 *
316 * We might have found a strong local symbol, where the curfile is
317 * accurate and matches that of the weak symbol.  We assume this is a
318 * reasonable match.
319 *
320 * If we've got a local symbol with a non-matching curfile, there are
321 * two possibilities.  Either this is a completely different symbol, or
322 * it's a once-global symbol that was scoped to local via a mapfile.  In
323 * the latter case, curfile is likely inaccurate since the linker does
324 * not preserve the needed curfile in the order of the symbol table (see
325 * the comments about locally scoped symbols in libld's update_osym()).
326 * As we can't tell this case from the former one, we use this symbol
327 * iff no other matching symbol is found.
328 *
329 * What we really need here is a SUNW section containing weak<->strong
330 * mappings that we can consume.
331 */
332static int
333check_for_weak(GElf_Sym *weak, char const *weakfile,
334    Elf_Data *data, int nent, Elf_Data *strdata,
335    GElf_Sym *retsym, char **curfilep)
336{
337	char *curfile = NULL;
338	char *tmpfile;
339	GElf_Sym tmpsym;
340	int candidate = 0;
341	int i;
342
343	if (GELF_ST_BIND(weak->st_info) != STB_WEAK)
344		return (0);
345
346	for (i = 0; i < nent; i++) {
347		GElf_Sym sym;
348		uchar_t type;
349
350		if (gelf_getsym(data, i, &sym) == NULL)
351			continue;
352
353		type = GELF_ST_TYPE(sym.st_info);
354
355		if (type == STT_FILE)
356			curfile = (char *)strdata->d_buf + sym.st_name;
357
358		if (GELF_ST_TYPE(weak->st_info) != type ||
359		    weak->st_value != sym.st_value)
360			continue;
361
362		if (weak->st_size != sym.st_size)
363			continue;
364
365		if (GELF_ST_BIND(sym.st_info) == STB_WEAK)
366			continue;
367
368		if (sym.st_shndx != weak->st_shndx)
369			continue;
370
371		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL &&
372		    (curfile == NULL || weakfile == NULL ||
373		    strcmp(curfile, weakfile) != 0)) {
374			candidate = 1;
375			tmpfile = curfile;
376			tmpsym = sym;
377			continue;
378		}
379
380		*curfilep = curfile;
381		*retsym = sym;
382		return (1);
383	}
384
385	if (candidate) {
386		*curfilep = tmpfile;
387		*retsym = tmpsym;
388		return (1);
389	}
390
391	return (0);
392}
393
394/*
395 * When we've found the underlying symbol's type description
396 * for a weak symbol, we need to copy it and rename it to match
397 * the weak symbol. We also need to add it to the td so it's
398 * handled along with the others later.
399 */
400static iidesc_t *
401copy_from_strong(tdata_t *td, GElf_Sym *sym, iidesc_t *strongdesc,
402    const char *weakname, const char *weakfile)
403{
404	iidesc_t *new = iidesc_dup_rename(strongdesc, weakname, weakfile);
405	uchar_t type = GELF_ST_TYPE(sym->st_info);
406
407	switch (type) {
408	case STT_OBJECT:
409		new->ii_type = II_GVAR;
410		break;
411	case STT_FUNC:
412		new->ii_type = II_GFUN;
413		break;
414	}
415
416	hash_add(td->td_iihash, new);
417
418	return (new);
419}
420
421/*
422 * Process the symbol table of the output file, associating each symbol
423 * with a type description if possible, and sorting them into functions
424 * and data, maintaining symbol table order.
425 */
426static iiburst_t *
427sort_iidescs(Elf *elf, const char *file, tdata_t *td, int fuzzymatch,
428    int dynsym)
429{
430	iiburst_t *iiburst;
431	Elf_Scn *scn;
432	GElf_Shdr shdr;
433	Elf_Data *data, *strdata;
434	int i, stidx;
435	int nent;
436	iidesc_match_t match;
437
438	match.iim_fuzzy = fuzzymatch;
439	match.iim_file = NULL;
440
441	if ((stidx = findelfsecidx(elf, file,
442	    dynsym ? ".dynsym" : ".symtab")) < 0)
443#if !defined(__APPLE__)
444		terminate("%s: Can't open symbol table\n", file);
445#else
446        terminate(""); /* missing symbol table is most likely an empty binary,
447                        * produce no output, but also don't warn the user. */
448#endif
449	scn = elf_getscn(elf, stidx);
450	data = elf_getdata(scn, NULL);
451	gelf_getshdr(scn, &shdr);
452	nent = shdr.sh_size / shdr.sh_entsize;
453
454#if !defined(__APPLE__)
455	scn = elf_getscn(elf, shdr.sh_link);
456	strdata = elf_getdata(scn, NULL);
457#else
458	if (SHN_MACHO !=  shdr.sh_link && SHN_MACHO_64 !=  shdr.sh_link) {
459		scn = elf_getscn(elf, shdr.sh_link);
460		strdata = elf_getdata(scn, NULL);
461	} else {
462		/* Underlying file is Mach-o */
463		int dir_idx;
464
465		if ((dir_idx = findelfsecidx(elf, file, ".dir_str_table")) < 0 ||
466		    (scn = elf_getscn(elf, dir_idx)) == NULL ||
467		    (strdata = elf_getdata(scn, NULL)) == NULL)
468			terminate("%s: Can't open direct string table\n", file);
469	}
470#endif /* __APPLE__ */
471
472	iiburst = iiburst_new(td, nent);
473
474#if !defined(__APPLE__)
475	for (i = 0; i < nent; i++) {
476		GElf_Sym sym;
477		iidesc_t **tolist;
478		GElf_Sym ssym;
479		iidesc_match_t smatch;
480		int *curr;
481		iidesc_t *iidesc;
482
483		if (gelf_getsym(data, i, &sym) == NULL)
484			elfterminate(file, "Couldn't read symbol %d", i);
485
486		match.iim_name = (char *)strdata->d_buf + sym.st_name;
487		match.iim_bind = GELF_ST_BIND(sym.st_info);
488
489		switch (GELF_ST_TYPE(sym.st_info)) {
490		case STT_FILE:
491			match.iim_file = match.iim_name;
492			continue;
493		case STT_OBJECT:
494			tolist = iiburst->iib_objts;
495			curr = &iiburst->iib_nobjts;
496			break;
497		case STT_FUNC:
498			tolist = iiburst->iib_funcs;
499			curr = &iiburst->iib_nfuncs;
500			break;
501		default:
502			continue;
503		}
504
505		if (ignore_symbol(&sym, match.iim_name))
506			continue;
507
508		iidesc = find_iidesc(td, &match);
509
510		if (iidesc != NULL) {
511			tolist[*curr] = iidesc;
512			iidesc->ii_flags |= IIDESC_F_USED;
513			(*curr)++;
514			continue;
515		}
516
517		if (!check_for_weak(&sym, match.iim_file, data, nent, strdata,
518		    &ssym, &smatch.iim_file)) {
519			(*curr)++;
520			continue;
521		}
522
523		smatch.iim_fuzzy = fuzzymatch;
524		smatch.iim_name = (char *)strdata->d_buf + ssym.st_name;
525		smatch.iim_bind = GELF_ST_BIND(ssym.st_info);
526
527		debug(3, "Weak symbol %s resolved to %s\n", match.iim_name,
528		    smatch.iim_name);
529
530		iidesc = find_iidesc(td, &smatch);
531
532		if (iidesc != NULL) {
533			tolist[*curr] = copy_from_strong(td, &sym,
534			    iidesc, match.iim_name, match.iim_file);
535			tolist[*curr]->ii_flags |= IIDESC_F_USED;
536		}
537
538		(*curr)++;
539	}
540#else
541	for (i = 0; i < nent; i++) {
542		GElf_Sym sym;
543		iidesc_t **tolist;
544		int *curr;
545		iidesc_t *iidesc;
546
547		if (SHN_MACHO == shdr.sh_link) {
548			if (gelf_getsym_macho(data, i, nent, &sym, (const char *)strdata->d_buf) == NULL)
549				elfterminate(file, "Couldn't read symbol %d", i);
550		} else if (SHN_MACHO_64 == shdr.sh_link) {
551			if (gelf_getsym_macho_64(data, i, nent, &sym, (const char *)strdata->d_buf) == NULL)
552				elfterminate(file, "Couldn't read symbol %d", i);
553		}
554
555		match.iim_name = (char *)strdata->d_buf + sym.st_name;
556		match.iim_bind = GELF_ST_BIND(sym.st_info);
557
558		switch (GELF_ST_TYPE(sym.st_info)) {
559		case STT_FILE:
560			match.iim_file = match.iim_name;
561			continue;
562		case STT_OBJECT:
563			tolist = iiburst->iib_objts;
564			curr = &iiburst->iib_nobjts;
565			break;
566		case STT_FUNC:
567			tolist = iiburst->iib_funcs;
568			curr = &iiburst->iib_nfuncs;
569			break;
570		default:
571			continue;
572		}
573
574		if (ignore_symbol(&sym, match.iim_name))
575			continue;
576
577		iidesc = find_iidesc(td, &match);
578
579		if (iidesc != NULL) {
580			tolist[*curr] = iidesc;
581			iidesc->ii_flags |= IIDESC_F_USED;
582			(*curr)++;
583			continue;
584		}
585
586		if (ignore_symbol(&sym, match.iim_name))
587			continue;
588
589#warning FIXME: deal with weak bindings.
590
591		(*curr)++;
592	}
593#endif /* __APPLE__ */
594
595	/*
596	 * Stabs are generated for every function declared in a given C source
597	 * file.  When converting an object file, we may encounter a stab that
598	 * has no symbol table entry because the optimizer has decided to omit
599	 * that item (for example, an unreferenced static function).  We may
600	 * see iidescs that do not have an associated symtab entry, and so
601	 * we do not write records for those functions into the CTF data.
602	 * All others get marked as a root by this function.
603	 */
604	iiburst_types(iiburst);
605
606	/*
607	 * By not adding some of the functions and/or objects, we may have
608	 * caused some types that were referenced solely by those
609	 * functions/objects to be suppressed.  This could cause a label,
610	 * generated prior to the evisceration, to be incorrect.  Find the
611	 * highest type index, and change the label indicies to be no higher
612	 * than this value.
613	 */
614	tdata_label_newmax(td, iiburst->iib_maxtypeid);
615
616	return (iiburst);
617}
618
619#if !defined(__APPLE__)
620static void
621write_file(Elf *src, const char *srcname, Elf *dst, const char *dstname,
622    caddr_t ctfdata, size_t ctfsize, int flags)
623{
624	GElf_Ehdr sehdr, dehdr;
625	Elf_Scn *sscn, *dscn;
626	Elf_Data *sdata, *ddata;
627	GElf_Shdr shdr;
628	GElf_Word symtab_type;
629	int symtab_idx = -1;
630	off_t new_offset = 0;
631	off_t ctfnameoff = 0;
632	int dynsym = (flags & CTF_USE_DYNSYM);
633	int keep_stabs = (flags & CTF_KEEP_STABS);
634	int *secxlate;
635	int srcidx, dstidx;
636	int curnmoff = 0;
637	int changing = 0;
638	int pad;
639	int i;
640
641	if (gelf_newehdr(dst, gelf_getclass(src)) == NULL)
642		elfterminate(dstname, "Cannot copy ehdr to temp file");
643	gelf_getehdr(src, &sehdr);
644	memcpy(&dehdr, &sehdr, sizeof (GElf_Ehdr));
645	gelf_update_ehdr(dst, &dehdr);
646
647	symtab_type = dynsym ? SHT_DYNSYM : SHT_SYMTAB;
648
649	/*
650	 * Neither the existing stab sections nor the SUNW_ctf sections (new or
651	 * existing) are SHF_ALLOC'd, so they won't be in areas referenced by
652	 * program headers.  As such, we can just blindly copy the program
653	 * headers from the existing file to the new file.
654	 */
655	if (sehdr.e_phnum != 0) {
656		(void) elf_flagelf(dst, ELF_C_SET, ELF_F_LAYOUT);
657		if (gelf_newphdr(dst, sehdr.e_phnum) == NULL)
658			elfterminate(dstname, "Cannot make phdrs in temp file");
659
660		for (i = 0; i < sehdr.e_phnum; i++) {
661			GElf_Phdr phdr;
662
663			gelf_getphdr(src, i, &phdr);
664			gelf_update_phdr(dst, i, &phdr);
665		}
666	}
667
668	secxlate = xmalloc(sizeof (int) * sehdr.e_shnum);
669	for (srcidx = dstidx = 0; srcidx < sehdr.e_shnum; srcidx++) {
670		Elf_Scn *scn = elf_getscn(src, srcidx);
671		GElf_Shdr shdr;
672		char *sname;
673
674		gelf_getshdr(scn, &shdr);
675		sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name);
676		if (sname == NULL) {
677			elfterminate(srcname, "Can't find string at %u",
678			    shdr.sh_name);
679		}
680
681		if (strcmp(sname, CTF_ELF_SCN_NAME) == 0) {
682			secxlate[srcidx] = -1;
683		} else if (!keep_stabs &&
684		    (strncmp(sname, ".stab", 5) == 0 ||
685		    strncmp(sname, ".debug", 6) == 0 ||
686		    strncmp(sname, ".rel.debug", 10) == 0 ||
687		    strncmp(sname, ".rela.debug", 11) == 0)) {
688			secxlate[srcidx] = -1;
689		} else if (dynsym && shdr.sh_type == SHT_SYMTAB) {
690			/*
691			 * If we're building CTF against the dynsym,
692			 * we'll rip out the symtab so debuggers aren't
693			 * confused.
694			 */
695			secxlate[srcidx] = -1;
696		} else {
697			secxlate[srcidx] = dstidx++;
698			curnmoff += strlen(sname) + 1;
699		}
700
701		new_offset = (off_t)dehdr.e_phoff;
702	}
703
704	for (srcidx = 1; srcidx < sehdr.e_shnum; srcidx++) {
705		char *sname;
706
707		sscn = elf_getscn(src, srcidx);
708		gelf_getshdr(sscn, &shdr);
709
710		if (secxlate[srcidx] == -1) {
711			changing = 1;
712			continue;
713		}
714
715		dscn = elf_newscn(dst);
716
717		/*
718		 * If this file has program headers, we need to explicitly lay
719		 * out sections.  If none of the sections prior to this one have
720		 * been removed, then we can just use the existing location.  If
721		 * one or more sections have been changed, then we need to
722		 * adjust this one to avoid holes.
723		 */
724		if (changing && sehdr.e_phnum != 0) {
725			pad = new_offset % shdr.sh_addralign;
726
727			if (pad)
728				new_offset += shdr.sh_addralign - pad;
729			shdr.sh_offset = new_offset;
730		}
731
732		shdr.sh_link = secxlate[shdr.sh_link];
733
734		if (shdr.sh_type == SHT_REL || shdr.sh_type == SHT_RELA)
735			shdr.sh_info = secxlate[shdr.sh_info];
736
737		sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name);
738		if (sname == NULL) {
739			elfterminate(srcname, "Can't find string at %u",
740			    shdr.sh_name);
741		}
742		if ((sdata = elf_getdata(sscn, NULL)) == NULL)
743			elfterminate(srcname, "Cannot get sect %s data", sname);
744		if ((ddata = elf_newdata(dscn)) == NULL)
745			elfterminate(dstname, "Can't make sect %s data", sname);
746		bcopy(sdata, ddata, sizeof (Elf_Data));
747
748		if (srcidx == sehdr.e_shstrndx) {
749			char seclen = strlen(CTF_ELF_SCN_NAME);
750
751			ddata->d_buf = xmalloc(ddata->d_size + shdr.sh_size +
752			    seclen + 1);
753			bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);
754			strcpy((caddr_t)ddata->d_buf + shdr.sh_size,
755			    CTF_ELF_SCN_NAME);
756			ctfnameoff = (off_t)shdr.sh_size;
757			shdr.sh_size += seclen + 1;
758			ddata->d_size += seclen + 1;
759
760			if (sehdr.e_phnum != 0)
761				changing = 1;
762		}
763
764		if (shdr.sh_type == symtab_type && shdr.sh_entsize != 0) {
765			int nsym = shdr.sh_size / shdr.sh_entsize;
766
767			symtab_idx = secxlate[srcidx];
768
769			ddata->d_buf = xmalloc(shdr.sh_size);
770			bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);
771
772			for (i = 0; i < nsym; i++) {
773				GElf_Sym sym;
774				short newscn;
775
776				(void) gelf_getsym(ddata, i, &sym);
777
778				if (sym.st_shndx >= SHN_LORESERVE)
779					continue;
780
781				if ((newscn = secxlate[sym.st_shndx]) !=
782				    sym.st_shndx) {
783					sym.st_shndx =
784					    (newscn == -1 ? 1 : newscn);
785
786					gelf_update_sym(ddata, i, &sym);
787				}
788			}
789		}
790
791		if (gelf_update_shdr(dscn, &shdr) == NULL)
792			elfterminate(dstname, "Cannot update sect %s", sname);
793
794		new_offset = (off_t)shdr.sh_offset;
795		if (shdr.sh_type != SHT_NOBITS)
796			new_offset += shdr.sh_size;
797	}
798
799	if (symtab_idx == -1) {
800		terminate("%s: Cannot find %s section\n", srcname,
801		    dynsym ? "SHT_DYNSYM" : "SHT_SYMTAB");
802	}
803
804	/* Add the ctf section */
805	dscn = elf_newscn(dst);
806	gelf_getshdr(dscn, &shdr);
807	shdr.sh_name = ctfnameoff;
808	shdr.sh_type = SHT_PROGBITS;
809	shdr.sh_size = ctfsize;
810	shdr.sh_link = symtab_idx;
811	shdr.sh_addralign = 4;
812	if (changing && sehdr.e_phnum != 0) {
813		pad = new_offset % shdr.sh_addralign;
814
815		if (pad)
816			new_offset += shdr.sh_addralign - pad;
817
818		shdr.sh_offset = new_offset;
819		new_offset += shdr.sh_size;
820	}
821
822	ddata = elf_newdata(dscn);
823	ddata->d_buf = ctfdata;
824	ddata->d_size = ctfsize;
825	ddata->d_align = shdr.sh_addralign;
826
827	gelf_update_shdr(dscn, &shdr);
828
829	/* update the section header location */
830	if (sehdr.e_phnum != 0) {
831		size_t align = gelf_fsize(dst, ELF_T_ADDR, 1, EV_CURRENT);
832		size_t r = new_offset % align;
833
834		if (r)
835			new_offset += align - r;
836
837		dehdr.e_shoff = new_offset;
838	}
839
840	/* commit to disk */
841	dehdr.e_shstrndx = secxlate[sehdr.e_shstrndx];
842	gelf_update_ehdr(dst, &dehdr);
843	if (elf_update(dst, ELF_C_WRITE) < 0)
844		elfterminate(dstname, "Cannot finalize temp file");
845
846	free(secxlate);
847}
848#else
849#include "decl.h"
850static void
851write_file_64(Elf *src, const char *srcname, Elf *dst, const char *dstname,
852    caddr_t ctfdata, size_t ctfsize, int flags); /* Forward reference. */
853
854static void
855fill_ctf_segments(struct segment_command *seg, struct section *sect, uint32_t vmaddr, size_t size, uint32_t offset, int swap)
856{
857	struct segment_command tmpseg = {
858		LC_SEGMENT,
859		sizeof(struct segment_command) + sizeof(struct section),
860		SEG_CTF,
861		vmaddr,
862		0, /* Do not map. Do not reserve virtual address range. */
863		offset,
864		size,
865		VM_PROT_READ,
866		VM_PROT_READ,
867		1,
868		0
869	};
870
871	struct section tmpsect = {
872		SECT_CTF,
873		SEG_CTF,
874		vmaddr,
875		size,
876		offset,
877		0, /* byte aligned */
878		0,
879		0,
880		0,
881		0,
882		0
883	};
884
885	if (swap) {
886		__swap_segment_command(&tmpseg);
887		__swap_section(&tmpsect);
888	}
889
890	*seg = tmpseg;
891	*sect = tmpsect;
892}
893
894void
895write_file(Elf *src, const char *srcname, Elf *dst, const char *dstname,
896    caddr_t ctfdata, size_t ctfsize, int flags)
897{
898	struct mach_header hdr, *mh = (struct mach_header *)src->ed_image;
899	struct segment_command ctfseg_command;
900	struct section ctf_sect;
901	struct segment_command *curcmd, *ctfcmd;
902	int fd, cmdsleft, swap = (MH_CIGAM == mh->magic);
903	size_t sz;
904	char *p;
905	uint32_t ctf_vmaddr = 0, t;
906
907	if (ELFCLASS64 == src->ed_class) {
908		write_file_64(src, srcname, dst, dstname, ctfdata, ctfsize, flags);
909		return;
910	}
911
912	/* Swap mach header to host order so we can do arithmetic */
913	if (swap) {
914		hdr = *mh;
915		mh = &hdr;
916		__swap_mach_header(mh);
917	}
918
919	/* Get a pristine instance of the source mach-o */
920	if ((fd = open(srcname, O_RDONLY)) < 0)
921		terminate("%s: Cannot open for re-reading", srcname);
922
923	sz = (size_t)lseek(fd, (off_t)0, SEEK_END);
924
925	p = mmap((char *)0, sz, PROT_READ, MAP_PRIVATE, fd, (off_t)0);
926	if ((char *)-1 == p)
927		terminate("%s: Cannot mmap for re-reading", srcname);
928
929	if ((MH_MAGIC != ((struct mach_header *)p)->magic) &&
930			(MH_CIGAM != ((struct mach_header *)p)->magic))
931		terminate("%s: is not a thin (single architecture) mach-o binary.\n", srcname);
932
933	/* Iterate through load commands looking for CTF data */
934	ctfcmd = NULL;
935	cmdsleft = mh->ncmds;
936	curcmd = (struct segment_command *) (p + sizeof(struct mach_header));
937
938	if (cmdsleft < 1)
939		terminate("%s: Has no load commands.\n", srcname);
940
941	while (cmdsleft-- > 0) {
942		int size = curcmd->cmdsize;
943		uint32_t thecmd = curcmd->cmd;
944
945		if (swap) {
946			SWAP32(size);
947			SWAP32(thecmd);
948		}
949
950		if (LC_SEGMENT == thecmd) {
951			uint32_t vmaddr = curcmd->vmaddr;
952			uint32_t vmsize = curcmd->vmsize;
953			if (swap) {
954				SWAP32(vmaddr);
955				SWAP32(vmsize);
956			}
957			t = vmaddr + vmsize;
958			if (t > ctf_vmaddr)
959				ctf_vmaddr = t;
960		}
961
962		if ((LC_SEGMENT == thecmd) && (!strcmp(curcmd->segname, SEG_CTF))) {
963			ctfcmd = curcmd;
964		}
965
966		curcmd = (struct segment_command *) (((char *)curcmd) + size);
967	}
968
969	ctf_vmaddr = (ctf_vmaddr + getpagesize() - 1) & (~(getpagesize() - 1)); // page aligned
970
971	if (ctfcmd) {
972		/* CTF segment command exists: overwrite it */
973		fill_ctf_segments(&ctfseg_command, &ctf_sect,
974			((struct segment_command *)ctfcmd)->vmaddr, ctfsize, sz /* file offset */, swap);
975
976		write(dst->ed_fd, p, sz); // byte-for-byte copy of input mach-o file
977		write(dst->ed_fd, ctfdata, ctfsize); // append CTF
978
979		lseek(dst->ed_fd, (off_t)((char *)ctfcmd - p), SEEK_SET);
980		write(dst->ed_fd, &ctfseg_command, sizeof(ctfseg_command)); // lay down CTF_SEG
981		write(dst->ed_fd, &ctf_sect, sizeof(ctf_sect)); // lay down CTF_SECT
982	} else {
983		int cmdlength, dataoffset, datalength;
984		int ctfhdrsz = (sizeof(ctfseg_command) + sizeof(ctf_sect));
985
986		cmdlength = mh->sizeofcmds; // where to write CTF seg/sect
987		dataoffset = sizeof(*mh) + mh->sizeofcmds; // where all real data starts
988		datalength = src->ed_imagesz - dataoffset;
989
990		/* Add one segment command to header */
991		mh->sizeofcmds += ctfhdrsz;
992		mh->ncmds++;
993		/*
994		 * Chop the first ctfhdrsz bytes out of the generic data so
995		 * that all the internal offsets stay the same
996		 * (required for ELF parsing)
997		 * FIXME: This isn't pretty.
998		 */
999		dataoffset += ctfhdrsz;
1000		datalength -= ctfhdrsz;
1001
1002		fill_ctf_segments(&ctfseg_command, &ctf_sect, ctf_vmaddr, ctfsize,
1003			(sizeof(*mh) + cmdlength + ctfhdrsz + datalength) /* file offset */,
1004			swap);
1005
1006		if (swap) {
1007			__swap_mach_header(mh);
1008		}
1009
1010		write(dst->ed_fd, mh, sizeof(*mh));
1011		write(dst->ed_fd, p + sizeof(*mh), cmdlength);
1012		write(dst->ed_fd, &ctfseg_command, sizeof(ctfseg_command));
1013		write(dst->ed_fd, &ctf_sect, sizeof(ctf_sect));
1014		write(dst->ed_fd, p + dataoffset, datalength);
1015		write(dst->ed_fd, ctfdata, ctfsize);
1016	}
1017
1018	(void)munmap(p, sz);
1019	(void)close(fd);
1020
1021	return;
1022}
1023
1024static void
1025fill_ctf_segments_64(struct segment_command_64 *seg, struct section_64 *sect, uint64_t vmaddr, size_t size, uint32_t offset, int swap)
1026{
1027	struct segment_command_64 tmpseg = {
1028		LC_SEGMENT_64,
1029		sizeof(struct segment_command_64) + sizeof(struct section_64),
1030		SEG_CTF,
1031		vmaddr,
1032		0, /* Do not map. Do not reserve virtual address range. */
1033		offset,
1034		size,
1035		VM_PROT_READ,
1036		VM_PROT_READ,
1037		1,
1038		0
1039	};
1040
1041	struct section_64 tmpsect = {
1042		SECT_CTF,
1043		SEG_CTF,
1044		vmaddr,
1045		size,
1046		offset,
1047		0, /* byte aligned */
1048		0,
1049		0,
1050		0,
1051		0,
1052		0
1053	};
1054
1055	if (swap) {
1056		__swap_segment_command_64(&tmpseg);
1057		__swap_section_64(&tmpsect);
1058	}
1059
1060	*seg = tmpseg;
1061	*sect = tmpsect;
1062}
1063
1064static void
1065write_file_64(Elf *src, const char *srcname, Elf *dst, const char *dstname,
1066    caddr_t ctfdata, size_t ctfsize, int flags)
1067{
1068	struct mach_header_64 hdr, *mh = (struct mach_header_64 *)src->ed_image;
1069	struct segment_command_64 ctfseg_command;
1070	struct section_64 ctf_sect;
1071	struct segment_command_64 *curcmd, *ctfcmd;
1072	int fd, cmdsleft, swap = (MH_CIGAM_64 == mh->magic);
1073	size_t sz;
1074	char *p;
1075	uint64_t ctf_vmaddr = 0, t;
1076
1077	/* Swap mach header to host order so we can do arithmetic */
1078	if (swap) {
1079		hdr = *mh;
1080		mh = &hdr;
1081		__swap_mach_header_64(mh);
1082	}
1083
1084	/* Get a pristine instance of the source mach-o */
1085	if ((fd = open(srcname, O_RDONLY)) < 0)
1086		terminate("%s: Cannot open for re-reading", srcname);
1087
1088	sz = (size_t)lseek(fd, (off_t)0, SEEK_END);
1089
1090	p = mmap((char *)0, sz, PROT_READ, MAP_PRIVATE, fd, (off_t)0);
1091	if ((char *)-1 == p)
1092		terminate("%s: Cannot mmap for re-reading", srcname);
1093
1094	if ((MH_MAGIC_64 != ((struct mach_header *)p)->magic) &&
1095			(MH_CIGAM_64 != ((struct mach_header *)p)->magic))
1096		terminate("%s: is not a thin (single architecture) mach-o binary.\n", srcname);
1097
1098	/* Iterate through load commands looking for CTF data */
1099	ctfcmd = NULL;
1100	cmdsleft = mh->ncmds;
1101	curcmd = (struct segment_command_64 *) (p + sizeof(struct mach_header_64));
1102
1103	if (cmdsleft < 1)
1104		terminate("%s: Has no load commands.\n", srcname);
1105
1106	while (cmdsleft-- > 0) {
1107		int size = curcmd->cmdsize;
1108		uint32_t thecmd = curcmd->cmd;
1109
1110		if (swap) {
1111			SWAP32(size);
1112			SWAP32(thecmd);
1113		}
1114
1115		if (LC_SEGMENT_64 == thecmd) {
1116			uint64_t vmaddr = curcmd->vmaddr;
1117			uint64_t vmsize = curcmd->vmsize;
1118			if (swap) {
1119				SWAP64(vmaddr);
1120				SWAP64(vmsize);
1121			}
1122			t = vmaddr + vmsize;
1123			if (t > ctf_vmaddr)
1124				ctf_vmaddr = t;
1125		}
1126
1127		if ((LC_SEGMENT_64 == thecmd) && (!strcmp(curcmd->segname, SEG_CTF))) {
1128			ctfcmd = curcmd;
1129		}
1130
1131		curcmd = (struct segment_command_64 *) (((char *)curcmd) + size);
1132	}
1133
1134	ctf_vmaddr = (ctf_vmaddr + getpagesize() - 1) & (~(getpagesize() - 1)); // page aligned
1135
1136	if (ctfcmd) {
1137		/* CTF segment command exists: overwrite it */
1138		fill_ctf_segments_64(&ctfseg_command, &ctf_sect,
1139			((struct segment_command_64 *)curcmd)->vmaddr, ctfsize, sz /* file offset */, swap);
1140
1141		write(dst->ed_fd, p, sz); // byte-for-byte copy of input mach-o file
1142		write(dst->ed_fd, ctfdata, ctfsize); // append CTF
1143
1144		lseek(dst->ed_fd, (off_t)((char *)ctfcmd - p), SEEK_SET);
1145		write(dst->ed_fd, &ctfseg_command, sizeof(ctfseg_command)); // lay down CTF_SEG
1146		write(dst->ed_fd, &ctf_sect, sizeof(ctf_sect)); // lay down CTF_SECT
1147	} else {
1148		int cmdlength, dataoffset, datalength;
1149		int ctfhdrsz = (sizeof(ctfseg_command) + sizeof(ctf_sect));
1150
1151		cmdlength = mh->sizeofcmds; // where to write CTF seg/sect
1152		dataoffset = sizeof(*mh) + mh->sizeofcmds; // where all real data starts
1153		datalength = src->ed_imagesz - dataoffset;
1154
1155		/* Add one segment command to header */
1156		mh->sizeofcmds += ctfhdrsz;
1157		mh->ncmds++;
1158		/*
1159		 * Chop the first ctfhdrsz bytes out of the generic data so
1160		 * that all the internal offsets stay the same
1161		 * (required for ELF parsing)
1162		 * FIXME: This isn't pretty.
1163		 */
1164		dataoffset += ctfhdrsz;
1165		datalength -= ctfhdrsz;
1166
1167		fill_ctf_segments_64(&ctfseg_command, &ctf_sect, ctf_vmaddr, ctfsize,
1168			(sizeof(*mh) + cmdlength + ctfhdrsz + datalength) /* file offset */,
1169			swap);
1170
1171		if (swap) {
1172			__swap_mach_header_64(mh);
1173		}
1174
1175		write(dst->ed_fd, mh, sizeof(*mh));
1176		write(dst->ed_fd, p + sizeof(*mh), cmdlength);
1177		write(dst->ed_fd, &ctfseg_command, sizeof(ctfseg_command));
1178		write(dst->ed_fd, &ctf_sect, sizeof(ctf_sect));
1179		write(dst->ed_fd, p + dataoffset, datalength);
1180		write(dst->ed_fd, ctfdata, ctfsize);
1181	}
1182
1183	(void)munmap(p, sz);
1184	(void)close(fd);
1185
1186	return;
1187}
1188
1189#endif /* __APPLE__ */
1190
1191static caddr_t
1192make_ctf_data(tdata_t *td, Elf *elf, const char *file, size_t *lenp, int flags)
1193{
1194	iiburst_t *iiburst;
1195	caddr_t data;
1196
1197	iiburst = sort_iidescs(elf, file, td, flags & CTF_FUZZY_MATCH,
1198	    flags & CTF_USE_DYNSYM);
1199#if !defined(__APPLE__)
1200	data = ctf_gen(iiburst, lenp, flags & CTF_COMPRESS);
1201#else
1202	data = ctf_gen(iiburst, lenp, flags & (CTF_COMPRESS | CTF_BYTESWAP));
1203#endif /* __APPLE__ */
1204
1205	iiburst_free(iiburst);
1206
1207	return (data);
1208}
1209
1210void
1211write_ctf(tdata_t *td, const char *curname, const char *newname, int flags)
1212{
1213	struct stat st;
1214	Elf *elf = NULL;
1215	Elf *telf = NULL;
1216	caddr_t data;
1217	size_t len;
1218	int fd = -1;
1219	int tfd = -1;
1220
1221	(void) elf_version(EV_CURRENT);
1222	if ((fd = open(curname, O_RDONLY)) < 0 || fstat(fd, &st) < 0)
1223		terminate("%s: Cannot open for re-reading", curname);
1224	if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
1225		elfterminate(curname, "Cannot re-read");
1226
1227	if ((tfd = open(newname, O_RDWR | O_CREAT | O_TRUNC, st.st_mode)) < 0)
1228		terminate("Cannot open temp file %s for writing", newname);
1229	if ((telf = elf_begin(tfd, ELF_C_WRITE, NULL)) == NULL)
1230		elfterminate(curname, "Cannot write");
1231
1232#if defined(__APPLE__)
1233	/*
1234	 * If the caller has advised CTF_BYTESWAP but the target is the same
1235	 * byte order as this processor then clear CTF_BYTESWAP. Otherwise CTF_BYTESWAP
1236	 * stays lit and the output (typically from ctfmerge) is swapped to final form.
1237	 */
1238	if (ELFCLASS32 == elf->ed_class) {
1239		struct mach_header *mh = (struct mach_header *)elf->ed_image;
1240		if ((flags & CTF_BYTESWAP) && (MH_CIGAM != mh->magic))
1241			flags &= ~CTF_BYTESWAP;
1242
1243		data = make_ctf_data(td, elf, curname, &len, flags);
1244		if (flags & CTF_RAW_OUTPUT) {
1245			if (write(tfd, data, len) != len) {
1246				perror("Attempt to write raw CTF data failed");
1247				terminate("Attempt to write raw CTF data failed");
1248			}
1249		} else {
1250			write_file(elf, curname, telf, newname, data, len, flags);
1251		}
1252	} else if (ELFCLASS64 == elf->ed_class) {
1253		struct mach_header_64 *mh_64 = (struct mach_header_64 *)elf->ed_image;
1254		if ((flags & CTF_BYTESWAP) && (MH_CIGAM_64 != mh_64->magic))
1255			flags &= ~CTF_BYTESWAP;
1256
1257		data = make_ctf_data(td, elf, curname, &len, flags);
1258		if (flags & CTF_RAW_OUTPUT) {
1259			if (write(tfd, data, len) != len) {
1260				perror("Attempt to write raw CTF data failed");
1261				terminate("Attempt to write raw CTF data failed");
1262			}
1263		} else {
1264			write_file_64(elf, curname, telf, newname, data, len, flags);
1265		}
1266	} else
1267		terminate("%s: Unknown ed_class", curname);
1268#else
1269	data = make_ctf_data(td, elf, curname, &len, flags);
1270	write_file(elf, curname, telf, newname, data, len, flags);
1271#endif /* __APPLE__ */
1272	free(data);
1273
1274	elf_end(telf);
1275	elf_end(elf);
1276	(void) close(fd);
1277	(void) close(tfd);
1278}
1279