1/*
2 * Copyright (C) 1996-1997 John D. Polstra.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY JOHN D. POLSTRA AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL JOHN D. POLSTRA OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD$");
28
29#include <sys/types.h>
30#include <sys/mman.h>
31#include <sys/stat.h>
32
33#include <machine/elf.h>
34
35#include <arpa/inet.h>
36
37#include <a.out.h>
38#include <assert.h>
39#include <ctype.h>
40#include <err.h>
41#include <fcntl.h>
42#include <sys/link_aout.h>
43#include <stab.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48
49#include "extern.h"
50
51#define PAGE_SIZE	4096	/* i386 specific */
52
53#ifndef N_SETA
54#define	N_SETA	0x14		/* Absolute set element symbol */
55#endif				/* This is input to LD, in a .o file.  */
56
57#ifndef N_SETT
58#define	N_SETT	0x16		/* Text set element symbol */
59#endif				/* This is input to LD, in a .o file.  */
60
61#ifndef N_SETD
62#define	N_SETD	0x18		/* Data set element symbol */
63#endif				/* This is input to LD, in a .o file. */
64
65#ifndef N_SETB
66#define	N_SETB	0x1A		/* Bss set element symbol */
67#endif				/* This is input to LD, in a .o file. */
68
69#ifndef N_SETV
70#define N_SETV	0x1C		/* Pointer to set vector in data area. */
71#endif				/* This is output from LD. */
72
73#ifdef STANDALONE
74static
75#endif
76
77static void dump_rels(const char *, const struct relocation_info *,
78    unsigned long, const char *(*)(unsigned long), unsigned char *);
79static void dump_segs(void);
80static void dump_sods(void);
81static void dump_sym(const struct nlist *);
82static void dump_syms(void);
83
84static void dump_rtsyms(void);
85
86static const char *rtsym_name(unsigned long);
87static const char *sym_name(unsigned long);
88
89#ifdef STANDALONE
90static
91#endif
92int error_count;
93
94/*
95 * Variables ending in _base are pointers to things in our address space,
96 * i.e., in the file itself.
97 *
98 * Variables ending in _addr are adjusted according to where things would
99 * actually appear in memory if the file were loaded.
100 */
101static const char *file_base;
102static const char *text_base;
103static const char *data_base;
104static const struct relocation_info *rel_base;
105static const struct nlist *sym_base;
106static const char *str_base;
107
108static const struct relocation_info *rtrel_base;
109static const struct nzlist *rtsym_base;
110static const char *rtstr_base;
111
112static const struct exec *ex;
113static const struct _dynamic *dyn;
114static const struct section_dispatch_table *sdt;
115
116static const char *text_addr;
117static const char *data_addr;
118
119static unsigned long rel_count;
120static unsigned long sym_count;
121
122static unsigned long rtrel_count;
123static unsigned long rtsym_count;
124
125/* Dynamically allocated flags, 1 byte per symbol, to record whether each
126   symbol was referenced by a relocation entry. */
127static unsigned char *sym_used;
128static unsigned char *rtsym_used;
129
130static unsigned long origin;	/* What values are relocated relative to */
131
132#ifdef STANDALONE
133int
134main(int argc, char *argv[])
135{
136    int i;
137
138    for (i = 1;  i < argc;  ++i)
139	dump_file(argv[i]);
140
141    return error_count == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
142}
143#endif
144
145static inline const void *align_struct(const void *expr)
146{
147  assert(!(((int)expr) & 3));
148  return expr;
149}
150
151static inline const void *align_long(const void *expr)
152{
153  assert(!(((int)expr) & 3));
154  return expr;
155}
156
157static inline const void *align_short(const void *expr)
158{
159  assert(!(((int)expr) & 1));
160  return expr;
161}
162
163#ifdef STANDALONE
164static
165#endif
166void
167dump_file(const char *fname)
168{
169    int fd;
170    struct stat sb;
171    caddr_t objbase;
172
173    if (stat(fname, &sb) == -1) {
174	warnx("cannot stat \"%s\"", fname);
175	++error_count;
176	return;
177    }
178
179    if ((sb.st_mode & S_IFMT) != S_IFREG) {
180	warnx("\"%s\" is not a regular file", fname);
181	++error_count;
182	return;
183    }
184
185    if ((fd = open(fname, O_RDONLY, 0)) == -1) {
186	warnx("cannot open \"%s\"", fname);
187	++error_count;
188	return;
189    }
190
191    objbase = mmap(0, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
192    if (objbase == (caddr_t) -1) {
193	warnx("cannot mmap \"%s\"", fname);
194	++error_count;
195	close(fd);
196	return;
197    }
198
199    close(fd);
200
201    file_base = (const char *) objbase;	/* Makes address arithmetic easier */
202
203    if (IS_ELF(*(const Elf32_Ehdr*) align_struct(file_base))) {
204	warnx("%s: this is an ELF program; use objdump to examine", fname);
205	++error_count;
206	munmap(objbase, sb.st_size);
207	close(fd);
208	return;
209    }
210
211    ex = (const struct exec *) align_struct(file_base);
212
213    printf("%s: a_midmag = 0x%lx\n", fname, (long)ex->a_midmag);
214    printf("  magic = 0x%lx = 0%lo, netmagic = 0x%lx = 0%lo\n",
215	(long)N_GETMAGIC(*ex), (long)N_GETMAGIC(*ex),
216	(long)N_GETMAGIC_NET(*ex), (long)N_GETMAGIC_NET(*ex));
217
218    if (N_BADMAG(*ex)) {
219	warnx("%s: bad magic number", fname);
220	++error_count;
221	munmap(objbase, sb.st_size);
222	return;
223    }
224
225    printf("  a_text   = 0x%lx\n", (long)ex->a_text);
226    printf("  a_data   = 0x%lx\n", (long)ex->a_data);
227    printf("  a_bss    = 0x%lx\n", (long)ex->a_bss);
228    printf("  a_syms   = 0x%lx\n", (long)ex->a_syms);
229    printf("  a_entry  = 0x%lx\n", (long)ex->a_entry);
230    printf("  a_trsize = 0x%lx\n", (long)ex->a_trsize);
231    printf("  a_drsize = 0x%lx\n", (long)ex->a_drsize);
232
233    text_base = file_base + N_TXTOFF(*ex);
234    data_base = file_base + N_DATOFF(*ex);
235    rel_base = (const struct relocation_info *)
236	align_struct(file_base + N_RELOFF(*ex));
237    sym_base = (const struct nlist *) align_struct(file_base + N_SYMOFF(*ex));
238    str_base = file_base + N_STROFF(*ex);
239
240    rel_count = (ex->a_trsize + ex->a_drsize) / sizeof rel_base[0];
241    assert(rel_count * sizeof rel_base[0] == ex->a_trsize + ex->a_drsize);
242    sym_count = ex->a_syms / sizeof sym_base[0];
243    assert(sym_count * sizeof sym_base[0] == ex->a_syms);
244
245    if (sym_count != 0) {
246	sym_used = (unsigned char *) calloc(sym_count, sizeof(unsigned char));
247	assert(sym_used != NULL);
248    }
249
250    printf("  Entry = 0x%lx\n", (long)ex->a_entry);
251    printf("  Text offset = %x, address = %lx\n", N_TXTOFF(*ex),
252	(long)N_TXTADDR(*ex));
253    printf("  Data offset = %lx, address = %lx\n", (long)N_DATOFF(*ex),
254	(long)N_DATADDR(*ex));
255
256    /*
257     * In an executable program file, everything is relocated relative to
258     * the assumed run-time load address, i.e., N_TXTADDR(*ex), i.e., 0x1000.
259     *
260     * In a shared library file, everything is relocated relative to the
261     * start of the file, i.e., N_TXTOFF(*ex), i.e., 0.
262     *
263     * The way to tell the difference is by looking at ex->a_entry.   If it
264     * is >= 0x1000, then we have an executable program.  Otherwise, we
265     * have a shared library.
266     *
267     * When a program is executed, the entire file is mapped into memory,
268     * including the a.out header and so forth.  But it is not mapped at
269     * address 0; rather it is mapped at address 0x1000.  The first page
270     * of the user's address space is left unmapped in order to catch null
271     * pointer dereferences.
272     *
273     * In this program, when we map in an executable program, we have to
274     * simulate the empty page by decrementing our assumed base address by
275     * a pagesize.
276     */
277
278    text_addr = text_base;
279    data_addr = data_base;
280    origin = 0;
281
282    if (ex->a_entry >= PAGE_SIZE) {	/* Executable, not a shared library */
283	/*
284	 * The fields in the object have already been relocated on the
285	 * assumption that the object will be loaded at N_TXTADDR(*ex).
286	 * We have to compensate for that.
287	 */
288	text_addr -= PAGE_SIZE;
289	data_addr -= PAGE_SIZE;
290	origin = PAGE_SIZE;
291	printf("  Program, origin = %lx\n", origin);
292    } else if (N_GETFLAG(*ex) & EX_DYNAMIC)
293	printf("  Shared library, origin = %lx\n", origin);
294    else
295	printf("  Object file, origin = %lx\n", origin);
296
297    if (N_GETFLAG(*ex) & EX_DYNAMIC) {
298	dyn = (const struct _dynamic *) align_struct(data_base);
299	printf("  Dynamic version = %d\n", dyn->d_version);
300
301	sdt = (const struct section_dispatch_table *)
302	    align_struct(text_addr + (unsigned long) dyn->d_un.d_sdt);
303
304	rtrel_base = (const struct relocation_info *)
305	    align_struct(text_addr + sdt->sdt_rel);
306	rtrel_count = (sdt->sdt_hash - sdt->sdt_rel) / sizeof rtrel_base[0];
307	assert(rtrel_count * sizeof rtrel_base[0] ==
308	    (size_t)(sdt->sdt_hash - sdt->sdt_rel));
309
310	rtsym_base = (const struct nzlist *)
311	    align_struct(text_addr + sdt->sdt_nzlist);
312	rtsym_count = (sdt->sdt_strings - sdt->sdt_nzlist) /
313	    sizeof rtsym_base[0];
314	assert(rtsym_count * sizeof rtsym_base[0] ==
315	    (size_t)(sdt->sdt_strings - sdt->sdt_nzlist));
316
317	if (rtsym_count != 0) {
318	    rtsym_used = (unsigned char *) calloc(rtsym_count,
319		sizeof(unsigned char));
320	    assert(rtsym_used != NULL);
321	}
322
323	rtstr_base = text_addr + sdt->sdt_strings;
324    }
325
326    dump_segs();
327    dump_sods();
328    dump_rels("Relocations", rel_base, rel_count, sym_name, sym_used);
329    dump_syms();
330
331    dump_rels("Run-time relocations", rtrel_base, rtrel_count, rtsym_name,
332	rtsym_used);
333    dump_rtsyms();
334
335    if (rtsym_used != NULL) {
336	free(rtsym_used);
337	rtsym_used = NULL;
338    }
339    if (sym_used != NULL) {
340	free(sym_used);
341	sym_used = NULL;
342    }
343    munmap(objbase, sb.st_size);
344}
345
346static void
347dump_rels(const char *label, const struct relocation_info *base,
348    unsigned long count, const char *(*name)(unsigned long),
349    unsigned char *sym_used_flags)
350{
351    unsigned long i;
352
353    printf("  %s:\n", label);
354    for (i = 0;  i < count;  ++i) {
355	const struct relocation_info *r = &base[i];
356	unsigned int size;
357	char contents[16];
358
359	size = 1u << r->r_length;
360
361	if (origin <= (unsigned long)r->r_address
362	  && (unsigned long)r->r_address < origin + ex->a_text + ex->a_data
363	  && 1 <= size && size <= 4) {
364	    /*
365	     * XXX - This can cause unaligned accesses.  OK for the
366	     * i386, not so for other architectures.
367	     */
368	    switch (size) {
369	    case 1:
370		snprintf(contents, sizeof contents, "      [%02x]",
371		  *(unsigned const char *)(text_addr + r->r_address));
372		break;
373	    case 2:
374		snprintf(contents, sizeof contents, "    [%04x]",
375			 *(unsigned const short *)
376			 align_short(text_addr + r->r_address));
377		break;
378	    case 4:
379		snprintf(contents, sizeof contents, "[%08lx]",
380			 *(unsigned const long *)
381			 align_long(text_addr + r->r_address));
382		break;
383	    }
384	} else
385	    snprintf(contents, sizeof contents, "          ");
386
387	printf("    %6lu %8x/%u %s %c%c%c%c%c%c", i,
388	    r->r_address, size,
389	    contents,
390	    r->r_extern   ? 'e' : '-',
391	    r->r_jmptable ? 'j' : '-',
392	    r->r_relative ? 'r' : '-',
393	    r->r_baserel  ? 'b' : '-',
394	    r->r_pcrel    ? 'p' : '-',
395	    r->r_copy     ? 'c' : '-');
396
397	if (r->r_extern || r->r_baserel || r->r_jmptable || r->r_copy) {
398	    printf(" %4u %s", r->r_symbolnum, name(r->r_symbolnum));
399	    sym_used_flags[r->r_symbolnum] = 1;
400	}
401
402	printf("\n");
403    }
404}
405
406static void
407dump_rtsyms(void)
408{
409    unsigned long i;
410
411    printf("  Run-time symbols:\n");
412    for (i = 0;  i < rtsym_count;  ++i) {
413	printf("    %6lu%c ", i, rtsym_used[i] ? '*' : ' ');
414	dump_sym(&rtsym_base[i].nlist);
415	printf("/%-5ld %s\n", rtsym_base[i].nz_size, rtsym_name(i));
416    }
417}
418
419static void
420dump_segs(void)
421{
422    printf("  Text segment starts at address %lx\n", origin + N_TXTOFF(*ex));
423    if (N_GETFLAG(*ex) & EX_DYNAMIC) {
424	printf("    rel starts at %lx\n", sdt->sdt_rel);
425	printf("    hash starts at %lx\n", sdt->sdt_hash);
426	printf("    nzlist starts at %lx\n", sdt->sdt_nzlist);
427	printf("    strings starts at %lx\n", sdt->sdt_strings);
428    }
429
430    printf("  Data segment starts at address %lx\n", origin + N_DATOFF(*ex));
431    if (N_GETFLAG(*ex) & EX_DYNAMIC) {
432	printf("    _dynamic starts at %lx\n", origin + N_DATOFF(*ex));
433	printf("    so_debug starts at %lx\n", (unsigned long) dyn->d_debug);
434	printf("    sdt starts at %lx\n", (unsigned long) dyn->d_un.d_sdt);
435	printf("    got starts at %lx\n", sdt->sdt_got);
436	printf("    plt starts at %lx\n", sdt->sdt_plt);
437	printf("    rest of stuff starts at %lx\n",
438	    sdt->sdt_plt + sdt->sdt_plt_sz);
439    }
440}
441
442static void
443dump_sods(void)
444{
445    long sod_offset;
446    long paths_offset;
447
448    if (dyn == NULL)		/* Not a shared object */
449	return;
450
451    sod_offset = sdt->sdt_sods;
452    printf("  Shared object dependencies:\n");
453    while (sod_offset != 0) {
454      const struct sod *sodp = (const struct sod *) align_struct((text_addr + sod_offset));
455	const char *name = (const char *) (text_addr + sodp->sod_name);
456
457	if (sodp->sod_library)
458	    printf("    -l%-16s version %d.%d\n", name, sodp->sod_major,
459		sodp->sod_minor);
460	else
461	    printf("    %s\n", name);
462	sod_offset = sodp->sod_next;
463    }
464    paths_offset = sdt->sdt_paths;
465    printf("  Shared object additional paths:\n");
466    if (paths_offset != 0) {
467	printf("    %s\n", (const char *)(text_addr + paths_offset));
468    } else {
469	printf("    (none)\n");
470    }
471}
472
473static void
474dump_sym(const struct nlist *np)
475{
476    char type[8];
477    char aux[8];
478    char weak;
479    char *p;
480
481    switch (np->n_type & ~N_EXT) {
482    case N_UNDF:	strcpy(type, "undf");  break;
483    case N_ABS:		strcpy(type, "abs");  break;
484    case N_TEXT:	strcpy(type, "text");  break;
485    case N_DATA:	strcpy(type, "data");  break;
486    case N_BSS:		strcpy(type, "bss");  break;
487    case N_INDR:	strcpy(type, "indr");  break;
488    case N_SIZE:	strcpy(type, "size");  break;
489    case N_COMM:	strcpy(type, "comm");  break;
490    case N_SETA:	strcpy(type, "seta");  break;
491    case N_SETT:	strcpy(type, "sett");  break;
492    case N_SETD:	strcpy(type, "setd");  break;
493    case N_SETB:	strcpy(type, "setb");  break;
494    case N_SETV:	strcpy(type, "setv");  break;
495    case N_FN:		strcpy(type, np->n_type&N_EXT ? "fn" : "warn");  break;
496    case N_GSYM:	strcpy(type, "gsym");  break;
497    case N_FNAME:	strcpy(type, "fname");  break;
498    case N_FUN:		strcpy(type, "fun");  break;
499    case N_STSYM:	strcpy(type, "stsym");  break;
500    case N_LCSYM:	strcpy(type, "lcsym");  break;
501    case N_MAIN:	strcpy(type, "main");  break;
502    case N_PC:		strcpy(type, "pc");  break;
503    case N_RSYM:	strcpy(type, "rsym");  break;
504    case N_SLINE:	strcpy(type, "sline");  break;
505    case N_DSLINE:	strcpy(type, "dsline");  break;
506    case N_BSLINE:	strcpy(type, "bsline");  break;
507    case N_SSYM:	strcpy(type, "ssym");  break;
508    case N_SO:		strcpy(type, "so");  break;
509    case N_LSYM:	strcpy(type, "lsym");  break;
510    case N_BINCL:	strcpy(type, "bincl");  break;
511    case N_SOL:		strcpy(type, "sol");  break;
512    case N_PSYM:	strcpy(type, "psym");  break;
513    case N_EINCL:	strcpy(type, "eincl");  break;
514    case N_ENTRY:	strcpy(type, "entry");  break;
515    case N_LBRAC:	strcpy(type, "lbrac");  break;
516    case N_EXCL:	strcpy(type, "excl");  break;
517    case N_RBRAC:	strcpy(type, "rbrac");  break;
518    case N_BCOMM:	strcpy(type, "bcomm");  break;
519    case N_ECOMM:	strcpy(type, "ecomm");  break;
520    case N_ECOML:	strcpy(type, "ecoml");  break;
521    case N_LENG:	strcpy(type, "leng");  break;
522    default:
523	snprintf(type, sizeof type, "%#02x", np->n_type);
524	break;
525    }
526
527    if (np->n_type & N_EXT && type[0] != '0')
528	for (p = type;  *p != '\0';  ++p)
529	    *p = toupper(*p);
530
531    switch (N_AUX(np)) {
532    case 0:		strcpy(aux, "");  break;
533    case AUX_OBJECT:	strcpy(aux, "objt");  break;
534    case AUX_FUNC:	strcpy(aux, "func");  break;
535    default:		snprintf(aux, sizeof aux, "%#01x", N_AUX(np));  break;
536    }
537
538    weak = N_BIND(np) == BIND_WEAK ? 'w' : ' ';
539
540    printf("%c%-6s %-4s %8lx", weak, type, aux, np->n_value);
541}
542
543static void
544dump_syms(void)
545{
546    unsigned long i;
547
548    printf("  Symbols:\n");
549    for (i = 0;  i < sym_count;  ++i) {
550	printf("    %6lu%c ", i, sym_used[i] ? '*' : ' ');
551	dump_sym(&sym_base[i]);
552	printf(" %s\n", sym_name(i));
553    }
554}
555
556static const char *
557rtsym_name(unsigned long n)
558{
559    assert(n < rtsym_count);
560    if (rtsym_base[n].nz_strx == 0)
561	return "";
562    return rtstr_base + rtsym_base[n].nz_strx;
563}
564
565static const char *
566sym_name(unsigned long n)
567{
568    assert(n < sym_count);
569    if (sym_base[n].n_un.n_strx == 0)
570	return "";
571    return str_base + sym_base[n].n_un.n_strx;
572}
573