crunchide.c revision 29453
1/*
2 * Copyright (c) 1994 University of Maryland
3 * All Rights Reserved.
4 *
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of U.M. not be used in advertising or
10 * publicity pertaining to distribution of the software without specific,
11 * written prior permission.  U.M. makes no representations about the
12 * suitability of this software for any purpose.  It is provided "as is"
13 * without express or implied warranty.
14 *
15 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
17 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
19 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
20 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 *
22 * Author: James da Silva, Systems Design and Analysis Group
23 *			   Computer Science Department
24 *			   University of Maryland at College Park
25 */
26/*
27 * crunchide.c - tiptoes through an a.out symbol table, hiding all defined
28 *	global symbols.  Allows the user to supply a "keep list" of symbols
29 *	that are not to be hidden.  This program relies on the use of the
30 * 	linker's -dc flag to actually put global bss data into the file's
31 * 	bss segment (rather than leaving it as undefined "common" data).
32 *
33 * 	The point of all this is to allow multiple programs to be linked
34 *	together without getting multiple-defined errors.
35 *
36 *	For example, consider a program "foo.c".  It can be linked with a
37 *	small stub routine, called "foostub.c", eg:
38 *	    int foo_main(int argc, char **argv){ return main(argc, argv); }
39 *      like so:
40 *	    cc -c foo.c foostub.c
41 *	    ld -dc -r foo.o foostub.o -o foo.combined.o
42 *	    crunchide -k _foo_main foo.combined.o
43 *	at this point, foo.combined.o can be linked with another program
44 * 	and invoked with "foo_main(argc, argv)".  foo's main() and any
45 * 	other globals are hidden and will not conflict with other symbols.
46 *
47 * TODO:
48 *	- resolve the theoretical hanging reloc problem (see check_reloc()
49 *	  below). I have yet to see this problem actually occur in any real
50 *	  program. In what cases will gcc/gas generate code that needs a
51 *	  relative reloc from a global symbol, other than PIC?  The
52 *	  solution is to not hide the symbol from the linker in this case,
53 *	  but to generate some random name for it so that it doesn't link
54 *	  with anything but holds the place for the reloc.
55 *      - arrange that all the BSS segments start at the same address, so
56 *	  that the final crunched binary BSS size is the max of all the
57 *	  component programs' BSS sizes, rather than their sum.
58 */
59#include <a.out.h>
60#include <err.h>
61#include <fcntl.h>
62#include <stdio.h>
63#include <stdlib.h>
64#include <string.h>
65#include <unistd.h>
66#include <sys/types.h>
67#include <sys/stat.h>
68#include <sys/errno.h>
69
70void usage(void);
71
72void add_to_keep_list(char *symbol);
73void add_file_to_keep_list(char *filename);
74
75void hide_syms(char *filename);
76
77
78int main(argc, argv)
79int argc;
80char **argv;
81{
82    int ch;
83
84    while ((ch = getopt(argc, argv, "k:f:")) != -1)
85	switch(ch) {
86	case 'k':
87	    add_to_keep_list(optarg);
88	    break;
89	case 'f':
90	    add_file_to_keep_list(optarg);
91	    break;
92	default:
93	    usage();
94	}
95
96    argc -= optind;
97    argv += optind;
98
99    if(argc == 0) usage();
100
101    while(argc) {
102	hide_syms(*argv);
103	argc--, argv++;
104    }
105
106    return 0;
107}
108
109void usage(void)
110{
111    fprintf(stderr,
112	"usage: crunchide [-k <symbol-name>] [-f <keep-list-file>] <files> ...\n");
113    exit(1);
114}
115
116/* ---------------------------- */
117
118struct keep {
119    struct keep *next;
120    char *sym;
121} *keep_list;
122
123void add_to_keep_list(char *symbol)
124{
125    struct keep *newp, *prevp, *curp;
126    int cmp;
127
128    for(curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next)
129	if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
130
131    if(curp && cmp == 0)
132	return;	/* already in table */
133
134    newp = (struct keep *) malloc(sizeof(struct keep));
135    if(newp) newp->sym = strdup(symbol);
136    if(newp == NULL || newp->sym == NULL) {
137	errx(1, "out of memory for keep list");
138    }
139
140    newp->next = curp;
141    if(prevp) prevp->next = newp;
142    else keep_list = newp;
143}
144
145int in_keep_list(char *symbol)
146{
147    struct keep *curp;
148    int cmp;
149
150    for(curp = keep_list; curp; curp = curp->next)
151	if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
152
153    return curp && cmp == 0;
154}
155
156void add_file_to_keep_list(char *filename)
157{
158    FILE *keepf;
159    char symbol[1024];
160    int len;
161
162    if((keepf = fopen(filename, "r")) == NULL) {
163	warn("%s", filename);
164	usage();
165    }
166
167    while(fgets(symbol, 1024, keepf)) {
168	len = strlen(symbol);
169	if(len && symbol[len-1] == '\n')
170	    symbol[len-1] = '\0';
171
172	add_to_keep_list(symbol);
173    }
174    fclose(keepf);
175}
176
177/* ---------------------- */
178
179int nsyms, ntextrel, ndatarel;
180struct exec *hdrp;
181char *aoutdata, *strbase;
182struct relocation_info *textrel, *datarel;
183struct nlist *symbase;
184
185
186#define SYMSTR(sp)	&strbase[(sp)->n_un.n_strx]
187
188/* is the symbol a global symbol defined in the current file? */
189#define IS_GLOBAL_DEFINED(sp) \
190                  (((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF)
191
192/* is the relocation entry dependent on a symbol? */
193#define IS_SYMBOL_RELOC(rp)   \
194                  ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable)
195
196void check_reloc(char *filename, struct relocation_info *relp);
197
198void hide_syms(char *filename)
199{
200    int inf, rc;
201    struct stat infstat;
202    struct relocation_info *relp;
203    struct nlist *symp;
204
205    /*
206     * Open the file and do some error checking.
207     */
208
209    if((inf = open(filename, O_RDWR)) == -1) {
210	warn("%s", filename);
211	return;
212    }
213
214    if(fstat(inf, &infstat) == -1) {
215	warn("%s", filename);
216	close(inf);
217	return;
218    }
219
220    if(infstat.st_size < sizeof(struct exec)) {
221	warnx("%s: short file", filename);
222	close(inf);
223	return;
224    }
225
226    /*
227     * Read the entire file into memory.  XXX - Really, we only need to
228     * read the header and from TRELOFF to the end of the file.
229     */
230
231    if((aoutdata = (char *) malloc(infstat.st_size)) == NULL) {
232	warnx("%s: too big to read into memory", filename);
233	close(inf);
234	return;
235    }
236
237    if((rc = read(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
238	warnx("%s: read error: %s", filename,
239		rc == -1? strerror(errno) : "short read");
240	close(inf);
241	return;
242    }
243
244    /*
245     * Check the header and calculate offsets and sizes from it.
246     */
247
248    hdrp = (struct exec *) aoutdata;
249
250    if(N_BADMAG(*hdrp)) {
251	warnx("%s: bad magic: not an a.out file", filename);
252	close(inf);
253	return;
254    }
255
256#ifdef __FreeBSD__
257    textrel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp));
258    datarel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp) +
259					  hdrp->a_trsize);
260#else
261    textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp));
262    datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp));
263#endif
264    symbase = (struct nlist *)		 (aoutdata + N_SYMOFF(*hdrp));
265    strbase = (char *) 			 (aoutdata + N_STROFF(*hdrp));
266
267    ntextrel = hdrp->a_trsize / sizeof(struct relocation_info);
268    ndatarel = hdrp->a_drsize / sizeof(struct relocation_info);
269    nsyms    = hdrp->a_syms   / sizeof(struct nlist);
270
271    /*
272     * Zap the type field of all globally-defined symbols.  The linker will
273     * subsequently ignore these entries.  Don't zap any symbols in the
274     * keep list.
275     */
276
277    for(symp = symbase; symp < symbase + nsyms; symp++)
278	if(IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp)))
279	    symp->n_type = 0;
280
281    /*
282     * Check whether the relocation entries reference any symbols that we
283     * just zapped.  I don't know whether ld can handle this case, but I
284     * haven't encountered it yet.  These checks are here so that the program
285     * doesn't fail silently should such symbols be encountered.
286     */
287
288    for(relp = textrel; relp < textrel + ntextrel; relp++)
289	check_reloc(filename, relp);
290    for(relp = datarel; relp < datarel + ndatarel; relp++)
291	check_reloc(filename, relp);
292
293    /*
294     * Write the .o file back out to disk.  XXX - Really, we only need to
295     * write the symbol table entries back out.
296     */
297    lseek(inf, 0, SEEK_SET);
298    if((rc = write(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
299	warnx("%s: write error: %s", filename,
300		rc == -1? strerror(errno) : "short write");
301    }
302
303    close(inf);
304}
305
306
307void check_reloc(char *filename, struct relocation_info *relp)
308{
309    /* bail out if we zapped a symbol that is needed */
310    if(IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) {
311	errx(1, "%s: oops, have hanging relocation for %s: bailing out!",
312		filename, SYMSTR(&symbase[relp->r_symbolnum]));
313    }
314}
315