crunchide.c revision 29453
1/* 2 * Copyright (c) 1994 University of Maryland 3 * All Rights Reserved. 4 * 5 * Permission to use, copy, modify, distribute, and sell this software and its 6 * documentation for any purpose is hereby granted without fee, provided that 7 * the above copyright notice appear in all copies and that both that 8 * copyright notice and this permission notice appear in supporting 9 * documentation, and that the name of U.M. not be used in advertising or 10 * publicity pertaining to distribution of the software without specific, 11 * written prior permission. U.M. makes no representations about the 12 * suitability of this software for any purpose. It is provided "as is" 13 * without express or implied warranty. 14 * 15 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M. 17 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 19 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 20 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 21 * 22 * Author: James da Silva, Systems Design and Analysis Group 23 * Computer Science Department 24 * University of Maryland at College Park 25 */ 26/* 27 * crunchide.c - tiptoes through an a.out symbol table, hiding all defined 28 * global symbols. Allows the user to supply a "keep list" of symbols 29 * that are not to be hidden. This program relies on the use of the 30 * linker's -dc flag to actually put global bss data into the file's 31 * bss segment (rather than leaving it as undefined "common" data). 32 * 33 * The point of all this is to allow multiple programs to be linked 34 * together without getting multiple-defined errors. 35 * 36 * For example, consider a program "foo.c". It can be linked with a 37 * small stub routine, called "foostub.c", eg: 38 * int foo_main(int argc, char **argv){ return main(argc, argv); } 39 * like so: 40 * cc -c foo.c foostub.c 41 * ld -dc -r foo.o foostub.o -o foo.combined.o 42 * crunchide -k _foo_main foo.combined.o 43 * at this point, foo.combined.o can be linked with another program 44 * and invoked with "foo_main(argc, argv)". foo's main() and any 45 * other globals are hidden and will not conflict with other symbols. 46 * 47 * TODO: 48 * - resolve the theoretical hanging reloc problem (see check_reloc() 49 * below). I have yet to see this problem actually occur in any real 50 * program. In what cases will gcc/gas generate code that needs a 51 * relative reloc from a global symbol, other than PIC? The 52 * solution is to not hide the symbol from the linker in this case, 53 * but to generate some random name for it so that it doesn't link 54 * with anything but holds the place for the reloc. 55 * - arrange that all the BSS segments start at the same address, so 56 * that the final crunched binary BSS size is the max of all the 57 * component programs' BSS sizes, rather than their sum. 58 */ 59#include <a.out.h> 60#include <err.h> 61#include <fcntl.h> 62#include <stdio.h> 63#include <stdlib.h> 64#include <string.h> 65#include <unistd.h> 66#include <sys/types.h> 67#include <sys/stat.h> 68#include <sys/errno.h> 69 70void usage(void); 71 72void add_to_keep_list(char *symbol); 73void add_file_to_keep_list(char *filename); 74 75void hide_syms(char *filename); 76 77 78int main(argc, argv) 79int argc; 80char **argv; 81{ 82 int ch; 83 84 while ((ch = getopt(argc, argv, "k:f:")) != -1) 85 switch(ch) { 86 case 'k': 87 add_to_keep_list(optarg); 88 break; 89 case 'f': 90 add_file_to_keep_list(optarg); 91 break; 92 default: 93 usage(); 94 } 95 96 argc -= optind; 97 argv += optind; 98 99 if(argc == 0) usage(); 100 101 while(argc) { 102 hide_syms(*argv); 103 argc--, argv++; 104 } 105 106 return 0; 107} 108 109void usage(void) 110{ 111 fprintf(stderr, 112 "usage: crunchide [-k <symbol-name>] [-f <keep-list-file>] <files> ...\n"); 113 exit(1); 114} 115 116/* ---------------------------- */ 117 118struct keep { 119 struct keep *next; 120 char *sym; 121} *keep_list; 122 123void add_to_keep_list(char *symbol) 124{ 125 struct keep *newp, *prevp, *curp; 126 int cmp; 127 128 for(curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next) 129 if((cmp = strcmp(symbol, curp->sym)) <= 0) break; 130 131 if(curp && cmp == 0) 132 return; /* already in table */ 133 134 newp = (struct keep *) malloc(sizeof(struct keep)); 135 if(newp) newp->sym = strdup(symbol); 136 if(newp == NULL || newp->sym == NULL) { 137 errx(1, "out of memory for keep list"); 138 } 139 140 newp->next = curp; 141 if(prevp) prevp->next = newp; 142 else keep_list = newp; 143} 144 145int in_keep_list(char *symbol) 146{ 147 struct keep *curp; 148 int cmp; 149 150 for(curp = keep_list; curp; curp = curp->next) 151 if((cmp = strcmp(symbol, curp->sym)) <= 0) break; 152 153 return curp && cmp == 0; 154} 155 156void add_file_to_keep_list(char *filename) 157{ 158 FILE *keepf; 159 char symbol[1024]; 160 int len; 161 162 if((keepf = fopen(filename, "r")) == NULL) { 163 warn("%s", filename); 164 usage(); 165 } 166 167 while(fgets(symbol, 1024, keepf)) { 168 len = strlen(symbol); 169 if(len && symbol[len-1] == '\n') 170 symbol[len-1] = '\0'; 171 172 add_to_keep_list(symbol); 173 } 174 fclose(keepf); 175} 176 177/* ---------------------- */ 178 179int nsyms, ntextrel, ndatarel; 180struct exec *hdrp; 181char *aoutdata, *strbase; 182struct relocation_info *textrel, *datarel; 183struct nlist *symbase; 184 185 186#define SYMSTR(sp) &strbase[(sp)->n_un.n_strx] 187 188/* is the symbol a global symbol defined in the current file? */ 189#define IS_GLOBAL_DEFINED(sp) \ 190 (((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF) 191 192/* is the relocation entry dependent on a symbol? */ 193#define IS_SYMBOL_RELOC(rp) \ 194 ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable) 195 196void check_reloc(char *filename, struct relocation_info *relp); 197 198void hide_syms(char *filename) 199{ 200 int inf, rc; 201 struct stat infstat; 202 struct relocation_info *relp; 203 struct nlist *symp; 204 205 /* 206 * Open the file and do some error checking. 207 */ 208 209 if((inf = open(filename, O_RDWR)) == -1) { 210 warn("%s", filename); 211 return; 212 } 213 214 if(fstat(inf, &infstat) == -1) { 215 warn("%s", filename); 216 close(inf); 217 return; 218 } 219 220 if(infstat.st_size < sizeof(struct exec)) { 221 warnx("%s: short file", filename); 222 close(inf); 223 return; 224 } 225 226 /* 227 * Read the entire file into memory. XXX - Really, we only need to 228 * read the header and from TRELOFF to the end of the file. 229 */ 230 231 if((aoutdata = (char *) malloc(infstat.st_size)) == NULL) { 232 warnx("%s: too big to read into memory", filename); 233 close(inf); 234 return; 235 } 236 237 if((rc = read(inf, aoutdata, infstat.st_size)) < infstat.st_size) { 238 warnx("%s: read error: %s", filename, 239 rc == -1? strerror(errno) : "short read"); 240 close(inf); 241 return; 242 } 243 244 /* 245 * Check the header and calculate offsets and sizes from it. 246 */ 247 248 hdrp = (struct exec *) aoutdata; 249 250 if(N_BADMAG(*hdrp)) { 251 warnx("%s: bad magic: not an a.out file", filename); 252 close(inf); 253 return; 254 } 255 256#ifdef __FreeBSD__ 257 textrel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp)); 258 datarel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp) + 259 hdrp->a_trsize); 260#else 261 textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp)); 262 datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp)); 263#endif 264 symbase = (struct nlist *) (aoutdata + N_SYMOFF(*hdrp)); 265 strbase = (char *) (aoutdata + N_STROFF(*hdrp)); 266 267 ntextrel = hdrp->a_trsize / sizeof(struct relocation_info); 268 ndatarel = hdrp->a_drsize / sizeof(struct relocation_info); 269 nsyms = hdrp->a_syms / sizeof(struct nlist); 270 271 /* 272 * Zap the type field of all globally-defined symbols. The linker will 273 * subsequently ignore these entries. Don't zap any symbols in the 274 * keep list. 275 */ 276 277 for(symp = symbase; symp < symbase + nsyms; symp++) 278 if(IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp))) 279 symp->n_type = 0; 280 281 /* 282 * Check whether the relocation entries reference any symbols that we 283 * just zapped. I don't know whether ld can handle this case, but I 284 * haven't encountered it yet. These checks are here so that the program 285 * doesn't fail silently should such symbols be encountered. 286 */ 287 288 for(relp = textrel; relp < textrel + ntextrel; relp++) 289 check_reloc(filename, relp); 290 for(relp = datarel; relp < datarel + ndatarel; relp++) 291 check_reloc(filename, relp); 292 293 /* 294 * Write the .o file back out to disk. XXX - Really, we only need to 295 * write the symbol table entries back out. 296 */ 297 lseek(inf, 0, SEEK_SET); 298 if((rc = write(inf, aoutdata, infstat.st_size)) < infstat.st_size) { 299 warnx("%s: write error: %s", filename, 300 rc == -1? strerror(errno) : "short write"); 301 } 302 303 close(inf); 304} 305 306 307void check_reloc(char *filename, struct relocation_info *relp) 308{ 309 /* bail out if we zapped a symbol that is needed */ 310 if(IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) { 311 errx(1, "%s: oops, have hanging relocation for %s: bailing out!", 312 filename, SYMSTR(&symbase[relp->r_symbolnum])); 313 } 314} 315