libelf_ar.c revision 276371
1260684Skaiw/*- 2260684Skaiw * Copyright (c) 2006,2008,2010 Joseph Koshy 3260684Skaiw * All rights reserved. 4260684Skaiw * 5260684Skaiw * Redistribution and use in source and binary forms, with or without 6260684Skaiw * modification, are permitted provided that the following conditions 7260684Skaiw * are met: 8260684Skaiw * 1. Redistributions of source code must retain the above copyright 9260684Skaiw * notice, this list of conditions and the following disclaimer. 10260684Skaiw * 2. Redistributions in binary form must reproduce the above copyright 11260684Skaiw * notice, this list of conditions and the following disclaimer in the 12260684Skaiw * documentation and/or other materials provided with the distribution. 13260684Skaiw * 14260684Skaiw * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS `AS IS' AND 15260684Skaiw * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16260684Skaiw * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17260684Skaiw * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18260684Skaiw * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19260684Skaiw * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20260684Skaiw * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21260684Skaiw * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22260684Skaiw * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23260684Skaiw * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24260684Skaiw * SUCH DAMAGE. 25260684Skaiw */ 26260684Skaiw 27260684Skaiw#include <sys/cdefs.h> 28260684Skaiw 29260684Skaiw#include <assert.h> 30260684Skaiw#include <ctype.h> 31260684Skaiw#include <libelf.h> 32260684Skaiw#include <stdlib.h> 33260684Skaiw#include <string.h> 34260684Skaiw 35260684Skaiw#include "_libelf.h" 36260684Skaiw#include "_libelf_ar.h" 37260684Skaiw 38276371SemasteELFTC_VCSID("$Id: libelf_ar.c 3013 2014-03-23 06:16:59Z jkoshy $"); 39260684Skaiw 40260684Skaiw#define LIBELF_NALLOC_SIZE 16 41260684Skaiw 42260684Skaiw/* 43260684Skaiw * `ar' archive handling. 44260684Skaiw * 45260684Skaiw * `ar' archives start with signature `ARMAG'. Each archive member is 46260684Skaiw * preceded by a header containing meta-data for the member. This 47260684Skaiw * header is described in <ar.h> (struct ar_hdr). The header always 48260684Skaiw * starts on an even address. File data is padded with "\n" 49260684Skaiw * characters to keep this invariant. 50260684Skaiw * 51260684Skaiw * Special considerations for `ar' archives: 52260684Skaiw * 53260684Skaiw * There are two variants of the `ar' archive format: traditional BSD 54260684Skaiw * and SVR4. These differ in the way long file names are treated, and 55260684Skaiw * in the layout of the archive symbol table. 56260684Skaiw * 57260684Skaiw * The `ar' header only has space for a 16 character file name. 58260684Skaiw * 59260684Skaiw * In the SVR4 format, file names are terminated with a '/', so this 60260684Skaiw * effectively leaves 15 characters for the actual file name. Longer 61260684Skaiw * file names stored in a separate 'string table' and referenced 62260684Skaiw * indirectly from the name field. The string table itself appears as 63260684Skaiw * an archive member with name "// ". An `indirect' file name in an 64260684Skaiw * `ar' header matches the pattern "/[0-9]*". The digits form a 65260684Skaiw * decimal number that corresponds to a byte offset into the string 66260684Skaiw * table where the actual file name of the object starts. Strings in 67260684Skaiw * the string table are padded to start on even addresses. 68260684Skaiw * 69260684Skaiw * In the BSD format, file names can be upto 16 characters. File 70260684Skaiw * names shorter than 16 characters are padded to 16 characters using 71260684Skaiw * (ASCII) space characters. File names with embedded spaces and file 72260684Skaiw * names longer than 16 characters are stored immediately after the 73260684Skaiw * archive header and the name field set to a special indirect name 74260684Skaiw * matching the pattern "#1/[0-9]+". The digits form a decimal number 75260684Skaiw * that corresponds to the actual length of the file name following 76260684Skaiw * the archive header. The content of the archive member immediately 77260684Skaiw * follows the file name, and the size field of the archive member 78260684Skaiw * holds the sum of the sizes of the member and of the appended file 79260684Skaiw * name. 80260684Skaiw * 81260684Skaiw * Archives may also have a symbol table (see ranlib(1)), mapping 82260684Skaiw * program symbols to object files inside the archive. 83260684Skaiw * 84260684Skaiw * In the SVR4 format, a symbol table uses a file name of "/ " in its 85260684Skaiw * archive header. The symbol table is structured as: 86260684Skaiw * - a 4-byte count of entries stored as a binary value, MSB first 87260684Skaiw * - 'n' 4-byte offsets, stored as binary values, MSB first 88260684Skaiw * - 'n' NUL-terminated strings, for ELF symbol names, stored unpadded. 89260684Skaiw * 90260684Skaiw * In the BSD format, the symbol table uses a file name of "__.SYMDEF". 91260684Skaiw * It is structured as two parts: 92260684Skaiw * - The first part is an array of "ranlib" structures preceded by 93260684Skaiw * the size of the array in bytes. Each "ranlib" structure 94260684Skaiw * describes one symbol. Each structure contains an offset into 95260684Skaiw * the string table for the symbol name, and a file offset into the 96260684Skaiw * archive for the member defining the symbol. 97260684Skaiw * - The second part is a string table containing NUL-terminated 98260684Skaiw * strings, preceded by the size of the string table in bytes. 99260684Skaiw * 100260684Skaiw * If the symbol table and string table are is present in an archive 101260684Skaiw * they must be the very first objects and in that order. 102260684Skaiw */ 103260684Skaiw 104260684Skaiw 105260684Skaiw/* 106260684Skaiw * Retrieve an archive header descriptor. 107260684Skaiw */ 108260684Skaiw 109260684SkaiwElf_Arhdr * 110260684Skaiw_libelf_ar_gethdr(Elf *e) 111260684Skaiw{ 112260684Skaiw Elf *parent; 113276371Semaste Elf_Arhdr *eh; 114260684Skaiw char *namelen; 115260684Skaiw size_t n, nlen; 116260684Skaiw struct ar_hdr *arh; 117260684Skaiw 118260684Skaiw if ((parent = e->e_parent) == NULL) { 119260684Skaiw LIBELF_SET_ERROR(ARGUMENT, 0); 120260684Skaiw return (NULL); 121260684Skaiw } 122260684Skaiw 123260684Skaiw assert((e->e_flags & LIBELF_F_AR_HEADER) == 0); 124260684Skaiw 125260684Skaiw arh = (struct ar_hdr *) (uintptr_t) e->e_hdr.e_rawhdr; 126260684Skaiw 127260684Skaiw assert((uintptr_t) arh >= (uintptr_t) parent->e_rawfile + SARMAG); 128260684Skaiw assert((uintptr_t) arh <= (uintptr_t) parent->e_rawfile + 129260684Skaiw parent->e_rawsize - sizeof(struct ar_hdr)); 130260684Skaiw 131260684Skaiw if ((eh = malloc(sizeof(Elf_Arhdr))) == NULL) { 132260684Skaiw LIBELF_SET_ERROR(RESOURCE, 0); 133260684Skaiw return (NULL); 134260684Skaiw } 135260684Skaiw 136260684Skaiw e->e_hdr.e_arhdr = eh; 137260684Skaiw e->e_flags |= LIBELF_F_AR_HEADER; 138260684Skaiw 139260684Skaiw eh->ar_name = eh->ar_rawname = NULL; 140260684Skaiw 141260684Skaiw if ((eh->ar_name = _libelf_ar_get_translated_name(arh, parent)) == 142260684Skaiw NULL) 143260684Skaiw goto error; 144260684Skaiw 145260684Skaiw if (_libelf_ar_get_number(arh->ar_uid, sizeof(arh->ar_uid), 10, 146260684Skaiw &n) == 0) 147260684Skaiw goto error; 148260684Skaiw eh->ar_uid = (uid_t) n; 149260684Skaiw 150260684Skaiw if (_libelf_ar_get_number(arh->ar_gid, sizeof(arh->ar_gid), 10, 151260684Skaiw &n) == 0) 152260684Skaiw goto error; 153260684Skaiw eh->ar_gid = (gid_t) n; 154260684Skaiw 155260684Skaiw if (_libelf_ar_get_number(arh->ar_mode, sizeof(arh->ar_mode), 8, 156260684Skaiw &n) == 0) 157260684Skaiw goto error; 158260684Skaiw eh->ar_mode = (mode_t) n; 159260684Skaiw 160260684Skaiw if (_libelf_ar_get_number(arh->ar_size, sizeof(arh->ar_size), 10, 161260684Skaiw &n) == 0) 162260684Skaiw goto error; 163260684Skaiw 164260684Skaiw /* 165260684Skaiw * Get the true size of the member if extended naming is being used. 166260684Skaiw */ 167260684Skaiw if (IS_EXTENDED_BSD_NAME(arh->ar_name)) { 168260684Skaiw namelen = arh->ar_name + 169260684Skaiw LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE; 170260684Skaiw if (_libelf_ar_get_number(namelen, sizeof(arh->ar_name) - 171260684Skaiw LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE, 10, &nlen) == 0) 172260684Skaiw goto error; 173260684Skaiw n -= nlen; 174260684Skaiw } 175260684Skaiw 176260684Skaiw eh->ar_size = n; 177260684Skaiw 178260684Skaiw if ((eh->ar_rawname = _libelf_ar_get_raw_name(arh)) == NULL) 179260684Skaiw goto error; 180260684Skaiw 181260684Skaiw eh->ar_flags = 0; 182260684Skaiw 183260684Skaiw return (eh); 184260684Skaiw 185260684Skaiw error: 186260684Skaiw if (eh) { 187260684Skaiw if (eh->ar_name) 188260684Skaiw free(eh->ar_name); 189260684Skaiw if (eh->ar_rawname) 190260684Skaiw free(eh->ar_rawname); 191260684Skaiw free(eh); 192260684Skaiw } 193260684Skaiw 194260684Skaiw e->e_flags &= ~LIBELF_F_AR_HEADER; 195276371Semaste e->e_hdr.e_rawhdr = (unsigned char *) arh; 196260684Skaiw 197260684Skaiw return (NULL); 198260684Skaiw} 199260684Skaiw 200260684SkaiwElf * 201260684Skaiw_libelf_ar_open_member(int fd, Elf_Cmd c, Elf *elf) 202260684Skaiw{ 203260684Skaiw Elf *e; 204276371Semaste off_t next; 205260684Skaiw size_t nsz, sz; 206260684Skaiw struct ar_hdr *arh; 207276371Semaste char *member, *namelen; 208260684Skaiw 209260684Skaiw assert(elf->e_kind == ELF_K_AR); 210260684Skaiw 211260684Skaiw next = elf->e_u.e_ar.e_next; 212260684Skaiw 213260684Skaiw /* 214260684Skaiw * `next' is only set to zero by elf_next() when the last 215260684Skaiw * member of an archive is processed. 216260684Skaiw */ 217260684Skaiw if (next == (off_t) 0) 218260684Skaiw return (NULL); 219260684Skaiw 220260684Skaiw assert((next & 1) == 0); 221260684Skaiw 222260684Skaiw arh = (struct ar_hdr *) (elf->e_rawfile + next); 223260684Skaiw 224260684Skaiw /* 225260684Skaiw * Retrieve the size of the member. 226260684Skaiw */ 227260684Skaiw if (_libelf_ar_get_number(arh->ar_size, sizeof(arh->ar_size), 10, 228260684Skaiw &sz) == 0) { 229260684Skaiw LIBELF_SET_ERROR(ARCHIVE, 0); 230260684Skaiw return (NULL); 231260684Skaiw } 232260684Skaiw 233260684Skaiw /* 234260684Skaiw * Adjust the size field for members in BSD archives using 235260684Skaiw * extended naming. 236260684Skaiw */ 237260684Skaiw if (IS_EXTENDED_BSD_NAME(arh->ar_name)) { 238260684Skaiw namelen = arh->ar_name + 239260684Skaiw LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE; 240260684Skaiw if (_libelf_ar_get_number(namelen, sizeof(arh->ar_name) - 241260684Skaiw LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE, 10, &nsz) == 0) { 242260684Skaiw LIBELF_SET_ERROR(ARCHIVE, 0); 243260684Skaiw return (NULL); 244260684Skaiw } 245260684Skaiw 246260684Skaiw member = (char *) (arh + 1) + nsz; 247260684Skaiw sz -= nsz; 248260684Skaiw } else 249260684Skaiw member = (char *) (arh + 1); 250260684Skaiw 251260684Skaiw 252276371Semaste if ((e = elf_memory(member, sz)) == NULL) 253260684Skaiw return (NULL); 254260684Skaiw 255260684Skaiw e->e_fd = fd; 256260684Skaiw e->e_cmd = c; 257276371Semaste e->e_hdr.e_rawhdr = (unsigned char *) arh; 258260684Skaiw 259260684Skaiw elf->e_u.e_ar.e_nchildren++; 260260684Skaiw e->e_parent = elf; 261260684Skaiw 262260684Skaiw return (e); 263260684Skaiw} 264260684Skaiw 265260684Skaiw/* 266260684Skaiw * A BSD-style ar(1) symbol table has the following layout: 267260684Skaiw * 268260684Skaiw * - A count of bytes used by the following array of 'ranlib' 269260684Skaiw * structures, stored as a 'long'. 270260684Skaiw * - An array of 'ranlib' structures. Each array element is 271260684Skaiw * two 'long's in size. 272260684Skaiw * - A count of bytes used for the following symbol table. 273260684Skaiw * - The symbol table itself. 274260684Skaiw */ 275260684Skaiw 276260684Skaiw/* 277276371Semaste * A helper macro to read in a 'long' value from the archive. 278276371Semaste * 279276371Semaste * We use memcpy() since the source pointer may be misaligned with 280276371Semaste * respect to the natural alignment for a C 'long'. 281260684Skaiw */ 282260684Skaiw#define GET_LONG(P, V)do { \ 283260684Skaiw memcpy(&(V), (P), sizeof(long)); \ 284260684Skaiw (P) += sizeof(long); \ 285260684Skaiw } while (0) 286260684Skaiw 287260684SkaiwElf_Arsym * 288260684Skaiw_libelf_ar_process_bsd_symtab(Elf *e, size_t *count) 289260684Skaiw{ 290260684Skaiw Elf_Arsym *symtab, *sym; 291276371Semaste unsigned int n, nentries; 292260684Skaiw unsigned char *end, *p, *p0, *s, *s0; 293276371Semaste const size_t entrysize = 2 * sizeof(long); 294276371Semaste long arraysize, fileoffset, stroffset, strtabsize; 295260684Skaiw 296260684Skaiw assert(e != NULL); 297260684Skaiw assert(count != NULL); 298260684Skaiw assert(e->e_u.e_ar.e_symtab == NULL); 299260684Skaiw 300260684Skaiw symtab = NULL; 301260684Skaiw 302260684Skaiw /* 303260684Skaiw * The BSD symbol table always contains the count fields even 304260684Skaiw * if there are no entries in it. 305260684Skaiw */ 306260684Skaiw if (e->e_u.e_ar.e_rawsymtabsz < 2 * sizeof(long)) 307260684Skaiw goto symtaberror; 308260684Skaiw 309260684Skaiw p = p0 = (unsigned char *) e->e_u.e_ar.e_rawsymtab; 310260684Skaiw end = p0 + e->e_u.e_ar.e_rawsymtabsz; 311260684Skaiw 312260684Skaiw /* 313260684Skaiw * Retrieve the size of the array of ranlib descriptors and 314260684Skaiw * check it for validity. 315260684Skaiw */ 316260684Skaiw GET_LONG(p, arraysize); 317260684Skaiw 318276371Semaste if (arraysize < 0 || p0 + arraysize >= end || 319276371Semaste ((size_t) arraysize % entrysize != 0)) 320260684Skaiw goto symtaberror; 321260684Skaiw 322260684Skaiw /* 323260684Skaiw * Check the value of the string table size. 324260684Skaiw */ 325260684Skaiw s = p + arraysize; 326260684Skaiw GET_LONG(s, strtabsize); 327260684Skaiw 328260684Skaiw s0 = s; /* Start of string table. */ 329276371Semaste if (strtabsize < 0 || s0 + strtabsize > end) 330260684Skaiw goto symtaberror; 331260684Skaiw 332276371Semaste nentries = (size_t) arraysize / entrysize; 333260684Skaiw 334260684Skaiw /* 335260684Skaiw * Allocate space for the returned Elf_Arsym array. 336260684Skaiw */ 337260684Skaiw if ((symtab = malloc(sizeof(Elf_Arsym) * (nentries + 1))) == NULL) { 338260684Skaiw LIBELF_SET_ERROR(RESOURCE, 0); 339260684Skaiw return (NULL); 340260684Skaiw } 341260684Skaiw 342260684Skaiw /* Read in symbol table entries. */ 343260684Skaiw for (n = 0, sym = symtab; n < nentries; n++, sym++) { 344260684Skaiw GET_LONG(p, stroffset); 345260684Skaiw GET_LONG(p, fileoffset); 346260684Skaiw 347276371Semaste if (stroffset < 0 || fileoffset < 0 || 348276371Semaste (size_t) fileoffset >= e->e_rawsize) 349276371Semaste goto symtaberror; 350276371Semaste 351260684Skaiw s = s0 + stroffset; 352260684Skaiw 353260684Skaiw if (s >= end) 354260684Skaiw goto symtaberror; 355260684Skaiw 356276371Semaste sym->as_off = (off_t) fileoffset; 357260684Skaiw sym->as_hash = elf_hash((char *) s); 358260684Skaiw sym->as_name = (char *) s; 359260684Skaiw } 360260684Skaiw 361260684Skaiw /* Fill up the sentinel entry. */ 362260684Skaiw sym->as_name = NULL; 363260684Skaiw sym->as_hash = ~0UL; 364260684Skaiw sym->as_off = (off_t) 0; 365260684Skaiw 366260684Skaiw /* Remember the processed symbol table. */ 367260684Skaiw e->e_u.e_ar.e_symtab = symtab; 368260684Skaiw 369260684Skaiw *count = e->e_u.e_ar.e_symtabsz = nentries + 1; 370260684Skaiw 371260684Skaiw return (symtab); 372260684Skaiw 373260684Skaiwsymtaberror: 374260684Skaiw if (symtab) 375260684Skaiw free(symtab); 376260684Skaiw LIBELF_SET_ERROR(ARCHIVE, 0); 377260684Skaiw return (NULL); 378260684Skaiw} 379260684Skaiw 380260684Skaiw/* 381260684Skaiw * An SVR4-style ar(1) symbol table has the following layout: 382260684Skaiw * 383260684Skaiw * - The first 4 bytes are a binary count of the number of entries in the 384260684Skaiw * symbol table, stored MSB-first. 385260684Skaiw * - Then there are 'n' 4-byte binary offsets, also stored MSB first. 386260684Skaiw * - Following this, there are 'n' null-terminated strings. 387260684Skaiw */ 388260684Skaiw 389260684Skaiw#define GET_WORD(P, V) do { \ 390260684Skaiw (V) = 0; \ 391260684Skaiw (V) = (P)[0]; (V) <<= 8; \ 392260684Skaiw (V) += (P)[1]; (V) <<= 8; \ 393260684Skaiw (V) += (P)[2]; (V) <<= 8; \ 394260684Skaiw (V) += (P)[3]; \ 395260684Skaiw } while (0) 396260684Skaiw 397260684Skaiw#define INTSZ 4 398260684Skaiw 399260684Skaiw 400260684SkaiwElf_Arsym * 401260684Skaiw_libelf_ar_process_svr4_symtab(Elf *e, size_t *count) 402260684Skaiw{ 403276371Semaste uint32_t off; 404276371Semaste size_t n, nentries; 405260684Skaiw Elf_Arsym *symtab, *sym; 406260684Skaiw unsigned char *p, *s, *end; 407260684Skaiw 408260684Skaiw assert(e != NULL); 409260684Skaiw assert(count != NULL); 410260684Skaiw assert(e->e_u.e_ar.e_symtab == NULL); 411260684Skaiw 412260684Skaiw symtab = NULL; 413260684Skaiw 414260684Skaiw if (e->e_u.e_ar.e_rawsymtabsz < INTSZ) 415260684Skaiw goto symtaberror; 416260684Skaiw 417260684Skaiw p = (unsigned char *) e->e_u.e_ar.e_rawsymtab; 418260684Skaiw end = p + e->e_u.e_ar.e_rawsymtabsz; 419260684Skaiw 420260684Skaiw GET_WORD(p, nentries); 421260684Skaiw p += INTSZ; 422260684Skaiw 423260684Skaiw if (nentries == 0 || p + nentries * INTSZ >= end) 424260684Skaiw goto symtaberror; 425260684Skaiw 426260684Skaiw /* Allocate space for a nentries + a sentinel. */ 427260684Skaiw if ((symtab = malloc(sizeof(Elf_Arsym) * (nentries+1))) == NULL) { 428260684Skaiw LIBELF_SET_ERROR(RESOURCE, 0); 429260684Skaiw return (NULL); 430260684Skaiw } 431260684Skaiw 432260684Skaiw s = p + (nentries * INTSZ); /* start of the string table. */ 433260684Skaiw 434260684Skaiw for (n = nentries, sym = symtab; n > 0; n--) { 435260684Skaiw if (s >= end) 436260684Skaiw goto symtaberror; 437260684Skaiw 438260684Skaiw GET_WORD(p, off); 439276371Semaste if (off >= e->e_rawsize) 440276371Semaste goto symtaberror; 441260684Skaiw 442276371Semaste sym->as_off = (off_t) off; 443260684Skaiw sym->as_hash = elf_hash((char *) s); 444260684Skaiw sym->as_name = (char *) s; 445260684Skaiw 446260684Skaiw p += INTSZ; 447260684Skaiw sym++; 448260684Skaiw 449260684Skaiw for (; s < end && *s++ != '\0';) /* skip to next string */ 450260684Skaiw ; 451260684Skaiw } 452260684Skaiw 453260684Skaiw /* Fill up the sentinel entry. */ 454260684Skaiw sym->as_name = NULL; 455260684Skaiw sym->as_hash = ~0UL; 456260684Skaiw sym->as_off = (off_t) 0; 457260684Skaiw 458260684Skaiw *count = e->e_u.e_ar.e_symtabsz = nentries + 1; 459260684Skaiw e->e_u.e_ar.e_symtab = symtab; 460260684Skaiw 461260684Skaiw return (symtab); 462260684Skaiw 463260684Skaiwsymtaberror: 464260684Skaiw if (symtab) 465260684Skaiw free(symtab); 466260684Skaiw LIBELF_SET_ERROR(ARCHIVE, 0); 467260684Skaiw return (NULL); 468260684Skaiw} 469