libelf_ar.c revision 276371
1260684Skaiw/*-
2260684Skaiw * Copyright (c) 2006,2008,2010 Joseph Koshy
3260684Skaiw * All rights reserved.
4260684Skaiw *
5260684Skaiw * Redistribution and use in source and binary forms, with or without
6260684Skaiw * modification, are permitted provided that the following conditions
7260684Skaiw * are met:
8260684Skaiw * 1. Redistributions of source code must retain the above copyright
9260684Skaiw *    notice, this list of conditions and the following disclaimer.
10260684Skaiw * 2. Redistributions in binary form must reproduce the above copyright
11260684Skaiw *    notice, this list of conditions and the following disclaimer in the
12260684Skaiw *    documentation and/or other materials provided with the distribution.
13260684Skaiw *
14260684Skaiw * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS `AS IS' AND
15260684Skaiw * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16260684Skaiw * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17260684Skaiw * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18260684Skaiw * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19260684Skaiw * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20260684Skaiw * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21260684Skaiw * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22260684Skaiw * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23260684Skaiw * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24260684Skaiw * SUCH DAMAGE.
25260684Skaiw */
26260684Skaiw
27260684Skaiw#include <sys/cdefs.h>
28260684Skaiw
29260684Skaiw#include <assert.h>
30260684Skaiw#include <ctype.h>
31260684Skaiw#include <libelf.h>
32260684Skaiw#include <stdlib.h>
33260684Skaiw#include <string.h>
34260684Skaiw
35260684Skaiw#include "_libelf.h"
36260684Skaiw#include "_libelf_ar.h"
37260684Skaiw
38276371SemasteELFTC_VCSID("$Id: libelf_ar.c 3013 2014-03-23 06:16:59Z jkoshy $");
39260684Skaiw
40260684Skaiw#define	LIBELF_NALLOC_SIZE	16
41260684Skaiw
42260684Skaiw/*
43260684Skaiw * `ar' archive handling.
44260684Skaiw *
45260684Skaiw * `ar' archives start with signature `ARMAG'.  Each archive member is
46260684Skaiw * preceded by a header containing meta-data for the member.  This
47260684Skaiw * header is described in <ar.h> (struct ar_hdr).  The header always
48260684Skaiw * starts on an even address.  File data is padded with "\n"
49260684Skaiw * characters to keep this invariant.
50260684Skaiw *
51260684Skaiw * Special considerations for `ar' archives:
52260684Skaiw *
53260684Skaiw * There are two variants of the `ar' archive format: traditional BSD
54260684Skaiw * and SVR4.  These differ in the way long file names are treated, and
55260684Skaiw * in the layout of the archive symbol table.
56260684Skaiw *
57260684Skaiw * The `ar' header only has space for a 16 character file name.
58260684Skaiw *
59260684Skaiw * In the SVR4 format, file names are terminated with a '/', so this
60260684Skaiw * effectively leaves 15 characters for the actual file name.  Longer
61260684Skaiw * file names stored in a separate 'string table' and referenced
62260684Skaiw * indirectly from the name field.  The string table itself appears as
63260684Skaiw * an archive member with name "// ".  An `indirect' file name in an
64260684Skaiw * `ar' header matches the pattern "/[0-9]*". The digits form a
65260684Skaiw * decimal number that corresponds to a byte offset into the string
66260684Skaiw * table where the actual file name of the object starts.  Strings in
67260684Skaiw * the string table are padded to start on even addresses.
68260684Skaiw *
69260684Skaiw * In the BSD format, file names can be upto 16 characters.  File
70260684Skaiw * names shorter than 16 characters are padded to 16 characters using
71260684Skaiw * (ASCII) space characters.  File names with embedded spaces and file
72260684Skaiw * names longer than 16 characters are stored immediately after the
73260684Skaiw * archive header and the name field set to a special indirect name
74260684Skaiw * matching the pattern "#1/[0-9]+".  The digits form a decimal number
75260684Skaiw * that corresponds to the actual length of the file name following
76260684Skaiw * the archive header.  The content of the archive member immediately
77260684Skaiw * follows the file name, and the size field of the archive member
78260684Skaiw * holds the sum of the sizes of the member and of the appended file
79260684Skaiw * name.
80260684Skaiw *
81260684Skaiw * Archives may also have a symbol table (see ranlib(1)), mapping
82260684Skaiw * program symbols to object files inside the archive.
83260684Skaiw *
84260684Skaiw * In the SVR4 format, a symbol table uses a file name of "/ " in its
85260684Skaiw * archive header.  The symbol table is structured as:
86260684Skaiw *  - a 4-byte count of entries stored as a binary value, MSB first
87260684Skaiw *  - 'n' 4-byte offsets, stored as binary values, MSB first
88260684Skaiw *  - 'n' NUL-terminated strings, for ELF symbol names, stored unpadded.
89260684Skaiw *
90260684Skaiw * In the BSD format, the symbol table uses a file name of "__.SYMDEF".
91260684Skaiw * It is structured as two parts:
92260684Skaiw *  - The first part is an array of "ranlib" structures preceded by
93260684Skaiw *    the size of the array in bytes.  Each "ranlib" structure
94260684Skaiw *    describes one symbol.  Each structure contains an offset into
95260684Skaiw *    the string table for the symbol name, and a file offset into the
96260684Skaiw *    archive for the member defining the symbol.
97260684Skaiw *  - The second part is a string table containing NUL-terminated
98260684Skaiw *    strings, preceded by the size of the string table in bytes.
99260684Skaiw *
100260684Skaiw * If the symbol table and string table are is present in an archive
101260684Skaiw * they must be the very first objects and in that order.
102260684Skaiw */
103260684Skaiw
104260684Skaiw
105260684Skaiw/*
106260684Skaiw * Retrieve an archive header descriptor.
107260684Skaiw */
108260684Skaiw
109260684SkaiwElf_Arhdr *
110260684Skaiw_libelf_ar_gethdr(Elf *e)
111260684Skaiw{
112260684Skaiw	Elf *parent;
113276371Semaste	Elf_Arhdr *eh;
114260684Skaiw	char *namelen;
115260684Skaiw	size_t n, nlen;
116260684Skaiw	struct ar_hdr *arh;
117260684Skaiw
118260684Skaiw	if ((parent = e->e_parent) == NULL) {
119260684Skaiw		LIBELF_SET_ERROR(ARGUMENT, 0);
120260684Skaiw		return (NULL);
121260684Skaiw	}
122260684Skaiw
123260684Skaiw	assert((e->e_flags & LIBELF_F_AR_HEADER) == 0);
124260684Skaiw
125260684Skaiw	arh = (struct ar_hdr *) (uintptr_t) e->e_hdr.e_rawhdr;
126260684Skaiw
127260684Skaiw	assert((uintptr_t) arh >= (uintptr_t) parent->e_rawfile + SARMAG);
128260684Skaiw	assert((uintptr_t) arh <= (uintptr_t) parent->e_rawfile +
129260684Skaiw	    parent->e_rawsize - sizeof(struct ar_hdr));
130260684Skaiw
131260684Skaiw	if ((eh = malloc(sizeof(Elf_Arhdr))) == NULL) {
132260684Skaiw		LIBELF_SET_ERROR(RESOURCE, 0);
133260684Skaiw		return (NULL);
134260684Skaiw	}
135260684Skaiw
136260684Skaiw	e->e_hdr.e_arhdr = eh;
137260684Skaiw	e->e_flags |= LIBELF_F_AR_HEADER;
138260684Skaiw
139260684Skaiw	eh->ar_name = eh->ar_rawname = NULL;
140260684Skaiw
141260684Skaiw	if ((eh->ar_name = _libelf_ar_get_translated_name(arh, parent)) ==
142260684Skaiw	    NULL)
143260684Skaiw		goto error;
144260684Skaiw
145260684Skaiw	if (_libelf_ar_get_number(arh->ar_uid, sizeof(arh->ar_uid), 10,
146260684Skaiw	    &n) == 0)
147260684Skaiw		goto error;
148260684Skaiw	eh->ar_uid = (uid_t) n;
149260684Skaiw
150260684Skaiw	if (_libelf_ar_get_number(arh->ar_gid, sizeof(arh->ar_gid), 10,
151260684Skaiw	    &n) == 0)
152260684Skaiw		goto error;
153260684Skaiw	eh->ar_gid = (gid_t) n;
154260684Skaiw
155260684Skaiw	if (_libelf_ar_get_number(arh->ar_mode, sizeof(arh->ar_mode), 8,
156260684Skaiw	    &n) == 0)
157260684Skaiw		goto error;
158260684Skaiw	eh->ar_mode = (mode_t) n;
159260684Skaiw
160260684Skaiw	if (_libelf_ar_get_number(arh->ar_size, sizeof(arh->ar_size), 10,
161260684Skaiw	    &n) == 0)
162260684Skaiw		goto error;
163260684Skaiw
164260684Skaiw	/*
165260684Skaiw	 * Get the true size of the member if extended naming is being used.
166260684Skaiw	 */
167260684Skaiw	if (IS_EXTENDED_BSD_NAME(arh->ar_name)) {
168260684Skaiw		namelen = arh->ar_name +
169260684Skaiw		    LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE;
170260684Skaiw		if (_libelf_ar_get_number(namelen, sizeof(arh->ar_name) -
171260684Skaiw		    LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE, 10, &nlen) == 0)
172260684Skaiw			goto error;
173260684Skaiw		n -= nlen;
174260684Skaiw	}
175260684Skaiw
176260684Skaiw	eh->ar_size = n;
177260684Skaiw
178260684Skaiw	if ((eh->ar_rawname = _libelf_ar_get_raw_name(arh)) == NULL)
179260684Skaiw		goto error;
180260684Skaiw
181260684Skaiw	eh->ar_flags = 0;
182260684Skaiw
183260684Skaiw	return (eh);
184260684Skaiw
185260684Skaiw error:
186260684Skaiw	if (eh) {
187260684Skaiw		if (eh->ar_name)
188260684Skaiw			free(eh->ar_name);
189260684Skaiw		if (eh->ar_rawname)
190260684Skaiw			free(eh->ar_rawname);
191260684Skaiw		free(eh);
192260684Skaiw	}
193260684Skaiw
194260684Skaiw	e->e_flags &= ~LIBELF_F_AR_HEADER;
195276371Semaste	e->e_hdr.e_rawhdr = (unsigned char *) arh;
196260684Skaiw
197260684Skaiw	return (NULL);
198260684Skaiw}
199260684Skaiw
200260684SkaiwElf *
201260684Skaiw_libelf_ar_open_member(int fd, Elf_Cmd c, Elf *elf)
202260684Skaiw{
203260684Skaiw	Elf *e;
204276371Semaste	off_t next;
205260684Skaiw	size_t nsz, sz;
206260684Skaiw	struct ar_hdr *arh;
207276371Semaste	char *member, *namelen;
208260684Skaiw
209260684Skaiw	assert(elf->e_kind == ELF_K_AR);
210260684Skaiw
211260684Skaiw	next = elf->e_u.e_ar.e_next;
212260684Skaiw
213260684Skaiw	/*
214260684Skaiw	 * `next' is only set to zero by elf_next() when the last
215260684Skaiw	 * member of an archive is processed.
216260684Skaiw	 */
217260684Skaiw	if (next == (off_t) 0)
218260684Skaiw		return (NULL);
219260684Skaiw
220260684Skaiw	assert((next & 1) == 0);
221260684Skaiw
222260684Skaiw	arh = (struct ar_hdr *) (elf->e_rawfile + next);
223260684Skaiw
224260684Skaiw	/*
225260684Skaiw	 * Retrieve the size of the member.
226260684Skaiw	 */
227260684Skaiw	if (_libelf_ar_get_number(arh->ar_size, sizeof(arh->ar_size), 10,
228260684Skaiw	    &sz) == 0) {
229260684Skaiw		LIBELF_SET_ERROR(ARCHIVE, 0);
230260684Skaiw		return (NULL);
231260684Skaiw	}
232260684Skaiw
233260684Skaiw	/*
234260684Skaiw	 * Adjust the size field for members in BSD archives using
235260684Skaiw	 * extended naming.
236260684Skaiw	 */
237260684Skaiw	if (IS_EXTENDED_BSD_NAME(arh->ar_name)) {
238260684Skaiw		namelen = arh->ar_name +
239260684Skaiw		    LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE;
240260684Skaiw		if (_libelf_ar_get_number(namelen, sizeof(arh->ar_name) -
241260684Skaiw		    LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE, 10, &nsz) == 0) {
242260684Skaiw			LIBELF_SET_ERROR(ARCHIVE, 0);
243260684Skaiw			return (NULL);
244260684Skaiw		}
245260684Skaiw
246260684Skaiw		member = (char *) (arh + 1) + nsz;
247260684Skaiw		sz -= nsz;
248260684Skaiw	} else
249260684Skaiw		member = (char *) (arh + 1);
250260684Skaiw
251260684Skaiw
252276371Semaste	if ((e = elf_memory(member, sz)) == NULL)
253260684Skaiw		return (NULL);
254260684Skaiw
255260684Skaiw	e->e_fd = fd;
256260684Skaiw	e->e_cmd = c;
257276371Semaste	e->e_hdr.e_rawhdr = (unsigned char *) arh;
258260684Skaiw
259260684Skaiw	elf->e_u.e_ar.e_nchildren++;
260260684Skaiw	e->e_parent = elf;
261260684Skaiw
262260684Skaiw	return (e);
263260684Skaiw}
264260684Skaiw
265260684Skaiw/*
266260684Skaiw * A BSD-style ar(1) symbol table has the following layout:
267260684Skaiw *
268260684Skaiw * - A count of bytes used by the following array of 'ranlib'
269260684Skaiw *   structures, stored as a 'long'.
270260684Skaiw * - An array of 'ranlib' structures.  Each array element is
271260684Skaiw *   two 'long's in size.
272260684Skaiw * - A count of bytes used for the following symbol table.
273260684Skaiw * - The symbol table itself.
274260684Skaiw */
275260684Skaiw
276260684Skaiw/*
277276371Semaste * A helper macro to read in a 'long' value from the archive.
278276371Semaste *
279276371Semaste * We use memcpy() since the source pointer may be misaligned with
280276371Semaste * respect to the natural alignment for a C 'long'.
281260684Skaiw */
282260684Skaiw#define	GET_LONG(P, V)do {				\
283260684Skaiw		memcpy(&(V), (P), sizeof(long));	\
284260684Skaiw		(P) += sizeof(long);			\
285260684Skaiw	} while (0)
286260684Skaiw
287260684SkaiwElf_Arsym *
288260684Skaiw_libelf_ar_process_bsd_symtab(Elf *e, size_t *count)
289260684Skaiw{
290260684Skaiw	Elf_Arsym *symtab, *sym;
291276371Semaste	unsigned int n, nentries;
292260684Skaiw	unsigned char *end, *p, *p0, *s, *s0;
293276371Semaste	const size_t entrysize = 2 * sizeof(long);
294276371Semaste	long arraysize, fileoffset, stroffset, strtabsize;
295260684Skaiw
296260684Skaiw	assert(e != NULL);
297260684Skaiw	assert(count != NULL);
298260684Skaiw	assert(e->e_u.e_ar.e_symtab == NULL);
299260684Skaiw
300260684Skaiw	symtab = NULL;
301260684Skaiw
302260684Skaiw	/*
303260684Skaiw	 * The BSD symbol table always contains the count fields even
304260684Skaiw	 * if there are no entries in it.
305260684Skaiw	 */
306260684Skaiw	if (e->e_u.e_ar.e_rawsymtabsz < 2 * sizeof(long))
307260684Skaiw		goto symtaberror;
308260684Skaiw
309260684Skaiw	p = p0 = (unsigned char *) e->e_u.e_ar.e_rawsymtab;
310260684Skaiw	end = p0 + e->e_u.e_ar.e_rawsymtabsz;
311260684Skaiw
312260684Skaiw	/*
313260684Skaiw	 * Retrieve the size of the array of ranlib descriptors and
314260684Skaiw	 * check it for validity.
315260684Skaiw	 */
316260684Skaiw	GET_LONG(p, arraysize);
317260684Skaiw
318276371Semaste	if (arraysize < 0 || p0 + arraysize >= end ||
319276371Semaste	    ((size_t) arraysize % entrysize != 0))
320260684Skaiw		goto symtaberror;
321260684Skaiw
322260684Skaiw	/*
323260684Skaiw	 * Check the value of the string table size.
324260684Skaiw	 */
325260684Skaiw	s = p + arraysize;
326260684Skaiw	GET_LONG(s, strtabsize);
327260684Skaiw
328260684Skaiw	s0 = s;			/* Start of string table. */
329276371Semaste	if (strtabsize < 0 || s0 + strtabsize > end)
330260684Skaiw		goto symtaberror;
331260684Skaiw
332276371Semaste	nentries = (size_t) arraysize / entrysize;
333260684Skaiw
334260684Skaiw	/*
335260684Skaiw	 * Allocate space for the returned Elf_Arsym array.
336260684Skaiw	 */
337260684Skaiw	if ((symtab = malloc(sizeof(Elf_Arsym) * (nentries + 1))) == NULL) {
338260684Skaiw		LIBELF_SET_ERROR(RESOURCE, 0);
339260684Skaiw		return (NULL);
340260684Skaiw	}
341260684Skaiw
342260684Skaiw	/* Read in symbol table entries. */
343260684Skaiw	for (n = 0, sym = symtab; n < nentries; n++, sym++) {
344260684Skaiw		GET_LONG(p, stroffset);
345260684Skaiw		GET_LONG(p, fileoffset);
346260684Skaiw
347276371Semaste		if (stroffset < 0 || fileoffset <  0 ||
348276371Semaste		    (size_t) fileoffset >= e->e_rawsize)
349276371Semaste			goto symtaberror;
350276371Semaste
351260684Skaiw		s = s0 + stroffset;
352260684Skaiw
353260684Skaiw		if (s >= end)
354260684Skaiw			goto symtaberror;
355260684Skaiw
356276371Semaste		sym->as_off = (off_t) fileoffset;
357260684Skaiw		sym->as_hash = elf_hash((char *) s);
358260684Skaiw		sym->as_name = (char *) s;
359260684Skaiw	}
360260684Skaiw
361260684Skaiw	/* Fill up the sentinel entry. */
362260684Skaiw	sym->as_name = NULL;
363260684Skaiw	sym->as_hash = ~0UL;
364260684Skaiw	sym->as_off = (off_t) 0;
365260684Skaiw
366260684Skaiw	/* Remember the processed symbol table. */
367260684Skaiw	e->e_u.e_ar.e_symtab = symtab;
368260684Skaiw
369260684Skaiw	*count = e->e_u.e_ar.e_symtabsz = nentries + 1;
370260684Skaiw
371260684Skaiw	return (symtab);
372260684Skaiw
373260684Skaiwsymtaberror:
374260684Skaiw	if (symtab)
375260684Skaiw		free(symtab);
376260684Skaiw	LIBELF_SET_ERROR(ARCHIVE, 0);
377260684Skaiw	return (NULL);
378260684Skaiw}
379260684Skaiw
380260684Skaiw/*
381260684Skaiw * An SVR4-style ar(1) symbol table has the following layout:
382260684Skaiw *
383260684Skaiw * - The first 4 bytes are a binary count of the number of entries in the
384260684Skaiw *   symbol table, stored MSB-first.
385260684Skaiw * - Then there are 'n' 4-byte binary offsets, also stored MSB first.
386260684Skaiw * - Following this, there are 'n' null-terminated strings.
387260684Skaiw */
388260684Skaiw
389260684Skaiw#define	GET_WORD(P, V) do {			\
390260684Skaiw		(V) = 0;			\
391260684Skaiw		(V) = (P)[0]; (V) <<= 8;	\
392260684Skaiw		(V) += (P)[1]; (V) <<= 8;	\
393260684Skaiw		(V) += (P)[2]; (V) <<= 8;	\
394260684Skaiw		(V) += (P)[3];			\
395260684Skaiw	} while (0)
396260684Skaiw
397260684Skaiw#define	INTSZ	4
398260684Skaiw
399260684Skaiw
400260684SkaiwElf_Arsym *
401260684Skaiw_libelf_ar_process_svr4_symtab(Elf *e, size_t *count)
402260684Skaiw{
403276371Semaste	uint32_t off;
404276371Semaste	size_t n, nentries;
405260684Skaiw	Elf_Arsym *symtab, *sym;
406260684Skaiw	unsigned char *p, *s, *end;
407260684Skaiw
408260684Skaiw	assert(e != NULL);
409260684Skaiw	assert(count != NULL);
410260684Skaiw	assert(e->e_u.e_ar.e_symtab == NULL);
411260684Skaiw
412260684Skaiw	symtab = NULL;
413260684Skaiw
414260684Skaiw	if (e->e_u.e_ar.e_rawsymtabsz < INTSZ)
415260684Skaiw		goto symtaberror;
416260684Skaiw
417260684Skaiw	p = (unsigned char *) e->e_u.e_ar.e_rawsymtab;
418260684Skaiw	end = p + e->e_u.e_ar.e_rawsymtabsz;
419260684Skaiw
420260684Skaiw	GET_WORD(p, nentries);
421260684Skaiw	p += INTSZ;
422260684Skaiw
423260684Skaiw	if (nentries == 0 || p + nentries * INTSZ >= end)
424260684Skaiw		goto symtaberror;
425260684Skaiw
426260684Skaiw	/* Allocate space for a nentries + a sentinel. */
427260684Skaiw	if ((symtab = malloc(sizeof(Elf_Arsym) * (nentries+1))) == NULL) {
428260684Skaiw		LIBELF_SET_ERROR(RESOURCE, 0);
429260684Skaiw		return (NULL);
430260684Skaiw	}
431260684Skaiw
432260684Skaiw	s = p + (nentries * INTSZ); /* start of the string table. */
433260684Skaiw
434260684Skaiw	for (n = nentries, sym = symtab; n > 0; n--) {
435260684Skaiw		if (s >= end)
436260684Skaiw			goto symtaberror;
437260684Skaiw
438260684Skaiw		GET_WORD(p, off);
439276371Semaste		if (off >= e->e_rawsize)
440276371Semaste			goto symtaberror;
441260684Skaiw
442276371Semaste		sym->as_off = (off_t) off;
443260684Skaiw		sym->as_hash = elf_hash((char *) s);
444260684Skaiw		sym->as_name = (char *) s;
445260684Skaiw
446260684Skaiw		p += INTSZ;
447260684Skaiw		sym++;
448260684Skaiw
449260684Skaiw		for (; s < end && *s++ != '\0';) /* skip to next string */
450260684Skaiw			;
451260684Skaiw	}
452260684Skaiw
453260684Skaiw	/* Fill up the sentinel entry. */
454260684Skaiw	sym->as_name = NULL;
455260684Skaiw	sym->as_hash = ~0UL;
456260684Skaiw	sym->as_off = (off_t) 0;
457260684Skaiw
458260684Skaiw	*count = e->e_u.e_ar.e_symtabsz = nentries + 1;
459260684Skaiw	e->e_u.e_ar.e_symtab = symtab;
460260684Skaiw
461260684Skaiw	return (symtab);
462260684Skaiw
463260684Skaiwsymtaberror:
464260684Skaiw	if (symtab)
465260684Skaiw		free(symtab);
466260684Skaiw	LIBELF_SET_ERROR(ARCHIVE, 0);
467260684Skaiw	return (NULL);
468260684Skaiw}
469