1260684Skaiw/*-
2260684Skaiw * Copyright (c) 2013, Joseph Koshy
3260684Skaiw * All rights reserved.
4260684Skaiw *
5260684Skaiw * Redistribution and use in source and binary forms, with or without
6260684Skaiw * modification, are permitted provided that the following conditions
7260684Skaiw * are met:
8260684Skaiw * 1. Redistributions of source code must retain the above copyright
9260684Skaiw *    notice, this list of conditions and the following disclaimer
10260684Skaiw *    in this position and unchanged.
11260684Skaiw * 2. Redistributions in binary form must reproduce the above copyright
12260684Skaiw *    notice, this list of conditions and the following disclaimer in the
13260684Skaiw *    documentation and/or other materials provided with the distribution.
14260684Skaiw *
15260684Skaiw * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16260684Skaiw * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17260684Skaiw * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18260684Skaiw * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19260684Skaiw * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20260684Skaiw * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21260684Skaiw * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22260684Skaiw * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23260684Skaiw * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24260684Skaiw * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25260684Skaiw */
26260684Skaiw
27260684Skaiw#include <sys/param.h>
28260684Skaiw#include <sys/queue.h>
29260684Skaiw
30260684Skaiw#include <assert.h>
31260684Skaiw#include <errno.h>
32260684Skaiw#include <gelf.h>
33260684Skaiw#include <stdlib.h>
34260684Skaiw#include <string.h>
35260684Skaiw
36260684Skaiw#include "libelftc.h"
37260684Skaiw#include "_libelftc.h"
38260684Skaiw
39367466SdimELFTC_VCSID("$Id: elftc_string_table.c 3750 2019-06-28 01:12:10Z emaste $");
40260684Skaiw
41260684Skaiw#define	ELFTC_STRING_TABLE_DEFAULT_SIZE			(4*1024)
42260684Skaiw#define ELFTC_STRING_TABLE_EXPECTED_STRING_SIZE		16
43260684Skaiw#define ELFTC_STRING_TABLE_EXPECTED_CHAIN_LENGTH	8
44260684Skaiw#define	ELFTC_STRING_TABLE_POOL_SIZE_INCREMENT		(4*1024)
45260684Skaiw
46260684Skaiwstruct _Elftc_String_Table_Entry {
47367466Sdim	ssize_t		ste_idx;
48260684Skaiw	SLIST_ENTRY(_Elftc_String_Table_Entry) ste_next;
49260684Skaiw};
50260684Skaiw
51260684Skaiw#define	ELFTC_STRING_TABLE_COMPACTION_FLAG	0x1
52260684Skaiw#define	ELFTC_STRING_TABLE_LENGTH(st)		((st)->st_len >> 1)
53260684Skaiw#define	ELFTC_STRING_TABLE_CLEAR_COMPACTION_FLAG(st) do {		\
54260684Skaiw		(st)->st_len &= ~ELFTC_STRING_TABLE_COMPACTION_FLAG;	\
55260684Skaiw	} while (0)
56260684Skaiw#define	ELFTC_STRING_TABLE_SET_COMPACTION_FLAG(st) do {			\
57260684Skaiw		(st)->st_len |= ELFTC_STRING_TABLE_COMPACTION_FLAG;	\
58260684Skaiw	} while (0)
59260684Skaiw#define	ELFTC_STRING_TABLE_UPDATE_LENGTH(st, len) do {		\
60260684Skaiw		(st)->st_len =					\
61260684Skaiw		    ((st)->st_len &				\
62260684Skaiw			ELFTC_STRING_TABLE_COMPACTION_FLAG) |	\
63260684Skaiw		    ((len) << 1);				\
64260684Skaiw	} while (0)
65260684Skaiw
66260684Skaiwstruct _Elftc_String_Table {
67367466Sdim	size_t		st_len; /* length and flags */
68260684Skaiw	int		st_nbuckets;
69367466Sdim	size_t		st_string_pool_size;
70260684Skaiw	char		*st_string_pool;
71260684Skaiw	SLIST_HEAD(_Elftc_String_Table_Bucket,
72260684Skaiw	    _Elftc_String_Table_Entry) st_buckets[];
73260684Skaiw};
74260684Skaiw
75260684Skaiwstatic struct _Elftc_String_Table_Entry *
76260684Skaiwelftc_string_table_find_hash_entry(Elftc_String_Table *st, const char *string,
77260684Skaiw    int *rhashindex)
78260684Skaiw{
79260684Skaiw	struct _Elftc_String_Table_Entry *ste;
80260684Skaiw	int hashindex;
81260684Skaiw	char *s;
82260684Skaiw
83260684Skaiw	hashindex = libelftc_hash_string(string) % st->st_nbuckets;
84260684Skaiw
85260684Skaiw	if (rhashindex)
86260684Skaiw		*rhashindex = hashindex;
87260684Skaiw
88260684Skaiw	SLIST_FOREACH(ste, &st->st_buckets[hashindex], ste_next) {
89367466Sdim		s = st->st_string_pool + labs(ste->ste_idx);
90260684Skaiw
91260684Skaiw		assert(s > st->st_string_pool &&
92260684Skaiw		    s < st->st_string_pool + st->st_string_pool_size);
93260684Skaiw
94260684Skaiw		if (strcmp(s, string) == 0)
95260684Skaiw			return (ste);
96260684Skaiw	}
97260684Skaiw
98260684Skaiw	return (NULL);
99260684Skaiw}
100260684Skaiw
101260684Skaiwstatic int
102260684Skaiwelftc_string_table_add_to_pool(Elftc_String_Table *st, const char *string)
103260684Skaiw{
104260684Skaiw	char *newpool;
105367466Sdim	size_t len, newsize, stlen;
106260684Skaiw
107260684Skaiw	len = strlen(string) + 1; /* length, including the trailing NUL */
108260684Skaiw	stlen = ELFTC_STRING_TABLE_LENGTH(st);
109260684Skaiw
110260684Skaiw	/* Resize the pool, if needed. */
111260684Skaiw	if (stlen + len >= st->st_string_pool_size) {
112260684Skaiw		newsize = roundup(st->st_string_pool_size +
113260684Skaiw		    ELFTC_STRING_TABLE_POOL_SIZE_INCREMENT,
114260684Skaiw		    ELFTC_STRING_TABLE_POOL_SIZE_INCREMENT);
115260684Skaiw		if ((newpool = realloc(st->st_string_pool, newsize)) ==
116260684Skaiw		    NULL)
117260684Skaiw			return (0);
118260684Skaiw		st->st_string_pool = newpool;
119260684Skaiw		st->st_string_pool_size = newsize;
120260684Skaiw	}
121260684Skaiw
122367466Sdim	memcpy(st->st_string_pool + stlen, string, len);
123260684Skaiw	ELFTC_STRING_TABLE_UPDATE_LENGTH(st, stlen + len);
124260684Skaiw
125260684Skaiw	return (stlen);
126260684Skaiw}
127260684Skaiw
128260684SkaiwElftc_String_Table *
129367466Sdimelftc_string_table_create(size_t sizehint)
130260684Skaiw{
131367466Sdim	struct _Elftc_String_Table *st;
132260684Skaiw	int n, nbuckets, tablesize;
133260684Skaiw
134260684Skaiw	if (sizehint < ELFTC_STRING_TABLE_DEFAULT_SIZE)
135260684Skaiw		sizehint = ELFTC_STRING_TABLE_DEFAULT_SIZE;
136260684Skaiw
137260684Skaiw	nbuckets = sizehint / (ELFTC_STRING_TABLE_EXPECTED_CHAIN_LENGTH *
138260684Skaiw	    ELFTC_STRING_TABLE_EXPECTED_STRING_SIZE);
139260684Skaiw
140260684Skaiw	tablesize = sizeof(struct _Elftc_String_Table) +
141260684Skaiw	    nbuckets * sizeof(struct _Elftc_String_Table_Bucket);
142260684Skaiw
143260684Skaiw	if ((st = malloc(tablesize)) == NULL)
144260684Skaiw		return (NULL);
145260684Skaiw	if ((st->st_string_pool = malloc(sizehint)) == NULL) {
146260684Skaiw		free(st);
147260684Skaiw		return (NULL);
148260684Skaiw	}
149260684Skaiw
150260684Skaiw	for (n = 0; n < nbuckets; n++)
151260684Skaiw		SLIST_INIT(&st->st_buckets[n]);
152260684Skaiw
153260684Skaiw	st->st_len = 0;
154260684Skaiw	st->st_nbuckets = nbuckets;
155260684Skaiw	st->st_string_pool_size = sizehint;
156260684Skaiw	*st->st_string_pool = '\0';
157260684Skaiw	ELFTC_STRING_TABLE_UPDATE_LENGTH(st, 1);
158260684Skaiw
159260684Skaiw	return (st);
160260684Skaiw}
161260684Skaiw
162260684Skaiwvoid
163260684Skaiwelftc_string_table_destroy(Elftc_String_Table *st)
164260684Skaiw{
165260684Skaiw	int n;
166260684Skaiw	struct _Elftc_String_Table_Entry *s, *t;
167260684Skaiw
168260684Skaiw	for (n = 0; n < st->st_nbuckets; n++)
169260684Skaiw		SLIST_FOREACH_SAFE(s, &st->st_buckets[n], ste_next, t)
170367466Sdim			free(s);
171260684Skaiw	free(st->st_string_pool);
172260684Skaiw	free(st);
173260684Skaiw}
174260684Skaiw
175260684SkaiwElftc_String_Table *
176367466Sdimelftc_string_table_from_section(Elf_Scn *scn, size_t sizehint)
177260684Skaiw{
178260684Skaiw	Elf_Data *d;
179260684Skaiw	GElf_Shdr sh;
180260684Skaiw	const char *s, *end;
181260684Skaiw	Elftc_String_Table *st;
182367466Sdim	size_t len;
183260684Skaiw
184260684Skaiw	/* Verify the type of the section passed in. */
185260684Skaiw	if (gelf_getshdr(scn, &sh) == NULL ||
186260684Skaiw	    sh.sh_type != SHT_STRTAB) {
187260684Skaiw		errno = EINVAL;
188260684Skaiw		return (NULL);
189260684Skaiw	}
190260684Skaiw
191260684Skaiw	if ((d = elf_getdata(scn, NULL)) == NULL ||
192260684Skaiw	    d->d_size == 0) {
193260684Skaiw		errno = EINVAL;
194260684Skaiw		return (NULL);
195260684Skaiw	}
196260684Skaiw
197260684Skaiw	if ((st = elftc_string_table_create(sizehint)) == NULL)
198260684Skaiw		return (NULL);
199260684Skaiw
200260684Skaiw	s = d->d_buf;
201260684Skaiw
202260684Skaiw	/*
203260684Skaiw	 * Verify that the first byte of the data buffer is '\0'.
204260684Skaiw	 */
205260684Skaiw	if (*s != '\0') {
206260684Skaiw		errno = EINVAL;
207260684Skaiw		goto fail;
208260684Skaiw	}
209260684Skaiw
210260684Skaiw	end = s + d->d_size;
211260684Skaiw
212260684Skaiw	/*
213260684Skaiw	 * Skip the first '\0' and insert the strings in the buffer,
214260684Skaiw	 * in order.
215260684Skaiw	 */
216260684Skaiw	for (s += 1; s < end; s += len) {
217260684Skaiw		if (elftc_string_table_insert(st, s) == 0)
218260684Skaiw			goto fail;
219260684Skaiw
220260684Skaiw		len = strlen(s) + 1; /* Include space for the trailing NUL. */
221260684Skaiw	}
222260684Skaiw
223260684Skaiw	return (st);
224260684Skaiw
225260684Skaiwfail:
226260684Skaiw	if (st)
227260684Skaiw		(void) elftc_string_table_destroy(st);
228260684Skaiw
229260684Skaiw	return (NULL);
230260684Skaiw}
231260684Skaiw
232260684Skaiwconst char *
233260684Skaiwelftc_string_table_image(Elftc_String_Table *st, size_t *size)
234260684Skaiw{
235260684Skaiw	char *r, *s, *end;
236260684Skaiw	struct _Elftc_String_Table_Entry *ste;
237260684Skaiw	struct _Elftc_String_Table_Bucket *head;
238367466Sdim	size_t copied, offset, length, newsize;
239367466Sdim	int hashindex;
240260684Skaiw
241260684Skaiw	/*
242260684Skaiw	 * For the common case of a string table has not seen
243260684Skaiw	 * a string deletion, we can just export the current
244260684Skaiw	 * pool.
245260684Skaiw	 */
246260684Skaiw	if ((st->st_len & ELFTC_STRING_TABLE_COMPACTION_FLAG) == 0) {
247260684Skaiw		if (size)
248260684Skaiw			*size = ELFTC_STRING_TABLE_LENGTH(st);
249260684Skaiw		return (st->st_string_pool);
250260684Skaiw	}
251260684Skaiw
252260684Skaiw	/*
253260684Skaiw	 * Otherwise, compact the string table in-place.
254260684Skaiw	 */
255260684Skaiw	assert(*st->st_string_pool == '\0');
256260684Skaiw
257260684Skaiw	newsize = 1;
258260684Skaiw	end = st->st_string_pool + ELFTC_STRING_TABLE_LENGTH(st);
259260684Skaiw
260260684Skaiw	for (r = s = st->st_string_pool + 1;
261260684Skaiw	     s < end;
262260684Skaiw	     s += length, r += copied) {
263260684Skaiw
264260684Skaiw		copied = 0;
265260684Skaiw		length = strlen(s) + 1;
266260684Skaiw
267260684Skaiw		ste = elftc_string_table_find_hash_entry(st, s,
268260684Skaiw		    &hashindex);
269260684Skaiw		head = &st->st_buckets[hashindex];
270260684Skaiw
271260684Skaiw		assert(ste != NULL);
272260684Skaiw
273260684Skaiw		/* Ignore deleted strings. */
274260684Skaiw		if (ste->ste_idx < 0) {
275260684Skaiw			SLIST_REMOVE(head, ste, _Elftc_String_Table_Entry,
276260684Skaiw			    ste_next);
277260684Skaiw			free(ste);
278260684Skaiw			continue;
279260684Skaiw		}
280260684Skaiw
281260684Skaiw		/* Move 'live' strings up. */
282260684Skaiw		offset = newsize;
283260684Skaiw		newsize += length;
284260684Skaiw		copied = length;
285260684Skaiw
286260684Skaiw		if (r == s)	/* Nothing removed yet. */
287260684Skaiw			continue;
288260684Skaiw
289260684Skaiw		memmove(r, s, copied);
290260684Skaiw
291260684Skaiw		/* Update the index for this entry. */
292260684Skaiw		ste->ste_idx = offset;
293260684Skaiw	}
294260684Skaiw
295260684Skaiw	ELFTC_STRING_TABLE_CLEAR_COMPACTION_FLAG(st);
296260684Skaiw	ELFTC_STRING_TABLE_UPDATE_LENGTH(st, newsize);
297260684Skaiw
298260684Skaiw	if (size)
299260684Skaiw		*size = newsize;
300260684Skaiw
301260684Skaiw	return (st->st_string_pool);
302260684Skaiw}
303260684Skaiw
304260684Skaiwsize_t
305260684Skaiwelftc_string_table_insert(Elftc_String_Table *st, const char *string)
306260684Skaiw{
307260684Skaiw	struct _Elftc_String_Table_Entry *ste;
308367466Sdim	ssize_t idx;
309367466Sdim	int hashindex;
310260684Skaiw
311260684Skaiw	hashindex = 0;
312260684Skaiw
313260684Skaiw	ste = elftc_string_table_find_hash_entry(st, string, &hashindex);
314260684Skaiw
315260684Skaiw	assert(hashindex >= 0 && hashindex < st->st_nbuckets);
316260684Skaiw
317260684Skaiw	if (ste == NULL) {
318260684Skaiw		if ((ste = malloc(sizeof(*ste))) == NULL)
319260684Skaiw			return (0);
320260684Skaiw		if ((ste->ste_idx = elftc_string_table_add_to_pool(st,
321367466Sdim		    string)) == 0) {
322260684Skaiw			free(ste);
323260684Skaiw			return (0);
324260684Skaiw		}
325260684Skaiw
326260684Skaiw		SLIST_INSERT_HEAD(&st->st_buckets[hashindex], ste, ste_next);
327260684Skaiw	}
328260684Skaiw
329260684Skaiw	idx = ste->ste_idx;
330260684Skaiw	if (idx < 0) 		/* Undelete. */
331367466Sdim		ste->ste_idx = idx = -idx;
332260684Skaiw
333260684Skaiw	return (idx);
334260684Skaiw}
335260684Skaiw
336260684Skaiwsize_t
337260684Skaiwelftc_string_table_lookup(Elftc_String_Table *st, const char *string)
338260684Skaiw{
339260684Skaiw	struct _Elftc_String_Table_Entry *ste;
340367466Sdim	ssize_t idx;
341367466Sdim	int hashindex;
342260684Skaiw
343260684Skaiw	ste = elftc_string_table_find_hash_entry(st, string, &hashindex);
344260684Skaiw
345260684Skaiw	assert(hashindex >= 0 && hashindex < st->st_nbuckets);
346260684Skaiw
347260684Skaiw	if (ste == NULL || (idx = ste->ste_idx) < 0)
348260684Skaiw		return (0);
349260684Skaiw
350260684Skaiw	return (idx);
351260684Skaiw}
352260684Skaiw
353260684Skaiwint
354260684Skaiwelftc_string_table_remove(Elftc_String_Table *st, const char *string)
355260684Skaiw{
356260684Skaiw	struct _Elftc_String_Table_Entry *ste;
357367466Sdim	ssize_t idx;
358260684Skaiw
359260684Skaiw	ste = elftc_string_table_find_hash_entry(st, string, NULL);
360260684Skaiw
361260684Skaiw	if (ste == NULL || (idx = ste->ste_idx) < 0)
362260684Skaiw		return (ELFTC_FAILURE);
363260684Skaiw
364367466Sdim	assert(idx > 0 && (size_t)idx < ELFTC_STRING_TABLE_LENGTH(st));
365260684Skaiw
366367466Sdim	ste->ste_idx = -idx;
367260684Skaiw
368260684Skaiw	ELFTC_STRING_TABLE_SET_COMPACTION_FLAG(st);
369260684Skaiw
370260684Skaiw	return (ELFTC_SUCCESS);
371260684Skaiw}
372260684Skaiw
373260684Skaiwconst char *
374260684Skaiwelftc_string_table_to_string(Elftc_String_Table *st, size_t offset)
375260684Skaiw{
376260684Skaiw	const char *s;
377260684Skaiw
378260684Skaiw	s = st->st_string_pool + offset;
379260684Skaiw
380260684Skaiw	/*
381260684Skaiw	 * Check for:
382260684Skaiw	 * - An offset value within pool bounds.
383260684Skaiw	 * - A non-NUL byte at the specified offset.
384260684Skaiw	 * - The end of the prior string at offset - 1.
385260684Skaiw	 */
386260684Skaiw	if (offset == 0 || offset >= ELFTC_STRING_TABLE_LENGTH(st) ||
387260684Skaiw	    *s == '\0' || *(s - 1) != '\0') {
388260684Skaiw		errno = EINVAL;
389260684Skaiw		return (NULL);
390260684Skaiw	}
391260684Skaiw
392260684Skaiw	return (s);
393260684Skaiw}
394