archive_entry_link_resolver.c revision 267654
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27__FBSDID("$FreeBSD: releng/9.3/contrib/libarchive/libarchive/archive_entry_link_resolver.c 229592 2012-01-05 12:06:54Z mm $");
28
29#ifdef HAVE_SYS_STAT_H
30#include <sys/stat.h>
31#endif
32#ifdef HAVE_ERRNO_H
33#include <errno.h>
34#endif
35#include <stdio.h>
36#ifdef HAVE_STDLIB_H
37#include <stdlib.h>
38#endif
39#ifdef HAVE_STRING_H
40#include <string.h>
41#endif
42
43#include "archive.h"
44#include "archive_entry.h"
45
46/*
47 * This is mostly a pretty straightforward hash table implementation.
48 * The only interesting bit is the different strategies used to
49 * match up links.  These strategies match those used by various
50 * archiving formats:
51 *   tar - content stored with first link, remainder refer back to it.
52 *       This requires us to match each subsequent link up with the
53 *       first appearance.
54 *   cpio - Old cpio just stored body with each link, match-ups were
55 *       implicit.  This is trivial.
56 *   new cpio - New cpio only stores body with last link, match-ups
57 *       are implicit.  This is actually quite tricky; see the notes
58 *       below.
59 */
60
61/* Users pass us a format code, we translate that into a strategy here. */
62#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR	0
63#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
66
67/* Initial size of link cache. */
68#define	links_cache_initial_size 1024
69
70struct links_entry {
71	struct links_entry	*next;
72	struct links_entry	*previous;
73	int			 links; /* # links not yet seen */
74	int			 hash;
75	struct archive_entry	*canonical;
76	struct archive_entry	*entry;
77};
78
79struct archive_entry_linkresolver {
80	struct links_entry	**buckets;
81	struct links_entry	 *spare;
82	unsigned long		  number_entries;
83	size_t			  number_buckets;
84	int			  strategy;
85};
86
87static struct links_entry *find_entry(struct archive_entry_linkresolver *,
88		    struct archive_entry *);
89static void grow_hash(struct archive_entry_linkresolver *);
90static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
91		    struct archive_entry *);
92static struct links_entry *next_entry(struct archive_entry_linkresolver *);
93
94struct archive_entry_linkresolver *
95archive_entry_linkresolver_new(void)
96{
97	struct archive_entry_linkresolver *res;
98	size_t i;
99
100	res = malloc(sizeof(struct archive_entry_linkresolver));
101	if (res == NULL)
102		return (NULL);
103	memset(res, 0, sizeof(struct archive_entry_linkresolver));
104	res->number_buckets = links_cache_initial_size;
105	res->buckets = malloc(res->number_buckets *
106	    sizeof(res->buckets[0]));
107	if (res->buckets == NULL) {
108		free(res);
109		return (NULL);
110	}
111	for (i = 0; i < res->number_buckets; i++)
112		res->buckets[i] = NULL;
113	return (res);
114}
115
116void
117archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
118    int fmt)
119{
120	int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
121
122	switch (fmtbase) {
123	case ARCHIVE_FORMAT_CPIO:
124		switch (fmt) {
125		case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
126		case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
127			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
128			break;
129		default:
130			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131			break;
132		}
133		break;
134	case ARCHIVE_FORMAT_MTREE:
135		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
136		break;
137	case ARCHIVE_FORMAT_TAR:
138		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
139		break;
140	default:
141		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
142		break;
143	}
144}
145
146void
147archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
148{
149	struct links_entry *le;
150
151	if (res == NULL)
152		return;
153
154	if (res->buckets != NULL) {
155		while ((le = next_entry(res)) != NULL)
156			archive_entry_free(le->entry);
157		free(res->buckets);
158		res->buckets = NULL;
159	}
160	free(res);
161}
162
163void
164archive_entry_linkify(struct archive_entry_linkresolver *res,
165    struct archive_entry **e, struct archive_entry **f)
166{
167	struct links_entry *le;
168	struct archive_entry *t;
169
170	*f = NULL; /* Default: Don't return a second entry. */
171
172	if (*e == NULL) {
173		le = next_entry(res);
174		if (le != NULL) {
175			*e = le->entry;
176			le->entry = NULL;
177		}
178		return;
179	}
180
181	/* If it has only one link, then we're done. */
182	if (archive_entry_nlink(*e) == 1)
183		return;
184	/* Directories, devices never have hardlinks. */
185	if (archive_entry_filetype(*e) == AE_IFDIR
186	    || archive_entry_filetype(*e) == AE_IFBLK
187	    || archive_entry_filetype(*e) == AE_IFCHR)
188		return;
189
190	switch (res->strategy) {
191	case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
192		le = find_entry(res, *e);
193		if (le != NULL) {
194			archive_entry_unset_size(*e);
195			archive_entry_copy_hardlink(*e,
196			    archive_entry_pathname(le->canonical));
197		} else
198			insert_entry(res, *e);
199		return;
200	case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
201		le = find_entry(res, *e);
202		if (le != NULL) {
203			archive_entry_copy_hardlink(*e,
204			    archive_entry_pathname(le->canonical));
205		} else
206			insert_entry(res, *e);
207		return;
208	case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
209		/* This one is trivial. */
210		return;
211	case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
212		le = find_entry(res, *e);
213		if (le != NULL) {
214			/*
215			 * Put the new entry in le, return the
216			 * old entry from le.
217			 */
218			t = *e;
219			*e = le->entry;
220			le->entry = t;
221			/* Make the old entry into a hardlink. */
222			archive_entry_unset_size(*e);
223			archive_entry_copy_hardlink(*e,
224			    archive_entry_pathname(le->canonical));
225			/* If we ran out of links, return the
226			 * final entry as well. */
227			if (le->links == 0) {
228				*f = le->entry;
229				le->entry = NULL;
230			}
231		} else {
232			/*
233			 * If we haven't seen it, tuck it away
234			 * for future use.
235			 */
236			le = insert_entry(res, *e);
237			le->entry = *e;
238			*e = NULL;
239		}
240		return;
241	default:
242		break;
243	}
244	return;
245}
246
247static struct links_entry *
248find_entry(struct archive_entry_linkresolver *res,
249    struct archive_entry *entry)
250{
251	struct links_entry	*le;
252	int			 hash, bucket;
253	dev_t			 dev;
254	int64_t			 ino;
255
256	/* Free a held entry. */
257	if (res->spare != NULL) {
258		archive_entry_free(res->spare->canonical);
259		archive_entry_free(res->spare->entry);
260		free(res->spare);
261		res->spare = NULL;
262	}
263
264	/* If the links cache overflowed and got flushed, don't bother. */
265	if (res->buckets == NULL)
266		return (NULL);
267
268	dev = archive_entry_dev(entry);
269	ino = archive_entry_ino64(entry);
270	hash = (int)(dev ^ ino);
271
272	/* Try to locate this entry in the links cache. */
273	bucket = hash % res->number_buckets;
274	for (le = res->buckets[bucket]; le != NULL; le = le->next) {
275		if (le->hash == hash
276		    && dev == archive_entry_dev(le->canonical)
277		    && ino == archive_entry_ino64(le->canonical)) {
278			/*
279			 * Decrement link count each time and release
280			 * the entry if it hits zero.  This saves
281			 * memory and is necessary for detecting
282			 * missed links.
283			 */
284			--le->links;
285			if (le->links > 0)
286				return (le);
287			/* Remove it from this hash bucket. */
288			if (le->previous != NULL)
289				le->previous->next = le->next;
290			if (le->next != NULL)
291				le->next->previous = le->previous;
292			if (res->buckets[bucket] == le)
293				res->buckets[bucket] = le->next;
294			res->number_entries--;
295			/* Defer freeing this entry. */
296			res->spare = le;
297			return (le);
298		}
299	}
300	return (NULL);
301}
302
303static struct links_entry *
304next_entry(struct archive_entry_linkresolver *res)
305{
306	struct links_entry	*le;
307	size_t			 bucket;
308
309	/* Free a held entry. */
310	if (res->spare != NULL) {
311		archive_entry_free(res->spare->canonical);
312		free(res->spare);
313		res->spare = NULL;
314	}
315
316	/* If the links cache overflowed and got flushed, don't bother. */
317	if (res->buckets == NULL)
318		return (NULL);
319
320	/* Look for next non-empty bucket in the links cache. */
321	for (bucket = 0; bucket < res->number_buckets; bucket++) {
322		le = res->buckets[bucket];
323		if (le != NULL) {
324			/* Remove it from this hash bucket. */
325			if (le->next != NULL)
326				le->next->previous = le->previous;
327			res->buckets[bucket] = le->next;
328			res->number_entries--;
329			/* Defer freeing this entry. */
330			res->spare = le;
331			return (le);
332		}
333	}
334	return (NULL);
335}
336
337static struct links_entry *
338insert_entry(struct archive_entry_linkresolver *res,
339    struct archive_entry *entry)
340{
341	struct links_entry *le;
342	int			 hash, bucket;
343
344	/* Add this entry to the links cache. */
345	le = malloc(sizeof(struct links_entry));
346	if (le == NULL)
347		return (NULL);
348	memset(le, 0, sizeof(*le));
349	le->canonical = archive_entry_clone(entry);
350
351	/* If the links cache is getting too full, enlarge the hash table. */
352	if (res->number_entries > res->number_buckets * 2)
353		grow_hash(res);
354
355	hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry);
356	bucket = hash % res->number_buckets;
357
358	/* If we could allocate the entry, record it. */
359	if (res->buckets[bucket] != NULL)
360		res->buckets[bucket]->previous = le;
361	res->number_entries++;
362	le->next = res->buckets[bucket];
363	le->previous = NULL;
364	res->buckets[bucket] = le;
365	le->hash = hash;
366	le->links = archive_entry_nlink(entry) - 1;
367	return (le);
368}
369
370static void
371grow_hash(struct archive_entry_linkresolver *res)
372{
373	struct links_entry *le, **new_buckets;
374	size_t new_size;
375	size_t i, bucket;
376
377	/* Try to enlarge the bucket list. */
378	new_size = res->number_buckets * 2;
379	new_buckets = malloc(new_size * sizeof(struct links_entry *));
380
381	if (new_buckets != NULL) {
382		memset(new_buckets, 0,
383		    new_size * sizeof(struct links_entry *));
384		for (i = 0; i < res->number_buckets; i++) {
385			while (res->buckets[i] != NULL) {
386				/* Remove entry from old bucket. */
387				le = res->buckets[i];
388				res->buckets[i] = le->next;
389
390				/* Add entry to new bucket. */
391				bucket = le->hash % new_size;
392
393				if (new_buckets[bucket] != NULL)
394					new_buckets[bucket]->previous =
395					    le;
396				le->next = new_buckets[bucket];
397				le->previous = NULL;
398				new_buckets[bucket] = le;
399			}
400		}
401		free(res->buckets);
402		res->buckets = new_buckets;
403		res->number_buckets = new_size;
404	}
405}
406