1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27__FBSDID("$FreeBSD$");
28
29#ifdef HAVE_SYS_STAT_H
30#include <sys/stat.h>
31#endif
32#ifdef HAVE_ERRNO_H
33#include <errno.h>
34#endif
35#include <stdio.h>
36#ifdef HAVE_STDLIB_H
37#include <stdlib.h>
38#endif
39#ifdef HAVE_STRING_H
40#include <string.h>
41#endif
42
43#include "archive.h"
44#include "archive_entry.h"
45
46/*
47 * This is mostly a pretty straightforward hash table implementation.
48 * The only interesting bit is the different strategies used to
49 * match up links.  These strategies match those used by various
50 * archiving formats:
51 *   tar - content stored with first link, remainder refer back to it.
52 *       This requires us to match each subsequent link up with the
53 *       first appearance.
54 *   cpio - Old cpio just stored body with each link, match-ups were
55 *       implicit.  This is trivial.
56 *   new cpio - New cpio only stores body with last link, match-ups
57 *       are implicit.  This is actually quite tricky; see the notes
58 *       below.
59 */
60
61/* Users pass us a format code, we translate that into a strategy here. */
62#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR	0
63#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
66
67/* Initial size of link cache. */
68#define	links_cache_initial_size 1024
69
70struct links_entry {
71	struct links_entry	*next;
72	struct links_entry	*previous;
73	struct archive_entry	*canonical;
74	struct archive_entry	*entry;
75	size_t			 hash;
76	unsigned int		 links; /* # links not yet seen */
77};
78
79struct archive_entry_linkresolver {
80	struct links_entry	**buckets;
81	struct links_entry	 *spare;
82	unsigned long		  number_entries;
83	size_t			  number_buckets;
84	int			  strategy;
85};
86
87#define	NEXT_ENTRY_DEFERRED	1
88#define	NEXT_ENTRY_PARTIAL	2
89#define	NEXT_ENTRY_ALL		(NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
90
91static struct links_entry *find_entry(struct archive_entry_linkresolver *,
92		    struct archive_entry *);
93static void grow_hash(struct archive_entry_linkresolver *);
94static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
95		    struct archive_entry *);
96static struct links_entry *next_entry(struct archive_entry_linkresolver *,
97    int);
98
99struct archive_entry_linkresolver *
100archive_entry_linkresolver_new(void)
101{
102	struct archive_entry_linkresolver *res;
103
104	/* Check for positive power-of-two */
105	if (links_cache_initial_size == 0 ||
106	    (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
107		return (NULL);
108
109	res = calloc(1, sizeof(struct archive_entry_linkresolver));
110	if (res == NULL)
111		return (NULL);
112	res->number_buckets = links_cache_initial_size;
113	res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
114	if (res->buckets == NULL) {
115		free(res);
116		return (NULL);
117	}
118	return (res);
119}
120
121void
122archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
123    int fmt)
124{
125	int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
126
127	switch (fmtbase) {
128	case ARCHIVE_FORMAT_7ZIP:
129	case ARCHIVE_FORMAT_AR:
130	case ARCHIVE_FORMAT_ZIP:
131		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
132		break;
133	case ARCHIVE_FORMAT_CPIO:
134		switch (fmt) {
135		case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
136		case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
137			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
138			break;
139		default:
140			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
141			break;
142		}
143		break;
144	case ARCHIVE_FORMAT_MTREE:
145		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
146		break;
147	case ARCHIVE_FORMAT_ISO9660:
148	case ARCHIVE_FORMAT_SHAR:
149	case ARCHIVE_FORMAT_TAR:
150	case ARCHIVE_FORMAT_XAR:
151		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
152		break;
153	default:
154		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
155		break;
156	}
157}
158
159void
160archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
161{
162	struct links_entry *le;
163
164	if (res == NULL)
165		return;
166
167	while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
168		archive_entry_free(le->entry);
169	free(res->buckets);
170	free(res);
171}
172
173void
174archive_entry_linkify(struct archive_entry_linkresolver *res,
175    struct archive_entry **e, struct archive_entry **f)
176{
177	struct links_entry *le;
178	struct archive_entry *t;
179
180	*f = NULL; /* Default: Don't return a second entry. */
181
182	if (*e == NULL) {
183		le = next_entry(res, NEXT_ENTRY_DEFERRED);
184		if (le != NULL) {
185			*e = le->entry;
186			le->entry = NULL;
187		}
188		return;
189	}
190
191	/* If it has only one link, then we're done. */
192	if (archive_entry_nlink(*e) == 1)
193		return;
194	/* Directories, devices never have hardlinks. */
195	if (archive_entry_filetype(*e) == AE_IFDIR
196	    || archive_entry_filetype(*e) == AE_IFBLK
197	    || archive_entry_filetype(*e) == AE_IFCHR)
198		return;
199
200	switch (res->strategy) {
201	case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
202		le = find_entry(res, *e);
203		if (le != NULL) {
204			archive_entry_unset_size(*e);
205			archive_entry_copy_hardlink(*e,
206			    archive_entry_pathname(le->canonical));
207		} else
208			insert_entry(res, *e);
209		return;
210	case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
211		le = find_entry(res, *e);
212		if (le != NULL) {
213			archive_entry_copy_hardlink(*e,
214			    archive_entry_pathname(le->canonical));
215		} else
216			insert_entry(res, *e);
217		return;
218	case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
219		/* This one is trivial. */
220		return;
221	case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
222		le = find_entry(res, *e);
223		if (le != NULL) {
224			/*
225			 * Put the new entry in le, return the
226			 * old entry from le.
227			 */
228			t = *e;
229			*e = le->entry;
230			le->entry = t;
231			/* Make the old entry into a hardlink. */
232			archive_entry_unset_size(*e);
233			archive_entry_copy_hardlink(*e,
234			    archive_entry_pathname(le->canonical));
235			/* If we ran out of links, return the
236			 * final entry as well. */
237			if (le->links == 0) {
238				*f = le->entry;
239				le->entry = NULL;
240			}
241		} else {
242			/*
243			 * If we haven't seen it, tuck it away
244			 * for future use.
245			 */
246			le = insert_entry(res, *e);
247			if (le == NULL)
248				/* XXX We should return an error code XXX */
249				return;
250			le->entry = *e;
251			*e = NULL;
252		}
253		return;
254	default:
255		break;
256	}
257	return;
258}
259
260static struct links_entry *
261find_entry(struct archive_entry_linkresolver *res,
262    struct archive_entry *entry)
263{
264	struct links_entry	*le;
265	size_t			 hash, bucket;
266	dev_t			 dev;
267	int64_t			 ino;
268
269	/* Free a held entry. */
270	if (res->spare != NULL) {
271		archive_entry_free(res->spare->canonical);
272		archive_entry_free(res->spare->entry);
273		free(res->spare);
274		res->spare = NULL;
275	}
276
277	dev = archive_entry_dev(entry);
278	ino = archive_entry_ino64(entry);
279	hash = (size_t)(dev ^ ino);
280
281	/* Try to locate this entry in the links cache. */
282	bucket = hash & (res->number_buckets - 1);
283	for (le = res->buckets[bucket]; le != NULL; le = le->next) {
284		if (le->hash == hash
285		    && dev == archive_entry_dev(le->canonical)
286		    && ino == archive_entry_ino64(le->canonical)) {
287			/*
288			 * Decrement link count each time and release
289			 * the entry if it hits zero.  This saves
290			 * memory and is necessary for detecting
291			 * missed links.
292			 */
293			--le->links;
294			if (le->links > 0)
295				return (le);
296			/* Remove it from this hash bucket. */
297			if (le->previous != NULL)
298				le->previous->next = le->next;
299			if (le->next != NULL)
300				le->next->previous = le->previous;
301			if (res->buckets[bucket] == le)
302				res->buckets[bucket] = le->next;
303			res->number_entries--;
304			/* Defer freeing this entry. */
305			res->spare = le;
306			return (le);
307		}
308	}
309	return (NULL);
310}
311
312static struct links_entry *
313next_entry(struct archive_entry_linkresolver *res, int mode)
314{
315	struct links_entry	*le;
316	size_t			 bucket;
317
318	/* Free a held entry. */
319	if (res->spare != NULL) {
320		archive_entry_free(res->spare->canonical);
321		archive_entry_free(res->spare->entry);
322		free(res->spare);
323		res->spare = NULL;
324	}
325
326	/* Look for next non-empty bucket in the links cache. */
327	for (bucket = 0; bucket < res->number_buckets; bucket++) {
328		for (le = res->buckets[bucket]; le != NULL; le = le->next) {
329			if (le->entry != NULL &&
330			    (mode & NEXT_ENTRY_DEFERRED) == 0)
331				continue;
332			if (le->entry == NULL &&
333			    (mode & NEXT_ENTRY_PARTIAL) == 0)
334				continue;
335			/* Remove it from this hash bucket. */
336			if (le->next != NULL)
337				le->next->previous = le->previous;
338			if (le->previous != NULL)
339				le->previous->next = le->next;
340			else
341				res->buckets[bucket] = le->next;
342			res->number_entries--;
343			/* Defer freeing this entry. */
344			res->spare = le;
345			return (le);
346		}
347	}
348	return (NULL);
349}
350
351static struct links_entry *
352insert_entry(struct archive_entry_linkresolver *res,
353    struct archive_entry *entry)
354{
355	struct links_entry *le;
356	size_t hash, bucket;
357
358	/* Add this entry to the links cache. */
359	le = calloc(1, sizeof(struct links_entry));
360	if (le == NULL)
361		return (NULL);
362	le->canonical = archive_entry_clone(entry);
363
364	/* If the links cache is getting too full, enlarge the hash table. */
365	if (res->number_entries > res->number_buckets * 2)
366		grow_hash(res);
367
368	hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));
369	bucket = hash & (res->number_buckets - 1);
370
371	/* If we could allocate the entry, record it. */
372	if (res->buckets[bucket] != NULL)
373		res->buckets[bucket]->previous = le;
374	res->number_entries++;
375	le->next = res->buckets[bucket];
376	le->previous = NULL;
377	res->buckets[bucket] = le;
378	le->hash = hash;
379	le->links = archive_entry_nlink(entry) - 1;
380	return (le);
381}
382
383static void
384grow_hash(struct archive_entry_linkresolver *res)
385{
386	struct links_entry *le, **new_buckets;
387	size_t new_size;
388	size_t i, bucket;
389
390	/* Try to enlarge the bucket list. */
391	new_size = res->number_buckets * 2;
392	if (new_size < res->number_buckets)
393		return;
394	new_buckets = calloc(new_size, sizeof(struct links_entry *));
395
396	if (new_buckets == NULL)
397		return;
398
399	for (i = 0; i < res->number_buckets; i++) {
400		while (res->buckets[i] != NULL) {
401			/* Remove entry from old bucket. */
402			le = res->buckets[i];
403			res->buckets[i] = le->next;
404
405			/* Add entry to new bucket. */
406			bucket = le->hash & (new_size - 1);
407
408			if (new_buckets[bucket] != NULL)
409				new_buckets[bucket]->previous = le;
410			le->next = new_buckets[bucket];
411			le->previous = NULL;
412			new_buckets[bucket] = le;
413		}
414	}
415	free(res->buckets);
416	res->buckets = new_buckets;
417	res->number_buckets = new_size;
418}
419
420struct archive_entry *
421archive_entry_partial_links(struct archive_entry_linkresolver *res,
422    unsigned int *links)
423{
424	struct archive_entry	*e;
425	struct links_entry	*le;
426
427	/* Free a held entry. */
428	if (res->spare != NULL) {
429		archive_entry_free(res->spare->canonical);
430		archive_entry_free(res->spare->entry);
431		free(res->spare);
432		res->spare = NULL;
433	}
434
435	le = next_entry(res, NEXT_ENTRY_PARTIAL);
436	if (le != NULL) {
437		e = le->canonical;
438		if (links != NULL)
439			*links = le->links;
440		le->canonical = NULL;
441	} else {
442		e = NULL;
443		if (links != NULL)
444			*links = 0;
445	}
446	return (e);
447}
448