1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27228763Smm__FBSDID("$FreeBSD$");
28228753Smm
29228753Smm#ifdef HAVE_SYS_STAT_H
30228753Smm#include <sys/stat.h>
31228753Smm#endif
32228753Smm#ifdef HAVE_ERRNO_H
33228753Smm#include <errno.h>
34228753Smm#endif
35228753Smm#include <stdio.h>
36228753Smm#ifdef HAVE_STDLIB_H
37228753Smm#include <stdlib.h>
38228753Smm#endif
39228753Smm#ifdef HAVE_STRING_H
40228753Smm#include <string.h>
41228753Smm#endif
42228753Smm
43228753Smm#include "archive.h"
44228753Smm#include "archive_entry.h"
45228753Smm
46228753Smm/*
47228753Smm * This is mostly a pretty straightforward hash table implementation.
48228753Smm * The only interesting bit is the different strategies used to
49228753Smm * match up links.  These strategies match those used by various
50228753Smm * archiving formats:
51228753Smm *   tar - content stored with first link, remainder refer back to it.
52228753Smm *       This requires us to match each subsequent link up with the
53228753Smm *       first appearance.
54228753Smm *   cpio - Old cpio just stored body with each link, match-ups were
55228753Smm *       implicit.  This is trivial.
56228753Smm *   new cpio - New cpio only stores body with last link, match-ups
57228753Smm *       are implicit.  This is actually quite tricky; see the notes
58228753Smm *       below.
59228753Smm */
60228753Smm
61228753Smm/* Users pass us a format code, we translate that into a strategy here. */
62228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR	0
63228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
66228753Smm
67228753Smm/* Initial size of link cache. */
68228753Smm#define	links_cache_initial_size 1024
69228753Smm
70228753Smmstruct links_entry {
71228753Smm	struct links_entry	*next;
72228753Smm	struct links_entry	*previous;
73228753Smm	struct archive_entry	*canonical;
74228753Smm	struct archive_entry	*entry;
75232153Smm	size_t			 hash;
76232153Smm	unsigned int		 links; /* # links not yet seen */
77228753Smm};
78228753Smm
79228753Smmstruct archive_entry_linkresolver {
80228753Smm	struct links_entry	**buckets;
81228753Smm	struct links_entry	 *spare;
82228753Smm	unsigned long		  number_entries;
83228753Smm	size_t			  number_buckets;
84228753Smm	int			  strategy;
85228753Smm};
86228753Smm
87232153Smm#define	NEXT_ENTRY_DEFERRED	1
88232153Smm#define	NEXT_ENTRY_PARTIAL	2
89232153Smm#define	NEXT_ENTRY_ALL		(NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
90232153Smm
91228753Smmstatic struct links_entry *find_entry(struct archive_entry_linkresolver *,
92228753Smm		    struct archive_entry *);
93228753Smmstatic void grow_hash(struct archive_entry_linkresolver *);
94228753Smmstatic struct links_entry *insert_entry(struct archive_entry_linkresolver *,
95228753Smm		    struct archive_entry *);
96232153Smmstatic struct links_entry *next_entry(struct archive_entry_linkresolver *,
97232153Smm    int);
98228753Smm
99228753Smmstruct archive_entry_linkresolver *
100228753Smmarchive_entry_linkresolver_new(void)
101228753Smm{
102228753Smm	struct archive_entry_linkresolver *res;
103228753Smm
104232153Smm	/* Check for positive power-of-two */
105232153Smm	if (links_cache_initial_size == 0 ||
106232153Smm	    (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
107232153Smm		return (NULL);
108232153Smm
109232153Smm	res = calloc(1, sizeof(struct archive_entry_linkresolver));
110228753Smm	if (res == NULL)
111228753Smm		return (NULL);
112228753Smm	res->number_buckets = links_cache_initial_size;
113232153Smm	res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
114228753Smm	if (res->buckets == NULL) {
115228753Smm		free(res);
116228753Smm		return (NULL);
117228753Smm	}
118228753Smm	return (res);
119228753Smm}
120228753Smm
121228753Smmvoid
122228753Smmarchive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
123228753Smm    int fmt)
124228753Smm{
125228753Smm	int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
126228753Smm
127228753Smm	switch (fmtbase) {
128232153Smm	case ARCHIVE_FORMAT_7ZIP:
129232153Smm	case ARCHIVE_FORMAT_AR:
130232153Smm	case ARCHIVE_FORMAT_ZIP:
131232153Smm		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
132232153Smm		break;
133228753Smm	case ARCHIVE_FORMAT_CPIO:
134228753Smm		switch (fmt) {
135228753Smm		case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
136228753Smm		case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
137228753Smm			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
138228753Smm			break;
139228753Smm		default:
140228753Smm			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
141228753Smm			break;
142228753Smm		}
143228753Smm		break;
144228753Smm	case ARCHIVE_FORMAT_MTREE:
145228753Smm		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
146228753Smm		break;
147232153Smm	case ARCHIVE_FORMAT_ISO9660:
148232153Smm	case ARCHIVE_FORMAT_SHAR:
149228753Smm	case ARCHIVE_FORMAT_TAR:
150232153Smm	case ARCHIVE_FORMAT_XAR:
151228753Smm		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
152228753Smm		break;
153228753Smm	default:
154232153Smm		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
155228753Smm		break;
156228753Smm	}
157228753Smm}
158228753Smm
159228753Smmvoid
160228753Smmarchive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
161228753Smm{
162228753Smm	struct links_entry *le;
163228753Smm
164228753Smm	if (res == NULL)
165228753Smm		return;
166228753Smm
167232153Smm	while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
168232153Smm		archive_entry_free(le->entry);
169232153Smm	free(res->buckets);
170228753Smm	free(res);
171228753Smm}
172228753Smm
173228753Smmvoid
174228753Smmarchive_entry_linkify(struct archive_entry_linkresolver *res,
175228753Smm    struct archive_entry **e, struct archive_entry **f)
176228753Smm{
177228753Smm	struct links_entry *le;
178228753Smm	struct archive_entry *t;
179228753Smm
180228753Smm	*f = NULL; /* Default: Don't return a second entry. */
181228753Smm
182228753Smm	if (*e == NULL) {
183232153Smm		le = next_entry(res, NEXT_ENTRY_DEFERRED);
184228753Smm		if (le != NULL) {
185228753Smm			*e = le->entry;
186228753Smm			le->entry = NULL;
187228753Smm		}
188228753Smm		return;
189228753Smm	}
190228753Smm
191228753Smm	/* If it has only one link, then we're done. */
192228753Smm	if (archive_entry_nlink(*e) == 1)
193228753Smm		return;
194228753Smm	/* Directories, devices never have hardlinks. */
195228753Smm	if (archive_entry_filetype(*e) == AE_IFDIR
196228753Smm	    || archive_entry_filetype(*e) == AE_IFBLK
197228753Smm	    || archive_entry_filetype(*e) == AE_IFCHR)
198228753Smm		return;
199228753Smm
200228753Smm	switch (res->strategy) {
201228753Smm	case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
202228753Smm		le = find_entry(res, *e);
203228753Smm		if (le != NULL) {
204228753Smm			archive_entry_unset_size(*e);
205228753Smm			archive_entry_copy_hardlink(*e,
206228753Smm			    archive_entry_pathname(le->canonical));
207228753Smm		} else
208228753Smm			insert_entry(res, *e);
209228753Smm		return;
210228753Smm	case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
211228753Smm		le = find_entry(res, *e);
212228753Smm		if (le != NULL) {
213228753Smm			archive_entry_copy_hardlink(*e,
214228753Smm			    archive_entry_pathname(le->canonical));
215228753Smm		} else
216228753Smm			insert_entry(res, *e);
217228753Smm		return;
218228753Smm	case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
219228753Smm		/* This one is trivial. */
220228753Smm		return;
221228753Smm	case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
222228753Smm		le = find_entry(res, *e);
223228753Smm		if (le != NULL) {
224228753Smm			/*
225228753Smm			 * Put the new entry in le, return the
226228753Smm			 * old entry from le.
227228753Smm			 */
228228753Smm			t = *e;
229228753Smm			*e = le->entry;
230228753Smm			le->entry = t;
231228753Smm			/* Make the old entry into a hardlink. */
232228753Smm			archive_entry_unset_size(*e);
233228753Smm			archive_entry_copy_hardlink(*e,
234228753Smm			    archive_entry_pathname(le->canonical));
235228753Smm			/* If we ran out of links, return the
236228753Smm			 * final entry as well. */
237228753Smm			if (le->links == 0) {
238228753Smm				*f = le->entry;
239228753Smm				le->entry = NULL;
240228753Smm			}
241228753Smm		} else {
242228753Smm			/*
243228753Smm			 * If we haven't seen it, tuck it away
244228753Smm			 * for future use.
245228753Smm			 */
246228753Smm			le = insert_entry(res, *e);
247248616Smm			if (le == NULL)
248248616Smm				/* XXX We should return an error code XXX */
249248616Smm				return;
250228753Smm			le->entry = *e;
251228753Smm			*e = NULL;
252228753Smm		}
253228753Smm		return;
254228753Smm	default:
255228753Smm		break;
256228753Smm	}
257228753Smm	return;
258228753Smm}
259228753Smm
260228753Smmstatic struct links_entry *
261228753Smmfind_entry(struct archive_entry_linkresolver *res,
262228753Smm    struct archive_entry *entry)
263228753Smm{
264228753Smm	struct links_entry	*le;
265232153Smm	size_t			 hash, bucket;
266228753Smm	dev_t			 dev;
267228753Smm	int64_t			 ino;
268228753Smm
269228753Smm	/* Free a held entry. */
270228753Smm	if (res->spare != NULL) {
271228753Smm		archive_entry_free(res->spare->canonical);
272228753Smm		archive_entry_free(res->spare->entry);
273228753Smm		free(res->spare);
274228753Smm		res->spare = NULL;
275228753Smm	}
276228753Smm
277228753Smm	dev = archive_entry_dev(entry);
278228753Smm	ino = archive_entry_ino64(entry);
279232153Smm	hash = (size_t)(dev ^ ino);
280228753Smm
281228753Smm	/* Try to locate this entry in the links cache. */
282232153Smm	bucket = hash & (res->number_buckets - 1);
283228753Smm	for (le = res->buckets[bucket]; le != NULL; le = le->next) {
284228753Smm		if (le->hash == hash
285228753Smm		    && dev == archive_entry_dev(le->canonical)
286228753Smm		    && ino == archive_entry_ino64(le->canonical)) {
287228753Smm			/*
288228753Smm			 * Decrement link count each time and release
289228753Smm			 * the entry if it hits zero.  This saves
290228753Smm			 * memory and is necessary for detecting
291228753Smm			 * missed links.
292228753Smm			 */
293228753Smm			--le->links;
294228753Smm			if (le->links > 0)
295228753Smm				return (le);
296228753Smm			/* Remove it from this hash bucket. */
297228753Smm			if (le->previous != NULL)
298228753Smm				le->previous->next = le->next;
299228753Smm			if (le->next != NULL)
300228753Smm				le->next->previous = le->previous;
301228753Smm			if (res->buckets[bucket] == le)
302228753Smm				res->buckets[bucket] = le->next;
303228753Smm			res->number_entries--;
304228753Smm			/* Defer freeing this entry. */
305228753Smm			res->spare = le;
306228753Smm			return (le);
307228753Smm		}
308228753Smm	}
309228753Smm	return (NULL);
310228753Smm}
311228753Smm
312228753Smmstatic struct links_entry *
313232153Smmnext_entry(struct archive_entry_linkresolver *res, int mode)
314228753Smm{
315228753Smm	struct links_entry	*le;
316228753Smm	size_t			 bucket;
317228753Smm
318228753Smm	/* Free a held entry. */
319228753Smm	if (res->spare != NULL) {
320228753Smm		archive_entry_free(res->spare->canonical);
321232153Smm		archive_entry_free(res->spare->entry);
322228753Smm		free(res->spare);
323228753Smm		res->spare = NULL;
324228753Smm	}
325228753Smm
326228753Smm	/* Look for next non-empty bucket in the links cache. */
327228753Smm	for (bucket = 0; bucket < res->number_buckets; bucket++) {
328232153Smm		for (le = res->buckets[bucket]; le != NULL; le = le->next) {
329232153Smm			if (le->entry != NULL &&
330232153Smm			    (mode & NEXT_ENTRY_DEFERRED) == 0)
331232153Smm				continue;
332232153Smm			if (le->entry == NULL &&
333232153Smm			    (mode & NEXT_ENTRY_PARTIAL) == 0)
334232153Smm				continue;
335228753Smm			/* Remove it from this hash bucket. */
336228753Smm			if (le->next != NULL)
337228753Smm				le->next->previous = le->previous;
338232153Smm			if (le->previous != NULL)
339232153Smm				le->previous->next = le->next;
340232153Smm			else
341232153Smm				res->buckets[bucket] = le->next;
342228753Smm			res->number_entries--;
343228753Smm			/* Defer freeing this entry. */
344228753Smm			res->spare = le;
345228753Smm			return (le);
346228753Smm		}
347228753Smm	}
348228753Smm	return (NULL);
349228753Smm}
350228753Smm
351228753Smmstatic struct links_entry *
352228753Smminsert_entry(struct archive_entry_linkresolver *res,
353228753Smm    struct archive_entry *entry)
354228753Smm{
355228753Smm	struct links_entry *le;
356232153Smm	size_t hash, bucket;
357228753Smm
358228753Smm	/* Add this entry to the links cache. */
359232153Smm	le = calloc(1, sizeof(struct links_entry));
360228753Smm	if (le == NULL)
361228753Smm		return (NULL);
362228753Smm	le->canonical = archive_entry_clone(entry);
363228753Smm
364228753Smm	/* If the links cache is getting too full, enlarge the hash table. */
365228753Smm	if (res->number_entries > res->number_buckets * 2)
366228753Smm		grow_hash(res);
367228753Smm
368238856Smm	hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));
369232153Smm	bucket = hash & (res->number_buckets - 1);
370228753Smm
371228753Smm	/* If we could allocate the entry, record it. */
372228753Smm	if (res->buckets[bucket] != NULL)
373228753Smm		res->buckets[bucket]->previous = le;
374228753Smm	res->number_entries++;
375228753Smm	le->next = res->buckets[bucket];
376228753Smm	le->previous = NULL;
377228753Smm	res->buckets[bucket] = le;
378228753Smm	le->hash = hash;
379228753Smm	le->links = archive_entry_nlink(entry) - 1;
380228753Smm	return (le);
381228753Smm}
382228753Smm
383228753Smmstatic void
384228753Smmgrow_hash(struct archive_entry_linkresolver *res)
385228753Smm{
386228753Smm	struct links_entry *le, **new_buckets;
387228753Smm	size_t new_size;
388228753Smm	size_t i, bucket;
389228753Smm
390228753Smm	/* Try to enlarge the bucket list. */
391228753Smm	new_size = res->number_buckets * 2;
392232153Smm	if (new_size < res->number_buckets)
393232153Smm		return;
394232153Smm	new_buckets = calloc(new_size, sizeof(struct links_entry *));
395228753Smm
396232153Smm	if (new_buckets == NULL)
397232153Smm		return;
398228753Smm
399232153Smm	for (i = 0; i < res->number_buckets; i++) {
400232153Smm		while (res->buckets[i] != NULL) {
401232153Smm			/* Remove entry from old bucket. */
402232153Smm			le = res->buckets[i];
403232153Smm			res->buckets[i] = le->next;
404228753Smm
405232153Smm			/* Add entry to new bucket. */
406232153Smm			bucket = le->hash & (new_size - 1);
407232153Smm
408232153Smm			if (new_buckets[bucket] != NULL)
409232153Smm				new_buckets[bucket]->previous = le;
410232153Smm			le->next = new_buckets[bucket];
411232153Smm			le->previous = NULL;
412232153Smm			new_buckets[bucket] = le;
413228753Smm		}
414228753Smm	}
415232153Smm	free(res->buckets);
416232153Smm	res->buckets = new_buckets;
417232153Smm	res->number_buckets = new_size;
418228753Smm}
419232153Smm
420232153Smmstruct archive_entry *
421232153Smmarchive_entry_partial_links(struct archive_entry_linkresolver *res,
422232153Smm    unsigned int *links)
423232153Smm{
424232153Smm	struct archive_entry	*e;
425232153Smm	struct links_entry	*le;
426232153Smm
427232153Smm	/* Free a held entry. */
428232153Smm	if (res->spare != NULL) {
429232153Smm		archive_entry_free(res->spare->canonical);
430232153Smm		archive_entry_free(res->spare->entry);
431232153Smm		free(res->spare);
432232153Smm		res->spare = NULL;
433232153Smm	}
434232153Smm
435232153Smm	le = next_entry(res, NEXT_ENTRY_PARTIAL);
436232153Smm	if (le != NULL) {
437232153Smm		e = le->canonical;
438232153Smm		if (links != NULL)
439232153Smm			*links = le->links;
440232153Smm		le->canonical = NULL;
441232153Smm	} else {
442232153Smm		e = NULL;
443232153Smm		if (links != NULL)
444232153Smm			*links = 0;
445232153Smm	}
446232153Smm	return (e);
447232153Smm}
448