1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27229592Smm__FBSDID("$FreeBSD$");
28228753Smm
29228753Smm#ifdef HAVE_SYS_STAT_H
30228753Smm#include <sys/stat.h>
31228753Smm#endif
32228753Smm#ifdef HAVE_ERRNO_H
33228753Smm#include <errno.h>
34228753Smm#endif
35228753Smm#include <stdio.h>
36228753Smm#ifdef HAVE_STDLIB_H
37228753Smm#include <stdlib.h>
38228753Smm#endif
39228753Smm#ifdef HAVE_STRING_H
40228753Smm#include <string.h>
41228753Smm#endif
42228753Smm
43228753Smm#include "archive.h"
44228753Smm#include "archive_entry.h"
45228753Smm
46228753Smm/*
47228753Smm * This is mostly a pretty straightforward hash table implementation.
48228753Smm * The only interesting bit is the different strategies used to
49228753Smm * match up links.  These strategies match those used by various
50228753Smm * archiving formats:
51228753Smm *   tar - content stored with first link, remainder refer back to it.
52228753Smm *       This requires us to match each subsequent link up with the
53228753Smm *       first appearance.
54228753Smm *   cpio - Old cpio just stored body with each link, match-ups were
55228753Smm *       implicit.  This is trivial.
56228753Smm *   new cpio - New cpio only stores body with last link, match-ups
57228753Smm *       are implicit.  This is actually quite tricky; see the notes
58228753Smm *       below.
59228753Smm */
60228753Smm
61228753Smm/* Users pass us a format code, we translate that into a strategy here. */
62228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR	0
63228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
66228753Smm
67228753Smm/* Initial size of link cache. */
68228753Smm#define	links_cache_initial_size 1024
69228753Smm
70228753Smmstruct links_entry {
71228753Smm	struct links_entry	*next;
72228753Smm	struct links_entry	*previous;
73228753Smm	int			 links; /* # links not yet seen */
74228753Smm	int			 hash;
75228753Smm	struct archive_entry	*canonical;
76228753Smm	struct archive_entry	*entry;
77228753Smm};
78228753Smm
79228753Smmstruct archive_entry_linkresolver {
80228753Smm	struct links_entry	**buckets;
81228753Smm	struct links_entry	 *spare;
82228753Smm	unsigned long		  number_entries;
83228753Smm	size_t			  number_buckets;
84228753Smm	int			  strategy;
85228753Smm};
86228753Smm
87228753Smmstatic struct links_entry *find_entry(struct archive_entry_linkresolver *,
88228753Smm		    struct archive_entry *);
89228753Smmstatic void grow_hash(struct archive_entry_linkresolver *);
90228753Smmstatic struct links_entry *insert_entry(struct archive_entry_linkresolver *,
91228753Smm		    struct archive_entry *);
92228753Smmstatic struct links_entry *next_entry(struct archive_entry_linkresolver *);
93228753Smm
94228753Smmstruct archive_entry_linkresolver *
95228753Smmarchive_entry_linkresolver_new(void)
96228753Smm{
97228753Smm	struct archive_entry_linkresolver *res;
98228753Smm	size_t i;
99228753Smm
100228753Smm	res = malloc(sizeof(struct archive_entry_linkresolver));
101228753Smm	if (res == NULL)
102228753Smm		return (NULL);
103228753Smm	memset(res, 0, sizeof(struct archive_entry_linkresolver));
104228753Smm	res->number_buckets = links_cache_initial_size;
105228753Smm	res->buckets = malloc(res->number_buckets *
106228753Smm	    sizeof(res->buckets[0]));
107228753Smm	if (res->buckets == NULL) {
108228753Smm		free(res);
109228753Smm		return (NULL);
110228753Smm	}
111228753Smm	for (i = 0; i < res->number_buckets; i++)
112228753Smm		res->buckets[i] = NULL;
113228753Smm	return (res);
114228753Smm}
115228753Smm
116228753Smmvoid
117228753Smmarchive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
118228753Smm    int fmt)
119228753Smm{
120228753Smm	int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
121228753Smm
122228753Smm	switch (fmtbase) {
123228753Smm	case ARCHIVE_FORMAT_CPIO:
124228753Smm		switch (fmt) {
125228753Smm		case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
126228753Smm		case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
127228753Smm			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
128228753Smm			break;
129228753Smm		default:
130228753Smm			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131228753Smm			break;
132228753Smm		}
133228753Smm		break;
134228753Smm	case ARCHIVE_FORMAT_MTREE:
135228753Smm		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
136228753Smm		break;
137228753Smm	case ARCHIVE_FORMAT_TAR:
138228753Smm		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
139228753Smm		break;
140228753Smm	default:
141228753Smm		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
142228753Smm		break;
143228753Smm	}
144228753Smm}
145228753Smm
146228753Smmvoid
147228753Smmarchive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
148228753Smm{
149228753Smm	struct links_entry *le;
150228753Smm
151228753Smm	if (res == NULL)
152228753Smm		return;
153228753Smm
154228753Smm	if (res->buckets != NULL) {
155228753Smm		while ((le = next_entry(res)) != NULL)
156228753Smm			archive_entry_free(le->entry);
157228753Smm		free(res->buckets);
158228753Smm		res->buckets = NULL;
159228753Smm	}
160228753Smm	free(res);
161228753Smm}
162228753Smm
163228753Smmvoid
164228753Smmarchive_entry_linkify(struct archive_entry_linkresolver *res,
165228753Smm    struct archive_entry **e, struct archive_entry **f)
166228753Smm{
167228753Smm	struct links_entry *le;
168228753Smm	struct archive_entry *t;
169228753Smm
170228753Smm	*f = NULL; /* Default: Don't return a second entry. */
171228753Smm
172228753Smm	if (*e == NULL) {
173228753Smm		le = next_entry(res);
174228753Smm		if (le != NULL) {
175228753Smm			*e = le->entry;
176228753Smm			le->entry = NULL;
177228753Smm		}
178228753Smm		return;
179228753Smm	}
180228753Smm
181228753Smm	/* If it has only one link, then we're done. */
182228753Smm	if (archive_entry_nlink(*e) == 1)
183228753Smm		return;
184228753Smm	/* Directories, devices never have hardlinks. */
185228753Smm	if (archive_entry_filetype(*e) == AE_IFDIR
186228753Smm	    || archive_entry_filetype(*e) == AE_IFBLK
187228753Smm	    || archive_entry_filetype(*e) == AE_IFCHR)
188228753Smm		return;
189228753Smm
190228753Smm	switch (res->strategy) {
191228753Smm	case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
192228753Smm		le = find_entry(res, *e);
193228753Smm		if (le != NULL) {
194228753Smm			archive_entry_unset_size(*e);
195228753Smm			archive_entry_copy_hardlink(*e,
196228753Smm			    archive_entry_pathname(le->canonical));
197228753Smm		} else
198228753Smm			insert_entry(res, *e);
199228753Smm		return;
200228753Smm	case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
201228753Smm		le = find_entry(res, *e);
202228753Smm		if (le != NULL) {
203228753Smm			archive_entry_copy_hardlink(*e,
204228753Smm			    archive_entry_pathname(le->canonical));
205228753Smm		} else
206228753Smm			insert_entry(res, *e);
207228753Smm		return;
208228753Smm	case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
209228753Smm		/* This one is trivial. */
210228753Smm		return;
211228753Smm	case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
212228753Smm		le = find_entry(res, *e);
213228753Smm		if (le != NULL) {
214228753Smm			/*
215228753Smm			 * Put the new entry in le, return the
216228753Smm			 * old entry from le.
217228753Smm			 */
218228753Smm			t = *e;
219228753Smm			*e = le->entry;
220228753Smm			le->entry = t;
221228753Smm			/* Make the old entry into a hardlink. */
222228753Smm			archive_entry_unset_size(*e);
223228753Smm			archive_entry_copy_hardlink(*e,
224228753Smm			    archive_entry_pathname(le->canonical));
225228753Smm			/* If we ran out of links, return the
226228753Smm			 * final entry as well. */
227228753Smm			if (le->links == 0) {
228228753Smm				*f = le->entry;
229228753Smm				le->entry = NULL;
230228753Smm			}
231228753Smm		} else {
232228753Smm			/*
233228753Smm			 * If we haven't seen it, tuck it away
234228753Smm			 * for future use.
235228753Smm			 */
236228753Smm			le = insert_entry(res, *e);
237228753Smm			le->entry = *e;
238228753Smm			*e = NULL;
239228753Smm		}
240228753Smm		return;
241228753Smm	default:
242228753Smm		break;
243228753Smm	}
244228753Smm	return;
245228753Smm}
246228753Smm
247228753Smmstatic struct links_entry *
248228753Smmfind_entry(struct archive_entry_linkresolver *res,
249228753Smm    struct archive_entry *entry)
250228753Smm{
251228753Smm	struct links_entry	*le;
252228753Smm	int			 hash, bucket;
253228753Smm	dev_t			 dev;
254228753Smm	int64_t			 ino;
255228753Smm
256228753Smm	/* Free a held entry. */
257228753Smm	if (res->spare != NULL) {
258228753Smm		archive_entry_free(res->spare->canonical);
259228753Smm		archive_entry_free(res->spare->entry);
260228753Smm		free(res->spare);
261228753Smm		res->spare = NULL;
262228753Smm	}
263228753Smm
264228753Smm	/* If the links cache overflowed and got flushed, don't bother. */
265228753Smm	if (res->buckets == NULL)
266228753Smm		return (NULL);
267228753Smm
268228753Smm	dev = archive_entry_dev(entry);
269228753Smm	ino = archive_entry_ino64(entry);
270228753Smm	hash = (int)(dev ^ ino);
271228753Smm
272228753Smm	/* Try to locate this entry in the links cache. */
273228753Smm	bucket = hash % res->number_buckets;
274228753Smm	for (le = res->buckets[bucket]; le != NULL; le = le->next) {
275228753Smm		if (le->hash == hash
276228753Smm		    && dev == archive_entry_dev(le->canonical)
277228753Smm		    && ino == archive_entry_ino64(le->canonical)) {
278228753Smm			/*
279228753Smm			 * Decrement link count each time and release
280228753Smm			 * the entry if it hits zero.  This saves
281228753Smm			 * memory and is necessary for detecting
282228753Smm			 * missed links.
283228753Smm			 */
284228753Smm			--le->links;
285228753Smm			if (le->links > 0)
286228753Smm				return (le);
287228753Smm			/* Remove it from this hash bucket. */
288228753Smm			if (le->previous != NULL)
289228753Smm				le->previous->next = le->next;
290228753Smm			if (le->next != NULL)
291228753Smm				le->next->previous = le->previous;
292228753Smm			if (res->buckets[bucket] == le)
293228753Smm				res->buckets[bucket] = le->next;
294228753Smm			res->number_entries--;
295228753Smm			/* Defer freeing this entry. */
296228753Smm			res->spare = le;
297228753Smm			return (le);
298228753Smm		}
299228753Smm	}
300228753Smm	return (NULL);
301228753Smm}
302228753Smm
303228753Smmstatic struct links_entry *
304228753Smmnext_entry(struct archive_entry_linkresolver *res)
305228753Smm{
306228753Smm	struct links_entry	*le;
307228753Smm	size_t			 bucket;
308228753Smm
309228753Smm	/* Free a held entry. */
310228753Smm	if (res->spare != NULL) {
311228753Smm		archive_entry_free(res->spare->canonical);
312228753Smm		free(res->spare);
313228753Smm		res->spare = NULL;
314228753Smm	}
315228753Smm
316228753Smm	/* If the links cache overflowed and got flushed, don't bother. */
317228753Smm	if (res->buckets == NULL)
318228753Smm		return (NULL);
319228753Smm
320228753Smm	/* Look for next non-empty bucket in the links cache. */
321228753Smm	for (bucket = 0; bucket < res->number_buckets; bucket++) {
322228753Smm		le = res->buckets[bucket];
323228753Smm		if (le != NULL) {
324228753Smm			/* Remove it from this hash bucket. */
325228753Smm			if (le->next != NULL)
326228753Smm				le->next->previous = le->previous;
327228753Smm			res->buckets[bucket] = le->next;
328228753Smm			res->number_entries--;
329228753Smm			/* Defer freeing this entry. */
330228753Smm			res->spare = le;
331228753Smm			return (le);
332228753Smm		}
333228753Smm	}
334228753Smm	return (NULL);
335228753Smm}
336228753Smm
337228753Smmstatic struct links_entry *
338228753Smminsert_entry(struct archive_entry_linkresolver *res,
339228753Smm    struct archive_entry *entry)
340228753Smm{
341228753Smm	struct links_entry *le;
342228753Smm	int			 hash, bucket;
343228753Smm
344228753Smm	/* Add this entry to the links cache. */
345228753Smm	le = malloc(sizeof(struct links_entry));
346228753Smm	if (le == NULL)
347228753Smm		return (NULL);
348228753Smm	memset(le, 0, sizeof(*le));
349228753Smm	le->canonical = archive_entry_clone(entry);
350228753Smm
351228753Smm	/* If the links cache is getting too full, enlarge the hash table. */
352228753Smm	if (res->number_entries > res->number_buckets * 2)
353228753Smm		grow_hash(res);
354228753Smm
355228753Smm	hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry);
356228753Smm	bucket = hash % res->number_buckets;
357228753Smm
358228753Smm	/* If we could allocate the entry, record it. */
359228753Smm	if (res->buckets[bucket] != NULL)
360228753Smm		res->buckets[bucket]->previous = le;
361228753Smm	res->number_entries++;
362228753Smm	le->next = res->buckets[bucket];
363228753Smm	le->previous = NULL;
364228753Smm	res->buckets[bucket] = le;
365228753Smm	le->hash = hash;
366228753Smm	le->links = archive_entry_nlink(entry) - 1;
367228753Smm	return (le);
368228753Smm}
369228753Smm
370228753Smmstatic void
371228753Smmgrow_hash(struct archive_entry_linkresolver *res)
372228753Smm{
373228753Smm	struct links_entry *le, **new_buckets;
374228753Smm	size_t new_size;
375228753Smm	size_t i, bucket;
376228753Smm
377228753Smm	/* Try to enlarge the bucket list. */
378228753Smm	new_size = res->number_buckets * 2;
379228753Smm	new_buckets = malloc(new_size * sizeof(struct links_entry *));
380228753Smm
381228753Smm	if (new_buckets != NULL) {
382228753Smm		memset(new_buckets, 0,
383228753Smm		    new_size * sizeof(struct links_entry *));
384228753Smm		for (i = 0; i < res->number_buckets; i++) {
385228753Smm			while (res->buckets[i] != NULL) {
386228753Smm				/* Remove entry from old bucket. */
387228753Smm				le = res->buckets[i];
388228753Smm				res->buckets[i] = le->next;
389228753Smm
390228753Smm				/* Add entry to new bucket. */
391228753Smm				bucket = le->hash % new_size;
392228753Smm
393228753Smm				if (new_buckets[bucket] != NULL)
394228753Smm					new_buckets[bucket]->previous =
395228753Smm					    le;
396228753Smm				le->next = new_buckets[bucket];
397228753Smm				le->previous = NULL;
398228753Smm				new_buckets[bucket] = le;
399228753Smm			}
400228753Smm		}
401228753Smm		free(res->buckets);
402228753Smm		res->buckets = new_buckets;
403228753Smm		res->number_buckets = new_size;
404228753Smm	}
405228753Smm}
406