1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm#include "archive_platform.h" 27228763Smm__FBSDID("$FreeBSD$"); 28228753Smm 29228753Smm#ifdef HAVE_SYS_STAT_H 30228753Smm#include <sys/stat.h> 31228753Smm#endif 32228753Smm#ifdef HAVE_ERRNO_H 33228753Smm#include <errno.h> 34228753Smm#endif 35228753Smm#include <stdio.h> 36228753Smm#ifdef HAVE_STDLIB_H 37228753Smm#include <stdlib.h> 38228753Smm#endif 39228753Smm#ifdef HAVE_STRING_H 40228753Smm#include <string.h> 41228753Smm#endif 42228753Smm 43228753Smm#include "archive.h" 44228753Smm#include "archive_entry.h" 45228753Smm 46228753Smm/* 47228753Smm * This is mostly a pretty straightforward hash table implementation. 48228753Smm * The only interesting bit is the different strategies used to 49228753Smm * match up links. These strategies match those used by various 50228753Smm * archiving formats: 51228753Smm * tar - content stored with first link, remainder refer back to it. 52228753Smm * This requires us to match each subsequent link up with the 53228753Smm * first appearance. 54228753Smm * cpio - Old cpio just stored body with each link, match-ups were 55228753Smm * implicit. This is trivial. 56228753Smm * new cpio - New cpio only stores body with last link, match-ups 57228753Smm * are implicit. This is actually quite tricky; see the notes 58228753Smm * below. 59228753Smm */ 60228753Smm 61228753Smm/* Users pass us a format code, we translate that into a strategy here. */ 62228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 63228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 64228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 65228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 66228753Smm 67228753Smm/* Initial size of link cache. */ 68228753Smm#define links_cache_initial_size 1024 69228753Smm 70228753Smmstruct links_entry { 71228753Smm struct links_entry *next; 72228753Smm struct links_entry *previous; 73228753Smm struct archive_entry *canonical; 74228753Smm struct archive_entry *entry; 75232153Smm size_t hash; 76232153Smm unsigned int links; /* # links not yet seen */ 77228753Smm}; 78228753Smm 79228753Smmstruct archive_entry_linkresolver { 80228753Smm struct links_entry **buckets; 81228753Smm struct links_entry *spare; 82228753Smm unsigned long number_entries; 83228753Smm size_t number_buckets; 84228753Smm int strategy; 85228753Smm}; 86228753Smm 87232153Smm#define NEXT_ENTRY_DEFERRED 1 88232153Smm#define NEXT_ENTRY_PARTIAL 2 89232153Smm#define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL) 90232153Smm 91228753Smmstatic struct links_entry *find_entry(struct archive_entry_linkresolver *, 92228753Smm struct archive_entry *); 93228753Smmstatic void grow_hash(struct archive_entry_linkresolver *); 94228753Smmstatic struct links_entry *insert_entry(struct archive_entry_linkresolver *, 95228753Smm struct archive_entry *); 96232153Smmstatic struct links_entry *next_entry(struct archive_entry_linkresolver *, 97232153Smm int); 98228753Smm 99228753Smmstruct archive_entry_linkresolver * 100228753Smmarchive_entry_linkresolver_new(void) 101228753Smm{ 102228753Smm struct archive_entry_linkresolver *res; 103228753Smm 104232153Smm /* Check for positive power-of-two */ 105232153Smm if (links_cache_initial_size == 0 || 106232153Smm (links_cache_initial_size & (links_cache_initial_size - 1)) != 0) 107232153Smm return (NULL); 108232153Smm 109232153Smm res = calloc(1, sizeof(struct archive_entry_linkresolver)); 110228753Smm if (res == NULL) 111228753Smm return (NULL); 112228753Smm res->number_buckets = links_cache_initial_size; 113232153Smm res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0])); 114228753Smm if (res->buckets == NULL) { 115228753Smm free(res); 116228753Smm return (NULL); 117228753Smm } 118228753Smm return (res); 119228753Smm} 120228753Smm 121228753Smmvoid 122228753Smmarchive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, 123228753Smm int fmt) 124228753Smm{ 125228753Smm int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; 126228753Smm 127228753Smm switch (fmtbase) { 128232153Smm case ARCHIVE_FORMAT_7ZIP: 129232153Smm case ARCHIVE_FORMAT_AR: 130232153Smm case ARCHIVE_FORMAT_ZIP: 131232153Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 132232153Smm break; 133228753Smm case ARCHIVE_FORMAT_CPIO: 134228753Smm switch (fmt) { 135228753Smm case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: 136228753Smm case ARCHIVE_FORMAT_CPIO_SVR4_CRC: 137228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; 138228753Smm break; 139228753Smm default: 140228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 141228753Smm break; 142228753Smm } 143228753Smm break; 144228753Smm case ARCHIVE_FORMAT_MTREE: 145228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; 146228753Smm break; 147232153Smm case ARCHIVE_FORMAT_ISO9660: 148232153Smm case ARCHIVE_FORMAT_SHAR: 149228753Smm case ARCHIVE_FORMAT_TAR: 150232153Smm case ARCHIVE_FORMAT_XAR: 151228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 152228753Smm break; 153228753Smm default: 154232153Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 155228753Smm break; 156228753Smm } 157228753Smm} 158228753Smm 159228753Smmvoid 160228753Smmarchive_entry_linkresolver_free(struct archive_entry_linkresolver *res) 161228753Smm{ 162228753Smm struct links_entry *le; 163228753Smm 164228753Smm if (res == NULL) 165228753Smm return; 166228753Smm 167232153Smm while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL) 168232153Smm archive_entry_free(le->entry); 169232153Smm free(res->buckets); 170228753Smm free(res); 171228753Smm} 172228753Smm 173228753Smmvoid 174228753Smmarchive_entry_linkify(struct archive_entry_linkresolver *res, 175228753Smm struct archive_entry **e, struct archive_entry **f) 176228753Smm{ 177228753Smm struct links_entry *le; 178228753Smm struct archive_entry *t; 179228753Smm 180228753Smm *f = NULL; /* Default: Don't return a second entry. */ 181228753Smm 182228753Smm if (*e == NULL) { 183232153Smm le = next_entry(res, NEXT_ENTRY_DEFERRED); 184228753Smm if (le != NULL) { 185228753Smm *e = le->entry; 186228753Smm le->entry = NULL; 187228753Smm } 188228753Smm return; 189228753Smm } 190228753Smm 191228753Smm /* If it has only one link, then we're done. */ 192228753Smm if (archive_entry_nlink(*e) == 1) 193228753Smm return; 194228753Smm /* Directories, devices never have hardlinks. */ 195228753Smm if (archive_entry_filetype(*e) == AE_IFDIR 196228753Smm || archive_entry_filetype(*e) == AE_IFBLK 197228753Smm || archive_entry_filetype(*e) == AE_IFCHR) 198228753Smm return; 199228753Smm 200228753Smm switch (res->strategy) { 201228753Smm case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: 202228753Smm le = find_entry(res, *e); 203228753Smm if (le != NULL) { 204228753Smm archive_entry_unset_size(*e); 205228753Smm archive_entry_copy_hardlink(*e, 206228753Smm archive_entry_pathname(le->canonical)); 207228753Smm } else 208228753Smm insert_entry(res, *e); 209228753Smm return; 210228753Smm case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: 211228753Smm le = find_entry(res, *e); 212228753Smm if (le != NULL) { 213228753Smm archive_entry_copy_hardlink(*e, 214228753Smm archive_entry_pathname(le->canonical)); 215228753Smm } else 216228753Smm insert_entry(res, *e); 217228753Smm return; 218228753Smm case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: 219228753Smm /* This one is trivial. */ 220228753Smm return; 221228753Smm case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: 222228753Smm le = find_entry(res, *e); 223228753Smm if (le != NULL) { 224228753Smm /* 225228753Smm * Put the new entry in le, return the 226228753Smm * old entry from le. 227228753Smm */ 228228753Smm t = *e; 229228753Smm *e = le->entry; 230228753Smm le->entry = t; 231228753Smm /* Make the old entry into a hardlink. */ 232228753Smm archive_entry_unset_size(*e); 233228753Smm archive_entry_copy_hardlink(*e, 234228753Smm archive_entry_pathname(le->canonical)); 235228753Smm /* If we ran out of links, return the 236228753Smm * final entry as well. */ 237228753Smm if (le->links == 0) { 238228753Smm *f = le->entry; 239228753Smm le->entry = NULL; 240228753Smm } 241228753Smm } else { 242228753Smm /* 243228753Smm * If we haven't seen it, tuck it away 244228753Smm * for future use. 245228753Smm */ 246228753Smm le = insert_entry(res, *e); 247248616Smm if (le == NULL) 248248616Smm /* XXX We should return an error code XXX */ 249248616Smm return; 250228753Smm le->entry = *e; 251228753Smm *e = NULL; 252228753Smm } 253228753Smm return; 254228753Smm default: 255228753Smm break; 256228753Smm } 257228753Smm return; 258228753Smm} 259228753Smm 260228753Smmstatic struct links_entry * 261228753Smmfind_entry(struct archive_entry_linkresolver *res, 262228753Smm struct archive_entry *entry) 263228753Smm{ 264228753Smm struct links_entry *le; 265232153Smm size_t hash, bucket; 266228753Smm dev_t dev; 267228753Smm int64_t ino; 268228753Smm 269228753Smm /* Free a held entry. */ 270228753Smm if (res->spare != NULL) { 271228753Smm archive_entry_free(res->spare->canonical); 272228753Smm archive_entry_free(res->spare->entry); 273228753Smm free(res->spare); 274228753Smm res->spare = NULL; 275228753Smm } 276228753Smm 277228753Smm dev = archive_entry_dev(entry); 278228753Smm ino = archive_entry_ino64(entry); 279232153Smm hash = (size_t)(dev ^ ino); 280228753Smm 281228753Smm /* Try to locate this entry in the links cache. */ 282232153Smm bucket = hash & (res->number_buckets - 1); 283228753Smm for (le = res->buckets[bucket]; le != NULL; le = le->next) { 284228753Smm if (le->hash == hash 285228753Smm && dev == archive_entry_dev(le->canonical) 286228753Smm && ino == archive_entry_ino64(le->canonical)) { 287228753Smm /* 288228753Smm * Decrement link count each time and release 289228753Smm * the entry if it hits zero. This saves 290228753Smm * memory and is necessary for detecting 291228753Smm * missed links. 292228753Smm */ 293228753Smm --le->links; 294228753Smm if (le->links > 0) 295228753Smm return (le); 296228753Smm /* Remove it from this hash bucket. */ 297228753Smm if (le->previous != NULL) 298228753Smm le->previous->next = le->next; 299228753Smm if (le->next != NULL) 300228753Smm le->next->previous = le->previous; 301228753Smm if (res->buckets[bucket] == le) 302228753Smm res->buckets[bucket] = le->next; 303228753Smm res->number_entries--; 304228753Smm /* Defer freeing this entry. */ 305228753Smm res->spare = le; 306228753Smm return (le); 307228753Smm } 308228753Smm } 309228753Smm return (NULL); 310228753Smm} 311228753Smm 312228753Smmstatic struct links_entry * 313232153Smmnext_entry(struct archive_entry_linkresolver *res, int mode) 314228753Smm{ 315228753Smm struct links_entry *le; 316228753Smm size_t bucket; 317228753Smm 318228753Smm /* Free a held entry. */ 319228753Smm if (res->spare != NULL) { 320228753Smm archive_entry_free(res->spare->canonical); 321232153Smm archive_entry_free(res->spare->entry); 322228753Smm free(res->spare); 323228753Smm res->spare = NULL; 324228753Smm } 325228753Smm 326228753Smm /* Look for next non-empty bucket in the links cache. */ 327228753Smm for (bucket = 0; bucket < res->number_buckets; bucket++) { 328232153Smm for (le = res->buckets[bucket]; le != NULL; le = le->next) { 329232153Smm if (le->entry != NULL && 330232153Smm (mode & NEXT_ENTRY_DEFERRED) == 0) 331232153Smm continue; 332232153Smm if (le->entry == NULL && 333232153Smm (mode & NEXT_ENTRY_PARTIAL) == 0) 334232153Smm continue; 335228753Smm /* Remove it from this hash bucket. */ 336228753Smm if (le->next != NULL) 337228753Smm le->next->previous = le->previous; 338232153Smm if (le->previous != NULL) 339232153Smm le->previous->next = le->next; 340232153Smm else 341232153Smm res->buckets[bucket] = le->next; 342228753Smm res->number_entries--; 343228753Smm /* Defer freeing this entry. */ 344228753Smm res->spare = le; 345228753Smm return (le); 346228753Smm } 347228753Smm } 348228753Smm return (NULL); 349228753Smm} 350228753Smm 351228753Smmstatic struct links_entry * 352228753Smminsert_entry(struct archive_entry_linkresolver *res, 353228753Smm struct archive_entry *entry) 354228753Smm{ 355228753Smm struct links_entry *le; 356232153Smm size_t hash, bucket; 357228753Smm 358228753Smm /* Add this entry to the links cache. */ 359232153Smm le = calloc(1, sizeof(struct links_entry)); 360228753Smm if (le == NULL) 361228753Smm return (NULL); 362228753Smm le->canonical = archive_entry_clone(entry); 363228753Smm 364228753Smm /* If the links cache is getting too full, enlarge the hash table. */ 365228753Smm if (res->number_entries > res->number_buckets * 2) 366228753Smm grow_hash(res); 367228753Smm 368238856Smm hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry)); 369232153Smm bucket = hash & (res->number_buckets - 1); 370228753Smm 371228753Smm /* If we could allocate the entry, record it. */ 372228753Smm if (res->buckets[bucket] != NULL) 373228753Smm res->buckets[bucket]->previous = le; 374228753Smm res->number_entries++; 375228753Smm le->next = res->buckets[bucket]; 376228753Smm le->previous = NULL; 377228753Smm res->buckets[bucket] = le; 378228753Smm le->hash = hash; 379228753Smm le->links = archive_entry_nlink(entry) - 1; 380228753Smm return (le); 381228753Smm} 382228753Smm 383228753Smmstatic void 384228753Smmgrow_hash(struct archive_entry_linkresolver *res) 385228753Smm{ 386228753Smm struct links_entry *le, **new_buckets; 387228753Smm size_t new_size; 388228753Smm size_t i, bucket; 389228753Smm 390228753Smm /* Try to enlarge the bucket list. */ 391228753Smm new_size = res->number_buckets * 2; 392232153Smm if (new_size < res->number_buckets) 393232153Smm return; 394232153Smm new_buckets = calloc(new_size, sizeof(struct links_entry *)); 395228753Smm 396232153Smm if (new_buckets == NULL) 397232153Smm return; 398228753Smm 399232153Smm for (i = 0; i < res->number_buckets; i++) { 400232153Smm while (res->buckets[i] != NULL) { 401232153Smm /* Remove entry from old bucket. */ 402232153Smm le = res->buckets[i]; 403232153Smm res->buckets[i] = le->next; 404228753Smm 405232153Smm /* Add entry to new bucket. */ 406232153Smm bucket = le->hash & (new_size - 1); 407232153Smm 408232153Smm if (new_buckets[bucket] != NULL) 409232153Smm new_buckets[bucket]->previous = le; 410232153Smm le->next = new_buckets[bucket]; 411232153Smm le->previous = NULL; 412232153Smm new_buckets[bucket] = le; 413228753Smm } 414228753Smm } 415232153Smm free(res->buckets); 416232153Smm res->buckets = new_buckets; 417232153Smm res->number_buckets = new_size; 418228753Smm} 419232153Smm 420232153Smmstruct archive_entry * 421232153Smmarchive_entry_partial_links(struct archive_entry_linkresolver *res, 422232153Smm unsigned int *links) 423232153Smm{ 424232153Smm struct archive_entry *e; 425232153Smm struct links_entry *le; 426232153Smm 427232153Smm /* Free a held entry. */ 428232153Smm if (res->spare != NULL) { 429232153Smm archive_entry_free(res->spare->canonical); 430232153Smm archive_entry_free(res->spare->entry); 431232153Smm free(res->spare); 432232153Smm res->spare = NULL; 433232153Smm } 434232153Smm 435232153Smm le = next_entry(res, NEXT_ENTRY_PARTIAL); 436232153Smm if (le != NULL) { 437232153Smm e = le->canonical; 438232153Smm if (links != NULL) 439232153Smm *links = le->links; 440232153Smm le->canonical = NULL; 441232153Smm } else { 442232153Smm e = NULL; 443232153Smm if (links != NULL) 444232153Smm *links = 0; 445232153Smm } 446232153Smm return (e); 447232153Smm} 448