1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm#include "archive_platform.h" 27229592Smm__FBSDID("$FreeBSD$"); 28228753Smm 29228753Smm#ifdef HAVE_SYS_STAT_H 30228753Smm#include <sys/stat.h> 31228753Smm#endif 32228753Smm#ifdef HAVE_ERRNO_H 33228753Smm#include <errno.h> 34228753Smm#endif 35228753Smm#include <stdio.h> 36228753Smm#ifdef HAVE_STDLIB_H 37228753Smm#include <stdlib.h> 38228753Smm#endif 39228753Smm#ifdef HAVE_STRING_H 40228753Smm#include <string.h> 41228753Smm#endif 42228753Smm 43228753Smm#include "archive.h" 44228753Smm#include "archive_entry.h" 45228753Smm 46228753Smm/* 47228753Smm * This is mostly a pretty straightforward hash table implementation. 48228753Smm * The only interesting bit is the different strategies used to 49228753Smm * match up links. These strategies match those used by various 50228753Smm * archiving formats: 51228753Smm * tar - content stored with first link, remainder refer back to it. 52228753Smm * This requires us to match each subsequent link up with the 53228753Smm * first appearance. 54228753Smm * cpio - Old cpio just stored body with each link, match-ups were 55228753Smm * implicit. This is trivial. 56228753Smm * new cpio - New cpio only stores body with last link, match-ups 57228753Smm * are implicit. This is actually quite tricky; see the notes 58228753Smm * below. 59228753Smm */ 60228753Smm 61228753Smm/* Users pass us a format code, we translate that into a strategy here. */ 62228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 63228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 64228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 65228753Smm#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 66228753Smm 67228753Smm/* Initial size of link cache. */ 68228753Smm#define links_cache_initial_size 1024 69228753Smm 70228753Smmstruct links_entry { 71228753Smm struct links_entry *next; 72228753Smm struct links_entry *previous; 73228753Smm int links; /* # links not yet seen */ 74228753Smm int hash; 75228753Smm struct archive_entry *canonical; 76228753Smm struct archive_entry *entry; 77228753Smm}; 78228753Smm 79228753Smmstruct archive_entry_linkresolver { 80228753Smm struct links_entry **buckets; 81228753Smm struct links_entry *spare; 82228753Smm unsigned long number_entries; 83228753Smm size_t number_buckets; 84228753Smm int strategy; 85228753Smm}; 86228753Smm 87228753Smmstatic struct links_entry *find_entry(struct archive_entry_linkresolver *, 88228753Smm struct archive_entry *); 89228753Smmstatic void grow_hash(struct archive_entry_linkresolver *); 90228753Smmstatic struct links_entry *insert_entry(struct archive_entry_linkresolver *, 91228753Smm struct archive_entry *); 92228753Smmstatic struct links_entry *next_entry(struct archive_entry_linkresolver *); 93228753Smm 94228753Smmstruct archive_entry_linkresolver * 95228753Smmarchive_entry_linkresolver_new(void) 96228753Smm{ 97228753Smm struct archive_entry_linkresolver *res; 98228753Smm size_t i; 99228753Smm 100228753Smm res = malloc(sizeof(struct archive_entry_linkresolver)); 101228753Smm if (res == NULL) 102228753Smm return (NULL); 103228753Smm memset(res, 0, sizeof(struct archive_entry_linkresolver)); 104228753Smm res->number_buckets = links_cache_initial_size; 105228753Smm res->buckets = malloc(res->number_buckets * 106228753Smm sizeof(res->buckets[0])); 107228753Smm if (res->buckets == NULL) { 108228753Smm free(res); 109228753Smm return (NULL); 110228753Smm } 111228753Smm for (i = 0; i < res->number_buckets; i++) 112228753Smm res->buckets[i] = NULL; 113228753Smm return (res); 114228753Smm} 115228753Smm 116228753Smmvoid 117228753Smmarchive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, 118228753Smm int fmt) 119228753Smm{ 120228753Smm int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; 121228753Smm 122228753Smm switch (fmtbase) { 123228753Smm case ARCHIVE_FORMAT_CPIO: 124228753Smm switch (fmt) { 125228753Smm case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: 126228753Smm case ARCHIVE_FORMAT_CPIO_SVR4_CRC: 127228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; 128228753Smm break; 129228753Smm default: 130228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 131228753Smm break; 132228753Smm } 133228753Smm break; 134228753Smm case ARCHIVE_FORMAT_MTREE: 135228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; 136228753Smm break; 137228753Smm case ARCHIVE_FORMAT_TAR: 138228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 139228753Smm break; 140228753Smm default: 141228753Smm res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 142228753Smm break; 143228753Smm } 144228753Smm} 145228753Smm 146228753Smmvoid 147228753Smmarchive_entry_linkresolver_free(struct archive_entry_linkresolver *res) 148228753Smm{ 149228753Smm struct links_entry *le; 150228753Smm 151228753Smm if (res == NULL) 152228753Smm return; 153228753Smm 154228753Smm if (res->buckets != NULL) { 155228753Smm while ((le = next_entry(res)) != NULL) 156228753Smm archive_entry_free(le->entry); 157228753Smm free(res->buckets); 158228753Smm res->buckets = NULL; 159228753Smm } 160228753Smm free(res); 161228753Smm} 162228753Smm 163228753Smmvoid 164228753Smmarchive_entry_linkify(struct archive_entry_linkresolver *res, 165228753Smm struct archive_entry **e, struct archive_entry **f) 166228753Smm{ 167228753Smm struct links_entry *le; 168228753Smm struct archive_entry *t; 169228753Smm 170228753Smm *f = NULL; /* Default: Don't return a second entry. */ 171228753Smm 172228753Smm if (*e == NULL) { 173228753Smm le = next_entry(res); 174228753Smm if (le != NULL) { 175228753Smm *e = le->entry; 176228753Smm le->entry = NULL; 177228753Smm } 178228753Smm return; 179228753Smm } 180228753Smm 181228753Smm /* If it has only one link, then we're done. */ 182228753Smm if (archive_entry_nlink(*e) == 1) 183228753Smm return; 184228753Smm /* Directories, devices never have hardlinks. */ 185228753Smm if (archive_entry_filetype(*e) == AE_IFDIR 186228753Smm || archive_entry_filetype(*e) == AE_IFBLK 187228753Smm || archive_entry_filetype(*e) == AE_IFCHR) 188228753Smm return; 189228753Smm 190228753Smm switch (res->strategy) { 191228753Smm case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: 192228753Smm le = find_entry(res, *e); 193228753Smm if (le != NULL) { 194228753Smm archive_entry_unset_size(*e); 195228753Smm archive_entry_copy_hardlink(*e, 196228753Smm archive_entry_pathname(le->canonical)); 197228753Smm } else 198228753Smm insert_entry(res, *e); 199228753Smm return; 200228753Smm case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: 201228753Smm le = find_entry(res, *e); 202228753Smm if (le != NULL) { 203228753Smm archive_entry_copy_hardlink(*e, 204228753Smm archive_entry_pathname(le->canonical)); 205228753Smm } else 206228753Smm insert_entry(res, *e); 207228753Smm return; 208228753Smm case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: 209228753Smm /* This one is trivial. */ 210228753Smm return; 211228753Smm case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: 212228753Smm le = find_entry(res, *e); 213228753Smm if (le != NULL) { 214228753Smm /* 215228753Smm * Put the new entry in le, return the 216228753Smm * old entry from le. 217228753Smm */ 218228753Smm t = *e; 219228753Smm *e = le->entry; 220228753Smm le->entry = t; 221228753Smm /* Make the old entry into a hardlink. */ 222228753Smm archive_entry_unset_size(*e); 223228753Smm archive_entry_copy_hardlink(*e, 224228753Smm archive_entry_pathname(le->canonical)); 225228753Smm /* If we ran out of links, return the 226228753Smm * final entry as well. */ 227228753Smm if (le->links == 0) { 228228753Smm *f = le->entry; 229228753Smm le->entry = NULL; 230228753Smm } 231228753Smm } else { 232228753Smm /* 233228753Smm * If we haven't seen it, tuck it away 234228753Smm * for future use. 235228753Smm */ 236228753Smm le = insert_entry(res, *e); 237228753Smm le->entry = *e; 238228753Smm *e = NULL; 239228753Smm } 240228753Smm return; 241228753Smm default: 242228753Smm break; 243228753Smm } 244228753Smm return; 245228753Smm} 246228753Smm 247228753Smmstatic struct links_entry * 248228753Smmfind_entry(struct archive_entry_linkresolver *res, 249228753Smm struct archive_entry *entry) 250228753Smm{ 251228753Smm struct links_entry *le; 252228753Smm int hash, bucket; 253228753Smm dev_t dev; 254228753Smm int64_t ino; 255228753Smm 256228753Smm /* Free a held entry. */ 257228753Smm if (res->spare != NULL) { 258228753Smm archive_entry_free(res->spare->canonical); 259228753Smm archive_entry_free(res->spare->entry); 260228753Smm free(res->spare); 261228753Smm res->spare = NULL; 262228753Smm } 263228753Smm 264228753Smm /* If the links cache overflowed and got flushed, don't bother. */ 265228753Smm if (res->buckets == NULL) 266228753Smm return (NULL); 267228753Smm 268228753Smm dev = archive_entry_dev(entry); 269228753Smm ino = archive_entry_ino64(entry); 270228753Smm hash = (int)(dev ^ ino); 271228753Smm 272228753Smm /* Try to locate this entry in the links cache. */ 273228753Smm bucket = hash % res->number_buckets; 274228753Smm for (le = res->buckets[bucket]; le != NULL; le = le->next) { 275228753Smm if (le->hash == hash 276228753Smm && dev == archive_entry_dev(le->canonical) 277228753Smm && ino == archive_entry_ino64(le->canonical)) { 278228753Smm /* 279228753Smm * Decrement link count each time and release 280228753Smm * the entry if it hits zero. This saves 281228753Smm * memory and is necessary for detecting 282228753Smm * missed links. 283228753Smm */ 284228753Smm --le->links; 285228753Smm if (le->links > 0) 286228753Smm return (le); 287228753Smm /* Remove it from this hash bucket. */ 288228753Smm if (le->previous != NULL) 289228753Smm le->previous->next = le->next; 290228753Smm if (le->next != NULL) 291228753Smm le->next->previous = le->previous; 292228753Smm if (res->buckets[bucket] == le) 293228753Smm res->buckets[bucket] = le->next; 294228753Smm res->number_entries--; 295228753Smm /* Defer freeing this entry. */ 296228753Smm res->spare = le; 297228753Smm return (le); 298228753Smm } 299228753Smm } 300228753Smm return (NULL); 301228753Smm} 302228753Smm 303228753Smmstatic struct links_entry * 304228753Smmnext_entry(struct archive_entry_linkresolver *res) 305228753Smm{ 306228753Smm struct links_entry *le; 307228753Smm size_t bucket; 308228753Smm 309228753Smm /* Free a held entry. */ 310228753Smm if (res->spare != NULL) { 311228753Smm archive_entry_free(res->spare->canonical); 312228753Smm free(res->spare); 313228753Smm res->spare = NULL; 314228753Smm } 315228753Smm 316228753Smm /* If the links cache overflowed and got flushed, don't bother. */ 317228753Smm if (res->buckets == NULL) 318228753Smm return (NULL); 319228753Smm 320228753Smm /* Look for next non-empty bucket in the links cache. */ 321228753Smm for (bucket = 0; bucket < res->number_buckets; bucket++) { 322228753Smm le = res->buckets[bucket]; 323228753Smm if (le != NULL) { 324228753Smm /* Remove it from this hash bucket. */ 325228753Smm if (le->next != NULL) 326228753Smm le->next->previous = le->previous; 327228753Smm res->buckets[bucket] = le->next; 328228753Smm res->number_entries--; 329228753Smm /* Defer freeing this entry. */ 330228753Smm res->spare = le; 331228753Smm return (le); 332228753Smm } 333228753Smm } 334228753Smm return (NULL); 335228753Smm} 336228753Smm 337228753Smmstatic struct links_entry * 338228753Smminsert_entry(struct archive_entry_linkresolver *res, 339228753Smm struct archive_entry *entry) 340228753Smm{ 341228753Smm struct links_entry *le; 342228753Smm int hash, bucket; 343228753Smm 344228753Smm /* Add this entry to the links cache. */ 345228753Smm le = malloc(sizeof(struct links_entry)); 346228753Smm if (le == NULL) 347228753Smm return (NULL); 348228753Smm memset(le, 0, sizeof(*le)); 349228753Smm le->canonical = archive_entry_clone(entry); 350228753Smm 351228753Smm /* If the links cache is getting too full, enlarge the hash table. */ 352228753Smm if (res->number_entries > res->number_buckets * 2) 353228753Smm grow_hash(res); 354228753Smm 355228753Smm hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry); 356228753Smm bucket = hash % res->number_buckets; 357228753Smm 358228753Smm /* If we could allocate the entry, record it. */ 359228753Smm if (res->buckets[bucket] != NULL) 360228753Smm res->buckets[bucket]->previous = le; 361228753Smm res->number_entries++; 362228753Smm le->next = res->buckets[bucket]; 363228753Smm le->previous = NULL; 364228753Smm res->buckets[bucket] = le; 365228753Smm le->hash = hash; 366228753Smm le->links = archive_entry_nlink(entry) - 1; 367228753Smm return (le); 368228753Smm} 369228753Smm 370228753Smmstatic void 371228753Smmgrow_hash(struct archive_entry_linkresolver *res) 372228753Smm{ 373228753Smm struct links_entry *le, **new_buckets; 374228753Smm size_t new_size; 375228753Smm size_t i, bucket; 376228753Smm 377228753Smm /* Try to enlarge the bucket list. */ 378228753Smm new_size = res->number_buckets * 2; 379228753Smm new_buckets = malloc(new_size * sizeof(struct links_entry *)); 380228753Smm 381228753Smm if (new_buckets != NULL) { 382228753Smm memset(new_buckets, 0, 383228753Smm new_size * sizeof(struct links_entry *)); 384228753Smm for (i = 0; i < res->number_buckets; i++) { 385228753Smm while (res->buckets[i] != NULL) { 386228753Smm /* Remove entry from old bucket. */ 387228753Smm le = res->buckets[i]; 388228753Smm res->buckets[i] = le->next; 389228753Smm 390228753Smm /* Add entry to new bucket. */ 391228753Smm bucket = le->hash % new_size; 392228753Smm 393228753Smm if (new_buckets[bucket] != NULL) 394228753Smm new_buckets[bucket]->previous = 395228753Smm le; 396228753Smm le->next = new_buckets[bucket]; 397228753Smm le->previous = NULL; 398228753Smm new_buckets[bucket] = le; 399228753Smm } 400228753Smm } 401228753Smm free(res->buckets); 402228753Smm res->buckets = new_buckets; 403228753Smm res->number_buckets = new_size; 404228753Smm } 405228753Smm} 406