1/*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "archive_platform.h" 27__FBSDID("$FreeBSD$"); 28 29#ifdef HAVE_SYS_STAT_H 30#include <sys/stat.h> 31#endif 32#ifdef HAVE_ERRNO_H 33#include <errno.h> 34#endif 35#include <stdio.h> 36#ifdef HAVE_STDLIB_H 37#include <stdlib.h> 38#endif 39#ifdef HAVE_STRING_H 40#include <string.h> 41#endif 42 43#include "archive.h" 44#include "archive_entry.h" 45 46/* 47 * This is mostly a pretty straightforward hash table implementation. 48 * The only interesting bit is the different strategies used to 49 * match up links. These strategies match those used by various 50 * archiving formats: 51 * tar - content stored with first link, remainder refer back to it. 52 * This requires us to match each subsequent link up with the 53 * first appearance. 54 * cpio - Old cpio just stored body with each link, match-ups were 55 * implicit. This is trivial. 56 * new cpio - New cpio only stores body with last link, match-ups 57 * are implicit. This is actually quite tricky; see the notes 58 * below. 59 */ 60 61/* Users pass us a format code, we translate that into a strategy here. */ 62#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 63#define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 64#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 65#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 66 67/* Initial size of link cache. */ 68#define links_cache_initial_size 1024 69 70struct links_entry { 71 struct links_entry *next; 72 struct links_entry *previous; 73 struct archive_entry *canonical; 74 struct archive_entry *entry; 75 size_t hash; 76 unsigned int links; /* # links not yet seen */ 77}; 78 79struct archive_entry_linkresolver { 80 struct links_entry **buckets; 81 struct links_entry *spare; 82 unsigned long number_entries; 83 size_t number_buckets; 84 int strategy; 85}; 86 87#define NEXT_ENTRY_DEFERRED 1 88#define NEXT_ENTRY_PARTIAL 2 89#define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL) 90 91static struct links_entry *find_entry(struct archive_entry_linkresolver *, 92 struct archive_entry *); 93static void grow_hash(struct archive_entry_linkresolver *); 94static struct links_entry *insert_entry(struct archive_entry_linkresolver *, 95 struct archive_entry *); 96static struct links_entry *next_entry(struct archive_entry_linkresolver *, 97 int); 98 99struct archive_entry_linkresolver * 100archive_entry_linkresolver_new(void) 101{ 102 struct archive_entry_linkresolver *res; 103 104 /* Check for positive power-of-two */ 105 if (links_cache_initial_size == 0 || 106 (links_cache_initial_size & (links_cache_initial_size - 1)) != 0) 107 return (NULL); 108 109 res = calloc(1, sizeof(struct archive_entry_linkresolver)); 110 if (res == NULL) 111 return (NULL); 112 res->number_buckets = links_cache_initial_size; 113 res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0])); 114 if (res->buckets == NULL) { 115 free(res); 116 return (NULL); 117 } 118 return (res); 119} 120 121void 122archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, 123 int fmt) 124{ 125 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; 126 127 switch (fmtbase) { 128 case ARCHIVE_FORMAT_7ZIP: 129 case ARCHIVE_FORMAT_AR: 130 case ARCHIVE_FORMAT_ZIP: 131 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 132 break; 133 case ARCHIVE_FORMAT_CPIO: 134 switch (fmt) { 135 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: 136 case ARCHIVE_FORMAT_CPIO_SVR4_CRC: 137 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; 138 break; 139 default: 140 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 141 break; 142 } 143 break; 144 case ARCHIVE_FORMAT_MTREE: 145 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; 146 break; 147 case ARCHIVE_FORMAT_ISO9660: 148 case ARCHIVE_FORMAT_SHAR: 149 case ARCHIVE_FORMAT_TAR: 150 case ARCHIVE_FORMAT_XAR: 151 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 152 break; 153 default: 154 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 155 break; 156 } 157} 158 159void 160archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) 161{ 162 struct links_entry *le; 163 164 if (res == NULL) 165 return; 166 167 while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL) 168 archive_entry_free(le->entry); 169 free(res->buckets); 170 free(res); 171} 172 173void 174archive_entry_linkify(struct archive_entry_linkresolver *res, 175 struct archive_entry **e, struct archive_entry **f) 176{ 177 struct links_entry *le; 178 struct archive_entry *t; 179 180 *f = NULL; /* Default: Don't return a second entry. */ 181 182 if (*e == NULL) { 183 le = next_entry(res, NEXT_ENTRY_DEFERRED); 184 if (le != NULL) { 185 *e = le->entry; 186 le->entry = NULL; 187 } 188 return; 189 } 190 191 /* If it has only one link, then we're done. */ 192 if (archive_entry_nlink(*e) == 1) 193 return; 194 /* Directories, devices never have hardlinks. */ 195 if (archive_entry_filetype(*e) == AE_IFDIR 196 || archive_entry_filetype(*e) == AE_IFBLK 197 || archive_entry_filetype(*e) == AE_IFCHR) 198 return; 199 200 switch (res->strategy) { 201 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: 202 le = find_entry(res, *e); 203 if (le != NULL) { 204 archive_entry_unset_size(*e); 205 archive_entry_copy_hardlink(*e, 206 archive_entry_pathname(le->canonical)); 207 } else 208 insert_entry(res, *e); 209 return; 210 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: 211 le = find_entry(res, *e); 212 if (le != NULL) { 213 archive_entry_copy_hardlink(*e, 214 archive_entry_pathname(le->canonical)); 215 } else 216 insert_entry(res, *e); 217 return; 218 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: 219 /* This one is trivial. */ 220 return; 221 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: 222 le = find_entry(res, *e); 223 if (le != NULL) { 224 /* 225 * Put the new entry in le, return the 226 * old entry from le. 227 */ 228 t = *e; 229 *e = le->entry; 230 le->entry = t; 231 /* Make the old entry into a hardlink. */ 232 archive_entry_unset_size(*e); 233 archive_entry_copy_hardlink(*e, 234 archive_entry_pathname(le->canonical)); 235 /* If we ran out of links, return the 236 * final entry as well. */ 237 if (le->links == 0) { 238 *f = le->entry; 239 le->entry = NULL; 240 } 241 } else { 242 /* 243 * If we haven't seen it, tuck it away 244 * for future use. 245 */ 246 le = insert_entry(res, *e); 247 if (le == NULL) 248 /* XXX We should return an error code XXX */ 249 return; 250 le->entry = *e; 251 *e = NULL; 252 } 253 return; 254 default: 255 break; 256 } 257 return; 258} 259 260static struct links_entry * 261find_entry(struct archive_entry_linkresolver *res, 262 struct archive_entry *entry) 263{ 264 struct links_entry *le; 265 size_t hash, bucket; 266 dev_t dev; 267 int64_t ino; 268 269 /* Free a held entry. */ 270 if (res->spare != NULL) { 271 archive_entry_free(res->spare->canonical); 272 archive_entry_free(res->spare->entry); 273 free(res->spare); 274 res->spare = NULL; 275 } 276 277 dev = archive_entry_dev(entry); 278 ino = archive_entry_ino64(entry); 279 hash = (size_t)(dev ^ ino); 280 281 /* Try to locate this entry in the links cache. */ 282 bucket = hash & (res->number_buckets - 1); 283 for (le = res->buckets[bucket]; le != NULL; le = le->next) { 284 if (le->hash == hash 285 && dev == archive_entry_dev(le->canonical) 286 && ino == archive_entry_ino64(le->canonical)) { 287 /* 288 * Decrement link count each time and release 289 * the entry if it hits zero. This saves 290 * memory and is necessary for detecting 291 * missed links. 292 */ 293 --le->links; 294 if (le->links > 0) 295 return (le); 296 /* Remove it from this hash bucket. */ 297 if (le->previous != NULL) 298 le->previous->next = le->next; 299 if (le->next != NULL) 300 le->next->previous = le->previous; 301 if (res->buckets[bucket] == le) 302 res->buckets[bucket] = le->next; 303 res->number_entries--; 304 /* Defer freeing this entry. */ 305 res->spare = le; 306 return (le); 307 } 308 } 309 return (NULL); 310} 311 312static struct links_entry * 313next_entry(struct archive_entry_linkresolver *res, int mode) 314{ 315 struct links_entry *le; 316 size_t bucket; 317 318 /* Free a held entry. */ 319 if (res->spare != NULL) { 320 archive_entry_free(res->spare->canonical); 321 archive_entry_free(res->spare->entry); 322 free(res->spare); 323 res->spare = NULL; 324 } 325 326 /* Look for next non-empty bucket in the links cache. */ 327 for (bucket = 0; bucket < res->number_buckets; bucket++) { 328 for (le = res->buckets[bucket]; le != NULL; le = le->next) { 329 if (le->entry != NULL && 330 (mode & NEXT_ENTRY_DEFERRED) == 0) 331 continue; 332 if (le->entry == NULL && 333 (mode & NEXT_ENTRY_PARTIAL) == 0) 334 continue; 335 /* Remove it from this hash bucket. */ 336 if (le->next != NULL) 337 le->next->previous = le->previous; 338 if (le->previous != NULL) 339 le->previous->next = le->next; 340 else 341 res->buckets[bucket] = le->next; 342 res->number_entries--; 343 /* Defer freeing this entry. */ 344 res->spare = le; 345 return (le); 346 } 347 } 348 return (NULL); 349} 350 351static struct links_entry * 352insert_entry(struct archive_entry_linkresolver *res, 353 struct archive_entry *entry) 354{ 355 struct links_entry *le; 356 size_t hash, bucket; 357 358 /* Add this entry to the links cache. */ 359 le = calloc(1, sizeof(struct links_entry)); 360 if (le == NULL) 361 return (NULL); 362 le->canonical = archive_entry_clone(entry); 363 364 /* If the links cache is getting too full, enlarge the hash table. */ 365 if (res->number_entries > res->number_buckets * 2) 366 grow_hash(res); 367 368 hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry)); 369 bucket = hash & (res->number_buckets - 1); 370 371 /* If we could allocate the entry, record it. */ 372 if (res->buckets[bucket] != NULL) 373 res->buckets[bucket]->previous = le; 374 res->number_entries++; 375 le->next = res->buckets[bucket]; 376 le->previous = NULL; 377 res->buckets[bucket] = le; 378 le->hash = hash; 379 le->links = archive_entry_nlink(entry) - 1; 380 return (le); 381} 382 383static void 384grow_hash(struct archive_entry_linkresolver *res) 385{ 386 struct links_entry *le, **new_buckets; 387 size_t new_size; 388 size_t i, bucket; 389 390 /* Try to enlarge the bucket list. */ 391 new_size = res->number_buckets * 2; 392 if (new_size < res->number_buckets) 393 return; 394 new_buckets = calloc(new_size, sizeof(struct links_entry *)); 395 396 if (new_buckets == NULL) 397 return; 398 399 for (i = 0; i < res->number_buckets; i++) { 400 while (res->buckets[i] != NULL) { 401 /* Remove entry from old bucket. */ 402 le = res->buckets[i]; 403 res->buckets[i] = le->next; 404 405 /* Add entry to new bucket. */ 406 bucket = le->hash & (new_size - 1); 407 408 if (new_buckets[bucket] != NULL) 409 new_buckets[bucket]->previous = le; 410 le->next = new_buckets[bucket]; 411 le->previous = NULL; 412 new_buckets[bucket] = le; 413 } 414 } 415 free(res->buckets); 416 res->buckets = new_buckets; 417 res->number_buckets = new_size; 418} 419 420struct archive_entry * 421archive_entry_partial_links(struct archive_entry_linkresolver *res, 422 unsigned int *links) 423{ 424 struct archive_entry *e; 425 struct links_entry *le; 426 427 /* Free a held entry. */ 428 if (res->spare != NULL) { 429 archive_entry_free(res->spare->canonical); 430 archive_entry_free(res->spare->entry); 431 free(res->spare); 432 res->spare = NULL; 433 } 434 435 le = next_entry(res, NEXT_ENTRY_PARTIAL); 436 if (le != NULL) { 437 e = le->canonical; 438 if (links != NULL) 439 *links = le->links; 440 le->canonical = NULL; 441 } else { 442 e = NULL; 443 if (links != NULL) 444 *links = 0; 445 } 446 return (e); 447} 448