1/* Functions for dealing with sparse files 2 3 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 4 5 This program is free software; you can redistribute it and/or modify it 6 under the terms of the GNU General Public License as published by the 7 Free Software Foundation; either version 2, or (at your option) any later 8 version. 9 10 This program is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 13 Public License for more details. 14 15 You should have received a copy of the GNU General Public License along 16 with this program; if not, write to the Free Software Foundation, Inc., 17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19#include <system.h> 20#include <inttostr.h> 21#include <quotearg.h> 22#include "common.h" 23 24struct tar_sparse_file; 25static bool sparse_select_optab (struct tar_sparse_file *file); 26 27enum sparse_scan_state 28 { 29 scan_begin, 30 scan_block, 31 scan_end 32 }; 33 34struct tar_sparse_optab 35{ 36 bool (*init) (struct tar_sparse_file *); 37 bool (*done) (struct tar_sparse_file *); 38 bool (*sparse_member_p) (struct tar_sparse_file *); 39 bool (*dump_header) (struct tar_sparse_file *); 40 bool (*fixup_header) (struct tar_sparse_file *); 41 bool (*decode_header) (struct tar_sparse_file *); 42 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state, 43 void *); 44 bool (*dump_region) (struct tar_sparse_file *, size_t); 45 bool (*extract_region) (struct tar_sparse_file *, size_t); 46}; 47 48struct tar_sparse_file 49{ 50 int fd; /* File descriptor */ 51 bool seekable; /* Is fd seekable? */ 52 off_t offset; /* Current offset in fd if seekable==false. 53 Otherwise unused */ 54 off_t dumped_size; /* Number of bytes actually written 55 to the archive */ 56 struct tar_stat_info *stat_info; /* Information about the file */ 57 struct tar_sparse_optab const *optab; /* Operation table */ 58 void *closure; /* Any additional data optab calls might 59 require */ 60}; 61 62/* Dump zeros to file->fd until offset is reached. It is used instead of 63 lseek if the output file is not seekable */ 64static bool 65dump_zeros (struct tar_sparse_file *file, off_t offset) 66{ 67 static char const zero_buf[BLOCKSIZE]; 68 69 if (offset < file->offset) 70 { 71 errno = EINVAL; 72 return false; 73 } 74 75 while (file->offset < offset) 76 { 77 size_t size = (BLOCKSIZE < offset - file->offset 78 ? BLOCKSIZE 79 : offset - file->offset); 80 ssize_t wrbytes; 81 82 wrbytes = write (file->fd, zero_buf, size); 83 if (wrbytes <= 0) 84 { 85 if (wrbytes == 0) 86 errno = EINVAL; 87 return false; 88 } 89 file->offset += wrbytes; 90 } 91 92 return true; 93} 94 95static bool 96tar_sparse_member_p (struct tar_sparse_file *file) 97{ 98 if (file->optab->sparse_member_p) 99 return file->optab->sparse_member_p (file); 100 return false; 101} 102 103static bool 104tar_sparse_init (struct tar_sparse_file *file) 105{ 106 memset (file, 0, sizeof *file); 107 108 if (!sparse_select_optab (file)) 109 return false; 110 111 if (file->optab->init) 112 return file->optab->init (file); 113 114 return true; 115} 116 117static bool 118tar_sparse_done (struct tar_sparse_file *file) 119{ 120 if (file->optab->done) 121 return file->optab->done (file); 122 return true; 123} 124 125static bool 126tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state, 127 void *block) 128{ 129 if (file->optab->scan_block) 130 return file->optab->scan_block (file, state, block); 131 return true; 132} 133 134static bool 135tar_sparse_dump_region (struct tar_sparse_file *file, size_t i) 136{ 137 if (file->optab->dump_region) 138 return file->optab->dump_region (file, i); 139 return false; 140} 141 142static bool 143tar_sparse_extract_region (struct tar_sparse_file *file, size_t i) 144{ 145 if (file->optab->extract_region) 146 return file->optab->extract_region (file, i); 147 return false; 148} 149 150static bool 151tar_sparse_dump_header (struct tar_sparse_file *file) 152{ 153 if (file->optab->dump_header) 154 return file->optab->dump_header (file); 155 return false; 156} 157 158static bool 159tar_sparse_decode_header (struct tar_sparse_file *file) 160{ 161 if (file->optab->decode_header) 162 return file->optab->decode_header (file); 163 return true; 164} 165 166static bool 167tar_sparse_fixup_header (struct tar_sparse_file *file) 168{ 169 if (file->optab->fixup_header) 170 return file->optab->fixup_header (file); 171 return true; 172} 173 174 175static bool 176lseek_or_error (struct tar_sparse_file *file, off_t offset) 177{ 178 if (file->seekable 179 ? lseek (file->fd, offset, SEEK_SET) < 0 180 : ! dump_zeros (file, offset)) 181 { 182 seek_diag_details (file->stat_info->orig_file_name, offset); 183 return false; 184 } 185 return true; 186} 187 188/* Takes a blockful of data and basically cruises through it to see if 189 it's made *entirely* of zeros, returning a 0 the instant it finds 190 something that is a nonzero, i.e., useful data. */ 191static bool 192zero_block_p (char const *buffer, size_t size) 193{ 194 while (size--) 195 if (*buffer++) 196 return false; 197 return true; 198} 199 200static void 201sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp) 202{ 203 struct sp_array *sparse_map = st->sparse_map; 204 size_t avail = st->sparse_map_avail; 205 if (avail == st->sparse_map_size) 206 st->sparse_map = sparse_map = 207 x2nrealloc (sparse_map, &st->sparse_map_size, sizeof *sparse_map); 208 sparse_map[avail] = *sp; 209 st->sparse_map_avail = avail + 1; 210} 211 212/* Scan the sparse file and create its map */ 213static bool 214sparse_scan_file (struct tar_sparse_file *file) 215{ 216 struct tar_stat_info *st = file->stat_info; 217 int fd = file->fd; 218 char buffer[BLOCKSIZE]; 219 size_t count; 220 off_t offset = 0; 221 struct sp_array sp = {0, 0}; 222 223 if (!lseek_or_error (file, 0)) 224 return false; 225 226 st->archive_file_size = 0; 227 228 if (!tar_sparse_scan (file, scan_begin, NULL)) 229 return false; 230 231 while ((count = safe_read (fd, buffer, sizeof buffer)) != 0 232 && count != SAFE_READ_ERROR) 233 { 234 /* Analyze the block. */ 235 if (zero_block_p (buffer, count)) 236 { 237 if (sp.numbytes) 238 { 239 sparse_add_map (st, &sp); 240 sp.numbytes = 0; 241 if (!tar_sparse_scan (file, scan_block, NULL)) 242 return false; 243 } 244 } 245 else 246 { 247 if (sp.numbytes == 0) 248 sp.offset = offset; 249 sp.numbytes += count; 250 st->archive_file_size += count; 251 if (!tar_sparse_scan (file, scan_block, buffer)) 252 return false; 253 } 254 255 offset += count; 256 } 257 258 if (sp.numbytes == 0) 259 sp.offset = offset; 260 261 sparse_add_map (st, &sp); 262 st->archive_file_size += count; 263 return tar_sparse_scan (file, scan_end, NULL); 264} 265 266static struct tar_sparse_optab const oldgnu_optab; 267static struct tar_sparse_optab const star_optab; 268static struct tar_sparse_optab const pax_optab; 269 270static bool 271sparse_select_optab (struct tar_sparse_file *file) 272{ 273 switch (current_format == DEFAULT_FORMAT ? archive_format : current_format) 274 { 275 case V7_FORMAT: 276 case USTAR_FORMAT: 277 return false; 278 279 case OLDGNU_FORMAT: 280 case GNU_FORMAT: /*FIXME: This one should disappear? */ 281 file->optab = &oldgnu_optab; 282 break; 283 284 case POSIX_FORMAT: 285 file->optab = &pax_optab; 286 break; 287 288 case STAR_FORMAT: 289 file->optab = &star_optab; 290 break; 291 292 default: 293 return false; 294 } 295 return true; 296} 297 298static bool 299sparse_dump_region (struct tar_sparse_file *file, size_t i) 300{ 301 union block *blk; 302 off_t bytes_left = file->stat_info->sparse_map[i].numbytes; 303 304 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset)) 305 return false; 306 307 while (bytes_left > 0) 308 { 309 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left; 310 size_t bytes_read; 311 312 blk = find_next_block (); 313 bytes_read = safe_read (file->fd, blk->buffer, bufsize); 314 if (bytes_read == SAFE_READ_ERROR) 315 { 316 read_diag_details (file->stat_info->orig_file_name, 317 (file->stat_info->sparse_map[i].offset 318 + file->stat_info->sparse_map[i].numbytes 319 - bytes_left), 320 bufsize); 321 return false; 322 } 323 324 memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read); 325 bytes_left -= bytes_read; 326 file->dumped_size += bytes_read; 327 mv_size_left (file->stat_info->archive_file_size - file->dumped_size); 328 set_next_block_after (blk); 329 } 330 331 return true; 332} 333 334static bool 335sparse_extract_region (struct tar_sparse_file *file, size_t i) 336{ 337 size_t write_size; 338 339 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset)) 340 return false; 341 342 write_size = file->stat_info->sparse_map[i].numbytes; 343 344 if (write_size == 0) 345 { 346 /* Last block of the file is a hole */ 347 if (file->seekable && sys_truncate (file->fd)) 348 truncate_warn (file->stat_info->orig_file_name); 349 } 350 else while (write_size > 0) 351 { 352 size_t count; 353 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size; 354 union block *blk = find_next_block (); 355 if (!blk) 356 { 357 ERROR ((0, 0, _("Unexpected EOF in archive"))); 358 return false; 359 } 360 set_next_block_after (blk); 361 count = full_write (file->fd, blk->buffer, wrbytes); 362 write_size -= count; 363 file->dumped_size += count; 364 mv_size_left (file->stat_info->archive_file_size - file->dumped_size); 365 file->offset += count; 366 if (count != wrbytes) 367 { 368 write_error_details (file->stat_info->orig_file_name, 369 count, wrbytes); 370 return false; 371 } 372 } 373 return true; 374} 375 376 377 378/* Interface functions */ 379enum dump_status 380sparse_dump_file (int fd, struct tar_stat_info *st) 381{ 382 bool rc; 383 struct tar_sparse_file file; 384 385 if (!tar_sparse_init (&file)) 386 return dump_status_not_implemented; 387 388 file.stat_info = st; 389 file.fd = fd; 390 file.seekable = true; /* File *must* be seekable for dump to work */ 391 392 rc = sparse_scan_file (&file); 393 if (rc && file.optab->dump_region) 394 { 395 tar_sparse_dump_header (&file); 396 397 if (fd >= 0) 398 { 399 size_t i; 400 401 mv_begin (file.stat_info); 402 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++) 403 rc = tar_sparse_dump_region (&file, i); 404 mv_end (); 405 } 406 } 407 408 pad_archive (file.stat_info->archive_file_size - file.dumped_size); 409 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; 410} 411 412bool 413sparse_member_p (struct tar_stat_info *st) 414{ 415 struct tar_sparse_file file; 416 417 if (!tar_sparse_init (&file)) 418 return false; 419 file.stat_info = st; 420 return tar_sparse_member_p (&file); 421} 422 423bool 424sparse_fixup_header (struct tar_stat_info *st) 425{ 426 struct tar_sparse_file file; 427 428 if (!tar_sparse_init (&file)) 429 return false; 430 file.stat_info = st; 431 return tar_sparse_fixup_header (&file); 432} 433 434enum dump_status 435sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size) 436{ 437 bool rc = true; 438 struct tar_sparse_file file; 439 size_t i; 440 441 if (!tar_sparse_init (&file)) 442 return dump_status_not_implemented; 443 444 file.stat_info = st; 445 file.fd = fd; 446 file.seekable = lseek (fd, 0, SEEK_SET) == 0; 447 file.offset = 0; 448 449 rc = tar_sparse_decode_header (&file); 450 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++) 451 rc = tar_sparse_extract_region (&file, i); 452 *size = file.stat_info->archive_file_size - file.dumped_size; 453 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; 454} 455 456enum dump_status 457sparse_skip_file (struct tar_stat_info *st) 458{ 459 bool rc = true; 460 struct tar_sparse_file file; 461 462 if (!tar_sparse_init (&file)) 463 return dump_status_not_implemented; 464 465 file.stat_info = st; 466 file.fd = -1; 467 468 rc = tar_sparse_decode_header (&file); 469 skip_file (file.stat_info->archive_file_size - file.dumped_size); 470 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short; 471} 472 473 474static bool 475check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end) 476{ 477 if (!lseek_or_error (file, beg)) 478 return false; 479 480 while (beg < end) 481 { 482 size_t bytes_read; 483 size_t rdsize = BLOCKSIZE < end - beg ? BLOCKSIZE : end - beg; 484 char diff_buffer[BLOCKSIZE]; 485 486 bytes_read = safe_read (file->fd, diff_buffer, rdsize); 487 if (bytes_read == SAFE_READ_ERROR) 488 { 489 read_diag_details (file->stat_info->orig_file_name, 490 beg, 491 rdsize); 492 return false; 493 } 494 if (!zero_block_p (diff_buffer, bytes_read)) 495 { 496 char begbuf[INT_BUFSIZE_BOUND (off_t)]; 497 report_difference (file->stat_info, 498 _("File fragment at %s is not a hole"), 499 offtostr (beg, begbuf)); 500 return false; 501 } 502 503 beg += bytes_read; 504 } 505 return true; 506} 507 508static bool 509check_data_region (struct tar_sparse_file *file, size_t i) 510{ 511 size_t size_left; 512 513 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset)) 514 return false; 515 size_left = file->stat_info->sparse_map[i].numbytes; 516 mv_size_left (file->stat_info->archive_file_size - file->dumped_size); 517 518 while (size_left > 0) 519 { 520 size_t bytes_read; 521 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left; 522 char diff_buffer[BLOCKSIZE]; 523 524 union block *blk = find_next_block (); 525 if (!blk) 526 { 527 ERROR ((0, 0, _("Unexpected EOF in archive"))); 528 return false; 529 } 530 set_next_block_after (blk); 531 bytes_read = safe_read (file->fd, diff_buffer, rdsize); 532 if (bytes_read == SAFE_READ_ERROR) 533 { 534 read_diag_details (file->stat_info->orig_file_name, 535 (file->stat_info->sparse_map[i].offset 536 + file->stat_info->sparse_map[i].numbytes 537 - size_left), 538 rdsize); 539 return false; 540 } 541 file->dumped_size += bytes_read; 542 size_left -= bytes_read; 543 mv_size_left (file->stat_info->archive_file_size - file->dumped_size); 544 if (memcmp (blk->buffer, diff_buffer, rdsize)) 545 { 546 report_difference (file->stat_info, _("Contents differ")); 547 return false; 548 } 549 } 550 return true; 551} 552 553bool 554sparse_diff_file (int fd, struct tar_stat_info *st) 555{ 556 bool rc = true; 557 struct tar_sparse_file file; 558 size_t i; 559 off_t offset = 0; 560 561 if (!tar_sparse_init (&file)) 562 return dump_status_not_implemented; 563 564 file.stat_info = st; 565 file.fd = fd; 566 file.seekable = true; /* File *must* be seekable for compare to work */ 567 568 rc = tar_sparse_decode_header (&file); 569 mv_begin (st); 570 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++) 571 { 572 rc = check_sparse_region (&file, 573 offset, file.stat_info->sparse_map[i].offset) 574 && check_data_region (&file, i); 575 offset = file.stat_info->sparse_map[i].offset 576 + file.stat_info->sparse_map[i].numbytes; 577 } 578 579 if (!rc) 580 skip_file (file.stat_info->archive_file_size - file.dumped_size); 581 mv_end (); 582 583 tar_sparse_done (&file); 584 return rc; 585} 586 587 588/* Old GNU Format. The sparse file information is stored in the 589 oldgnu_header in the following manner: 590 591 The header is marked with type 'S'. Its `size' field contains 592 the cumulative size of all non-empty blocks of the file. The 593 actual file size is stored in `realsize' member of oldgnu_header. 594 595 The map of the file is stored in a list of `struct sparse'. 596 Each struct contains offset to the block of data and its 597 size (both as octal numbers). The first file header contains 598 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map 599 contains more structs, then the field `isextended' of the main 600 header is set to 1 (binary) and the `struct sparse_header' 601 header follows, containing at most 21 following structs 602 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended' 603 field of the extended header is set and next next extension header 604 follows, etc... */ 605 606enum oldgnu_add_status 607 { 608 add_ok, 609 add_finish, 610 add_fail 611 }; 612 613static bool 614oldgnu_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused))) 615{ 616 return current_header->header.typeflag == GNUTYPE_SPARSE; 617} 618 619/* Add a sparse item to the sparse file and its obstack */ 620static enum oldgnu_add_status 621oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s) 622{ 623 struct sp_array sp; 624 625 if (s->numbytes[0] == '\0') 626 return add_finish; 627 sp.offset = OFF_FROM_HEADER (s->offset); 628 sp.numbytes = SIZE_FROM_HEADER (s->numbytes); 629 if (sp.offset < 0 630 || file->stat_info->stat.st_size < sp.offset + sp.numbytes 631 || file->stat_info->archive_file_size < 0) 632 return add_fail; 633 634 sparse_add_map (file->stat_info, &sp); 635 return add_ok; 636} 637 638static bool 639oldgnu_fixup_header (struct tar_sparse_file *file) 640{ 641 /* NOTE! st_size was initialized from the header 642 which actually contains archived size. The following fixes it */ 643 file->stat_info->archive_file_size = file->stat_info->stat.st_size; 644 file->stat_info->stat.st_size = 645 OFF_FROM_HEADER (current_header->oldgnu_header.realsize); 646 return true; 647} 648 649/* Convert old GNU format sparse data to internal representation */ 650static bool 651oldgnu_get_sparse_info (struct tar_sparse_file *file) 652{ 653 size_t i; 654 union block *h = current_header; 655 int ext_p; 656 enum oldgnu_add_status rc; 657 658 file->stat_info->sparse_map_avail = 0; 659 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++) 660 { 661 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]); 662 if (rc != add_ok) 663 break; 664 } 665 666 for (ext_p = h->oldgnu_header.isextended; 667 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended) 668 { 669 h = find_next_block (); 670 if (!h) 671 { 672 ERROR ((0, 0, _("Unexpected EOF in archive"))); 673 return false; 674 } 675 set_next_block_after (h); 676 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++) 677 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]); 678 } 679 680 if (rc == add_fail) 681 { 682 ERROR ((0, 0, _("%s: invalid sparse archive member"), 683 file->stat_info->orig_file_name)); 684 return false; 685 } 686 return true; 687} 688 689static void 690oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex, 691 struct sparse *sp, size_t sparse_size) 692{ 693 for (; *pindex < file->stat_info->sparse_map_avail 694 && sparse_size > 0; sparse_size--, sp++, ++*pindex) 695 { 696 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset, 697 sp->offset); 698 SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes, 699 sp->numbytes); 700 } 701} 702 703static bool 704oldgnu_dump_header (struct tar_sparse_file *file) 705{ 706 off_t block_ordinal = current_block_ordinal (); 707 union block *blk; 708 size_t i; 709 710 blk = start_header (file->stat_info); 711 blk->header.typeflag = GNUTYPE_SPARSE; 712 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER) 713 blk->oldgnu_header.isextended = 1; 714 715 /* Store the real file size */ 716 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize); 717 /* Store the effective (shrunken) file size */ 718 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size); 719 720 i = 0; 721 oldgnu_store_sparse_info (file, &i, 722 blk->oldgnu_header.sp, 723 SPARSES_IN_OLDGNU_HEADER); 724 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail; 725 finish_header (file->stat_info, blk, block_ordinal); 726 727 while (i < file->stat_info->sparse_map_avail) 728 { 729 blk = find_next_block (); 730 memset (blk->buffer, 0, BLOCKSIZE); 731 oldgnu_store_sparse_info (file, &i, 732 blk->sparse_header.sp, 733 SPARSES_IN_SPARSE_HEADER); 734 if (i < file->stat_info->sparse_map_avail) 735 blk->sparse_header.isextended = 1; 736 set_next_block_after (blk); 737 } 738 return true; 739} 740 741static struct tar_sparse_optab const oldgnu_optab = { 742 NULL, /* No init function */ 743 NULL, /* No done function */ 744 oldgnu_sparse_member_p, 745 oldgnu_dump_header, 746 oldgnu_fixup_header, 747 oldgnu_get_sparse_info, 748 NULL, /* No scan_block function */ 749 sparse_dump_region, 750 sparse_extract_region, 751}; 752 753 754/* Star */ 755 756static bool 757star_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused))) 758{ 759 return current_header->header.typeflag == GNUTYPE_SPARSE; 760} 761 762static bool 763star_fixup_header (struct tar_sparse_file *file) 764{ 765 /* NOTE! st_size was initialized from the header 766 which actually contains archived size. The following fixes it */ 767 file->stat_info->archive_file_size = file->stat_info->stat.st_size; 768 file->stat_info->stat.st_size = 769 OFF_FROM_HEADER (current_header->star_in_header.realsize); 770 return true; 771} 772 773/* Convert STAR format sparse data to internal representation */ 774static bool 775star_get_sparse_info (struct tar_sparse_file *file) 776{ 777 size_t i; 778 union block *h = current_header; 779 int ext_p; 780 enum oldgnu_add_status rc = add_ok; 781 782 file->stat_info->sparse_map_avail = 0; 783 784 if (h->star_in_header.prefix[0] == '\0' 785 && h->star_in_header.sp[0].offset[10] != '\0') 786 { 787 /* Old star format */ 788 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++) 789 { 790 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]); 791 if (rc != add_ok) 792 break; 793 } 794 ext_p = h->star_in_header.isextended; 795 } 796 else 797 ext_p = 1; 798 799 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended) 800 { 801 h = find_next_block (); 802 if (!h) 803 { 804 ERROR ((0, 0, _("Unexpected EOF in archive"))); 805 return false; 806 } 807 set_next_block_after (h); 808 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++) 809 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]); 810 } 811 812 if (rc == add_fail) 813 { 814 ERROR ((0, 0, _("%s: invalid sparse archive member"), 815 file->stat_info->orig_file_name)); 816 return false; 817 } 818 return true; 819} 820 821 822static struct tar_sparse_optab const star_optab = { 823 NULL, /* No init function */ 824 NULL, /* No done function */ 825 star_sparse_member_p, 826 NULL, 827 star_fixup_header, 828 star_get_sparse_info, 829 NULL, /* No scan_block function */ 830 NULL, /* No dump region function */ 831 sparse_extract_region, 832}; 833 834 835/* GNU PAX sparse file format. There are several versions: 836 837 * 0.0 838 839 The initial version of sparse format used by tar 1.14-1.15.1. 840 The sparse file map is stored in x header: 841 842 GNU.sparse.size Real size of the stored file 843 GNU.sparse.numblocks Number of blocks in the sparse map 844 repeat numblocks time 845 GNU.sparse.offset Offset of the next data block 846 GNU.sparse.numbytes Size of the next data block 847 end repeat 848 849 This has been reported as conflicting with the POSIX specs. The reason is 850 that offsets and sizes of non-zero data blocks were stored in multiple 851 instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas 852 POSIX requires the latest occurrence of the variable to override all 853 previous occurrences. 854 855 To avoid this incompatibility two following versions were introduced. 856 857 * 0.1 858 859 Used by tar 1.15.2 -- 1.15.91 (alpha releases). 860 861 The sparse file map is stored in 862 x header: 863 864 GNU.sparse.size Real size of the stored file 865 GNU.sparse.numblocks Number of blocks in the sparse map 866 GNU.sparse.map Map of non-null data chunks. A string consisting 867 of comma-separated values "offset,size[,offset,size]..." 868 869 The resulting GNU.sparse.map string can be *very* long. While POSIX does not 870 impose any limit on the length of a x header variable, this can confuse some 871 tars. 872 873 * 1.0 874 875 Starting from this version, the exact sparse format version is specified 876 explicitely in the header using the following variables: 877 878 GNU.sparse.major Major version 879 GNU.sparse.minor Minor version 880 881 X header keeps the following variables: 882 883 GNU.sparse.name Real file name of the sparse file 884 GNU.sparse.realsize Real size of the stored file (corresponds to the old 885 GNU.sparse.size variable) 886 887 The name field of the ustar header is constructed using the pattern 888 "%d/GNUSparseFile.%p/%f". 889 890 The sparse map itself is stored in the file data block, preceding the actual 891 file data. It consists of a series of octal numbers of arbitrary length, 892 delimited by newlines. The map is padded with nulls to the nearest block 893 boundary. 894 895 The first number gives the number of entries in the map. Following are map 896 entries, each one consisting of two numbers giving the offset and size of 897 the data block it describes. 898 899 The format is designed in such a way that non-posix aware tars and tars not 900 supporting GNU.sparse.* keywords will extract each sparse file in its 901 condensed form with the file map attached and will place it into a separate 902 directory. Then, using a simple program it would be possible to expand the 903 file to its original form even without GNU tar. 904 905 Bu default, v.1.0 archives are created. To use other formats, 906 --sparse-version option is provided. Additionally, v.0.0 can be obtained 907 by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1 908 --pax-option delete=GNU.sparse.map 909*/ 910 911static bool 912pax_sparse_member_p (struct tar_sparse_file *file) 913{ 914 return file->stat_info->sparse_map_avail > 0 915 || file->stat_info->sparse_major > 0; 916} 917 918static bool 919pax_dump_header_0 (struct tar_sparse_file *file) 920{ 921 off_t block_ordinal = current_block_ordinal (); 922 union block *blk; 923 size_t i; 924 char nbuf[UINTMAX_STRSIZE_BOUND]; 925 struct sp_array *map = file->stat_info->sparse_map; 926 char *save_file_name = NULL; 927 928 /* Store the real file size */ 929 xheader_store ("GNU.sparse.size", file->stat_info, NULL); 930 xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL); 931 932 if (xheader_keyword_deleted_p ("GNU.sparse.map") 933 || tar_sparse_minor == 0) 934 { 935 for (i = 0; i < file->stat_info->sparse_map_avail; i++) 936 { 937 xheader_store ("GNU.sparse.offset", file->stat_info, &i); 938 xheader_store ("GNU.sparse.numbytes", file->stat_info, &i); 939 } 940 } 941 else 942 { 943 xheader_store ("GNU.sparse.name", file->stat_info, NULL); 944 save_file_name = file->stat_info->file_name; 945 file->stat_info->file_name = xheader_format_name (file->stat_info, 946 "%d/GNUSparseFile.%p/%f", 0); 947 948 xheader_string_begin (&file->stat_info->xhdr); 949 for (i = 0; i < file->stat_info->sparse_map_avail; i++) 950 { 951 if (i) 952 xheader_string_add (&file->stat_info->xhdr, ","); 953 xheader_string_add (&file->stat_info->xhdr, 954 umaxtostr (map[i].offset, nbuf)); 955 xheader_string_add (&file->stat_info->xhdr, ","); 956 xheader_string_add (&file->stat_info->xhdr, 957 umaxtostr (map[i].numbytes, nbuf)); 958 } 959 if (!xheader_string_end (&file->stat_info->xhdr, 960 "GNU.sparse.map")) 961 { 962 free (file->stat_info->file_name); 963 file->stat_info->file_name = save_file_name; 964 return false; 965 } 966 } 967 blk = start_header (file->stat_info); 968 /* Store the effective (shrunken) file size */ 969 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size); 970 finish_header (file->stat_info, blk, block_ordinal); 971 if (save_file_name) 972 { 973 free (file->stat_info->file_name); 974 file->stat_info->file_name = save_file_name; 975 } 976 return true; 977} 978 979static bool 980pax_dump_header_1 (struct tar_sparse_file *file) 981{ 982 off_t block_ordinal = current_block_ordinal (); 983 union block *blk; 984 char *p, *q; 985 size_t i; 986 char nbuf[UINTMAX_STRSIZE_BOUND]; 987 off_t size = 0; 988 struct sp_array *map = file->stat_info->sparse_map; 989 char *save_file_name = file->stat_info->file_name; 990 991#define COPY_STRING(b,dst,src) do \ 992 { \ 993 char *endp = b->buffer + BLOCKSIZE; \ 994 char *srcp = src; \ 995 while (*srcp) \ 996 { \ 997 if (dst == endp) \ 998 { \ 999 set_next_block_after (b); \ 1000 b = find_next_block (); \ 1001 dst = b->buffer; \ 1002 endp = b->buffer + BLOCKSIZE; \ 1003 } \ 1004 *dst++ = *srcp++; \ 1005 } \ 1006 } while (0) 1007 1008 /* Compute stored file size */ 1009 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf); 1010 size += strlen (p) + 1; 1011 for (i = 0; i < file->stat_info->sparse_map_avail; i++) 1012 { 1013 p = umaxtostr (map[i].offset, nbuf); 1014 size += strlen (p) + 1; 1015 p = umaxtostr (map[i].numbytes, nbuf); 1016 size += strlen (p) + 1; 1017 } 1018 size = (size + BLOCKSIZE - 1) / BLOCKSIZE; 1019 file->stat_info->archive_file_size += size * BLOCKSIZE; 1020 file->dumped_size += size * BLOCKSIZE; 1021 1022 /* Store sparse file identification */ 1023 xheader_store ("GNU.sparse.major", file->stat_info, NULL); 1024 xheader_store ("GNU.sparse.minor", file->stat_info, NULL); 1025 xheader_store ("GNU.sparse.name", file->stat_info, NULL); 1026 xheader_store ("GNU.sparse.realsize", file->stat_info, NULL); 1027 1028 file->stat_info->file_name = xheader_format_name (file->stat_info, 1029 "%d/GNUSparseFile.%p/%f", 0); 1030 1031 blk = start_header (file->stat_info); 1032 /* Store the effective (shrunken) file size */ 1033 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size); 1034 finish_header (file->stat_info, blk, block_ordinal); 1035 free (file->stat_info->file_name); 1036 file->stat_info->file_name = save_file_name; 1037 1038 blk = find_next_block (); 1039 q = blk->buffer; 1040 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf); 1041 COPY_STRING (blk, q, p); 1042 COPY_STRING (blk, q, "\n"); 1043 for (i = 0; i < file->stat_info->sparse_map_avail; i++) 1044 { 1045 p = umaxtostr (map[i].offset, nbuf); 1046 COPY_STRING (blk, q, p); 1047 COPY_STRING (blk, q, "\n"); 1048 p = umaxtostr (map[i].numbytes, nbuf); 1049 COPY_STRING (blk, q, p); 1050 COPY_STRING (blk, q, "\n"); 1051 } 1052 memset (q, 0, BLOCKSIZE - (q - blk->buffer)); 1053 set_next_block_after (blk); 1054 return true; 1055} 1056 1057static bool 1058pax_dump_header (struct tar_sparse_file *file) 1059{ 1060 file->stat_info->sparse_major = tar_sparse_major; 1061 file->stat_info->sparse_minor = tar_sparse_minor; 1062 1063 return (file->stat_info->sparse_major == 0) ? 1064 pax_dump_header_0 (file) : pax_dump_header_1 (file); 1065} 1066 1067static bool 1068decode_num (uintmax_t *num, char const *arg, uintmax_t maxval) 1069{ 1070 uintmax_t u; 1071 char *arg_lim; 1072 1073 if (!ISDIGIT (*arg)) 1074 return false; 1075 1076 u = strtoumax (arg, &arg_lim, 10); 1077 1078 if (! (u <= maxval && errno != ERANGE) || *arg_lim) 1079 return false; 1080 1081 *num = u; 1082 return true; 1083} 1084 1085static bool 1086pax_decode_header (struct tar_sparse_file *file) 1087{ 1088 if (file->stat_info->sparse_major > 0) 1089 { 1090 uintmax_t u; 1091 char nbuf[UINTMAX_STRSIZE_BOUND]; 1092 union block *blk; 1093 char *p; 1094 size_t i; 1095 1096#define COPY_BUF(b,buf,src) do \ 1097 { \ 1098 char *endp = b->buffer + BLOCKSIZE; \ 1099 char *dst = buf; \ 1100 do \ 1101 { \ 1102 if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \ 1103 { \ 1104 ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \ 1105 file->stat_info->orig_file_name)); \ 1106 return false; \ 1107 } \ 1108 if (src == endp) \ 1109 { \ 1110 set_next_block_after (b); \ 1111 file->dumped_size += BLOCKSIZE; \ 1112 b = find_next_block (); \ 1113 src = b->buffer; \ 1114 endp = b->buffer + BLOCKSIZE; \ 1115 } \ 1116 *dst = *src++; \ 1117 } \ 1118 while (*dst++ != '\n'); \ 1119 dst[-1] = 0; \ 1120 } while (0) 1121 1122 set_next_block_after (current_header); 1123 file->dumped_size += BLOCKSIZE; 1124 blk = find_next_block (); 1125 p = blk->buffer; 1126 COPY_BUF (blk,nbuf,p); 1127 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t))) 1128 { 1129 ERROR ((0, 0, _("%s: malformed sparse archive member"), 1130 file->stat_info->orig_file_name)); 1131 return false; 1132 } 1133 file->stat_info->sparse_map_size = u; 1134 file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size, 1135 sizeof (*file->stat_info->sparse_map)); 1136 file->stat_info->sparse_map_avail = 0; 1137 for (i = 0; i < file->stat_info->sparse_map_size; i++) 1138 { 1139 struct sp_array sp; 1140 1141 COPY_BUF (blk,nbuf,p); 1142 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t))) 1143 { 1144 ERROR ((0, 0, _("%s: malformed sparse archive member"), 1145 file->stat_info->orig_file_name)); 1146 return false; 1147 } 1148 sp.offset = u; 1149 COPY_BUF (blk,nbuf,p); 1150 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t))) 1151 { 1152 ERROR ((0, 0, _("%s: malformed sparse archive member"), 1153 file->stat_info->orig_file_name)); 1154 return false; 1155 } 1156 sp.numbytes = u; 1157 sparse_add_map (file->stat_info, &sp); 1158 } 1159 set_next_block_after (blk); 1160 } 1161 1162 return true; 1163} 1164 1165static struct tar_sparse_optab const pax_optab = { 1166 NULL, /* No init function */ 1167 NULL, /* No done function */ 1168 pax_sparse_member_p, 1169 pax_dump_header, 1170 NULL, 1171 pax_decode_header, 1172 NULL, /* No scan_block function */ 1173 sparse_dump_region, 1174 sparse_extract_region, 1175}; 1176