archive_read_support_format_rar5.c revision 349524
1/*- 2* Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org) 3* All rights reserved. 4* 5* Redistribution and use in source and binary forms, with or without 6* modification, are permitted provided that the following conditions 7* are met: 8* 1. Redistributions of source code must retain the above copyright 9* notice, this list of conditions and the following disclaimer. 10* 2. Redistributions in binary form must reproduce the above copyright 11* notice, this list of conditions and the following disclaimer in the 12* documentation and/or other materials provided with the distribution. 13* 14* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24*/ 25 26#include "archive_platform.h" 27#include "archive_endian.h" 28 29#ifdef HAVE_ERRNO_H 30#include <errno.h> 31#endif 32#include <time.h> 33#ifdef HAVE_ZLIB_H 34#include <zlib.h> /* crc32 */ 35#endif 36#ifdef HAVE_LIMITS_H 37#include <limits.h> 38#endif 39 40#include "archive.h" 41#ifndef HAVE_ZLIB_H 42#include "archive_crc32.h" 43#endif 44 45#include "archive_entry.h" 46#include "archive_entry_locale.h" 47#include "archive_ppmd7_private.h" 48#include "archive_entry_private.h" 49 50#ifdef HAVE_BLAKE2_H 51#include <blake2.h> 52#else 53#include "archive_blake2.h" 54#endif 55 56/*#define CHECK_CRC_ON_SOLID_SKIP*/ 57/*#define DONT_FAIL_ON_CRC_ERROR*/ 58/*#define DEBUG*/ 59 60#define rar5_min(a, b) (((a) > (b)) ? (b) : (a)) 61#define rar5_max(a, b) (((a) > (b)) ? (a) : (b)) 62#define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X))) 63 64#if defined DEBUG 65#define DEBUG_CODE if(1) 66#else 67#define DEBUG_CODE if(0) 68#endif 69 70/* Real RAR5 magic number is: 71 * 72 * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00 73 * "Rar!�����������\x00" 74 * 75 * It's stored in `rar5_signature` after XOR'ing it with 0xA1, because I don't 76 * want to put this magic sequence in each binary that uses libarchive, so 77 * applications that scan through the file for this marker won't trigger on 78 * this "false" one. 79 * 80 * The array itself is decrypted in `rar5_init` function. */ 81 82static unsigned char rar5_signature[] = { 243, 192, 211, 128, 187, 166, 160, 161 }; 83static const ssize_t rar5_signature_size = sizeof(rar5_signature); 84static const size_t g_unpack_window_size = 0x20000; 85 86/* These could have been static const's, but they aren't, because of 87 * Visual Studio. */ 88#define MAX_NAME_IN_CHARS 2048 89#define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS) 90 91struct file_header { 92 ssize_t bytes_remaining; 93 ssize_t unpacked_size; 94 int64_t last_offset; /* Used in sanity checks. */ 95 int64_t last_size; /* Used in sanity checks. */ 96 97 uint8_t solid : 1; /* Is this a solid stream? */ 98 uint8_t service : 1; /* Is this file a service data? */ 99 uint8_t eof : 1; /* Did we finish unpacking the file? */ 100 uint8_t dir : 1; /* Is this file entry a directory? */ 101 102 /* Optional time fields. */ 103 uint64_t e_mtime; 104 uint64_t e_ctime; 105 uint64_t e_atime; 106 uint32_t e_unix_ns; 107 108 /* Optional hash fields. */ 109 uint32_t stored_crc32; 110 uint32_t calculated_crc32; 111 uint8_t blake2sp[32]; 112 blake2sp_state b2state; 113 char has_blake2; 114 115 /* Optional redir fields */ 116 uint64_t redir_type; 117 uint64_t redir_flags; 118}; 119 120enum EXTRA { 121 EX_CRYPT = 0x01, 122 EX_HASH = 0x02, 123 EX_HTIME = 0x03, 124 EX_VERSION = 0x04, 125 EX_REDIR = 0x05, 126 EX_UOWNER = 0x06, 127 EX_SUBDATA = 0x07 128}; 129 130#define REDIR_SYMLINK_IS_DIR 1 131 132enum REDIR_TYPE { 133 REDIR_TYPE_NONE = 0, 134 REDIR_TYPE_UNIXSYMLINK = 1, 135 REDIR_TYPE_WINSYMLINK = 2, 136 REDIR_TYPE_JUNCTION = 3, 137 REDIR_TYPE_HARDLINK = 4, 138 REDIR_TYPE_FILECOPY = 5, 139}; 140 141#define OWNER_USER_NAME 0x01 142#define OWNER_GROUP_NAME 0x02 143#define OWNER_USER_UID 0x04 144#define OWNER_GROUP_GID 0x08 145#define OWNER_MAXNAMELEN 256 146 147enum FILTER_TYPE { 148 FILTER_DELTA = 0, /* Generic pattern. */ 149 FILTER_E8 = 1, /* Intel x86 code. */ 150 FILTER_E8E9 = 2, /* Intel x86 code. */ 151 FILTER_ARM = 3, /* ARM code. */ 152 FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */ 153 FILTER_RGB = 5, /* Color palette, not used in RARv5. */ 154 FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */ 155 FILTER_PPM = 7, /* Predictive pattern matching, not used in 156 RARv5. */ 157 FILTER_NONE = 8, 158}; 159 160struct filter_info { 161 int type; 162 int channels; 163 int pos_r; 164 165 int64_t block_start; 166 ssize_t block_length; 167 uint16_t width; 168}; 169 170struct data_ready { 171 char used; 172 const uint8_t* buf; 173 size_t size; 174 int64_t offset; 175}; 176 177struct cdeque { 178 uint16_t beg_pos; 179 uint16_t end_pos; 180 uint16_t cap_mask; 181 uint16_t size; 182 size_t* arr; 183}; 184 185struct decode_table { 186 uint32_t size; 187 int32_t decode_len[16]; 188 uint32_t decode_pos[16]; 189 uint32_t quick_bits; 190 uint8_t quick_len[1 << 10]; 191 uint16_t quick_num[1 << 10]; 192 uint16_t decode_num[306]; 193}; 194 195struct comp_state { 196 /* Flag used to specify if unpacker needs to reinitialize the 197 uncompression context. */ 198 uint8_t initialized : 1; 199 200 /* Flag used when applying filters. */ 201 uint8_t all_filters_applied : 1; 202 203 /* Flag used to skip file context reinitialization, used when unpacker 204 is skipping through different multivolume archives. */ 205 uint8_t switch_multivolume : 1; 206 207 /* Flag used to specify if unpacker has processed the whole data block 208 or just a part of it. */ 209 uint8_t block_parsing_finished : 1; 210 211 int notused : 4; 212 213 int flags; /* Uncompression flags. */ 214 int method; /* Uncompression algorithm method. */ 215 int version; /* Uncompression algorithm version. */ 216 ssize_t window_size; /* Size of window_buf. */ 217 uint8_t* window_buf; /* Circular buffer used during 218 decompression. */ 219 uint8_t* filtered_buf; /* Buffer used when applying filters. */ 220 const uint8_t* block_buf; /* Buffer used when merging blocks. */ 221 size_t window_mask; /* Convenience field; window_size - 1. */ 222 int64_t write_ptr; /* This amount of data has been unpacked 223 in the window buffer. */ 224 int64_t last_write_ptr; /* This amount of data has been stored in 225 the output file. */ 226 int64_t last_unstore_ptr; /* Counter of bytes extracted during 227 unstoring. This is separate from 228 last_write_ptr because of how SERVICE 229 base blocks are handled during skipping 230 in solid multiarchive archives. */ 231 int64_t solid_offset; /* Additional offset inside the window 232 buffer, used in unpacking solid 233 archives. */ 234 ssize_t cur_block_size; /* Size of current data block. */ 235 int last_len; /* Flag used in lzss decompression. */ 236 237 /* Decode tables used during lzss uncompression. */ 238 239#define HUFF_BC 20 240 struct decode_table bd; /* huffman bit lengths */ 241#define HUFF_NC 306 242 struct decode_table ld; /* literals */ 243#define HUFF_DC 64 244 struct decode_table dd; /* distances */ 245#define HUFF_LDC 16 246 struct decode_table ldd; /* lower bits of distances */ 247#define HUFF_RC 44 248 struct decode_table rd; /* repeating distances */ 249#define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC) 250 251 /* Circular deque for storing filters. */ 252 struct cdeque filters; 253 int64_t last_block_start; /* Used for sanity checking. */ 254 ssize_t last_block_length; /* Used for sanity checking. */ 255 256 /* Distance cache used during lzss uncompression. */ 257 int dist_cache[4]; 258 259 /* Data buffer stack. */ 260 struct data_ready dready[2]; 261}; 262 263/* Bit reader state. */ 264struct bit_reader { 265 int8_t bit_addr; /* Current bit pointer inside current byte. */ 266 int in_addr; /* Current byte pointer. */ 267}; 268 269/* RARv5 block header structure. Use bf_* functions to get values from 270 * block_flags_u8 field. I.e. bf_byte_count, etc. */ 271struct compressed_block_header { 272 /* block_flags_u8 contain fields encoded in little-endian bitfield: 273 * 274 * - table present flag (shr 7, and 1), 275 * - last block flag (shr 6, and 1), 276 * - byte_count (shr 3, and 7), 277 * - bit_size (shr 0, and 7). 278 */ 279 uint8_t block_flags_u8; 280 uint8_t block_cksum; 281}; 282 283/* RARv5 main header structure. */ 284struct main_header { 285 /* Does the archive contain solid streams? */ 286 uint8_t solid : 1; 287 288 /* If this a multi-file archive? */ 289 uint8_t volume : 1; 290 uint8_t endarc : 1; 291 uint8_t notused : 5; 292 293 unsigned int vol_no; 294}; 295 296struct generic_header { 297 uint8_t split_after : 1; 298 uint8_t split_before : 1; 299 uint8_t padding : 6; 300 int size; 301 int last_header_id; 302}; 303 304struct multivolume { 305 unsigned int expected_vol_no; 306 uint8_t* push_buf; 307}; 308 309/* Main context structure. */ 310struct rar5 { 311 int header_initialized; 312 313 /* Set to 1 if current file is positioned AFTER the magic value 314 * of the archive file. This is used in header reading functions. */ 315 int skipped_magic; 316 317 /* Set to not zero if we're in skip mode (either by calling 318 * rar5_data_skip function or when skipping over solid streams). 319 * Set to 0 when in * extraction mode. This is used during checksum 320 * calculation functions. */ 321 int skip_mode; 322 323 /* Set to not zero if we're in block merging mode (i.e. when switching 324 * to another file in multivolume archive, last block from 1st archive 325 * needs to be merged with 1st block from 2nd archive). This flag 326 * guards against recursive use of the merging function, which doesn't 327 * support recursive calls. */ 328 int merge_mode; 329 330 /* An offset to QuickOpen list. This is not supported by this unpacker, 331 * because we're focusing on streaming interface. QuickOpen is designed 332 * to make things quicker for non-stream interfaces, so it's not our 333 * use case. */ 334 uint64_t qlist_offset; 335 336 /* An offset to additional Recovery data. This is not supported by this 337 * unpacker. Recovery data are additional Reed-Solomon codes that could 338 * be used to calculate bytes that are missing in archive or are 339 * corrupted. */ 340 uint64_t rr_offset; 341 342 /* Various context variables grouped to different structures. */ 343 struct generic_header generic; 344 struct main_header main; 345 struct comp_state cstate; 346 struct file_header file; 347 struct bit_reader bits; 348 struct multivolume vol; 349 350 /* The header of currently processed RARv5 block. Used in main 351 * decompression logic loop. */ 352 struct compressed_block_header last_block_hdr; 353}; 354 355/* Forward function declarations. */ 356 357static int verify_global_checksums(struct archive_read* a); 358static int rar5_read_data_skip(struct archive_read *a); 359static int push_data_ready(struct archive_read* a, struct rar5* rar, 360 const uint8_t* buf, size_t size, int64_t offset); 361 362/* CDE_xxx = Circular Double Ended (Queue) return values. */ 363enum CDE_RETURN_VALUES { 364 CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS, 365}; 366 367/* Clears the contents of this circular deque. */ 368static void cdeque_clear(struct cdeque* d) { 369 d->size = 0; 370 d->beg_pos = 0; 371 d->end_pos = 0; 372} 373 374/* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32, 375 * 64, 256, etc. When the user will add another item above current capacity, 376 * the circular deque will overwrite the oldest entry. */ 377static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) { 378 if(d == NULL || max_capacity_power_of_2 == 0) 379 return CDE_PARAM; 380 381 d->cap_mask = max_capacity_power_of_2 - 1; 382 d->arr = NULL; 383 384 if((max_capacity_power_of_2 & d->cap_mask) > 0) 385 return CDE_PARAM; 386 387 cdeque_clear(d); 388 d->arr = malloc(sizeof(void*) * max_capacity_power_of_2); 389 390 return d->arr ? CDE_OK : CDE_ALLOC; 391} 392 393/* Return the current size (not capacity) of circular deque `d`. */ 394static size_t cdeque_size(struct cdeque* d) { 395 return d->size; 396} 397 398/* Returns the first element of current circular deque. Note that this function 399 * doesn't perform any bounds checking. If you need bounds checking, use 400 * `cdeque_front()` function instead. */ 401static void cdeque_front_fast(struct cdeque* d, void** value) { 402 *value = (void*) d->arr[d->beg_pos]; 403} 404 405/* Returns the first element of current circular deque. This function 406 * performs bounds checking. */ 407static int cdeque_front(struct cdeque* d, void** value) { 408 if(d->size > 0) { 409 cdeque_front_fast(d, value); 410 return CDE_OK; 411 } else 412 return CDE_OUT_OF_BOUNDS; 413} 414 415/* Pushes a new element into the end of this circular deque object. If current 416 * size will exceed capacity, the oldest element will be overwritten. */ 417static int cdeque_push_back(struct cdeque* d, void* item) { 418 if(d == NULL) 419 return CDE_PARAM; 420 421 if(d->size == d->cap_mask + 1) 422 return CDE_OUT_OF_BOUNDS; 423 424 d->arr[d->end_pos] = (size_t) item; 425 d->end_pos = (d->end_pos + 1) & d->cap_mask; 426 d->size++; 427 428 return CDE_OK; 429} 430 431/* Pops a front element of this circular deque object and returns its value. 432 * This function doesn't perform any bounds checking. */ 433static void cdeque_pop_front_fast(struct cdeque* d, void** value) { 434 *value = (void*) d->arr[d->beg_pos]; 435 d->beg_pos = (d->beg_pos + 1) & d->cap_mask; 436 d->size--; 437} 438 439/* Pops a front element of this circular deque object and returns its value. 440 * This function performs bounds checking. */ 441static int cdeque_pop_front(struct cdeque* d, void** value) { 442 if(!d || !value) 443 return CDE_PARAM; 444 445 if(d->size == 0) 446 return CDE_OUT_OF_BOUNDS; 447 448 cdeque_pop_front_fast(d, value); 449 return CDE_OK; 450} 451 452/* Convenience function to cast filter_info** to void **. */ 453static void** cdeque_filter_p(struct filter_info** f) { 454 return (void**) (size_t) f; 455} 456 457/* Convenience function to cast filter_info* to void *. */ 458static void* cdeque_filter(struct filter_info* f) { 459 return (void**) (size_t) f; 460} 461 462/* Destroys this circular deque object. Deallocates the memory of the 463 * collection buffer, but doesn't deallocate the memory of any pointer passed 464 * to this deque as a value. */ 465static void cdeque_free(struct cdeque* d) { 466 if(!d) 467 return; 468 469 if(!d->arr) 470 return; 471 472 free(d->arr); 473 474 d->arr = NULL; 475 d->beg_pos = -1; 476 d->end_pos = -1; 477 d->cap_mask = 0; 478} 479 480static inline 481uint8_t bf_bit_size(const struct compressed_block_header* hdr) { 482 return hdr->block_flags_u8 & 7; 483} 484 485static inline 486uint8_t bf_byte_count(const struct compressed_block_header* hdr) { 487 return (hdr->block_flags_u8 >> 3) & 7; 488} 489 490static inline 491uint8_t bf_is_table_present(const struct compressed_block_header* hdr) { 492 return (hdr->block_flags_u8 >> 7) & 1; 493} 494 495static inline struct rar5* get_context(struct archive_read* a) { 496 return (struct rar5*) a->format->data; 497} 498 499/* Convenience functions used by filter implementations. */ 500static void circular_memcpy(uint8_t* dst, uint8_t* window, const uint64_t mask, 501 int64_t start, int64_t end) 502{ 503 if((start & mask) > (end & mask)) { 504 ssize_t len1 = mask + 1 - (start & mask); 505 ssize_t len2 = end & mask; 506 507 memcpy(dst, &window[start & mask], len1); 508 memcpy(dst + len1, window, len2); 509 } else { 510 memcpy(dst, &window[start & mask], (size_t) (end - start)); 511 } 512} 513 514static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) { 515 uint8_t linear_buf[4]; 516 circular_memcpy(linear_buf, rar->cstate.window_buf, 517 rar->cstate.window_mask, offset, offset + 4); 518 return archive_le32dec(linear_buf); 519} 520 521static void write_filter_data(struct rar5* rar, uint32_t offset, 522 uint32_t value) 523{ 524 archive_le32enc(&rar->cstate.filtered_buf[offset], value); 525} 526 527/* Allocates a new filter descriptor and adds it to the filter array. */ 528static struct filter_info* add_new_filter(struct rar5* rar) { 529 struct filter_info* f = 530 (struct filter_info*) calloc(1, sizeof(struct filter_info)); 531 532 if(!f) { 533 return NULL; 534 } 535 536 cdeque_push_back(&rar->cstate.filters, cdeque_filter(f)); 537 return f; 538} 539 540static int run_delta_filter(struct rar5* rar, struct filter_info* flt) { 541 int i; 542 ssize_t dest_pos, src_pos = 0; 543 544 for(i = 0; i < flt->channels; i++) { 545 uint8_t prev_byte = 0; 546 for(dest_pos = i; 547 dest_pos < flt->block_length; 548 dest_pos += flt->channels) 549 { 550 uint8_t byte; 551 552 byte = rar->cstate.window_buf[ 553 (rar->cstate.solid_offset + flt->block_start + 554 src_pos) & rar->cstate.window_mask]; 555 556 prev_byte -= byte; 557 rar->cstate.filtered_buf[dest_pos] = prev_byte; 558 src_pos++; 559 } 560 } 561 562 return ARCHIVE_OK; 563} 564 565static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt, 566 int extended) 567{ 568 const uint32_t file_size = 0x1000000; 569 ssize_t i; 570 571 circular_memcpy(rar->cstate.filtered_buf, 572 rar->cstate.window_buf, rar->cstate.window_mask, 573 rar->cstate.solid_offset + flt->block_start, 574 rar->cstate.solid_offset + flt->block_start + flt->block_length); 575 576 for(i = 0; i < flt->block_length - 4;) { 577 uint8_t b = rar->cstate.window_buf[ 578 (rar->cstate.solid_offset + flt->block_start + 579 i++) & rar->cstate.window_mask]; 580 581 /* 582 * 0xE8 = x86's call <relative_addr_uint32> (function call) 583 * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump) 584 */ 585 if(b == 0xE8 || (extended && b == 0xE9)) { 586 587 uint32_t addr; 588 uint32_t offset = (i + flt->block_start) % file_size; 589 590 addr = read_filter_data(rar, 591 (uint32_t)(rar->cstate.solid_offset + 592 flt->block_start + i) & rar->cstate.window_mask); 593 594 if(addr & 0x80000000) { 595 if(((addr + offset) & 0x80000000) == 0) { 596 write_filter_data(rar, (uint32_t)i, 597 addr + file_size); 598 } 599 } else { 600 if((addr - file_size) & 0x80000000) { 601 uint32_t naddr = addr - offset; 602 write_filter_data(rar, (uint32_t)i, 603 naddr); 604 } 605 } 606 607 i += 4; 608 } 609 } 610 611 return ARCHIVE_OK; 612} 613 614static int run_arm_filter(struct rar5* rar, struct filter_info* flt) { 615 ssize_t i = 0; 616 uint32_t offset; 617 618 circular_memcpy(rar->cstate.filtered_buf, 619 rar->cstate.window_buf, rar->cstate.window_mask, 620 rar->cstate.solid_offset + flt->block_start, 621 rar->cstate.solid_offset + flt->block_start + flt->block_length); 622 623 for(i = 0; i < flt->block_length - 3; i += 4) { 624 uint8_t* b = &rar->cstate.window_buf[ 625 (rar->cstate.solid_offset + 626 flt->block_start + i) & rar->cstate.window_mask]; 627 628 if(b[3] == 0xEB) { 629 /* 0xEB = ARM's BL (branch + link) instruction. */ 630 offset = read_filter_data(rar, 631 (rar->cstate.solid_offset + flt->block_start + i) & 632 rar->cstate.window_mask) & 0x00ffffff; 633 634 offset -= (uint32_t) ((i + flt->block_start) / 4); 635 offset = (offset & 0x00ffffff) | 0xeb000000; 636 write_filter_data(rar, (uint32_t)i, offset); 637 } 638 } 639 640 return ARCHIVE_OK; 641} 642 643static int run_filter(struct archive_read* a, struct filter_info* flt) { 644 int ret; 645 struct rar5* rar = get_context(a); 646 647 free(rar->cstate.filtered_buf); 648 649 rar->cstate.filtered_buf = malloc(flt->block_length); 650 if(!rar->cstate.filtered_buf) { 651 archive_set_error(&a->archive, ENOMEM, 652 "Can't allocate memory for filter data."); 653 return ARCHIVE_FATAL; 654 } 655 656 switch(flt->type) { 657 case FILTER_DELTA: 658 ret = run_delta_filter(rar, flt); 659 break; 660 661 case FILTER_E8: 662 /* fallthrough */ 663 case FILTER_E8E9: 664 ret = run_e8e9_filter(rar, flt, 665 flt->type == FILTER_E8E9); 666 break; 667 668 case FILTER_ARM: 669 ret = run_arm_filter(rar, flt); 670 break; 671 672 default: 673 archive_set_error(&a->archive, 674 ARCHIVE_ERRNO_FILE_FORMAT, 675 "Unsupported filter type: 0x%x", flt->type); 676 return ARCHIVE_FATAL; 677 } 678 679 if(ret != ARCHIVE_OK) { 680 /* Filter has failed. */ 681 return ret; 682 } 683 684 if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf, 685 flt->block_length, rar->cstate.last_write_ptr)) 686 { 687 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 688 "Stack overflow when submitting unpacked data"); 689 690 return ARCHIVE_FATAL; 691 } 692 693 rar->cstate.last_write_ptr += flt->block_length; 694 return ARCHIVE_OK; 695} 696 697/* The `push_data` function submits the selected data range to the user. 698 * Next call of `use_data` will use the pointer, size and offset arguments 699 * that are specified here. These arguments are pushed to the FIFO stack here, 700 * and popped from the stack by the `use_data` function. */ 701static void push_data(struct archive_read* a, struct rar5* rar, 702 const uint8_t* buf, int64_t idx_begin, int64_t idx_end) 703{ 704 const uint64_t wmask = rar->cstate.window_mask; 705 const ssize_t solid_write_ptr = (rar->cstate.solid_offset + 706 rar->cstate.last_write_ptr) & wmask; 707 708 idx_begin += rar->cstate.solid_offset; 709 idx_end += rar->cstate.solid_offset; 710 711 /* Check if our unpacked data is wrapped inside the window circular 712 * buffer. If it's not wrapped, it can be copied out by using 713 * a single memcpy, but when it's wrapped, we need to copy the first 714 * part with one memcpy, and the second part with another memcpy. */ 715 716 if((idx_begin & wmask) > (idx_end & wmask)) { 717 /* The data is wrapped (begin offset sis bigger than end 718 * offset). */ 719 const ssize_t frag1_size = rar->cstate.window_size - 720 (idx_begin & wmask); 721 const ssize_t frag2_size = idx_end & wmask; 722 723 /* Copy the first part of the buffer first. */ 724 push_data_ready(a, rar, buf + solid_write_ptr, frag1_size, 725 rar->cstate.last_write_ptr); 726 727 /* Copy the second part of the buffer. */ 728 push_data_ready(a, rar, buf, frag2_size, 729 rar->cstate.last_write_ptr + frag1_size); 730 731 rar->cstate.last_write_ptr += frag1_size + frag2_size; 732 } else { 733 /* Data is not wrapped, so we can just use one call to copy the 734 * data. */ 735 push_data_ready(a, rar, 736 buf + solid_write_ptr, (idx_end - idx_begin) & wmask, 737 rar->cstate.last_write_ptr); 738 739 rar->cstate.last_write_ptr += idx_end - idx_begin; 740 } 741} 742 743/* Convenience function that submits the data to the user. It uses the 744 * unpack window buffer as a source location. */ 745static void push_window_data(struct archive_read* a, struct rar5* rar, 746 int64_t idx_begin, int64_t idx_end) 747{ 748 push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end); 749} 750 751static int apply_filters(struct archive_read* a) { 752 struct filter_info* flt; 753 struct rar5* rar = get_context(a); 754 int ret; 755 756 rar->cstate.all_filters_applied = 0; 757 758 /* Get the first filter that can be applied to our data. The data 759 * needs to be fully unpacked before the filter can be run. */ 760 if(CDE_OK == cdeque_front(&rar->cstate.filters, 761 cdeque_filter_p(&flt))) { 762 /* Check if our unpacked data fully covers this filter's 763 * range. */ 764 if(rar->cstate.write_ptr > flt->block_start && 765 rar->cstate.write_ptr >= flt->block_start + 766 flt->block_length) { 767 /* Check if we have some data pending to be written 768 * right before the filter's start offset. */ 769 if(rar->cstate.last_write_ptr == flt->block_start) { 770 /* Run the filter specified by descriptor 771 * `flt`. */ 772 ret = run_filter(a, flt); 773 if(ret != ARCHIVE_OK) { 774 /* Filter failure, return error. */ 775 return ret; 776 } 777 778 /* Filter descriptor won't be needed anymore 779 * after it's used, * so remove it from the 780 * filter list and free its memory. */ 781 (void) cdeque_pop_front(&rar->cstate.filters, 782 cdeque_filter_p(&flt)); 783 784 free(flt); 785 } else { 786 /* We can't run filters yet, dump the memory 787 * right before the filter. */ 788 push_window_data(a, rar, 789 rar->cstate.last_write_ptr, 790 flt->block_start); 791 } 792 793 /* Return 'filter applied or not needed' state to the 794 * caller. */ 795 return ARCHIVE_RETRY; 796 } 797 } 798 799 rar->cstate.all_filters_applied = 1; 800 return ARCHIVE_OK; 801} 802 803static void dist_cache_push(struct rar5* rar, int value) { 804 int* q = rar->cstate.dist_cache; 805 806 q[3] = q[2]; 807 q[2] = q[1]; 808 q[1] = q[0]; 809 q[0] = value; 810} 811 812static int dist_cache_touch(struct rar5* rar, int idx) { 813 int* q = rar->cstate.dist_cache; 814 int i, dist = q[idx]; 815 816 for(i = idx; i > 0; i--) 817 q[i] = q[i - 1]; 818 819 q[0] = dist; 820 return dist; 821} 822 823static void free_filters(struct rar5* rar) { 824 struct cdeque* d = &rar->cstate.filters; 825 826 /* Free any remaining filters. All filters should be naturally 827 * consumed by the unpacking function, so remaining filters after 828 * unpacking normally mean that unpacking wasn't successful. 829 * But still of course we shouldn't leak memory in such case. */ 830 831 /* cdeque_size() is a fast operation, so we can use it as a loop 832 * expression. */ 833 while(cdeque_size(d) > 0) { 834 struct filter_info* f = NULL; 835 836 /* Pop_front will also decrease the collection's size. */ 837 if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f))) 838 free(f); 839 } 840 841 cdeque_clear(d); 842 843 /* Also clear out the variables needed for sanity checking. */ 844 rar->cstate.last_block_start = 0; 845 rar->cstate.last_block_length = 0; 846} 847 848static void reset_file_context(struct rar5* rar) { 849 memset(&rar->file, 0, sizeof(rar->file)); 850 blake2sp_init(&rar->file.b2state, 32); 851 852 if(rar->main.solid) { 853 rar->cstate.solid_offset += rar->cstate.write_ptr; 854 } else { 855 rar->cstate.solid_offset = 0; 856 } 857 858 rar->cstate.write_ptr = 0; 859 rar->cstate.last_write_ptr = 0; 860 rar->cstate.last_unstore_ptr = 0; 861 862 rar->file.redir_type = REDIR_TYPE_NONE; 863 rar->file.redir_flags = 0; 864 865 free_filters(rar); 866} 867 868static inline int get_archive_read(struct archive* a, 869 struct archive_read** ar) 870{ 871 *ar = (struct archive_read*) a; 872 archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 873 "archive_read_support_format_rar5"); 874 875 return ARCHIVE_OK; 876} 877 878static int read_ahead(struct archive_read* a, size_t how_many, 879 const uint8_t** ptr) 880{ 881 if(!ptr) 882 return 0; 883 884 ssize_t avail = -1; 885 *ptr = __archive_read_ahead(a, how_many, &avail); 886 if(*ptr == NULL) { 887 return 0; 888 } 889 890 return 1; 891} 892 893static int consume(struct archive_read* a, int64_t how_many) { 894 int ret; 895 896 ret = how_many == __archive_read_consume(a, how_many) 897 ? ARCHIVE_OK 898 : ARCHIVE_FATAL; 899 900 return ret; 901} 902 903/** 904 * Read a RAR5 variable sized numeric value. This value will be stored in 905 * `pvalue`. The `pvalue_len` argument points to a variable that will receive 906 * the byte count that was consumed in order to decode the `pvalue` value, plus 907 * one. 908 * 909 * pvalue_len is optional and can be NULL. 910 * 911 * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume 912 * the number of bytes that `pvalue_len` value contains. If the `pvalue_len` 913 * is NULL, this consuming operation is done automatically. 914 * 915 * Returns 1 if *pvalue was successfully read. 916 * Returns 0 if there was an error. In this case, *pvalue contains an 917 * invalid value. 918 */ 919 920static int read_var(struct archive_read* a, uint64_t* pvalue, 921 uint64_t* pvalue_len) 922{ 923 uint64_t result = 0; 924 size_t shift, i; 925 const uint8_t* p; 926 uint8_t b; 927 928 /* We will read maximum of 8 bytes. We don't have to handle the 929 * situation to read the RAR5 variable-sized value stored at the end of 930 * the file, because such situation will never happen. */ 931 if(!read_ahead(a, 8, &p)) 932 return 0; 933 934 for(shift = 0, i = 0; i < 8; i++, shift += 7) { 935 b = p[i]; 936 937 /* Strip the MSB from the input byte and add the resulting 938 * number to the `result`. */ 939 result += (b & (uint64_t)0x7F) << shift; 940 941 /* MSB set to 1 means we need to continue decoding process. 942 * MSB set to 0 means we're done. 943 * 944 * This conditional checks for the second case. */ 945 if((b & 0x80) == 0) { 946 if(pvalue) { 947 *pvalue = result; 948 } 949 950 /* If the caller has passed the `pvalue_len` pointer, 951 * store the number of consumed bytes in it and do NOT 952 * consume those bytes, since the caller has all the 953 * information it needs to perform */ 954 if(pvalue_len) { 955 *pvalue_len = 1 + i; 956 } else { 957 /* If the caller did not provide the 958 * `pvalue_len` pointer, it will not have the 959 * possibility to advance the file pointer, 960 * because it will not know how many bytes it 961 * needs to consume. This is why we handle 962 * such situation here automatically. */ 963 if(ARCHIVE_OK != consume(a, 1 + i)) { 964 return 0; 965 } 966 } 967 968 /* End of decoding process, return success. */ 969 return 1; 970 } 971 } 972 973 /* The decoded value takes the maximum number of 8 bytes. 974 * It's a maximum number of bytes, so end decoding process here 975 * even if the first bit of last byte is 1. */ 976 if(pvalue) { 977 *pvalue = result; 978 } 979 980 if(pvalue_len) { 981 *pvalue_len = 9; 982 } else { 983 if(ARCHIVE_OK != consume(a, 9)) { 984 return 0; 985 } 986 } 987 988 return 1; 989} 990 991static int read_var_sized(struct archive_read* a, size_t* pvalue, 992 size_t* pvalue_len) 993{ 994 uint64_t v; 995 uint64_t v_size = 0; 996 997 const int ret = pvalue_len ? read_var(a, &v, &v_size) 998 : read_var(a, &v, NULL); 999 1000 if(ret == 1 && pvalue) { 1001 *pvalue = (size_t) v; 1002 } 1003 1004 if(pvalue_len) { 1005 /* Possible data truncation should be safe. */ 1006 *pvalue_len = (size_t) v_size; 1007 } 1008 1009 return ret; 1010} 1011 1012static int read_bits_32(struct rar5* rar, const uint8_t* p, uint32_t* value) { 1013 uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24; 1014 bits |= p[rar->bits.in_addr + 1] << 16; 1015 bits |= p[rar->bits.in_addr + 2] << 8; 1016 bits |= p[rar->bits.in_addr + 3]; 1017 bits <<= rar->bits.bit_addr; 1018 bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr); 1019 *value = bits; 1020 return ARCHIVE_OK; 1021} 1022 1023static int read_bits_16(struct rar5* rar, const uint8_t* p, uint16_t* value) { 1024 int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16; 1025 bits |= (int) p[rar->bits.in_addr + 1] << 8; 1026 bits |= (int) p[rar->bits.in_addr + 2]; 1027 bits >>= (8 - rar->bits.bit_addr); 1028 *value = bits & 0xffff; 1029 return ARCHIVE_OK; 1030} 1031 1032static void skip_bits(struct rar5* rar, int bits) { 1033 const int new_bits = rar->bits.bit_addr + bits; 1034 rar->bits.in_addr += new_bits >> 3; 1035 rar->bits.bit_addr = new_bits & 7; 1036} 1037 1038/* n = up to 16 */ 1039static int read_consume_bits(struct rar5* rar, const uint8_t* p, int n, 1040 int* value) 1041{ 1042 uint16_t v; 1043 int ret, num; 1044 1045 if(n == 0 || n > 16) { 1046 /* This is a programmer error and should never happen 1047 * in runtime. */ 1048 return ARCHIVE_FATAL; 1049 } 1050 1051 ret = read_bits_16(rar, p, &v); 1052 if(ret != ARCHIVE_OK) 1053 return ret; 1054 1055 num = (int) v; 1056 num >>= 16 - n; 1057 1058 skip_bits(rar, n); 1059 1060 if(value) 1061 *value = num; 1062 1063 return ARCHIVE_OK; 1064} 1065 1066static int read_u32(struct archive_read* a, uint32_t* pvalue) { 1067 const uint8_t* p; 1068 if(!read_ahead(a, 4, &p)) 1069 return 0; 1070 1071 *pvalue = archive_le32dec(p); 1072 return ARCHIVE_OK == consume(a, 4) ? 1 : 0; 1073} 1074 1075static int read_u64(struct archive_read* a, uint64_t* pvalue) { 1076 const uint8_t* p; 1077 if(!read_ahead(a, 8, &p)) 1078 return 0; 1079 1080 *pvalue = archive_le64dec(p); 1081 return ARCHIVE_OK == consume(a, 8) ? 1 : 0; 1082} 1083 1084static int bid_standard(struct archive_read* a) { 1085 const uint8_t* p; 1086 1087 if(!read_ahead(a, rar5_signature_size, &p)) 1088 return -1; 1089 1090 if(!memcmp(rar5_signature, p, rar5_signature_size)) 1091 return 30; 1092 1093 return -1; 1094} 1095 1096static int rar5_bid(struct archive_read* a, int best_bid) { 1097 int my_bid; 1098 1099 if(best_bid > 30) 1100 return -1; 1101 1102 my_bid = bid_standard(a); 1103 if(my_bid > -1) { 1104 return my_bid; 1105 } 1106 1107 return -1; 1108} 1109 1110static int rar5_options(struct archive_read *a, const char *key, 1111 const char *val) { 1112 (void) a; 1113 (void) key; 1114 (void) val; 1115 1116 /* No options supported in this version. Return the ARCHIVE_WARN code 1117 * to signal the options supervisor that the unpacker didn't handle 1118 * setting this option. */ 1119 1120 return ARCHIVE_WARN; 1121} 1122 1123static void init_header(struct archive_read* a) { 1124 a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5; 1125 a->archive.archive_format_name = "RAR5"; 1126} 1127 1128static void init_window_mask(struct rar5* rar) { 1129 if (rar->cstate.window_size) 1130 rar->cstate.window_mask = rar->cstate.window_size - 1; 1131 else 1132 rar->cstate.window_mask = 0; 1133} 1134 1135enum HEADER_FLAGS { 1136 HFL_EXTRA_DATA = 0x0001, 1137 HFL_DATA = 0x0002, 1138 HFL_SKIP_IF_UNKNOWN = 0x0004, 1139 HFL_SPLIT_BEFORE = 0x0008, 1140 HFL_SPLIT_AFTER = 0x0010, 1141 HFL_CHILD = 0x0020, 1142 HFL_INHERITED = 0x0040 1143}; 1144 1145static int process_main_locator_extra_block(struct archive_read* a, 1146 struct rar5* rar) 1147{ 1148 uint64_t locator_flags; 1149 1150 if(!read_var(a, &locator_flags, NULL)) { 1151 return ARCHIVE_EOF; 1152 } 1153 1154 enum LOCATOR_FLAGS { 1155 QLIST = 0x01, RECOVERY = 0x02, 1156 }; 1157 1158 if(locator_flags & QLIST) { 1159 if(!read_var(a, &rar->qlist_offset, NULL)) { 1160 return ARCHIVE_EOF; 1161 } 1162 1163 /* qlist is not used */ 1164 } 1165 1166 if(locator_flags & RECOVERY) { 1167 if(!read_var(a, &rar->rr_offset, NULL)) { 1168 return ARCHIVE_EOF; 1169 } 1170 1171 /* rr is not used */ 1172 } 1173 1174 return ARCHIVE_OK; 1175} 1176 1177static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar, 1178 ssize_t* extra_data_size) 1179{ 1180 size_t hash_type; 1181 size_t value_len; 1182 1183 if(!read_var_sized(a, &hash_type, &value_len)) 1184 return ARCHIVE_EOF; 1185 1186 *extra_data_size -= value_len; 1187 if(ARCHIVE_OK != consume(a, value_len)) { 1188 return ARCHIVE_EOF; 1189 } 1190 1191 enum HASH_TYPE { 1192 BLAKE2sp = 0x00 1193 }; 1194 1195 /* The file uses BLAKE2sp checksum algorithm instead of plain old 1196 * CRC32. */ 1197 if(hash_type == BLAKE2sp) { 1198 const uint8_t* p; 1199 const int hash_size = sizeof(rar->file.blake2sp); 1200 1201 if(!read_ahead(a, hash_size, &p)) 1202 return ARCHIVE_EOF; 1203 1204 rar->file.has_blake2 = 1; 1205 memcpy(&rar->file.blake2sp, p, hash_size); 1206 1207 if(ARCHIVE_OK != consume(a, hash_size)) { 1208 return ARCHIVE_EOF; 1209 } 1210 1211 *extra_data_size -= hash_size; 1212 } else { 1213 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1214 "Unsupported hash type (0x%x)", (int) hash_type); 1215 return ARCHIVE_FATAL; 1216 } 1217 1218 return ARCHIVE_OK; 1219} 1220 1221static uint64_t time_win_to_unix(uint64_t win_time) { 1222 const size_t ns_in_sec = 10000000; 1223 const uint64_t sec_to_unix = 11644473600LL; 1224 return win_time / ns_in_sec - sec_to_unix; 1225} 1226 1227static int parse_htime_item(struct archive_read* a, char unix_time, 1228 uint64_t* where, ssize_t* extra_data_size) 1229{ 1230 if(unix_time) { 1231 uint32_t time_val; 1232 if(!read_u32(a, &time_val)) 1233 return ARCHIVE_EOF; 1234 1235 *extra_data_size -= 4; 1236 *where = (uint64_t) time_val; 1237 } else { 1238 uint64_t windows_time; 1239 if(!read_u64(a, &windows_time)) 1240 return ARCHIVE_EOF; 1241 1242 *where = time_win_to_unix(windows_time); 1243 *extra_data_size -= 8; 1244 } 1245 1246 return ARCHIVE_OK; 1247} 1248 1249static int parse_file_extra_version(struct archive_read* a, 1250 struct archive_entry* e, ssize_t* extra_data_size) 1251{ 1252 size_t flags = 0; 1253 size_t version = 0; 1254 size_t value_len = 0; 1255 struct archive_string version_string; 1256 struct archive_string name_utf8_string; 1257 1258 /* Flags are ignored. */ 1259 if(!read_var_sized(a, &flags, &value_len)) 1260 return ARCHIVE_EOF; 1261 1262 *extra_data_size -= value_len; 1263 if(ARCHIVE_OK != consume(a, value_len)) 1264 return ARCHIVE_EOF; 1265 1266 if(!read_var_sized(a, &version, &value_len)) 1267 return ARCHIVE_EOF; 1268 1269 *extra_data_size -= value_len; 1270 if(ARCHIVE_OK != consume(a, value_len)) 1271 return ARCHIVE_EOF; 1272 1273 /* extra_data_size should be zero here. */ 1274 1275 const char* cur_filename = archive_entry_pathname_utf8(e); 1276 if(cur_filename == NULL) { 1277 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1278 "Version entry without file name"); 1279 return ARCHIVE_FATAL; 1280 } 1281 1282 archive_string_init(&version_string); 1283 archive_string_init(&name_utf8_string); 1284 1285 /* Prepare a ;123 suffix for the filename, where '123' is the version 1286 * value of this file. */ 1287 archive_string_sprintf(&version_string, ";%zu", version); 1288 1289 /* Build the new filename. */ 1290 archive_strcat(&name_utf8_string, cur_filename); 1291 archive_strcat(&name_utf8_string, version_string.s); 1292 1293 /* Apply the new filename into this file's context. */ 1294 archive_entry_update_pathname_utf8(e, name_utf8_string.s); 1295 1296 /* Free buffers. */ 1297 archive_string_free(&version_string); 1298 archive_string_free(&name_utf8_string); 1299 return ARCHIVE_OK; 1300} 1301 1302static int parse_file_extra_htime(struct archive_read* a, 1303 struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) 1304{ 1305 char unix_time = 0; 1306 size_t flags; 1307 size_t value_len; 1308 1309 enum HTIME_FLAGS { 1310 IS_UNIX = 0x01, 1311 HAS_MTIME = 0x02, 1312 HAS_CTIME = 0x04, 1313 HAS_ATIME = 0x08, 1314 HAS_UNIX_NS = 0x10, 1315 }; 1316 1317 if(!read_var_sized(a, &flags, &value_len)) 1318 return ARCHIVE_EOF; 1319 1320 *extra_data_size -= value_len; 1321 if(ARCHIVE_OK != consume(a, value_len)) { 1322 return ARCHIVE_EOF; 1323 } 1324 1325 unix_time = flags & IS_UNIX; 1326 1327 if(flags & HAS_MTIME) { 1328 parse_htime_item(a, unix_time, &rar->file.e_mtime, 1329 extra_data_size); 1330 archive_entry_set_mtime(e, rar->file.e_mtime, 0); 1331 } 1332 1333 if(flags & HAS_CTIME) { 1334 parse_htime_item(a, unix_time, &rar->file.e_ctime, 1335 extra_data_size); 1336 archive_entry_set_ctime(e, rar->file.e_ctime, 0); 1337 } 1338 1339 if(flags & HAS_ATIME) { 1340 parse_htime_item(a, unix_time, &rar->file.e_atime, 1341 extra_data_size); 1342 archive_entry_set_atime(e, rar->file.e_atime, 0); 1343 } 1344 1345 if(flags & HAS_UNIX_NS) { 1346 if(!read_u32(a, &rar->file.e_unix_ns)) 1347 return ARCHIVE_EOF; 1348 1349 *extra_data_size -= 4; 1350 } 1351 1352 return ARCHIVE_OK; 1353} 1354 1355static int parse_file_extra_redir(struct archive_read* a, 1356 struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) 1357{ 1358 uint64_t value_size = 0; 1359 size_t target_size = 0; 1360 char target_utf8_buf[MAX_NAME_IN_BYTES]; 1361 const uint8_t* p; 1362 1363 if(!read_var(a, &rar->file.redir_type, &value_size)) 1364 return ARCHIVE_EOF; 1365 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1366 return ARCHIVE_EOF; 1367 *extra_data_size -= value_size; 1368 1369 if(!read_var(a, &rar->file.redir_flags, &value_size)) 1370 return ARCHIVE_EOF; 1371 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1372 return ARCHIVE_EOF; 1373 *extra_data_size -= value_size; 1374 1375 if(!read_var_sized(a, &target_size, NULL)) 1376 return ARCHIVE_EOF; 1377 *extra_data_size -= target_size + 1; 1378 1379 if(!read_ahead(a, target_size, &p)) 1380 return ARCHIVE_EOF; 1381 1382 if(target_size > (MAX_NAME_IN_CHARS - 1)) { 1383 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1384 "Link target is too long"); 1385 return ARCHIVE_FATAL; 1386 } 1387 1388 if(target_size == 0) { 1389 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1390 "No link target specified"); 1391 return ARCHIVE_FATAL; 1392 } 1393 1394 memcpy(target_utf8_buf, p, target_size); 1395 target_utf8_buf[target_size] = 0; 1396 1397 if(ARCHIVE_OK != consume(a, (int64_t)target_size)) 1398 return ARCHIVE_EOF; 1399 1400 switch(rar->file.redir_type) { 1401 case REDIR_TYPE_UNIXSYMLINK: 1402 case REDIR_TYPE_WINSYMLINK: 1403 archive_entry_set_filetype(e, AE_IFLNK); 1404 archive_entry_update_symlink_utf8(e, target_utf8_buf); 1405 if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) { 1406 archive_entry_set_symlink_type(e, 1407 AE_SYMLINK_TYPE_DIRECTORY); 1408 } else { 1409 archive_entry_set_symlink_type(e, 1410 AE_SYMLINK_TYPE_FILE); 1411 } 1412 break; 1413 1414 case REDIR_TYPE_HARDLINK: 1415 archive_entry_set_filetype(e, AE_IFREG); 1416 archive_entry_update_hardlink_utf8(e, target_utf8_buf); 1417 break; 1418 1419 default: 1420 /* Unknown redir type, skip it. */ 1421 break; 1422 } 1423 return ARCHIVE_OK; 1424} 1425 1426static int parse_file_extra_owner(struct archive_read* a, 1427 struct archive_entry* e, ssize_t* extra_data_size) 1428{ 1429 uint64_t flags = 0; 1430 uint64_t value_size = 0; 1431 uint64_t id = 0; 1432 size_t name_len = 0; 1433 size_t name_size = 0; 1434 char namebuf[OWNER_MAXNAMELEN]; 1435 const uint8_t* p; 1436 1437 if(!read_var(a, &flags, &value_size)) 1438 return ARCHIVE_EOF; 1439 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1440 return ARCHIVE_EOF; 1441 *extra_data_size -= value_size; 1442 1443 if ((flags & OWNER_USER_NAME) != 0) { 1444 if(!read_var_sized(a, &name_size, NULL)) 1445 return ARCHIVE_EOF; 1446 *extra_data_size -= name_size + 1; 1447 1448 if(!read_ahead(a, name_size, &p)) 1449 return ARCHIVE_EOF; 1450 1451 if (name_size >= OWNER_MAXNAMELEN) { 1452 name_len = OWNER_MAXNAMELEN - 1; 1453 } else { 1454 name_len = name_size; 1455 } 1456 1457 memcpy(namebuf, p, name_len); 1458 namebuf[name_len] = 0; 1459 if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1460 return ARCHIVE_EOF; 1461 1462 archive_entry_set_uname(e, namebuf); 1463 } 1464 if ((flags & OWNER_GROUP_NAME) != 0) { 1465 if(!read_var_sized(a, &name_size, NULL)) 1466 return ARCHIVE_EOF; 1467 *extra_data_size -= name_size + 1; 1468 1469 if(!read_ahead(a, name_size, &p)) 1470 return ARCHIVE_EOF; 1471 1472 if (name_size >= OWNER_MAXNAMELEN) { 1473 name_len = OWNER_MAXNAMELEN - 1; 1474 } else { 1475 name_len = name_size; 1476 } 1477 1478 memcpy(namebuf, p, name_len); 1479 namebuf[name_len] = 0; 1480 if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1481 return ARCHIVE_EOF; 1482 1483 archive_entry_set_gname(e, namebuf); 1484 } 1485 if ((flags & OWNER_USER_UID) != 0) { 1486 if(!read_var(a, &id, &value_size)) 1487 return ARCHIVE_EOF; 1488 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1489 return ARCHIVE_EOF; 1490 *extra_data_size -= value_size; 1491 1492 archive_entry_set_uid(e, (la_int64_t)id); 1493 } 1494 if ((flags & OWNER_GROUP_GID) != 0) { 1495 if(!read_var(a, &id, &value_size)) 1496 return ARCHIVE_EOF; 1497 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1498 return ARCHIVE_EOF; 1499 *extra_data_size -= value_size; 1500 1501 archive_entry_set_gid(e, (la_int64_t)id); 1502 } 1503 return ARCHIVE_OK; 1504} 1505 1506static int process_head_file_extra(struct archive_read* a, 1507 struct archive_entry* e, struct rar5* rar, ssize_t extra_data_size) 1508{ 1509 size_t extra_field_size; 1510 size_t extra_field_id = 0; 1511 int ret = ARCHIVE_FATAL; 1512 size_t var_size; 1513 1514 while(extra_data_size > 0) { 1515 if(!read_var_sized(a, &extra_field_size, &var_size)) 1516 return ARCHIVE_EOF; 1517 1518 extra_data_size -= var_size; 1519 if(ARCHIVE_OK != consume(a, var_size)) { 1520 return ARCHIVE_EOF; 1521 } 1522 1523 if(!read_var_sized(a, &extra_field_id, &var_size)) 1524 return ARCHIVE_EOF; 1525 1526 extra_data_size -= var_size; 1527 if(ARCHIVE_OK != consume(a, var_size)) { 1528 return ARCHIVE_EOF; 1529 } 1530 1531 switch(extra_field_id) { 1532 case EX_HASH: 1533 ret = parse_file_extra_hash(a, rar, 1534 &extra_data_size); 1535 break; 1536 case EX_HTIME: 1537 ret = parse_file_extra_htime(a, e, rar, 1538 &extra_data_size); 1539 break; 1540 case EX_REDIR: 1541 ret = parse_file_extra_redir(a, e, rar, 1542 &extra_data_size); 1543 break; 1544 case EX_UOWNER: 1545 ret = parse_file_extra_owner(a, e, 1546 &extra_data_size); 1547 break; 1548 case EX_VERSION: 1549 ret = parse_file_extra_version(a, e, 1550 &extra_data_size); 1551 break; 1552 case EX_CRYPT: 1553 /* fallthrough */ 1554 case EX_SUBDATA: 1555 /* fallthrough */ 1556 default: 1557 /* Skip unsupported entry. */ 1558 return consume(a, extra_data_size); 1559 } 1560 } 1561 1562 if(ret != ARCHIVE_OK) { 1563 /* Attribute not implemented. */ 1564 return ret; 1565 } 1566 1567 return ARCHIVE_OK; 1568} 1569 1570static int process_head_file(struct archive_read* a, struct rar5* rar, 1571 struct archive_entry* entry, size_t block_flags) 1572{ 1573 ssize_t extra_data_size = 0; 1574 size_t data_size = 0; 1575 size_t file_flags = 0; 1576 size_t file_attr = 0; 1577 size_t compression_info = 0; 1578 size_t host_os = 0; 1579 size_t name_size = 0; 1580 uint64_t unpacked_size, window_size; 1581 uint32_t mtime = 0, crc = 0; 1582 int c_method = 0, c_version = 0; 1583 char name_utf8_buf[MAX_NAME_IN_BYTES]; 1584 const uint8_t* p; 1585 1586 archive_entry_clear(entry); 1587 1588 /* Do not reset file context if we're switching archives. */ 1589 if(!rar->cstate.switch_multivolume) { 1590 reset_file_context(rar); 1591 } 1592 1593 if(block_flags & HFL_EXTRA_DATA) { 1594 size_t edata_size = 0; 1595 if(!read_var_sized(a, &edata_size, NULL)) 1596 return ARCHIVE_EOF; 1597 1598 /* Intentional type cast from unsigned to signed. */ 1599 extra_data_size = (ssize_t) edata_size; 1600 } 1601 1602 if(block_flags & HFL_DATA) { 1603 if(!read_var_sized(a, &data_size, NULL)) 1604 return ARCHIVE_EOF; 1605 1606 rar->file.bytes_remaining = data_size; 1607 } else { 1608 rar->file.bytes_remaining = 0; 1609 1610 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1611 "no data found in file/service block"); 1612 return ARCHIVE_FATAL; 1613 } 1614 1615 enum FILE_FLAGS { 1616 DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004, 1617 UNKNOWN_UNPACKED_SIZE = 0x0008, 1618 }; 1619 1620 enum FILE_ATTRS { 1621 ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4, 1622 ATTR_DIRECTORY = 0x10, 1623 }; 1624 1625 enum COMP_INFO_FLAGS { 1626 SOLID = 0x0040, 1627 }; 1628 1629 if(!read_var_sized(a, &file_flags, NULL)) 1630 return ARCHIVE_EOF; 1631 1632 if(!read_var(a, &unpacked_size, NULL)) 1633 return ARCHIVE_EOF; 1634 1635 if(file_flags & UNKNOWN_UNPACKED_SIZE) { 1636 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1637 "Files with unknown unpacked size are not supported"); 1638 return ARCHIVE_FATAL; 1639 } 1640 1641 rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0); 1642 1643 if(!read_var_sized(a, &file_attr, NULL)) 1644 return ARCHIVE_EOF; 1645 1646 if(file_flags & UTIME) { 1647 if(!read_u32(a, &mtime)) 1648 return ARCHIVE_EOF; 1649 } 1650 1651 if(file_flags & CRC32) { 1652 if(!read_u32(a, &crc)) 1653 return ARCHIVE_EOF; 1654 } 1655 1656 if(!read_var_sized(a, &compression_info, NULL)) 1657 return ARCHIVE_EOF; 1658 1659 c_method = (int) (compression_info >> 7) & 0x7; 1660 c_version = (int) (compression_info & 0x3f); 1661 1662 /* RAR5 seems to limit the dictionary size to 64MB. */ 1663 window_size = (rar->file.dir > 0) ? 1664 0 : 1665 g_unpack_window_size << ((compression_info >> 10) & 15); 1666 rar->cstate.method = c_method; 1667 rar->cstate.version = c_version + 50; 1668 1669 /* Check if window_size is a sane value. Also, if the file is not 1670 * declared as a directory, disallow window_size == 0. */ 1671 if(window_size > (64 * 1024 * 1024) || 1672 (rar->file.dir == 0 && window_size == 0)) 1673 { 1674 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1675 "Declared dictionary size is not supported."); 1676 return ARCHIVE_FATAL; 1677 } 1678 1679 /* Values up to 64M should fit into ssize_t on every 1680 * architecture. */ 1681 rar->cstate.window_size = (ssize_t) window_size; 1682 init_window_mask(rar); 1683 1684 rar->file.solid = (compression_info & SOLID) > 0; 1685 rar->file.service = 0; 1686 1687 if(!read_var_sized(a, &host_os, NULL)) 1688 return ARCHIVE_EOF; 1689 1690 enum HOST_OS { 1691 HOST_WINDOWS = 0, 1692 HOST_UNIX = 1, 1693 }; 1694 1695 if(host_os == HOST_WINDOWS) { 1696 /* Host OS is Windows */ 1697 1698 __LA_MODE_T mode; 1699 1700 if(file_attr & ATTR_DIRECTORY) { 1701 if (file_attr & ATTR_READONLY) { 1702 mode = 0555 | AE_IFDIR; 1703 } else { 1704 mode = 0755 | AE_IFDIR; 1705 } 1706 } else { 1707 if (file_attr & ATTR_READONLY) { 1708 mode = 0444 | AE_IFREG; 1709 } else { 1710 mode = 0644 | AE_IFREG; 1711 } 1712 } 1713 1714 archive_entry_set_mode(entry, mode); 1715 1716 if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) { 1717 char *fflags_text, *ptr; 1718 /* allocate for "rdonly,hidden,system," */ 1719 fflags_text = malloc(22 * sizeof(char)); 1720 if (fflags_text != NULL) { 1721 ptr = fflags_text; 1722 if (file_attr & ATTR_READONLY) { 1723 strcpy(ptr, "rdonly,"); 1724 ptr = ptr + 7; 1725 } 1726 if (file_attr & ATTR_HIDDEN) { 1727 strcpy(ptr, "hidden,"); 1728 ptr = ptr + 7; 1729 } 1730 if (file_attr & ATTR_SYSTEM) { 1731 strcpy(ptr, "system,"); 1732 ptr = ptr + 7; 1733 } 1734 if (ptr > fflags_text) { 1735 /* Delete trailing comma */ 1736 *(ptr - 1) = '\0'; 1737 archive_entry_copy_fflags_text(entry, 1738 fflags_text); 1739 } 1740 free(fflags_text); 1741 } 1742 } 1743 } else if(host_os == HOST_UNIX) { 1744 /* Host OS is Unix */ 1745 archive_entry_set_mode(entry, (__LA_MODE_T) file_attr); 1746 } else { 1747 /* Unknown host OS */ 1748 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1749 "Unsupported Host OS: 0x%x", (int) host_os); 1750 1751 return ARCHIVE_FATAL; 1752 } 1753 1754 if(!read_var_sized(a, &name_size, NULL)) 1755 return ARCHIVE_EOF; 1756 1757 if(!read_ahead(a, name_size, &p)) 1758 return ARCHIVE_EOF; 1759 1760 if(name_size > (MAX_NAME_IN_CHARS - 1)) { 1761 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1762 "Filename is too long"); 1763 1764 return ARCHIVE_FATAL; 1765 } 1766 1767 if(name_size == 0) { 1768 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1769 "No filename specified"); 1770 1771 return ARCHIVE_FATAL; 1772 } 1773 1774 memcpy(name_utf8_buf, p, name_size); 1775 name_utf8_buf[name_size] = 0; 1776 if(ARCHIVE_OK != consume(a, name_size)) { 1777 return ARCHIVE_EOF; 1778 } 1779 1780 archive_entry_update_pathname_utf8(entry, name_utf8_buf); 1781 1782 if(extra_data_size > 0) { 1783 int ret = process_head_file_extra(a, entry, rar, 1784 extra_data_size); 1785 1786 /* Sanity check. */ 1787 if(extra_data_size < 0) { 1788 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1789 "File extra data size is not zero"); 1790 return ARCHIVE_FATAL; 1791 } 1792 1793 if(ret != ARCHIVE_OK) 1794 return ret; 1795 } 1796 1797 if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) { 1798 rar->file.unpacked_size = (ssize_t) unpacked_size; 1799 if(rar->file.redir_type == REDIR_TYPE_NONE) 1800 archive_entry_set_size(entry, unpacked_size); 1801 } 1802 1803 if(file_flags & UTIME) { 1804 archive_entry_set_mtime(entry, (time_t) mtime, 0); 1805 } 1806 1807 if(file_flags & CRC32) { 1808 rar->file.stored_crc32 = crc; 1809 } 1810 1811 if(!rar->cstate.switch_multivolume) { 1812 /* Do not reinitialize unpacking state if we're switching 1813 * archives. */ 1814 rar->cstate.block_parsing_finished = 1; 1815 rar->cstate.all_filters_applied = 1; 1816 rar->cstate.initialized = 0; 1817 } 1818 1819 if(rar->generic.split_before > 0) { 1820 /* If now we're standing on a header that has a 'split before' 1821 * mark, it means we're standing on a 'continuation' file 1822 * header. Signal the caller that if it wants to move to 1823 * another file, it must call rar5_read_header() function 1824 * again. */ 1825 1826 return ARCHIVE_RETRY; 1827 } else { 1828 return ARCHIVE_OK; 1829 } 1830} 1831 1832static int process_head_service(struct archive_read* a, struct rar5* rar, 1833 struct archive_entry* entry, size_t block_flags) 1834{ 1835 /* Process this SERVICE block the same way as FILE blocks. */ 1836 int ret = process_head_file(a, rar, entry, block_flags); 1837 if(ret != ARCHIVE_OK) 1838 return ret; 1839 1840 rar->file.service = 1; 1841 1842 /* But skip the data part automatically. It's no use for the user 1843 * anyway. It contains only service data, not even needed to 1844 * properly unpack the file. */ 1845 ret = rar5_read_data_skip(a); 1846 if(ret != ARCHIVE_OK) 1847 return ret; 1848 1849 /* After skipping, try parsing another block automatically. */ 1850 return ARCHIVE_RETRY; 1851} 1852 1853static int process_head_main(struct archive_read* a, struct rar5* rar, 1854 struct archive_entry* entry, size_t block_flags) 1855{ 1856 (void) entry; 1857 1858 int ret; 1859 size_t extra_data_size = 0; 1860 size_t extra_field_size = 0; 1861 size_t extra_field_id = 0; 1862 size_t archive_flags = 0; 1863 1864 if(block_flags & HFL_EXTRA_DATA) { 1865 if(!read_var_sized(a, &extra_data_size, NULL)) 1866 return ARCHIVE_EOF; 1867 } else { 1868 extra_data_size = 0; 1869 } 1870 1871 if(!read_var_sized(a, &archive_flags, NULL)) { 1872 return ARCHIVE_EOF; 1873 } 1874 1875 enum MAIN_FLAGS { 1876 VOLUME = 0x0001, /* multi-volume archive */ 1877 VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't 1878 * have it */ 1879 SOLID = 0x0004, /* solid archive */ 1880 PROTECT = 0x0008, /* contains Recovery info */ 1881 LOCK = 0x0010, /* readonly flag, not used */ 1882 }; 1883 1884 rar->main.volume = (archive_flags & VOLUME) > 0; 1885 rar->main.solid = (archive_flags & SOLID) > 0; 1886 1887 if(archive_flags & VOLUME_NUMBER) { 1888 size_t v = 0; 1889 if(!read_var_sized(a, &v, NULL)) { 1890 return ARCHIVE_EOF; 1891 } 1892 1893 if (v > UINT_MAX) { 1894 archive_set_error(&a->archive, 1895 ARCHIVE_ERRNO_FILE_FORMAT, 1896 "Invalid volume number"); 1897 return ARCHIVE_FATAL; 1898 } 1899 1900 rar->main.vol_no = (unsigned int) v; 1901 } else { 1902 rar->main.vol_no = 0; 1903 } 1904 1905 if(rar->vol.expected_vol_no > 0 && 1906 rar->main.vol_no != rar->vol.expected_vol_no) 1907 { 1908 /* Returning EOF instead of FATAL because of strange 1909 * libarchive behavior. When opening multiple files via 1910 * archive_read_open_filenames(), after reading up the whole 1911 * last file, the __archive_read_ahead function wraps up to 1912 * the first archive instead of returning EOF. */ 1913 return ARCHIVE_EOF; 1914 } 1915 1916 if(extra_data_size == 0) { 1917 /* Early return. */ 1918 return ARCHIVE_OK; 1919 } 1920 1921 if(!read_var_sized(a, &extra_field_size, NULL)) { 1922 return ARCHIVE_EOF; 1923 } 1924 1925 if(!read_var_sized(a, &extra_field_id, NULL)) { 1926 return ARCHIVE_EOF; 1927 } 1928 1929 if(extra_field_size == 0) { 1930 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1931 "Invalid extra field size"); 1932 return ARCHIVE_FATAL; 1933 } 1934 1935 enum MAIN_EXTRA { 1936 // Just one attribute here. 1937 LOCATOR = 0x01, 1938 }; 1939 1940 switch(extra_field_id) { 1941 case LOCATOR: 1942 ret = process_main_locator_extra_block(a, rar); 1943 if(ret != ARCHIVE_OK) { 1944 /* Error while parsing main locator extra 1945 * block. */ 1946 return ret; 1947 } 1948 1949 break; 1950 default: 1951 archive_set_error(&a->archive, 1952 ARCHIVE_ERRNO_FILE_FORMAT, 1953 "Unsupported extra type (0x%x)", 1954 (int) extra_field_id); 1955 return ARCHIVE_FATAL; 1956 } 1957 1958 return ARCHIVE_OK; 1959} 1960 1961static int skip_unprocessed_bytes(struct archive_read* a) { 1962 struct rar5* rar = get_context(a); 1963 int ret; 1964 1965 if(rar->file.bytes_remaining) { 1966 /* Use different skipping method in block merging mode than in 1967 * normal mode. If merge mode is active, rar5_read_data_skip 1968 * can't be used, because it could allow recursive use of 1969 * merge_block() * function, and this function doesn't support 1970 * recursive use. */ 1971 if(rar->merge_mode) { 1972 /* Discard whole merged block. This is valid in solid 1973 * mode as well, because the code will discard blocks 1974 * only if those blocks are safe to discard (i.e. 1975 * they're not FILE blocks). */ 1976 ret = consume(a, rar->file.bytes_remaining); 1977 if(ret != ARCHIVE_OK) { 1978 return ret; 1979 } 1980 rar->file.bytes_remaining = 0; 1981 } else { 1982 /* If we're not in merge mode, use safe skipping code. 1983 * This will ensure we'll handle solid archives 1984 * properly. */ 1985 ret = rar5_read_data_skip(a); 1986 if(ret != ARCHIVE_OK) { 1987 return ret; 1988 } 1989 } 1990 } 1991 1992 return ARCHIVE_OK; 1993} 1994 1995static int scan_for_signature(struct archive_read* a); 1996 1997/* Base block processing function. A 'base block' is a RARv5 header block 1998 * that tells the reader what kind of data is stored inside the block. 1999 * 2000 * From the birds-eye view a RAR file looks file this: 2001 * 2002 * <magic><base_block_1><base_block_2>...<base_block_n> 2003 * 2004 * There are a few types of base blocks. Those types are specified inside 2005 * the 'switch' statement in this function. For example purposes, I'll write 2006 * how a standard RARv5 file could look like here: 2007 * 2008 * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC> 2009 * 2010 * The structure above could describe an archive file with 3 files in it, 2011 * one service "QuickOpen" block (that is ignored by this parser), and an 2012 * end of file base block marker. 2013 * 2014 * If the file is stored in multiple archive files ("multiarchive"), it might 2015 * look like this: 2016 * 2017 * .part01.rar: <magic><MAIN><FILE><ENDARC> 2018 * .part02.rar: <magic><MAIN><FILE><ENDARC> 2019 * .part03.rar: <magic><MAIN><FILE><ENDARC> 2020 * 2021 * This example could describe 3 RAR files that contain ONE archived file. 2022 * Or it could describe 3 RAR files that contain 3 different files. Or 3 2023 * RAR files than contain 2 files. It all depends what metadata is stored in 2024 * the headers of <FILE> blocks. 2025 * 2026 * Each <FILE> block contains info about its size, the name of the file it's 2027 * storing inside, and whether this FILE block is a continuation block of 2028 * previous archive ('split before'), and is this FILE block should be 2029 * continued in another archive ('split after'). By parsing the 'split before' 2030 * and 'split after' flags, we're able to tell if multiple <FILE> base blocks 2031 * are describing one file, or multiple files (with the same filename, for 2032 * example). 2033 * 2034 * One thing to note is that if we're parsing the first <FILE> block, and 2035 * we see 'split after' flag, then we need to jump over to another <FILE> 2036 * block to be able to decompress rest of the data. To do this, we need 2037 * to skip the <ENDARC> block, then switch to another file, then skip the 2038 * <magic> block, <MAIN> block, and then we're standing on the proper 2039 * <FILE> block. 2040 */ 2041 2042static int process_base_block(struct archive_read* a, 2043 struct archive_entry* entry) 2044{ 2045 struct rar5* rar = get_context(a); 2046 uint32_t hdr_crc, computed_crc; 2047 size_t raw_hdr_size = 0, hdr_size_len, hdr_size; 2048 size_t header_id = 0; 2049 size_t header_flags = 0; 2050 const uint8_t* p; 2051 int ret; 2052 2053 /* Skip any unprocessed data for this file. */ 2054 ret = skip_unprocessed_bytes(a); 2055 if(ret != ARCHIVE_OK) 2056 return ret; 2057 2058 /* Read the expected CRC32 checksum. */ 2059 if(!read_u32(a, &hdr_crc)) { 2060 return ARCHIVE_EOF; 2061 } 2062 2063 /* Read header size. */ 2064 if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) { 2065 return ARCHIVE_EOF; 2066 } 2067 2068 /* Sanity check, maximum header size for RAR5 is 2MB. */ 2069 if(raw_hdr_size > (2 * 1024 * 1024)) { 2070 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2071 "Base block header is too large"); 2072 2073 return ARCHIVE_FATAL; 2074 } 2075 2076 hdr_size = raw_hdr_size + hdr_size_len; 2077 2078 /* Read the whole header data into memory, maximum memory use here is 2079 * 2MB. */ 2080 if(!read_ahead(a, hdr_size, &p)) { 2081 return ARCHIVE_EOF; 2082 } 2083 2084 /* Verify the CRC32 of the header data. */ 2085 computed_crc = (uint32_t) crc32(0, p, (int) hdr_size); 2086 if(computed_crc != hdr_crc) { 2087 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2088 "Header CRC error"); 2089 2090 return ARCHIVE_FATAL; 2091 } 2092 2093 /* If the checksum is OK, we proceed with parsing. */ 2094 if(ARCHIVE_OK != consume(a, hdr_size_len)) { 2095 return ARCHIVE_EOF; 2096 } 2097 2098 if(!read_var_sized(a, &header_id, NULL)) 2099 return ARCHIVE_EOF; 2100 2101 if(!read_var_sized(a, &header_flags, NULL)) 2102 return ARCHIVE_EOF; 2103 2104 rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0; 2105 rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0; 2106 rar->generic.size = (int)hdr_size; 2107 rar->generic.last_header_id = (int)header_id; 2108 rar->main.endarc = 0; 2109 2110 /* Those are possible header ids in RARv5. */ 2111 enum HEADER_TYPE { 2112 HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02, 2113 HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05, 2114 HEAD_UNKNOWN = 0xff, 2115 }; 2116 2117 switch(header_id) { 2118 case HEAD_MAIN: 2119 ret = process_head_main(a, rar, entry, header_flags); 2120 2121 /* Main header doesn't have any files in it, so it's 2122 * pointless to return to the caller. Retry to next 2123 * header, which should be HEAD_FILE/HEAD_SERVICE. */ 2124 if(ret == ARCHIVE_OK) 2125 return ARCHIVE_RETRY; 2126 2127 return ret; 2128 case HEAD_SERVICE: 2129 ret = process_head_service(a, rar, entry, header_flags); 2130 return ret; 2131 case HEAD_FILE: 2132 ret = process_head_file(a, rar, entry, header_flags); 2133 return ret; 2134 case HEAD_CRYPT: 2135 archive_set_error(&a->archive, 2136 ARCHIVE_ERRNO_FILE_FORMAT, 2137 "Encryption is not supported"); 2138 return ARCHIVE_FATAL; 2139 case HEAD_ENDARC: 2140 rar->main.endarc = 1; 2141 2142 /* After encountering an end of file marker, we need 2143 * to take into consideration if this archive is 2144 * continued in another file (i.e. is it part01.rar: 2145 * is there a part02.rar?) */ 2146 if(rar->main.volume) { 2147 /* In case there is part02.rar, position the 2148 * read pointer in a proper place, so we can 2149 * resume parsing. */ 2150 ret = scan_for_signature(a); 2151 if(ret == ARCHIVE_FATAL) { 2152 return ARCHIVE_EOF; 2153 } else { 2154 if(rar->vol.expected_vol_no == 2155 UINT_MAX) { 2156 archive_set_error(&a->archive, 2157 ARCHIVE_ERRNO_FILE_FORMAT, 2158 "Header error"); 2159 return ARCHIVE_FATAL; 2160 } 2161 2162 rar->vol.expected_vol_no = 2163 rar->main.vol_no + 1; 2164 return ARCHIVE_OK; 2165 } 2166 } else { 2167 return ARCHIVE_EOF; 2168 } 2169 case HEAD_MARK: 2170 return ARCHIVE_EOF; 2171 default: 2172 if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) { 2173 archive_set_error(&a->archive, 2174 ARCHIVE_ERRNO_FILE_FORMAT, 2175 "Header type error"); 2176 return ARCHIVE_FATAL; 2177 } else { 2178 /* If the block is marked as 'skip if unknown', 2179 * do as the flag says: skip the block 2180 * instead on failing on it. */ 2181 return ARCHIVE_RETRY; 2182 } 2183 } 2184 2185#if !defined WIN32 2186 // Not reached. 2187 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 2188 "Internal unpacker error"); 2189 return ARCHIVE_FATAL; 2190#endif 2191} 2192 2193static int skip_base_block(struct archive_read* a) { 2194 int ret; 2195 struct rar5* rar = get_context(a); 2196 2197 /* Create a new local archive_entry structure that will be operated on 2198 * by header reader; operations on this archive_entry will be discarded. 2199 */ 2200 struct archive_entry* entry = archive_entry_new(); 2201 ret = process_base_block(a, entry); 2202 2203 /* Discard operations on this archive_entry structure. */ 2204 archive_entry_free(entry); 2205 if(ret == ARCHIVE_FATAL) 2206 return ret; 2207 2208 if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0) 2209 return ARCHIVE_OK; 2210 2211 if(ret == ARCHIVE_OK) 2212 return ARCHIVE_RETRY; 2213 else 2214 return ret; 2215} 2216 2217static int rar5_read_header(struct archive_read *a, 2218 struct archive_entry *entry) 2219{ 2220 struct rar5* rar = get_context(a); 2221 int ret; 2222 2223 if(rar->header_initialized == 0) { 2224 init_header(a); 2225 rar->header_initialized = 1; 2226 } 2227 2228 if(rar->skipped_magic == 0) { 2229 if(ARCHIVE_OK != consume(a, rar5_signature_size)) { 2230 return ARCHIVE_EOF; 2231 } 2232 2233 rar->skipped_magic = 1; 2234 } 2235 2236 do { 2237 ret = process_base_block(a, entry); 2238 } while(ret == ARCHIVE_RETRY || 2239 (rar->main.endarc > 0 && ret == ARCHIVE_OK)); 2240 2241 return ret; 2242} 2243 2244static void init_unpack(struct rar5* rar) { 2245 rar->file.calculated_crc32 = 0; 2246 init_window_mask(rar); 2247 2248 free(rar->cstate.window_buf); 2249 free(rar->cstate.filtered_buf); 2250 2251 if(rar->cstate.window_size > 0) { 2252 rar->cstate.window_buf = calloc(1, rar->cstate.window_size); 2253 rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size); 2254 } else { 2255 rar->cstate.window_buf = NULL; 2256 rar->cstate.filtered_buf = NULL; 2257 } 2258 2259 rar->cstate.write_ptr = 0; 2260 rar->cstate.last_write_ptr = 0; 2261 2262 memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd)); 2263 memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld)); 2264 memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd)); 2265 memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd)); 2266 memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd)); 2267} 2268 2269static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) { 2270 int verify_crc; 2271 2272 if(rar->skip_mode) { 2273#if defined CHECK_CRC_ON_SOLID_SKIP 2274 verify_crc = 1; 2275#else 2276 verify_crc = 0; 2277#endif 2278 } else 2279 verify_crc = 1; 2280 2281 if(verify_crc) { 2282 /* Don't update CRC32 if the file doesn't have the 2283 * `stored_crc32` info filled in. */ 2284 if(rar->file.stored_crc32 > 0) { 2285 rar->file.calculated_crc32 = 2286 crc32(rar->file.calculated_crc32, p, to_read); 2287 } 2288 2289 /* Check if the file uses an optional BLAKE2sp checksum 2290 * algorithm. */ 2291 if(rar->file.has_blake2 > 0) { 2292 /* Return value of the `update` function is always 0, 2293 * so we can explicitly ignore it here. */ 2294 (void) blake2sp_update(&rar->file.b2state, p, to_read); 2295 } 2296 } 2297} 2298 2299static int create_decode_tables(uint8_t* bit_length, 2300 struct decode_table* table, int size) 2301{ 2302 int code, upper_limit = 0, i, lc[16]; 2303 uint32_t decode_pos_clone[rar5_countof(table->decode_pos)]; 2304 ssize_t cur_len, quick_data_size; 2305 2306 memset(&lc, 0, sizeof(lc)); 2307 memset(table->decode_num, 0, sizeof(table->decode_num)); 2308 table->size = size; 2309 table->quick_bits = size == HUFF_NC ? 10 : 7; 2310 2311 for(i = 0; i < size; i++) { 2312 lc[bit_length[i] & 15]++; 2313 } 2314 2315 lc[0] = 0; 2316 table->decode_pos[0] = 0; 2317 table->decode_len[0] = 0; 2318 2319 for(i = 1; i < 16; i++) { 2320 upper_limit += lc[i]; 2321 2322 table->decode_len[i] = upper_limit << (16 - i); 2323 table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1]; 2324 2325 upper_limit <<= 1; 2326 } 2327 2328 memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone)); 2329 2330 for(i = 0; i < size; i++) { 2331 uint8_t clen = bit_length[i] & 15; 2332 if(clen > 0) { 2333 int last_pos = decode_pos_clone[clen]; 2334 table->decode_num[last_pos] = i; 2335 decode_pos_clone[clen]++; 2336 } 2337 } 2338 2339 quick_data_size = (int64_t)1 << table->quick_bits; 2340 cur_len = 1; 2341 for(code = 0; code < quick_data_size; code++) { 2342 int bit_field = code << (16 - table->quick_bits); 2343 int dist, pos; 2344 2345 while(cur_len < rar5_countof(table->decode_len) && 2346 bit_field >= table->decode_len[cur_len]) { 2347 cur_len++; 2348 } 2349 2350 table->quick_len[code] = (uint8_t) cur_len; 2351 2352 dist = bit_field - table->decode_len[cur_len - 1]; 2353 dist >>= (16 - cur_len); 2354 2355 pos = table->decode_pos[cur_len & 15] + dist; 2356 if(cur_len < rar5_countof(table->decode_pos) && pos < size) { 2357 table->quick_num[code] = table->decode_num[pos]; 2358 } else { 2359 table->quick_num[code] = 0; 2360 } 2361 } 2362 2363 return ARCHIVE_OK; 2364} 2365 2366static int decode_number(struct archive_read* a, struct decode_table* table, 2367 const uint8_t* p, uint16_t* num) 2368{ 2369 int i, bits, dist; 2370 uint16_t bitfield; 2371 uint32_t pos; 2372 struct rar5* rar = get_context(a); 2373 2374 if(ARCHIVE_OK != read_bits_16(rar, p, &bitfield)) { 2375 return ARCHIVE_EOF; 2376 } 2377 2378 bitfield &= 0xfffe; 2379 2380 if(bitfield < table->decode_len[table->quick_bits]) { 2381 int code = bitfield >> (16 - table->quick_bits); 2382 skip_bits(rar, table->quick_len[code]); 2383 *num = table->quick_num[code]; 2384 return ARCHIVE_OK; 2385 } 2386 2387 bits = 15; 2388 2389 for(i = table->quick_bits + 1; i < 15; i++) { 2390 if(bitfield < table->decode_len[i]) { 2391 bits = i; 2392 break; 2393 } 2394 } 2395 2396 skip_bits(rar, bits); 2397 2398 dist = bitfield - table->decode_len[bits - 1]; 2399 dist >>= (16 - bits); 2400 pos = table->decode_pos[bits] + dist; 2401 2402 if(pos >= table->size) 2403 pos = 0; 2404 2405 *num = table->decode_num[pos]; 2406 return ARCHIVE_OK; 2407} 2408 2409/* Reads and parses Huffman tables from the beginning of the block. */ 2410static int parse_tables(struct archive_read* a, struct rar5* rar, 2411 const uint8_t* p) 2412{ 2413 int ret, value, i, w, idx = 0; 2414 uint8_t bit_length[HUFF_BC], 2415 table[HUFF_TABLE_SIZE], 2416 nibble_mask = 0xF0, 2417 nibble_shift = 4; 2418 2419 enum { ESCAPE = 15 }; 2420 2421 /* The data for table generation is compressed using a simple RLE-like 2422 * algorithm when storing zeroes, so we need to unpack it first. */ 2423 for(w = 0, i = 0; w < HUFF_BC;) { 2424 if(i >= rar->cstate.cur_block_size) { 2425 /* Truncated data, can't continue. */ 2426 archive_set_error(&a->archive, 2427 ARCHIVE_ERRNO_FILE_FORMAT, 2428 "Truncated data in huffman tables"); 2429 return ARCHIVE_FATAL; 2430 } 2431 2432 value = (p[i] & nibble_mask) >> nibble_shift; 2433 2434 if(nibble_mask == 0x0F) 2435 ++i; 2436 2437 nibble_mask ^= 0xFF; 2438 nibble_shift ^= 4; 2439 2440 /* Values smaller than 15 is data, so we write it directly. 2441 * Value 15 is a flag telling us that we need to unpack more 2442 * bytes. */ 2443 if(value == ESCAPE) { 2444 value = (p[i] & nibble_mask) >> nibble_shift; 2445 if(nibble_mask == 0x0F) 2446 ++i; 2447 nibble_mask ^= 0xFF; 2448 nibble_shift ^= 4; 2449 2450 if(value == 0) { 2451 /* We sometimes need to write the actual value 2452 * of 15, so this case handles that. */ 2453 bit_length[w++] = ESCAPE; 2454 } else { 2455 int k; 2456 2457 /* Fill zeroes. */ 2458 for(k = 0; (k < value + 2) && (w < HUFF_BC); 2459 k++) { 2460 bit_length[w++] = 0; 2461 } 2462 } 2463 } else { 2464 bit_length[w++] = value; 2465 } 2466 } 2467 2468 rar->bits.in_addr = i; 2469 rar->bits.bit_addr = nibble_shift ^ 4; 2470 2471 ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC); 2472 if(ret != ARCHIVE_OK) { 2473 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2474 "Decoding huffman tables failed"); 2475 return ARCHIVE_FATAL; 2476 } 2477 2478 for(i = 0; i < HUFF_TABLE_SIZE;) { 2479 uint16_t num; 2480 2481 if((rar->bits.in_addr + 6) >= rar->cstate.cur_block_size) { 2482 /* Truncated data, can't continue. */ 2483 archive_set_error(&a->archive, 2484 ARCHIVE_ERRNO_FILE_FORMAT, 2485 "Truncated data in huffman tables (#2)"); 2486 return ARCHIVE_FATAL; 2487 } 2488 2489 ret = decode_number(a, &rar->cstate.bd, p, &num); 2490 if(ret != ARCHIVE_OK) { 2491 archive_set_error(&a->archive, 2492 ARCHIVE_ERRNO_FILE_FORMAT, 2493 "Decoding huffman tables failed"); 2494 return ARCHIVE_FATAL; 2495 } 2496 2497 if(num < 16) { 2498 /* 0..15: store directly */ 2499 table[i] = (uint8_t) num; 2500 i++; 2501 continue; 2502 } 2503 2504 if(num < 18) { 2505 /* 16..17: repeat previous code */ 2506 uint16_t n; 2507 if(ARCHIVE_OK != read_bits_16(rar, p, &n)) 2508 return ARCHIVE_EOF; 2509 2510 if(num == 16) { 2511 n >>= 13; 2512 n += 3; 2513 skip_bits(rar, 3); 2514 } else { 2515 n >>= 9; 2516 n += 11; 2517 skip_bits(rar, 7); 2518 } 2519 2520 if(i > 0) { 2521 while(n-- > 0 && i < HUFF_TABLE_SIZE) { 2522 table[i] = table[i - 1]; 2523 i++; 2524 } 2525 } else { 2526 archive_set_error(&a->archive, 2527 ARCHIVE_ERRNO_FILE_FORMAT, 2528 "Unexpected error when decoding " 2529 "huffman tables"); 2530 return ARCHIVE_FATAL; 2531 } 2532 2533 continue; 2534 } 2535 2536 /* other codes: fill with zeroes `n` times */ 2537 uint16_t n; 2538 if(ARCHIVE_OK != read_bits_16(rar, p, &n)) 2539 return ARCHIVE_EOF; 2540 2541 if(num == 18) { 2542 n >>= 13; 2543 n += 3; 2544 skip_bits(rar, 3); 2545 } else { 2546 n >>= 9; 2547 n += 11; 2548 skip_bits(rar, 7); 2549 } 2550 2551 while(n-- > 0 && i < HUFF_TABLE_SIZE) 2552 table[i++] = 0; 2553 } 2554 2555 ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC); 2556 if(ret != ARCHIVE_OK) { 2557 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2558 "Failed to create literal table"); 2559 return ARCHIVE_FATAL; 2560 } 2561 2562 idx += HUFF_NC; 2563 2564 ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC); 2565 if(ret != ARCHIVE_OK) { 2566 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2567 "Failed to create distance table"); 2568 return ARCHIVE_FATAL; 2569 } 2570 2571 idx += HUFF_DC; 2572 2573 ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC); 2574 if(ret != ARCHIVE_OK) { 2575 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2576 "Failed to create lower bits of distances table"); 2577 return ARCHIVE_FATAL; 2578 } 2579 2580 idx += HUFF_LDC; 2581 2582 ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC); 2583 if(ret != ARCHIVE_OK) { 2584 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2585 "Failed to create repeating distances table"); 2586 return ARCHIVE_FATAL; 2587 } 2588 2589 return ARCHIVE_OK; 2590} 2591 2592/* Parses the block header, verifies its CRC byte, and saves the header 2593 * fields inside the `hdr` pointer. */ 2594static int parse_block_header(struct archive_read* a, const uint8_t* p, 2595 ssize_t* block_size, struct compressed_block_header* hdr) 2596{ 2597 memcpy(hdr, p, sizeof(struct compressed_block_header)); 2598 2599 if(bf_byte_count(hdr) > 2) { 2600 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2601 "Unsupported block header size (was %d, max is 2)", 2602 bf_byte_count(hdr)); 2603 return ARCHIVE_FATAL; 2604 } 2605 2606 /* This should probably use bit reader interface in order to be more 2607 * future-proof. */ 2608 *block_size = 0; 2609 switch(bf_byte_count(hdr)) { 2610 /* 1-byte block size */ 2611 case 0: 2612 *block_size = *(const uint8_t*) &p[2]; 2613 break; 2614 2615 /* 2-byte block size */ 2616 case 1: 2617 *block_size = archive_le16dec(&p[2]); 2618 break; 2619 2620 /* 3-byte block size */ 2621 case 2: 2622 *block_size = archive_le32dec(&p[2]); 2623 *block_size &= 0x00FFFFFF; 2624 break; 2625 2626 /* Other block sizes are not supported. This case is not 2627 * reached, because we have an 'if' guard before the switch 2628 * that makes sure of it. */ 2629 default: 2630 return ARCHIVE_FATAL; 2631 } 2632 2633 /* Verify the block header checksum. 0x5A is a magic value and is 2634 * always * constant. */ 2635 uint8_t calculated_cksum = 0x5A 2636 ^ (uint8_t) hdr->block_flags_u8 2637 ^ (uint8_t) *block_size 2638 ^ (uint8_t) (*block_size >> 8) 2639 ^ (uint8_t) (*block_size >> 16); 2640 2641 if(calculated_cksum != hdr->block_cksum) { 2642 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2643 "Block checksum error: got 0x%x, expected 0x%x", 2644 hdr->block_cksum, calculated_cksum); 2645 2646 return ARCHIVE_FATAL; 2647 } 2648 2649 return ARCHIVE_OK; 2650} 2651 2652/* Convenience function used during filter processing. */ 2653static int parse_filter_data(struct rar5* rar, const uint8_t* p, 2654 uint32_t* filter_data) 2655{ 2656 int i, bytes; 2657 uint32_t data = 0; 2658 2659 if(ARCHIVE_OK != read_consume_bits(rar, p, 2, &bytes)) 2660 return ARCHIVE_EOF; 2661 2662 bytes++; 2663 2664 for(i = 0; i < bytes; i++) { 2665 uint16_t byte; 2666 2667 if(ARCHIVE_OK != read_bits_16(rar, p, &byte)) { 2668 return ARCHIVE_EOF; 2669 } 2670 2671 /* Cast to uint32_t will ensure the shift operation will not 2672 * produce undefined result. */ 2673 data += ((uint32_t) byte >> 8) << (i * 8); 2674 skip_bits(rar, 8); 2675 } 2676 2677 *filter_data = data; 2678 return ARCHIVE_OK; 2679} 2680 2681/* Function is used during sanity checking. */ 2682static int is_valid_filter_block_start(struct rar5* rar, 2683 uint32_t start) 2684{ 2685 const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr; 2686 const int64_t last_bs = rar->cstate.last_block_start; 2687 const ssize_t last_bl = rar->cstate.last_block_length; 2688 2689 if(last_bs == 0 || last_bl == 0) { 2690 /* We didn't have any filters yet, so accept this offset. */ 2691 return 1; 2692 } 2693 2694 if(block_start >= last_bs + last_bl) { 2695 /* Current offset is bigger than last block's end offset, so 2696 * accept current offset. */ 2697 return 1; 2698 } 2699 2700 /* Any other case is not a normal situation and we should fail. */ 2701 return 0; 2702} 2703 2704/* The function will create a new filter, read its parameters from the input 2705 * stream and add it to the filter collection. */ 2706static int parse_filter(struct archive_read* ar, const uint8_t* p) { 2707 uint32_t block_start, block_length; 2708 uint16_t filter_type; 2709 struct rar5* rar = get_context(ar); 2710 2711 /* Read the parameters from the input stream. */ 2712 if(ARCHIVE_OK != parse_filter_data(rar, p, &block_start)) 2713 return ARCHIVE_EOF; 2714 2715 if(ARCHIVE_OK != parse_filter_data(rar, p, &block_length)) 2716 return ARCHIVE_EOF; 2717 2718 if(ARCHIVE_OK != read_bits_16(rar, p, &filter_type)) 2719 return ARCHIVE_EOF; 2720 2721 filter_type >>= 13; 2722 skip_bits(rar, 3); 2723 2724 /* Perform some sanity checks on this filter parameters. Note that we 2725 * allow only DELTA, E8/E9 and ARM filters here, because rest of 2726 * filters are not used in RARv5. */ 2727 2728 if(block_length < 4 || 2729 block_length > 0x400000 || 2730 filter_type > FILTER_ARM || 2731 !is_valid_filter_block_start(rar, block_start)) 2732 { 2733 archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2734 "Invalid filter encountered"); 2735 return ARCHIVE_FATAL; 2736 } 2737 2738 /* Allocate a new filter. */ 2739 struct filter_info* filt = add_new_filter(rar); 2740 if(filt == NULL) { 2741 archive_set_error(&ar->archive, ENOMEM, 2742 "Can't allocate memory for a filter descriptor."); 2743 return ARCHIVE_FATAL; 2744 } 2745 2746 filt->type = filter_type; 2747 filt->block_start = rar->cstate.write_ptr + block_start; 2748 filt->block_length = block_length; 2749 2750 rar->cstate.last_block_start = filt->block_start; 2751 rar->cstate.last_block_length = filt->block_length; 2752 2753 /* Read some more data in case this is a DELTA filter. Other filter 2754 * types don't require any additional data over what was already 2755 * read. */ 2756 if(filter_type == FILTER_DELTA) { 2757 int channels; 2758 2759 if(ARCHIVE_OK != read_consume_bits(rar, p, 5, &channels)) 2760 return ARCHIVE_EOF; 2761 2762 filt->channels = channels + 1; 2763 } 2764 2765 return ARCHIVE_OK; 2766} 2767 2768static int decode_code_length(struct rar5* rar, const uint8_t* p, 2769 uint16_t code) 2770{ 2771 int lbits, length = 2; 2772 if(code < 8) { 2773 lbits = 0; 2774 length += code; 2775 } else { 2776 lbits = code / 4 - 1; 2777 length += (4 | (code & 3)) << lbits; 2778 } 2779 2780 if(lbits > 0) { 2781 int add; 2782 2783 if(ARCHIVE_OK != read_consume_bits(rar, p, lbits, &add)) 2784 return -1; 2785 2786 length += add; 2787 } 2788 2789 return length; 2790} 2791 2792static int copy_string(struct archive_read* a, int len, int dist) { 2793 struct rar5* rar = get_context(a); 2794 const uint64_t cmask = rar->cstate.window_mask; 2795 const uint64_t write_ptr = rar->cstate.write_ptr + 2796 rar->cstate.solid_offset; 2797 int i; 2798 2799 if (rar->cstate.window_buf == NULL) 2800 return ARCHIVE_FATAL; 2801 2802 /* The unpacker spends most of the time in this function. It would be 2803 * a good idea to introduce some optimizations here. 2804 * 2805 * Just remember that this loop treats buffers that overlap differently 2806 * than buffers that do not overlap. This is why a simple memcpy(3) 2807 * call will not be enough. */ 2808 2809 for(i = 0; i < len; i++) { 2810 const ssize_t write_idx = (write_ptr + i) & cmask; 2811 const ssize_t read_idx = (write_ptr + i - dist) & cmask; 2812 rar->cstate.window_buf[write_idx] = 2813 rar->cstate.window_buf[read_idx]; 2814 } 2815 2816 rar->cstate.write_ptr += len; 2817 return ARCHIVE_OK; 2818} 2819 2820static int do_uncompress_block(struct archive_read* a, const uint8_t* p) { 2821 struct rar5* rar = get_context(a); 2822 uint16_t num; 2823 int ret; 2824 2825 const uint64_t cmask = rar->cstate.window_mask; 2826 const struct compressed_block_header* hdr = &rar->last_block_hdr; 2827 const uint8_t bit_size = 1 + bf_bit_size(hdr); 2828 2829 while(1) { 2830 if(rar->cstate.write_ptr - rar->cstate.last_write_ptr > 2831 (rar->cstate.window_size >> 1)) { 2832 /* Don't allow growing data by more than half of the 2833 * window size at a time. In such case, break the loop; 2834 * next call to this function will continue processing 2835 * from this moment. */ 2836 break; 2837 } 2838 2839 if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 || 2840 (rar->bits.in_addr == rar->cstate.cur_block_size - 1 && 2841 rar->bits.bit_addr >= bit_size)) 2842 { 2843 /* If the program counter is here, it means the 2844 * function has finished processing the block. */ 2845 rar->cstate.block_parsing_finished = 1; 2846 break; 2847 } 2848 2849 /* Decode the next literal. */ 2850 if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) { 2851 return ARCHIVE_EOF; 2852 } 2853 2854 /* Num holds a decompression literal, or 'command code'. 2855 * 2856 * - Values lower than 256 are just bytes. Those codes 2857 * can be stored in the output buffer directly. 2858 * 2859 * - Code 256 defines a new filter, which is later used to 2860 * ransform the data block accordingly to the filter type. 2861 * The data block needs to be fully uncompressed first. 2862 * 2863 * - Code bigger than 257 and smaller than 262 define 2864 * a repetition pattern that should be copied from 2865 * an already uncompressed chunk of data. 2866 */ 2867 2868 if(num < 256) { 2869 /* Directly store the byte. */ 2870 int64_t write_idx = rar->cstate.solid_offset + 2871 rar->cstate.write_ptr++; 2872 2873 rar->cstate.window_buf[write_idx & cmask] = 2874 (uint8_t) num; 2875 continue; 2876 } else if(num >= 262) { 2877 uint16_t dist_slot; 2878 int len = decode_code_length(rar, p, num - 262), 2879 dbits, 2880 dist = 1; 2881 2882 if(len == -1) { 2883 archive_set_error(&a->archive, 2884 ARCHIVE_ERRNO_PROGRAMMER, 2885 "Failed to decode the code length"); 2886 2887 return ARCHIVE_FATAL; 2888 } 2889 2890 if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p, 2891 &dist_slot)) 2892 { 2893 archive_set_error(&a->archive, 2894 ARCHIVE_ERRNO_PROGRAMMER, 2895 "Failed to decode the distance slot"); 2896 2897 return ARCHIVE_FATAL; 2898 } 2899 2900 if(dist_slot < 4) { 2901 dbits = 0; 2902 dist += dist_slot; 2903 } else { 2904 dbits = dist_slot / 2 - 1; 2905 2906 /* Cast to uint32_t will make sure the shift 2907 * left operation won't produce undefined 2908 * result. Then, the uint32_t type will 2909 * be implicitly casted to int. */ 2910 dist += (uint32_t) (2 | 2911 (dist_slot & 1)) << dbits; 2912 } 2913 2914 if(dbits > 0) { 2915 if(dbits >= 4) { 2916 uint32_t add = 0; 2917 uint16_t low_dist; 2918 2919 if(dbits > 4) { 2920 if(ARCHIVE_OK != read_bits_32( 2921 rar, p, &add)) { 2922 /* Return EOF if we 2923 * can't read more 2924 * data. */ 2925 return ARCHIVE_EOF; 2926 } 2927 2928 skip_bits(rar, dbits - 4); 2929 add = (add >> ( 2930 36 - dbits)) << 4; 2931 dist += add; 2932 } 2933 2934 if(ARCHIVE_OK != decode_number(a, 2935 &rar->cstate.ldd, p, &low_dist)) 2936 { 2937 archive_set_error(&a->archive, 2938 ARCHIVE_ERRNO_PROGRAMMER, 2939 "Failed to decode the " 2940 "distance slot"); 2941 2942 return ARCHIVE_FATAL; 2943 } 2944 2945 if(dist >= INT_MAX - low_dist - 1) { 2946 /* This only happens in 2947 * invalid archives. */ 2948 archive_set_error(&a->archive, 2949 ARCHIVE_ERRNO_FILE_FORMAT, 2950 "Distance pointer " 2951 "overflow"); 2952 return ARCHIVE_FATAL; 2953 } 2954 2955 dist += low_dist; 2956 } else { 2957 /* dbits is one of [0,1,2,3] */ 2958 int add; 2959 2960 if(ARCHIVE_OK != read_consume_bits(rar, 2961 p, dbits, &add)) { 2962 /* Return EOF if we can't read 2963 * more data. */ 2964 return ARCHIVE_EOF; 2965 } 2966 2967 dist += add; 2968 } 2969 } 2970 2971 if(dist > 0x100) { 2972 len++; 2973 2974 if(dist > 0x2000) { 2975 len++; 2976 2977 if(dist > 0x40000) { 2978 len++; 2979 } 2980 } 2981 } 2982 2983 dist_cache_push(rar, dist); 2984 rar->cstate.last_len = len; 2985 2986 if(ARCHIVE_OK != copy_string(a, len, dist)) 2987 return ARCHIVE_FATAL; 2988 2989 continue; 2990 } else if(num == 256) { 2991 /* Create a filter. */ 2992 ret = parse_filter(a, p); 2993 if(ret != ARCHIVE_OK) 2994 return ret; 2995 2996 continue; 2997 } else if(num == 257) { 2998 if(rar->cstate.last_len != 0) { 2999 if(ARCHIVE_OK != copy_string(a, 3000 rar->cstate.last_len, 3001 rar->cstate.dist_cache[0])) 3002 { 3003 return ARCHIVE_FATAL; 3004 } 3005 } 3006 3007 continue; 3008 } else if(num < 262) { 3009 const int idx = num - 258; 3010 const int dist = dist_cache_touch(rar, idx); 3011 3012 uint16_t len_slot; 3013 int len; 3014 3015 if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p, 3016 &len_slot)) { 3017 return ARCHIVE_FATAL; 3018 } 3019 3020 len = decode_code_length(rar, p, len_slot); 3021 rar->cstate.last_len = len; 3022 3023 if(ARCHIVE_OK != copy_string(a, len, dist)) 3024 return ARCHIVE_FATAL; 3025 3026 continue; 3027 } 3028 3029 /* The program counter shouldn't reach here. */ 3030 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3031 "Unsupported block code: 0x%x", num); 3032 3033 return ARCHIVE_FATAL; 3034 } 3035 3036 return ARCHIVE_OK; 3037} 3038 3039/* Binary search for the RARv5 signature. */ 3040static int scan_for_signature(struct archive_read* a) { 3041 const uint8_t* p; 3042 const int chunk_size = 512; 3043 ssize_t i; 3044 3045 /* If we're here, it means we're on an 'unknown territory' data. 3046 * There's no indication what kind of data we're reading here. 3047 * It could be some text comment, any kind of binary data, 3048 * digital sign, dragons, etc. 3049 * 3050 * We want to find a valid RARv5 magic header inside this unknown 3051 * data. */ 3052 3053 /* Is it possible in libarchive to just skip everything until the 3054 * end of the file? If so, it would be a better approach than the 3055 * current implementation of this function. */ 3056 3057 while(1) { 3058 if(!read_ahead(a, chunk_size, &p)) 3059 return ARCHIVE_EOF; 3060 3061 for(i = 0; i < chunk_size - rar5_signature_size; i++) { 3062 if(memcmp(&p[i], rar5_signature, 3063 rar5_signature_size) == 0) { 3064 /* Consume the number of bytes we've used to 3065 * search for the signature, as well as the 3066 * number of bytes used by the signature 3067 * itself. After this we should be standing 3068 * on a valid base block header. */ 3069 (void) consume(a, i + rar5_signature_size); 3070 return ARCHIVE_OK; 3071 } 3072 } 3073 3074 consume(a, chunk_size); 3075 } 3076 3077 return ARCHIVE_FATAL; 3078} 3079 3080/* This function will switch the multivolume archive file to another file, 3081 * i.e. from part03 to part 04. */ 3082static int advance_multivolume(struct archive_read* a) { 3083 int lret; 3084 struct rar5* rar = get_context(a); 3085 3086 /* A small state machine that will skip unnecessary data, needed to 3087 * switch from one multivolume to another. Such skipping is needed if 3088 * we want to be an stream-oriented (instead of file-oriented) 3089 * unpacker. 3090 * 3091 * The state machine starts with `rar->main.endarc` == 0. It also 3092 * assumes that current stream pointer points to some base block 3093 * header. 3094 * 3095 * The `endarc` field is being set when the base block parsing 3096 * function encounters the 'end of archive' marker. 3097 */ 3098 3099 while(1) { 3100 if(rar->main.endarc == 1) { 3101 int looping = 1; 3102 3103 rar->main.endarc = 0; 3104 3105 while(looping) { 3106 lret = skip_base_block(a); 3107 switch(lret) { 3108 case ARCHIVE_RETRY: 3109 /* Continue looping. */ 3110 break; 3111 case ARCHIVE_OK: 3112 /* Break loop. */ 3113 looping = 0; 3114 break; 3115 default: 3116 /* Forward any errors to the 3117 * caller. */ 3118 return lret; 3119 } 3120 } 3121 3122 break; 3123 } else { 3124 /* Skip current base block. In order to properly skip 3125 * it, we really need to simply parse it and discard 3126 * the results. */ 3127 3128 lret = skip_base_block(a); 3129 if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED) 3130 return lret; 3131 3132 /* The `skip_base_block` function tells us if we 3133 * should continue with skipping, or we should stop 3134 * skipping. We're trying to skip everything up to 3135 * a base FILE block. */ 3136 3137 if(lret != ARCHIVE_RETRY) { 3138 /* If there was an error during skipping, or we 3139 * have just skipped a FILE base block... */ 3140 3141 if(rar->main.endarc == 0) { 3142 return lret; 3143 } else { 3144 continue; 3145 } 3146 } 3147 } 3148 } 3149 3150 return ARCHIVE_OK; 3151} 3152 3153/* Merges the partial block from the first multivolume archive file, and 3154 * partial block from the second multivolume archive file. The result is 3155 * a chunk of memory containing the whole block, and the stream pointer 3156 * is advanced to the next block in the second multivolume archive file. */ 3157static int merge_block(struct archive_read* a, ssize_t block_size, 3158 const uint8_t** p) 3159{ 3160 struct rar5* rar = get_context(a); 3161 ssize_t cur_block_size, partial_offset = 0; 3162 const uint8_t* lp; 3163 int ret; 3164 3165 if(rar->merge_mode) { 3166 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3167 "Recursive merge is not allowed"); 3168 3169 return ARCHIVE_FATAL; 3170 } 3171 3172 /* Set a flag that we're in the switching mode. */ 3173 rar->cstate.switch_multivolume = 1; 3174 3175 /* Reallocate the memory which will hold the whole block. */ 3176 if(rar->vol.push_buf) 3177 free((void*) rar->vol.push_buf); 3178 3179 /* Increasing the allocation block by 8 is due to bit reading functions, 3180 * which are using additional 2 or 4 bytes. Allocating the block size 3181 * by exact value would make bit reader perform reads from invalid 3182 * memory block when reading the last byte from the buffer. */ 3183 rar->vol.push_buf = malloc(block_size + 8); 3184 if(!rar->vol.push_buf) { 3185 archive_set_error(&a->archive, ENOMEM, 3186 "Can't allocate memory for a merge block buffer."); 3187 return ARCHIVE_FATAL; 3188 } 3189 3190 /* Valgrind complains if the extension block for bit reader is not 3191 * initialized, so initialize it. */ 3192 memset(&rar->vol.push_buf[block_size], 0, 8); 3193 3194 /* A single block can span across multiple multivolume archive files, 3195 * so we use a loop here. This loop will consume enough multivolume 3196 * archive files until the whole block is read. */ 3197 3198 while(1) { 3199 /* Get the size of current block chunk in this multivolume 3200 * archive file and read it. */ 3201 cur_block_size = rar5_min(rar->file.bytes_remaining, 3202 block_size - partial_offset); 3203 3204 if(cur_block_size == 0) { 3205 archive_set_error(&a->archive, 3206 ARCHIVE_ERRNO_FILE_FORMAT, 3207 "Encountered block size == 0 during block merge"); 3208 return ARCHIVE_FATAL; 3209 } 3210 3211 if(!read_ahead(a, cur_block_size, &lp)) 3212 return ARCHIVE_EOF; 3213 3214 /* Sanity check; there should never be a situation where this 3215 * function reads more data than the block's size. */ 3216 if(partial_offset + cur_block_size > block_size) { 3217 archive_set_error(&a->archive, 3218 ARCHIVE_ERRNO_PROGRAMMER, 3219 "Consumed too much data when merging blocks."); 3220 return ARCHIVE_FATAL; 3221 } 3222 3223 /* Merge previous block chunk with current block chunk, 3224 * or create first block chunk if this is our first 3225 * iteration. */ 3226 memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size); 3227 3228 /* Advance the stream read pointer by this block chunk size. */ 3229 if(ARCHIVE_OK != consume(a, cur_block_size)) 3230 return ARCHIVE_EOF; 3231 3232 /* Update the pointers. `partial_offset` contains information 3233 * about the sum of merged block chunks. */ 3234 partial_offset += cur_block_size; 3235 rar->file.bytes_remaining -= cur_block_size; 3236 3237 /* If `partial_offset` is the same as `block_size`, this means 3238 * we've merged all block chunks and we have a valid full 3239 * block. */ 3240 if(partial_offset == block_size) { 3241 break; 3242 } 3243 3244 /* If we don't have any bytes to read, this means we should 3245 * switch to another multivolume archive file. */ 3246 if(rar->file.bytes_remaining == 0) { 3247 rar->merge_mode++; 3248 ret = advance_multivolume(a); 3249 rar->merge_mode--; 3250 if(ret != ARCHIVE_OK) { 3251 return ret; 3252 } 3253 } 3254 } 3255 3256 *p = rar->vol.push_buf; 3257 3258 /* If we're here, we can resume unpacking by processing the block 3259 * pointed to by the `*p` memory pointer. */ 3260 3261 return ARCHIVE_OK; 3262} 3263 3264static int process_block(struct archive_read* a) { 3265 const uint8_t* p; 3266 struct rar5* rar = get_context(a); 3267 int ret; 3268 3269 /* If we don't have any data to be processed, this most probably means 3270 * we need to switch to the next volume. */ 3271 if(rar->main.volume && rar->file.bytes_remaining == 0) { 3272 ret = advance_multivolume(a); 3273 if(ret != ARCHIVE_OK) 3274 return ret; 3275 } 3276 3277 if(rar->cstate.block_parsing_finished) { 3278 ssize_t block_size; 3279 3280 /* The header size won't be bigger than 6 bytes. */ 3281 if(!read_ahead(a, 6, &p)) { 3282 /* Failed to prefetch data block header. */ 3283 return ARCHIVE_EOF; 3284 } 3285 3286 /* 3287 * Read block_size by parsing block header. Validate the header 3288 * by calculating CRC byte stored inside the header. Size of 3289 * the header is not constant (block size can be stored either 3290 * in 1 or 2 bytes), that's why block size is left out from the 3291 * `compressed_block_header` structure and returned by 3292 * `parse_block_header` as the second argument. */ 3293 3294 ret = parse_block_header(a, p, &block_size, 3295 &rar->last_block_hdr); 3296 if(ret != ARCHIVE_OK) { 3297 return ret; 3298 } 3299 3300 /* Skip block header. Next data is huffman tables, 3301 * if present. */ 3302 ssize_t to_skip = sizeof(struct compressed_block_header) + 3303 bf_byte_count(&rar->last_block_hdr) + 1; 3304 3305 if(ARCHIVE_OK != consume(a, to_skip)) 3306 return ARCHIVE_EOF; 3307 3308 rar->file.bytes_remaining -= to_skip; 3309 3310 /* The block size gives information about the whole block size, 3311 * but the block could be stored in split form when using 3312 * multi-volume archives. In this case, the block size will be 3313 * bigger than the actual data stored in this file. Remaining 3314 * part of the data will be in another file. */ 3315 3316 ssize_t cur_block_size = 3317 rar5_min(rar->file.bytes_remaining, block_size); 3318 3319 if(block_size > rar->file.bytes_remaining) { 3320 /* If current blocks' size is bigger than our data 3321 * size, this means we have a multivolume archive. 3322 * In this case, skip all base headers until the end 3323 * of the file, proceed to next "partXXX.rar" volume, 3324 * find its signature, skip all headers up to the first 3325 * FILE base header, and continue from there. 3326 * 3327 * Note that `merge_block` will update the `rar` 3328 * context structure quite extensively. */ 3329 3330 ret = merge_block(a, block_size, &p); 3331 if(ret != ARCHIVE_OK) { 3332 return ret; 3333 } 3334 3335 cur_block_size = block_size; 3336 3337 /* Current stream pointer should be now directly 3338 * *after* the block that spanned through multiple 3339 * archive files. `p` pointer should have the data of 3340 * the *whole* block (merged from partial blocks 3341 * stored in multiple archives files). */ 3342 } else { 3343 rar->cstate.switch_multivolume = 0; 3344 3345 /* Read the whole block size into memory. This can take 3346 * up to 8 megabytes of memory in theoretical cases. 3347 * Might be worth to optimize this and use a standard 3348 * chunk of 4kb's. */ 3349 if(!read_ahead(a, 4 + cur_block_size, &p)) { 3350 /* Failed to prefetch block data. */ 3351 return ARCHIVE_EOF; 3352 } 3353 } 3354 3355 rar->cstate.block_buf = p; 3356 rar->cstate.cur_block_size = cur_block_size; 3357 rar->cstate.block_parsing_finished = 0; 3358 3359 rar->bits.in_addr = 0; 3360 rar->bits.bit_addr = 0; 3361 3362 if(bf_is_table_present(&rar->last_block_hdr)) { 3363 /* Load Huffman tables. */ 3364 ret = parse_tables(a, rar, p); 3365 if(ret != ARCHIVE_OK) { 3366 /* Error during decompression of Huffman 3367 * tables. */ 3368 return ret; 3369 } 3370 } 3371 } else { 3372 /* Block parsing not finished, reuse previous memory buffer. */ 3373 p = rar->cstate.block_buf; 3374 } 3375 3376 /* Uncompress the block, or a part of it, depending on how many bytes 3377 * will be generated by uncompressing the block. 3378 * 3379 * In case too many bytes will be generated, calling this function 3380 * again will resume the uncompression operation. */ 3381 ret = do_uncompress_block(a, p); 3382 if(ret != ARCHIVE_OK) { 3383 return ret; 3384 } 3385 3386 if(rar->cstate.block_parsing_finished && 3387 rar->cstate.switch_multivolume == 0 && 3388 rar->cstate.cur_block_size > 0) 3389 { 3390 /* If we're processing a normal block, consume the whole 3391 * block. We can do this because we've already read the whole 3392 * block to memory. */ 3393 if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size)) 3394 return ARCHIVE_FATAL; 3395 3396 rar->file.bytes_remaining -= rar->cstate.cur_block_size; 3397 } else if(rar->cstate.switch_multivolume) { 3398 /* Don't consume the block if we're doing multivolume 3399 * processing. The volume switching function will consume 3400 * the proper count of bytes instead. */ 3401 rar->cstate.switch_multivolume = 0; 3402 } 3403 3404 return ARCHIVE_OK; 3405} 3406 3407/* Pops the `buf`, `size` and `offset` from the "data ready" stack. 3408 * 3409 * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY 3410 * when there is no data on the stack. */ 3411static int use_data(struct rar5* rar, const void** buf, size_t* size, 3412 int64_t* offset) 3413{ 3414 int i; 3415 3416 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3417 struct data_ready *d = &rar->cstate.dready[i]; 3418 3419 if(d->used) { 3420 if(buf) *buf = d->buf; 3421 if(size) *size = d->size; 3422 if(offset) *offset = d->offset; 3423 3424 d->used = 0; 3425 return ARCHIVE_OK; 3426 } 3427 } 3428 3429 return ARCHIVE_RETRY; 3430} 3431 3432/* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready 3433 * FIFO stack. Those values will be popped from this stack by the `use_data` 3434 * function. */ 3435static int push_data_ready(struct archive_read* a, struct rar5* rar, 3436 const uint8_t* buf, size_t size, int64_t offset) 3437{ 3438 int i; 3439 3440 /* Don't push if we're in skip mode. This is needed because solid 3441 * streams need full processing even if we're skipping data. After 3442 * fully processing the stream, we need to discard the generated bytes, 3443 * because we're interested only in the side effect: building up the 3444 * internal window circular buffer. This window buffer will be used 3445 * later during unpacking of requested data. */ 3446 if(rar->skip_mode) 3447 return ARCHIVE_OK; 3448 3449 /* Sanity check. */ 3450 if(offset != rar->file.last_offset + rar->file.last_size) { 3451 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3452 "Sanity check error: output stream is not continuous"); 3453 return ARCHIVE_FATAL; 3454 } 3455 3456 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3457 struct data_ready* d = &rar->cstate.dready[i]; 3458 if(!d->used) { 3459 d->used = 1; 3460 d->buf = buf; 3461 d->size = size; 3462 d->offset = offset; 3463 3464 /* These fields are used only in sanity checking. */ 3465 rar->file.last_offset = offset; 3466 rar->file.last_size = size; 3467 3468 /* Calculate the checksum of this new block before 3469 * submitting data to libarchive's engine. */ 3470 update_crc(rar, d->buf, d->size); 3471 3472 return ARCHIVE_OK; 3473 } 3474 } 3475 3476 /* Program counter will reach this code if the `rar->cstate.data_ready` 3477 * stack will be filled up so that no new entries will be allowed. The 3478 * code shouldn't allow such situation to occur. So we treat this case 3479 * as an internal error. */ 3480 3481 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3482 "Error: premature end of data_ready stack"); 3483 return ARCHIVE_FATAL; 3484} 3485 3486/* This function uncompresses the data that is stored in the <FILE> base 3487 * block. 3488 * 3489 * The FILE base block looks like this: 3490 * 3491 * <header><huffman tables><block_1><block_2>...<block_n> 3492 * 3493 * The <header> is a block header, that is parsed in parse_block_header(). 3494 * It's a "compressed_block_header" structure, containing metadata needed 3495 * to know when we should stop looking for more <block_n> blocks. 3496 * 3497 * <huffman tables> contain data needed to set up the huffman tables, needed 3498 * for the actual decompression. 3499 * 3500 * Each <block_n> consists of series of literals: 3501 * 3502 * <literal><literal><literal>...<literal> 3503 * 3504 * Those literals generate the uncompression data. They operate on a circular 3505 * buffer, sometimes writing raw data into it, sometimes referencing 3506 * some previous data inside this buffer, and sometimes declaring a filter 3507 * that will need to be executed on the data stored in the circular buffer. 3508 * It all depends on the literal that is used. 3509 * 3510 * Sometimes blocks produce output data, sometimes they don't. For example, for 3511 * some huge files that use lots of filters, sometimes a block is filled with 3512 * only filter declaration literals. Such blocks won't produce any data in the 3513 * circular buffer. 3514 * 3515 * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte, 3516 * because a literal can reference previously decompressed data. For example, 3517 * there can be a literal that says: 'append a byte 0xFE here', and after 3518 * it another literal can say 'append 1 megabyte of data from circular buffer 3519 * offset 0x12345'. This is how RAR format handles compressing repeated 3520 * patterns. 3521 * 3522 * The RAR compressor creates those literals and the actual efficiency of 3523 * compression depends on what those literals are. The literals can also 3524 * be seen as a kind of a non-turing-complete virtual machine that simply 3525 * tells the decompressor what it should do. 3526 * */ 3527 3528static int do_uncompress_file(struct archive_read* a) { 3529 struct rar5* rar = get_context(a); 3530 int ret; 3531 int64_t max_end_pos; 3532 3533 if(!rar->cstate.initialized) { 3534 /* Don't perform full context reinitialization if we're 3535 * processing a solid archive. */ 3536 if(!rar->main.solid || !rar->cstate.window_buf) { 3537 init_unpack(rar); 3538 } 3539 3540 rar->cstate.initialized = 1; 3541 } 3542 3543 if(rar->cstate.all_filters_applied == 1) { 3544 /* We use while(1) here, but standard case allows for just 1 3545 * iteration. The loop will iterate if process_block() didn't 3546 * generate any data at all. This can happen if the block 3547 * contains only filter definitions (this is common in big 3548 * files). */ 3549 while(1) { 3550 ret = process_block(a); 3551 if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL) 3552 return ret; 3553 3554 if(rar->cstate.last_write_ptr == 3555 rar->cstate.write_ptr) { 3556 /* The block didn't generate any new data, 3557 * so just process a new block. */ 3558 continue; 3559 } 3560 3561 /* The block has generated some new data, so break 3562 * the loop. */ 3563 break; 3564 } 3565 } 3566 3567 /* Try to run filters. If filters won't be applied, it means that 3568 * insufficient data was generated. */ 3569 ret = apply_filters(a); 3570 if(ret == ARCHIVE_RETRY) { 3571 return ARCHIVE_OK; 3572 } else if(ret == ARCHIVE_FATAL) { 3573 return ARCHIVE_FATAL; 3574 } 3575 3576 /* If apply_filters() will return ARCHIVE_OK, we can continue here. */ 3577 3578 if(cdeque_size(&rar->cstate.filters) > 0) { 3579 /* Check if we can write something before hitting first 3580 * filter. */ 3581 struct filter_info* flt; 3582 3583 /* Get the block_start offset from the first filter. */ 3584 if(CDE_OK != cdeque_front(&rar->cstate.filters, 3585 cdeque_filter_p(&flt))) 3586 { 3587 archive_set_error(&a->archive, 3588 ARCHIVE_ERRNO_PROGRAMMER, 3589 "Can't read first filter"); 3590 return ARCHIVE_FATAL; 3591 } 3592 3593 max_end_pos = rar5_min(flt->block_start, 3594 rar->cstate.write_ptr); 3595 } else { 3596 /* There are no filters defined, or all filters were applied. 3597 * This means we can just store the data without any 3598 * postprocessing. */ 3599 max_end_pos = rar->cstate.write_ptr; 3600 } 3601 3602 if(max_end_pos == rar->cstate.last_write_ptr) { 3603 /* We can't write anything yet. The block uncompression 3604 * function did not generate enough data, and no filter can be 3605 * applied. At the same time we don't have any data that can be 3606 * stored without filter postprocessing. This means we need to 3607 * wait for more data to be generated, so we can apply the 3608 * filters. 3609 * 3610 * Signal the caller that we need more data to be able to do 3611 * anything. 3612 */ 3613 return ARCHIVE_RETRY; 3614 } else { 3615 /* We can write the data before hitting the first filter. 3616 * So let's do it. The push_window_data() function will 3617 * effectively return the selected data block to the user 3618 * application. */ 3619 push_window_data(a, rar, rar->cstate.last_write_ptr, 3620 max_end_pos); 3621 rar->cstate.last_write_ptr = max_end_pos; 3622 } 3623 3624 return ARCHIVE_OK; 3625} 3626 3627static int uncompress_file(struct archive_read* a) { 3628 int ret; 3629 3630 while(1) { 3631 /* Sometimes the uncompression function will return a 3632 * 'retry' signal. If this will happen, we have to retry 3633 * the function. */ 3634 ret = do_uncompress_file(a); 3635 if(ret != ARCHIVE_RETRY) 3636 return ret; 3637 } 3638} 3639 3640 3641static int do_unstore_file(struct archive_read* a, 3642 struct rar5* rar, const void** buf, size_t* size, int64_t* offset) 3643{ 3644 const uint8_t* p; 3645 3646 if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 && 3647 rar->generic.split_after > 0) 3648 { 3649 int ret; 3650 3651 rar->cstate.switch_multivolume = 1; 3652 ret = advance_multivolume(a); 3653 rar->cstate.switch_multivolume = 0; 3654 3655 if(ret != ARCHIVE_OK) { 3656 /* Failed to advance to next multivolume archive 3657 * file. */ 3658 return ret; 3659 } 3660 } 3661 3662 size_t to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024); 3663 if(to_read == 0) { 3664 return ARCHIVE_EOF; 3665 } 3666 3667 if(!read_ahead(a, to_read, &p)) { 3668 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3669 "I/O error when unstoring file"); 3670 return ARCHIVE_FATAL; 3671 } 3672 3673 if(ARCHIVE_OK != consume(a, to_read)) { 3674 return ARCHIVE_EOF; 3675 } 3676 3677 if(buf) *buf = p; 3678 if(size) *size = to_read; 3679 if(offset) *offset = rar->cstate.last_unstore_ptr; 3680 3681 rar->file.bytes_remaining -= to_read; 3682 rar->cstate.last_unstore_ptr += to_read; 3683 3684 update_crc(rar, p, to_read); 3685 return ARCHIVE_OK; 3686} 3687 3688static int do_unpack(struct archive_read* a, struct rar5* rar, 3689 const void** buf, size_t* size, int64_t* offset) 3690{ 3691 enum COMPRESSION_METHOD { 3692 STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4, 3693 BEST = 5 3694 }; 3695 3696 if(rar->file.service > 0) { 3697 return do_unstore_file(a, rar, buf, size, offset); 3698 } else { 3699 switch(rar->cstate.method) { 3700 case STORE: 3701 return do_unstore_file(a, rar, buf, size, 3702 offset); 3703 case FASTEST: 3704 /* fallthrough */ 3705 case FAST: 3706 /* fallthrough */ 3707 case NORMAL: 3708 /* fallthrough */ 3709 case GOOD: 3710 /* fallthrough */ 3711 case BEST: 3712 return uncompress_file(a); 3713 default: 3714 archive_set_error(&a->archive, 3715 ARCHIVE_ERRNO_FILE_FORMAT, 3716 "Compression method not supported: 0x%x", 3717 rar->cstate.method); 3718 3719 return ARCHIVE_FATAL; 3720 } 3721 } 3722 3723#if !defined WIN32 3724 /* Not reached. */ 3725 return ARCHIVE_OK; 3726#endif 3727} 3728 3729static int verify_checksums(struct archive_read* a) { 3730 int verify_crc; 3731 struct rar5* rar = get_context(a); 3732 3733 /* Check checksums only when actually unpacking the data. There's no 3734 * need to calculate checksum when we're skipping data in solid archives 3735 * (skipping in solid archives is the same thing as unpacking compressed 3736 * data and discarding the result). */ 3737 3738 if(!rar->skip_mode) { 3739 /* Always check checksums if we're not in skip mode */ 3740 verify_crc = 1; 3741 } else { 3742 /* We can override the logic above with a compile-time option 3743 * NO_CRC_ON_SOLID_SKIP. This option is used during debugging, 3744 * and it will check checksums of unpacked data even when 3745 * we're skipping it. */ 3746 3747#if defined CHECK_CRC_ON_SOLID_SKIP 3748 /* Debug case */ 3749 verify_crc = 1; 3750#else 3751 /* Normal case */ 3752 verify_crc = 0; 3753#endif 3754 } 3755 3756 if(verify_crc) { 3757 /* During unpacking, on each unpacked block we're calling the 3758 * update_crc() function. Since we are here, the unpacking 3759 * process is already over and we can check if calculated 3760 * checksum (CRC32 or BLAKE2sp) is the same as what is stored 3761 * in the archive. */ 3762 if(rar->file.stored_crc32 > 0) { 3763 /* Check CRC32 only when the file contains a CRC32 3764 * value for this file. */ 3765 3766 if(rar->file.calculated_crc32 != 3767 rar->file.stored_crc32) { 3768 /* Checksums do not match; the unpacked file 3769 * is corrupted. */ 3770 3771 DEBUG_CODE { 3772 printf("Checksum error: CRC32 " 3773 "(was: %08x, expected: %08x)\n", 3774 rar->file.calculated_crc32, 3775 rar->file.stored_crc32); 3776 } 3777 3778#ifndef DONT_FAIL_ON_CRC_ERROR 3779 archive_set_error(&a->archive, 3780 ARCHIVE_ERRNO_FILE_FORMAT, 3781 "Checksum error: CRC32"); 3782 return ARCHIVE_FATAL; 3783#endif 3784 } else { 3785 DEBUG_CODE { 3786 printf("Checksum OK: CRC32 " 3787 "(%08x/%08x)\n", 3788 rar->file.stored_crc32, 3789 rar->file.calculated_crc32); 3790 } 3791 } 3792 } 3793 3794 if(rar->file.has_blake2 > 0) { 3795 /* BLAKE2sp is an optional checksum algorithm that is 3796 * added to RARv5 archives when using the `-htb` switch 3797 * during creation of archive. 3798 * 3799 * We now finalize the hash calculation by calling the 3800 * `final` function. This will generate the final hash 3801 * value we can use to compare it with the BLAKE2sp 3802 * checksum that is stored in the archive. 3803 * 3804 * The return value of this `final` function is not 3805 * very helpful, as it guards only against improper use. 3806 * This is why we're explicitly ignoring it. */ 3807 3808 uint8_t b2_buf[32]; 3809 (void) blake2sp_final(&rar->file.b2state, b2_buf, 32); 3810 3811 if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) { 3812#ifndef DONT_FAIL_ON_CRC_ERROR 3813 archive_set_error(&a->archive, 3814 ARCHIVE_ERRNO_FILE_FORMAT, 3815 "Checksum error: BLAKE2"); 3816 3817 return ARCHIVE_FATAL; 3818#endif 3819 } 3820 } 3821 } 3822 3823 /* Finalization for this file has been successfully completed. */ 3824 return ARCHIVE_OK; 3825} 3826 3827static int verify_global_checksums(struct archive_read* a) { 3828 return verify_checksums(a); 3829} 3830 3831static int rar5_read_data(struct archive_read *a, const void **buff, 3832 size_t *size, int64_t *offset) { 3833 int ret; 3834 struct rar5* rar = get_context(a); 3835 3836 if(rar->file.dir > 0) { 3837 /* Don't process any data if this file entry was declared 3838 * as a directory. This is needed, because entries marked as 3839 * directory doesn't have any dictionary buffer allocated, so 3840 * it's impossible to perform any decompression. */ 3841 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3842 "Can't decompress an entry marked as a directory"); 3843 return ARCHIVE_FAILED; 3844 } 3845 3846 if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) { 3847 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3848 "Unpacker has written too many bytes"); 3849 return ARCHIVE_FATAL; 3850 } 3851 3852 ret = use_data(rar, buff, size, offset); 3853 if(ret == ARCHIVE_OK) { 3854 return ret; 3855 } 3856 3857 if(rar->file.eof == 1) { 3858 return ARCHIVE_EOF; 3859 } 3860 3861 ret = do_unpack(a, rar, buff, size, offset); 3862 if(ret != ARCHIVE_OK) { 3863 return ret; 3864 } 3865 3866 if(rar->file.bytes_remaining == 0 && 3867 rar->cstate.last_write_ptr == rar->file.unpacked_size) 3868 { 3869 /* If all bytes of current file were processed, run 3870 * finalization. 3871 * 3872 * Finalization will check checksum against proper values. If 3873 * some of the checksums will not match, we'll return an error 3874 * value in the last `archive_read_data` call to signal an error 3875 * to the user. */ 3876 3877 rar->file.eof = 1; 3878 return verify_global_checksums(a); 3879 } 3880 3881 return ARCHIVE_OK; 3882} 3883 3884static int rar5_read_data_skip(struct archive_read *a) { 3885 struct rar5* rar = get_context(a); 3886 3887 if(rar->main.solid) { 3888 /* In solid archives, instead of skipping the data, we need to 3889 * extract it, and dispose the result. The side effect of this 3890 * operation will be setting up the initial window buffer state 3891 * needed to be able to extract the selected file. */ 3892 3893 int ret; 3894 3895 /* Make sure to process all blocks in the compressed stream. */ 3896 while(rar->file.bytes_remaining > 0) { 3897 /* Setting the "skip mode" will allow us to skip 3898 * checksum checks during data skipping. Checking the 3899 * checksum of skipped data isn't really necessary and 3900 * it's only slowing things down. 3901 * 3902 * This is incremented instead of setting to 1 because 3903 * this data skipping function can be called 3904 * recursively. */ 3905 rar->skip_mode++; 3906 3907 /* We're disposing 1 block of data, so we use triple 3908 * NULLs in arguments. */ 3909 ret = rar5_read_data(a, NULL, NULL, NULL); 3910 3911 /* Turn off "skip mode". */ 3912 rar->skip_mode--; 3913 3914 if(ret < 0 || ret == ARCHIVE_EOF) { 3915 /* Propagate any potential error conditions 3916 * to the caller. */ 3917 return ret; 3918 } 3919 } 3920 } else { 3921 /* In standard archives, we can just jump over the compressed 3922 * stream. Each file in non-solid archives starts from an empty 3923 * window buffer. */ 3924 3925 if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) { 3926 return ARCHIVE_FATAL; 3927 } 3928 3929 rar->file.bytes_remaining = 0; 3930 } 3931 3932 return ARCHIVE_OK; 3933} 3934 3935static int64_t rar5_seek_data(struct archive_read *a, int64_t offset, 3936 int whence) 3937{ 3938 (void) a; 3939 (void) offset; 3940 (void) whence; 3941 3942 /* We're a streaming unpacker, and we don't support seeking. */ 3943 3944 return ARCHIVE_FATAL; 3945} 3946 3947static int rar5_cleanup(struct archive_read *a) { 3948 struct rar5* rar = get_context(a); 3949 3950 free(rar->cstate.window_buf); 3951 free(rar->cstate.filtered_buf); 3952 3953 free(rar->vol.push_buf); 3954 3955 free_filters(rar); 3956 cdeque_free(&rar->cstate.filters); 3957 3958 free(rar); 3959 a->format->data = NULL; 3960 3961 return ARCHIVE_OK; 3962} 3963 3964static int rar5_capabilities(struct archive_read * a) { 3965 (void) a; 3966 return 0; 3967} 3968 3969static int rar5_has_encrypted_entries(struct archive_read *_a) { 3970 (void) _a; 3971 3972 /* Unsupported for now. */ 3973 return ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED; 3974} 3975 3976static int rar5_init(struct rar5* rar) { 3977 ssize_t i; 3978 3979 memset(rar, 0, sizeof(struct rar5)); 3980 3981 /* Decrypt the magic signature pattern. Check the comment near the 3982 * `rar5_signature` symbol to read the rationale behind this. */ 3983 3984 if(rar5_signature[0] == 243) { 3985 for(i = 0; i < rar5_signature_size; i++) { 3986 rar5_signature[i] ^= 0xA1; 3987 } 3988 } 3989 3990 if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192)) 3991 return ARCHIVE_FATAL; 3992 3993 return ARCHIVE_OK; 3994} 3995 3996int archive_read_support_format_rar5(struct archive *_a) { 3997 struct archive_read* ar; 3998 int ret; 3999 struct rar5* rar; 4000 4001 if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar))) 4002 return ret; 4003 4004 rar = malloc(sizeof(*rar)); 4005 if(rar == NULL) { 4006 archive_set_error(&ar->archive, ENOMEM, 4007 "Can't allocate rar5 data"); 4008 return ARCHIVE_FATAL; 4009 } 4010 4011 if(ARCHIVE_OK != rar5_init(rar)) { 4012 archive_set_error(&ar->archive, ENOMEM, 4013 "Can't allocate rar5 filter buffer"); 4014 return ARCHIVE_FATAL; 4015 } 4016 4017 ret = __archive_read_register_format(ar, 4018 rar, 4019 "rar5", 4020 rar5_bid, 4021 rar5_options, 4022 rar5_read_header, 4023 rar5_read_data, 4024 rar5_read_data_skip, 4025 rar5_seek_data, 4026 rar5_cleanup, 4027 rar5_capabilities, 4028 rar5_has_encrypted_entries); 4029 4030 if(ret != ARCHIVE_OK) { 4031 (void) rar5_cleanup(ar); 4032 } 4033 4034 return ret; 4035} 4036