archive_read_support_format_rar5.c revision 358090
1/*- 2* Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org) 3* All rights reserved. 4* 5* Redistribution and use in source and binary forms, with or without 6* modification, are permitted provided that the following conditions 7* are met: 8* 1. Redistributions of source code must retain the above copyright 9* notice, this list of conditions and the following disclaimer. 10* 2. Redistributions in binary form must reproduce the above copyright 11* notice, this list of conditions and the following disclaimer in the 12* documentation and/or other materials provided with the distribution. 13* 14* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24*/ 25 26#include "archive_platform.h" 27#include "archive_endian.h" 28 29#ifdef HAVE_ERRNO_H 30#include <errno.h> 31#endif 32#include <time.h> 33#ifdef HAVE_ZLIB_H 34#include <zlib.h> /* crc32 */ 35#endif 36#ifdef HAVE_LIMITS_H 37#include <limits.h> 38#endif 39 40#include "archive.h" 41#ifndef HAVE_ZLIB_H 42#include "archive_crc32.h" 43#endif 44 45#include "archive_entry.h" 46#include "archive_entry_locale.h" 47#include "archive_ppmd7_private.h" 48#include "archive_entry_private.h" 49 50#ifdef HAVE_BLAKE2_H 51#include <blake2.h> 52#else 53#include "archive_blake2.h" 54#endif 55 56/*#define CHECK_CRC_ON_SOLID_SKIP*/ 57/*#define DONT_FAIL_ON_CRC_ERROR*/ 58/*#define DEBUG*/ 59 60#define rar5_min(a, b) (((a) > (b)) ? (b) : (a)) 61#define rar5_max(a, b) (((a) > (b)) ? (a) : (b)) 62#define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X))) 63 64#if defined DEBUG 65#define DEBUG_CODE if(1) 66#define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0) 67#else 68#define DEBUG_CODE if(0) 69#endif 70 71/* Real RAR5 magic number is: 72 * 73 * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00 74 * "Rar!�����������\x00" 75 * 76 * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't 77 * want to put this magic sequence in each binary that uses libarchive, so 78 * applications that scan through the file for this marker won't trigger on 79 * this "false" one. 80 * 81 * The array itself is decrypted in `rar5_init` function. */ 82 83static unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 }; 84static const size_t g_unpack_window_size = 0x20000; 85 86/* These could have been static const's, but they aren't, because of 87 * Visual Studio. */ 88#define MAX_NAME_IN_CHARS 2048 89#define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS) 90 91struct file_header { 92 ssize_t bytes_remaining; 93 ssize_t unpacked_size; 94 int64_t last_offset; /* Used in sanity checks. */ 95 int64_t last_size; /* Used in sanity checks. */ 96 97 uint8_t solid : 1; /* Is this a solid stream? */ 98 uint8_t service : 1; /* Is this file a service data? */ 99 uint8_t eof : 1; /* Did we finish unpacking the file? */ 100 uint8_t dir : 1; /* Is this file entry a directory? */ 101 102 /* Optional time fields. */ 103 uint64_t e_mtime; 104 uint64_t e_ctime; 105 uint64_t e_atime; 106 uint32_t e_unix_ns; 107 108 /* Optional hash fields. */ 109 uint32_t stored_crc32; 110 uint32_t calculated_crc32; 111 uint8_t blake2sp[32]; 112 blake2sp_state b2state; 113 char has_blake2; 114 115 /* Optional redir fields */ 116 uint64_t redir_type; 117 uint64_t redir_flags; 118 119 ssize_t solid_window_size; /* Used in file format check. */ 120}; 121 122enum EXTRA { 123 EX_CRYPT = 0x01, 124 EX_HASH = 0x02, 125 EX_HTIME = 0x03, 126 EX_VERSION = 0x04, 127 EX_REDIR = 0x05, 128 EX_UOWNER = 0x06, 129 EX_SUBDATA = 0x07 130}; 131 132#define REDIR_SYMLINK_IS_DIR 1 133 134enum REDIR_TYPE { 135 REDIR_TYPE_NONE = 0, 136 REDIR_TYPE_UNIXSYMLINK = 1, 137 REDIR_TYPE_WINSYMLINK = 2, 138 REDIR_TYPE_JUNCTION = 3, 139 REDIR_TYPE_HARDLINK = 4, 140 REDIR_TYPE_FILECOPY = 5, 141}; 142 143#define OWNER_USER_NAME 0x01 144#define OWNER_GROUP_NAME 0x02 145#define OWNER_USER_UID 0x04 146#define OWNER_GROUP_GID 0x08 147#define OWNER_MAXNAMELEN 256 148 149enum FILTER_TYPE { 150 FILTER_DELTA = 0, /* Generic pattern. */ 151 FILTER_E8 = 1, /* Intel x86 code. */ 152 FILTER_E8E9 = 2, /* Intel x86 code. */ 153 FILTER_ARM = 3, /* ARM code. */ 154 FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */ 155 FILTER_RGB = 5, /* Color palette, not used in RARv5. */ 156 FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */ 157 FILTER_PPM = 7, /* Predictive pattern matching, not used in 158 RARv5. */ 159 FILTER_NONE = 8, 160}; 161 162struct filter_info { 163 int type; 164 int channels; 165 int pos_r; 166 167 int64_t block_start; 168 ssize_t block_length; 169 uint16_t width; 170}; 171 172struct data_ready { 173 char used; 174 const uint8_t* buf; 175 size_t size; 176 int64_t offset; 177}; 178 179struct cdeque { 180 uint16_t beg_pos; 181 uint16_t end_pos; 182 uint16_t cap_mask; 183 uint16_t size; 184 size_t* arr; 185}; 186 187struct decode_table { 188 uint32_t size; 189 int32_t decode_len[16]; 190 uint32_t decode_pos[16]; 191 uint32_t quick_bits; 192 uint8_t quick_len[1 << 10]; 193 uint16_t quick_num[1 << 10]; 194 uint16_t decode_num[306]; 195}; 196 197struct comp_state { 198 /* Flag used to specify if unpacker needs to reinitialize the 199 uncompression context. */ 200 uint8_t initialized : 1; 201 202 /* Flag used when applying filters. */ 203 uint8_t all_filters_applied : 1; 204 205 /* Flag used to skip file context reinitialization, used when unpacker 206 is skipping through different multivolume archives. */ 207 uint8_t switch_multivolume : 1; 208 209 /* Flag used to specify if unpacker has processed the whole data block 210 or just a part of it. */ 211 uint8_t block_parsing_finished : 1; 212 213 signed int notused : 4; 214 215 int flags; /* Uncompression flags. */ 216 int method; /* Uncompression algorithm method. */ 217 int version; /* Uncompression algorithm version. */ 218 ssize_t window_size; /* Size of window_buf. */ 219 uint8_t* window_buf; /* Circular buffer used during 220 decompression. */ 221 uint8_t* filtered_buf; /* Buffer used when applying filters. */ 222 const uint8_t* block_buf; /* Buffer used when merging blocks. */ 223 size_t window_mask; /* Convenience field; window_size - 1. */ 224 int64_t write_ptr; /* This amount of data has been unpacked 225 in the window buffer. */ 226 int64_t last_write_ptr; /* This amount of data has been stored in 227 the output file. */ 228 int64_t last_unstore_ptr; /* Counter of bytes extracted during 229 unstoring. This is separate from 230 last_write_ptr because of how SERVICE 231 base blocks are handled during skipping 232 in solid multiarchive archives. */ 233 int64_t solid_offset; /* Additional offset inside the window 234 buffer, used in unpacking solid 235 archives. */ 236 ssize_t cur_block_size; /* Size of current data block. */ 237 int last_len; /* Flag used in lzss decompression. */ 238 239 /* Decode tables used during lzss uncompression. */ 240 241#define HUFF_BC 20 242 struct decode_table bd; /* huffman bit lengths */ 243#define HUFF_NC 306 244 struct decode_table ld; /* literals */ 245#define HUFF_DC 64 246 struct decode_table dd; /* distances */ 247#define HUFF_LDC 16 248 struct decode_table ldd; /* lower bits of distances */ 249#define HUFF_RC 44 250 struct decode_table rd; /* repeating distances */ 251#define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC) 252 253 /* Circular deque for storing filters. */ 254 struct cdeque filters; 255 int64_t last_block_start; /* Used for sanity checking. */ 256 ssize_t last_block_length; /* Used for sanity checking. */ 257 258 /* Distance cache used during lzss uncompression. */ 259 int dist_cache[4]; 260 261 /* Data buffer stack. */ 262 struct data_ready dready[2]; 263}; 264 265/* Bit reader state. */ 266struct bit_reader { 267 int8_t bit_addr; /* Current bit pointer inside current byte. */ 268 int in_addr; /* Current byte pointer. */ 269}; 270 271/* RARv5 block header structure. Use bf_* functions to get values from 272 * block_flags_u8 field. I.e. bf_byte_count, etc. */ 273struct compressed_block_header { 274 /* block_flags_u8 contain fields encoded in little-endian bitfield: 275 * 276 * - table present flag (shr 7, and 1), 277 * - last block flag (shr 6, and 1), 278 * - byte_count (shr 3, and 7), 279 * - bit_size (shr 0, and 7). 280 */ 281 uint8_t block_flags_u8; 282 uint8_t block_cksum; 283}; 284 285/* RARv5 main header structure. */ 286struct main_header { 287 /* Does the archive contain solid streams? */ 288 uint8_t solid : 1; 289 290 /* If this a multi-file archive? */ 291 uint8_t volume : 1; 292 uint8_t endarc : 1; 293 uint8_t notused : 5; 294 295 unsigned int vol_no; 296}; 297 298struct generic_header { 299 uint8_t split_after : 1; 300 uint8_t split_before : 1; 301 uint8_t padding : 6; 302 int size; 303 int last_header_id; 304}; 305 306struct multivolume { 307 unsigned int expected_vol_no; 308 uint8_t* push_buf; 309}; 310 311/* Main context structure. */ 312struct rar5 { 313 int header_initialized; 314 315 /* Set to 1 if current file is positioned AFTER the magic value 316 * of the archive file. This is used in header reading functions. */ 317 int skipped_magic; 318 319 /* Set to not zero if we're in skip mode (either by calling 320 * rar5_data_skip function or when skipping over solid streams). 321 * Set to 0 when in * extraction mode. This is used during checksum 322 * calculation functions. */ 323 int skip_mode; 324 325 /* Set to not zero if we're in block merging mode (i.e. when switching 326 * to another file in multivolume archive, last block from 1st archive 327 * needs to be merged with 1st block from 2nd archive). This flag 328 * guards against recursive use of the merging function, which doesn't 329 * support recursive calls. */ 330 int merge_mode; 331 332 /* An offset to QuickOpen list. This is not supported by this unpacker, 333 * because we're focusing on streaming interface. QuickOpen is designed 334 * to make things quicker for non-stream interfaces, so it's not our 335 * use case. */ 336 uint64_t qlist_offset; 337 338 /* An offset to additional Recovery data. This is not supported by this 339 * unpacker. Recovery data are additional Reed-Solomon codes that could 340 * be used to calculate bytes that are missing in archive or are 341 * corrupted. */ 342 uint64_t rr_offset; 343 344 /* Various context variables grouped to different structures. */ 345 struct generic_header generic; 346 struct main_header main; 347 struct comp_state cstate; 348 struct file_header file; 349 struct bit_reader bits; 350 struct multivolume vol; 351 352 /* The header of currently processed RARv5 block. Used in main 353 * decompression logic loop. */ 354 struct compressed_block_header last_block_hdr; 355}; 356 357/* Forward function declarations. */ 358 359static void rar5_signature(char *buf); 360static int verify_global_checksums(struct archive_read* a); 361static int rar5_read_data_skip(struct archive_read *a); 362static int push_data_ready(struct archive_read* a, struct rar5* rar, 363 const uint8_t* buf, size_t size, int64_t offset); 364 365/* CDE_xxx = Circular Double Ended (Queue) return values. */ 366enum CDE_RETURN_VALUES { 367 CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS, 368}; 369 370/* Clears the contents of this circular deque. */ 371static void cdeque_clear(struct cdeque* d) { 372 d->size = 0; 373 d->beg_pos = 0; 374 d->end_pos = 0; 375} 376 377/* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32, 378 * 64, 256, etc. When the user will add another item above current capacity, 379 * the circular deque will overwrite the oldest entry. */ 380static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) { 381 if(d == NULL || max_capacity_power_of_2 == 0) 382 return CDE_PARAM; 383 384 d->cap_mask = max_capacity_power_of_2 - 1; 385 d->arr = NULL; 386 387 if((max_capacity_power_of_2 & d->cap_mask) != 0) 388 return CDE_PARAM; 389 390 cdeque_clear(d); 391 d->arr = malloc(sizeof(void*) * max_capacity_power_of_2); 392 393 return d->arr ? CDE_OK : CDE_ALLOC; 394} 395 396/* Return the current size (not capacity) of circular deque `d`. */ 397static size_t cdeque_size(struct cdeque* d) { 398 return d->size; 399} 400 401/* Returns the first element of current circular deque. Note that this function 402 * doesn't perform any bounds checking. If you need bounds checking, use 403 * `cdeque_front()` function instead. */ 404static void cdeque_front_fast(struct cdeque* d, void** value) { 405 *value = (void*) d->arr[d->beg_pos]; 406} 407 408/* Returns the first element of current circular deque. This function 409 * performs bounds checking. */ 410static int cdeque_front(struct cdeque* d, void** value) { 411 if(d->size > 0) { 412 cdeque_front_fast(d, value); 413 return CDE_OK; 414 } else 415 return CDE_OUT_OF_BOUNDS; 416} 417 418/* Pushes a new element into the end of this circular deque object. If current 419 * size will exceed capacity, the oldest element will be overwritten. */ 420static int cdeque_push_back(struct cdeque* d, void* item) { 421 if(d == NULL) 422 return CDE_PARAM; 423 424 if(d->size == d->cap_mask + 1) 425 return CDE_OUT_OF_BOUNDS; 426 427 d->arr[d->end_pos] = (size_t) item; 428 d->end_pos = (d->end_pos + 1) & d->cap_mask; 429 d->size++; 430 431 return CDE_OK; 432} 433 434/* Pops a front element of this circular deque object and returns its value. 435 * This function doesn't perform any bounds checking. */ 436static void cdeque_pop_front_fast(struct cdeque* d, void** value) { 437 *value = (void*) d->arr[d->beg_pos]; 438 d->beg_pos = (d->beg_pos + 1) & d->cap_mask; 439 d->size--; 440} 441 442/* Pops a front element of this circular deque object and returns its value. 443 * This function performs bounds checking. */ 444static int cdeque_pop_front(struct cdeque* d, void** value) { 445 if(!d || !value) 446 return CDE_PARAM; 447 448 if(d->size == 0) 449 return CDE_OUT_OF_BOUNDS; 450 451 cdeque_pop_front_fast(d, value); 452 return CDE_OK; 453} 454 455/* Convenience function to cast filter_info** to void **. */ 456static void** cdeque_filter_p(struct filter_info** f) { 457 return (void**) (size_t) f; 458} 459 460/* Convenience function to cast filter_info* to void *. */ 461static void* cdeque_filter(struct filter_info* f) { 462 return (void**) (size_t) f; 463} 464 465/* Destroys this circular deque object. Deallocates the memory of the 466 * collection buffer, but doesn't deallocate the memory of any pointer passed 467 * to this deque as a value. */ 468static void cdeque_free(struct cdeque* d) { 469 if(!d) 470 return; 471 472 if(!d->arr) 473 return; 474 475 free(d->arr); 476 477 d->arr = NULL; 478 d->beg_pos = -1; 479 d->end_pos = -1; 480 d->cap_mask = 0; 481} 482 483static inline 484uint8_t bf_bit_size(const struct compressed_block_header* hdr) { 485 return hdr->block_flags_u8 & 7; 486} 487 488static inline 489uint8_t bf_byte_count(const struct compressed_block_header* hdr) { 490 return (hdr->block_flags_u8 >> 3) & 7; 491} 492 493static inline 494uint8_t bf_is_table_present(const struct compressed_block_header* hdr) { 495 return (hdr->block_flags_u8 >> 7) & 1; 496} 497 498static inline struct rar5* get_context(struct archive_read* a) { 499 return (struct rar5*) a->format->data; 500} 501 502/* Convenience functions used by filter implementations. */ 503static void circular_memcpy(uint8_t* dst, uint8_t* window, const uint64_t mask, 504 int64_t start, int64_t end) 505{ 506 if((start & mask) > (end & mask)) { 507 ssize_t len1 = mask + 1 - (start & mask); 508 ssize_t len2 = end & mask; 509 510 memcpy(dst, &window[start & mask], len1); 511 memcpy(dst + len1, window, len2); 512 } else { 513 memcpy(dst, &window[start & mask], (size_t) (end - start)); 514 } 515} 516 517static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) { 518 uint8_t linear_buf[4]; 519 circular_memcpy(linear_buf, rar->cstate.window_buf, 520 rar->cstate.window_mask, offset, offset + 4); 521 return archive_le32dec(linear_buf); 522} 523 524static void write_filter_data(struct rar5* rar, uint32_t offset, 525 uint32_t value) 526{ 527 archive_le32enc(&rar->cstate.filtered_buf[offset], value); 528} 529 530/* Allocates a new filter descriptor and adds it to the filter array. */ 531static struct filter_info* add_new_filter(struct rar5* rar) { 532 struct filter_info* f = 533 (struct filter_info*) calloc(1, sizeof(struct filter_info)); 534 535 if(!f) { 536 return NULL; 537 } 538 539 cdeque_push_back(&rar->cstate.filters, cdeque_filter(f)); 540 return f; 541} 542 543static int run_delta_filter(struct rar5* rar, struct filter_info* flt) { 544 int i; 545 ssize_t dest_pos, src_pos = 0; 546 547 for(i = 0; i < flt->channels; i++) { 548 uint8_t prev_byte = 0; 549 for(dest_pos = i; 550 dest_pos < flt->block_length; 551 dest_pos += flt->channels) 552 { 553 uint8_t byte; 554 555 byte = rar->cstate.window_buf[ 556 (rar->cstate.solid_offset + flt->block_start + 557 src_pos) & rar->cstate.window_mask]; 558 559 prev_byte -= byte; 560 rar->cstate.filtered_buf[dest_pos] = prev_byte; 561 src_pos++; 562 } 563 } 564 565 return ARCHIVE_OK; 566} 567 568static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt, 569 int extended) 570{ 571 const uint32_t file_size = 0x1000000; 572 ssize_t i; 573 574 circular_memcpy(rar->cstate.filtered_buf, 575 rar->cstate.window_buf, rar->cstate.window_mask, 576 rar->cstate.solid_offset + flt->block_start, 577 rar->cstate.solid_offset + flt->block_start + flt->block_length); 578 579 for(i = 0; i < flt->block_length - 4;) { 580 uint8_t b = rar->cstate.window_buf[ 581 (rar->cstate.solid_offset + flt->block_start + 582 i++) & rar->cstate.window_mask]; 583 584 /* 585 * 0xE8 = x86's call <relative_addr_uint32> (function call) 586 * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump) 587 */ 588 if(b == 0xE8 || (extended && b == 0xE9)) { 589 590 uint32_t addr; 591 uint32_t offset = (i + flt->block_start) % file_size; 592 593 addr = read_filter_data(rar, 594 (uint32_t)(rar->cstate.solid_offset + 595 flt->block_start + i) & rar->cstate.window_mask); 596 597 if(addr & 0x80000000) { 598 if(((addr + offset) & 0x80000000) == 0) { 599 write_filter_data(rar, (uint32_t)i, 600 addr + file_size); 601 } 602 } else { 603 if((addr - file_size) & 0x80000000) { 604 uint32_t naddr = addr - offset; 605 write_filter_data(rar, (uint32_t)i, 606 naddr); 607 } 608 } 609 610 i += 4; 611 } 612 } 613 614 return ARCHIVE_OK; 615} 616 617static int run_arm_filter(struct rar5* rar, struct filter_info* flt) { 618 ssize_t i = 0; 619 uint32_t offset; 620 621 circular_memcpy(rar->cstate.filtered_buf, 622 rar->cstate.window_buf, rar->cstate.window_mask, 623 rar->cstate.solid_offset + flt->block_start, 624 rar->cstate.solid_offset + flt->block_start + flt->block_length); 625 626 for(i = 0; i < flt->block_length - 3; i += 4) { 627 uint8_t* b = &rar->cstate.window_buf[ 628 (rar->cstate.solid_offset + 629 flt->block_start + i + 3) & rar->cstate.window_mask]; 630 631 if(*b == 0xEB) { 632 /* 0xEB = ARM's BL (branch + link) instruction. */ 633 offset = read_filter_data(rar, 634 (rar->cstate.solid_offset + flt->block_start + i) & 635 rar->cstate.window_mask) & 0x00ffffff; 636 637 offset -= (uint32_t) ((i + flt->block_start) / 4); 638 offset = (offset & 0x00ffffff) | 0xeb000000; 639 write_filter_data(rar, (uint32_t)i, offset); 640 } 641 } 642 643 return ARCHIVE_OK; 644} 645 646static int run_filter(struct archive_read* a, struct filter_info* flt) { 647 int ret; 648 struct rar5* rar = get_context(a); 649 650 free(rar->cstate.filtered_buf); 651 652 rar->cstate.filtered_buf = malloc(flt->block_length); 653 if(!rar->cstate.filtered_buf) { 654 archive_set_error(&a->archive, ENOMEM, 655 "Can't allocate memory for filter data."); 656 return ARCHIVE_FATAL; 657 } 658 659 switch(flt->type) { 660 case FILTER_DELTA: 661 ret = run_delta_filter(rar, flt); 662 break; 663 664 case FILTER_E8: 665 /* fallthrough */ 666 case FILTER_E8E9: 667 ret = run_e8e9_filter(rar, flt, 668 flt->type == FILTER_E8E9); 669 break; 670 671 case FILTER_ARM: 672 ret = run_arm_filter(rar, flt); 673 break; 674 675 default: 676 archive_set_error(&a->archive, 677 ARCHIVE_ERRNO_FILE_FORMAT, 678 "Unsupported filter type: 0x%x", flt->type); 679 return ARCHIVE_FATAL; 680 } 681 682 if(ret != ARCHIVE_OK) { 683 /* Filter has failed. */ 684 return ret; 685 } 686 687 if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf, 688 flt->block_length, rar->cstate.last_write_ptr)) 689 { 690 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 691 "Stack overflow when submitting unpacked data"); 692 693 return ARCHIVE_FATAL; 694 } 695 696 rar->cstate.last_write_ptr += flt->block_length; 697 return ARCHIVE_OK; 698} 699 700/* The `push_data` function submits the selected data range to the user. 701 * Next call of `use_data` will use the pointer, size and offset arguments 702 * that are specified here. These arguments are pushed to the FIFO stack here, 703 * and popped from the stack by the `use_data` function. */ 704static void push_data(struct archive_read* a, struct rar5* rar, 705 const uint8_t* buf, int64_t idx_begin, int64_t idx_end) 706{ 707 const uint64_t wmask = rar->cstate.window_mask; 708 const ssize_t solid_write_ptr = (rar->cstate.solid_offset + 709 rar->cstate.last_write_ptr) & wmask; 710 711 idx_begin += rar->cstate.solid_offset; 712 idx_end += rar->cstate.solid_offset; 713 714 /* Check if our unpacked data is wrapped inside the window circular 715 * buffer. If it's not wrapped, it can be copied out by using 716 * a single memcpy, but when it's wrapped, we need to copy the first 717 * part with one memcpy, and the second part with another memcpy. */ 718 719 if((idx_begin & wmask) > (idx_end & wmask)) { 720 /* The data is wrapped (begin offset sis bigger than end 721 * offset). */ 722 const ssize_t frag1_size = rar->cstate.window_size - 723 (idx_begin & wmask); 724 const ssize_t frag2_size = idx_end & wmask; 725 726 /* Copy the first part of the buffer first. */ 727 push_data_ready(a, rar, buf + solid_write_ptr, frag1_size, 728 rar->cstate.last_write_ptr); 729 730 /* Copy the second part of the buffer. */ 731 push_data_ready(a, rar, buf, frag2_size, 732 rar->cstate.last_write_ptr + frag1_size); 733 734 rar->cstate.last_write_ptr += frag1_size + frag2_size; 735 } else { 736 /* Data is not wrapped, so we can just use one call to copy the 737 * data. */ 738 push_data_ready(a, rar, 739 buf + solid_write_ptr, (idx_end - idx_begin) & wmask, 740 rar->cstate.last_write_ptr); 741 742 rar->cstate.last_write_ptr += idx_end - idx_begin; 743 } 744} 745 746/* Convenience function that submits the data to the user. It uses the 747 * unpack window buffer as a source location. */ 748static void push_window_data(struct archive_read* a, struct rar5* rar, 749 int64_t idx_begin, int64_t idx_end) 750{ 751 push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end); 752} 753 754static int apply_filters(struct archive_read* a) { 755 struct filter_info* flt; 756 struct rar5* rar = get_context(a); 757 int ret; 758 759 rar->cstate.all_filters_applied = 0; 760 761 /* Get the first filter that can be applied to our data. The data 762 * needs to be fully unpacked before the filter can be run. */ 763 if(CDE_OK == cdeque_front(&rar->cstate.filters, 764 cdeque_filter_p(&flt))) { 765 /* Check if our unpacked data fully covers this filter's 766 * range. */ 767 if(rar->cstate.write_ptr > flt->block_start && 768 rar->cstate.write_ptr >= flt->block_start + 769 flt->block_length) { 770 /* Check if we have some data pending to be written 771 * right before the filter's start offset. */ 772 if(rar->cstate.last_write_ptr == flt->block_start) { 773 /* Run the filter specified by descriptor 774 * `flt`. */ 775 ret = run_filter(a, flt); 776 if(ret != ARCHIVE_OK) { 777 /* Filter failure, return error. */ 778 return ret; 779 } 780 781 /* Filter descriptor won't be needed anymore 782 * after it's used, * so remove it from the 783 * filter list and free its memory. */ 784 (void) cdeque_pop_front(&rar->cstate.filters, 785 cdeque_filter_p(&flt)); 786 787 free(flt); 788 } else { 789 /* We can't run filters yet, dump the memory 790 * right before the filter. */ 791 push_window_data(a, rar, 792 rar->cstate.last_write_ptr, 793 flt->block_start); 794 } 795 796 /* Return 'filter applied or not needed' state to the 797 * caller. */ 798 return ARCHIVE_RETRY; 799 } 800 } 801 802 rar->cstate.all_filters_applied = 1; 803 return ARCHIVE_OK; 804} 805 806static void dist_cache_push(struct rar5* rar, int value) { 807 int* q = rar->cstate.dist_cache; 808 809 q[3] = q[2]; 810 q[2] = q[1]; 811 q[1] = q[0]; 812 q[0] = value; 813} 814 815static int dist_cache_touch(struct rar5* rar, int idx) { 816 int* q = rar->cstate.dist_cache; 817 int i, dist = q[idx]; 818 819 for(i = idx; i > 0; i--) 820 q[i] = q[i - 1]; 821 822 q[0] = dist; 823 return dist; 824} 825 826static void free_filters(struct rar5* rar) { 827 struct cdeque* d = &rar->cstate.filters; 828 829 /* Free any remaining filters. All filters should be naturally 830 * consumed by the unpacking function, so remaining filters after 831 * unpacking normally mean that unpacking wasn't successful. 832 * But still of course we shouldn't leak memory in such case. */ 833 834 /* cdeque_size() is a fast operation, so we can use it as a loop 835 * expression. */ 836 while(cdeque_size(d) > 0) { 837 struct filter_info* f = NULL; 838 839 /* Pop_front will also decrease the collection's size. */ 840 if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f))) 841 free(f); 842 } 843 844 cdeque_clear(d); 845 846 /* Also clear out the variables needed for sanity checking. */ 847 rar->cstate.last_block_start = 0; 848 rar->cstate.last_block_length = 0; 849} 850 851static void reset_file_context(struct rar5* rar) { 852 memset(&rar->file, 0, sizeof(rar->file)); 853 blake2sp_init(&rar->file.b2state, 32); 854 855 if(rar->main.solid) { 856 rar->cstate.solid_offset += rar->cstate.write_ptr; 857 } else { 858 rar->cstate.solid_offset = 0; 859 } 860 861 rar->cstate.write_ptr = 0; 862 rar->cstate.last_write_ptr = 0; 863 rar->cstate.last_unstore_ptr = 0; 864 865 rar->file.redir_type = REDIR_TYPE_NONE; 866 rar->file.redir_flags = 0; 867 868 free_filters(rar); 869} 870 871static inline int get_archive_read(struct archive* a, 872 struct archive_read** ar) 873{ 874 *ar = (struct archive_read*) a; 875 archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 876 "archive_read_support_format_rar5"); 877 878 return ARCHIVE_OK; 879} 880 881static int read_ahead(struct archive_read* a, size_t how_many, 882 const uint8_t** ptr) 883{ 884 ssize_t avail = -1; 885 if(!ptr) 886 return 0; 887 888 *ptr = __archive_read_ahead(a, how_many, &avail); 889 if(*ptr == NULL) { 890 return 0; 891 } 892 893 return 1; 894} 895 896static int consume(struct archive_read* a, int64_t how_many) { 897 int ret; 898 899 ret = how_many == __archive_read_consume(a, how_many) 900 ? ARCHIVE_OK 901 : ARCHIVE_FATAL; 902 903 return ret; 904} 905 906/** 907 * Read a RAR5 variable sized numeric value. This value will be stored in 908 * `pvalue`. The `pvalue_len` argument points to a variable that will receive 909 * the byte count that was consumed in order to decode the `pvalue` value, plus 910 * one. 911 * 912 * pvalue_len is optional and can be NULL. 913 * 914 * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume 915 * the number of bytes that `pvalue_len` value contains. If the `pvalue_len` 916 * is NULL, this consuming operation is done automatically. 917 * 918 * Returns 1 if *pvalue was successfully read. 919 * Returns 0 if there was an error. In this case, *pvalue contains an 920 * invalid value. 921 */ 922 923static int read_var(struct archive_read* a, uint64_t* pvalue, 924 uint64_t* pvalue_len) 925{ 926 uint64_t result = 0; 927 size_t shift, i; 928 const uint8_t* p; 929 uint8_t b; 930 931 /* We will read maximum of 8 bytes. We don't have to handle the 932 * situation to read the RAR5 variable-sized value stored at the end of 933 * the file, because such situation will never happen. */ 934 if(!read_ahead(a, 8, &p)) 935 return 0; 936 937 for(shift = 0, i = 0; i < 8; i++, shift += 7) { 938 b = p[i]; 939 940 /* Strip the MSB from the input byte and add the resulting 941 * number to the `result`. */ 942 result += (b & (uint64_t)0x7F) << shift; 943 944 /* MSB set to 1 means we need to continue decoding process. 945 * MSB set to 0 means we're done. 946 * 947 * This conditional checks for the second case. */ 948 if((b & 0x80) == 0) { 949 if(pvalue) { 950 *pvalue = result; 951 } 952 953 /* If the caller has passed the `pvalue_len` pointer, 954 * store the number of consumed bytes in it and do NOT 955 * consume those bytes, since the caller has all the 956 * information it needs to perform */ 957 if(pvalue_len) { 958 *pvalue_len = 1 + i; 959 } else { 960 /* If the caller did not provide the 961 * `pvalue_len` pointer, it will not have the 962 * possibility to advance the file pointer, 963 * because it will not know how many bytes it 964 * needs to consume. This is why we handle 965 * such situation here automatically. */ 966 if(ARCHIVE_OK != consume(a, 1 + i)) { 967 return 0; 968 } 969 } 970 971 /* End of decoding process, return success. */ 972 return 1; 973 } 974 } 975 976 /* The decoded value takes the maximum number of 8 bytes. 977 * It's a maximum number of bytes, so end decoding process here 978 * even if the first bit of last byte is 1. */ 979 if(pvalue) { 980 *pvalue = result; 981 } 982 983 if(pvalue_len) { 984 *pvalue_len = 9; 985 } else { 986 if(ARCHIVE_OK != consume(a, 9)) { 987 return 0; 988 } 989 } 990 991 return 1; 992} 993 994static int read_var_sized(struct archive_read* a, size_t* pvalue, 995 size_t* pvalue_len) 996{ 997 uint64_t v; 998 uint64_t v_size = 0; 999 1000 const int ret = pvalue_len ? read_var(a, &v, &v_size) 1001 : read_var(a, &v, NULL); 1002 1003 if(ret == 1 && pvalue) { 1004 *pvalue = (size_t) v; 1005 } 1006 1007 if(pvalue_len) { 1008 /* Possible data truncation should be safe. */ 1009 *pvalue_len = (size_t) v_size; 1010 } 1011 1012 return ret; 1013} 1014 1015static int read_bits_32(struct rar5* rar, const uint8_t* p, uint32_t* value) { 1016 uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24; 1017 bits |= p[rar->bits.in_addr + 1] << 16; 1018 bits |= p[rar->bits.in_addr + 2] << 8; 1019 bits |= p[rar->bits.in_addr + 3]; 1020 bits <<= rar->bits.bit_addr; 1021 bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr); 1022 *value = bits; 1023 return ARCHIVE_OK; 1024} 1025 1026static int read_bits_16(struct rar5* rar, const uint8_t* p, uint16_t* value) { 1027 int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16; 1028 bits |= (int) p[rar->bits.in_addr + 1] << 8; 1029 bits |= (int) p[rar->bits.in_addr + 2]; 1030 bits >>= (8 - rar->bits.bit_addr); 1031 *value = bits & 0xffff; 1032 return ARCHIVE_OK; 1033} 1034 1035static void skip_bits(struct rar5* rar, int bits) { 1036 const int new_bits = rar->bits.bit_addr + bits; 1037 rar->bits.in_addr += new_bits >> 3; 1038 rar->bits.bit_addr = new_bits & 7; 1039} 1040 1041/* n = up to 16 */ 1042static int read_consume_bits(struct rar5* rar, const uint8_t* p, int n, 1043 int* value) 1044{ 1045 uint16_t v; 1046 int ret, num; 1047 1048 if(n == 0 || n > 16) { 1049 /* This is a programmer error and should never happen 1050 * in runtime. */ 1051 return ARCHIVE_FATAL; 1052 } 1053 1054 ret = read_bits_16(rar, p, &v); 1055 if(ret != ARCHIVE_OK) 1056 return ret; 1057 1058 num = (int) v; 1059 num >>= 16 - n; 1060 1061 skip_bits(rar, n); 1062 1063 if(value) 1064 *value = num; 1065 1066 return ARCHIVE_OK; 1067} 1068 1069static int read_u32(struct archive_read* a, uint32_t* pvalue) { 1070 const uint8_t* p; 1071 if(!read_ahead(a, 4, &p)) 1072 return 0; 1073 1074 *pvalue = archive_le32dec(p); 1075 return ARCHIVE_OK == consume(a, 4) ? 1 : 0; 1076} 1077 1078static int read_u64(struct archive_read* a, uint64_t* pvalue) { 1079 const uint8_t* p; 1080 if(!read_ahead(a, 8, &p)) 1081 return 0; 1082 1083 *pvalue = archive_le64dec(p); 1084 return ARCHIVE_OK == consume(a, 8) ? 1 : 0; 1085} 1086 1087static int bid_standard(struct archive_read* a) { 1088 const uint8_t* p; 1089 char signature[sizeof(rar5_signature_xor)]; 1090 1091 rar5_signature(signature); 1092 1093 if(!read_ahead(a, sizeof(rar5_signature_xor), &p)) 1094 return -1; 1095 1096 if(!memcmp(signature, p, sizeof(rar5_signature_xor))) 1097 return 30; 1098 1099 return -1; 1100} 1101 1102static int rar5_bid(struct archive_read* a, int best_bid) { 1103 int my_bid; 1104 1105 if(best_bid > 30) 1106 return -1; 1107 1108 my_bid = bid_standard(a); 1109 if(my_bid > -1) { 1110 return my_bid; 1111 } 1112 1113 return -1; 1114} 1115 1116static int rar5_options(struct archive_read *a, const char *key, 1117 const char *val) { 1118 (void) a; 1119 (void) key; 1120 (void) val; 1121 1122 /* No options supported in this version. Return the ARCHIVE_WARN code 1123 * to signal the options supervisor that the unpacker didn't handle 1124 * setting this option. */ 1125 1126 return ARCHIVE_WARN; 1127} 1128 1129static void init_header(struct archive_read* a) { 1130 a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5; 1131 a->archive.archive_format_name = "RAR5"; 1132} 1133 1134static void init_window_mask(struct rar5* rar) { 1135 if (rar->cstate.window_size) 1136 rar->cstate.window_mask = rar->cstate.window_size - 1; 1137 else 1138 rar->cstate.window_mask = 0; 1139} 1140 1141enum HEADER_FLAGS { 1142 HFL_EXTRA_DATA = 0x0001, 1143 HFL_DATA = 0x0002, 1144 HFL_SKIP_IF_UNKNOWN = 0x0004, 1145 HFL_SPLIT_BEFORE = 0x0008, 1146 HFL_SPLIT_AFTER = 0x0010, 1147 HFL_CHILD = 0x0020, 1148 HFL_INHERITED = 0x0040 1149}; 1150 1151static int process_main_locator_extra_block(struct archive_read* a, 1152 struct rar5* rar) 1153{ 1154 uint64_t locator_flags; 1155 1156 enum LOCATOR_FLAGS { 1157 QLIST = 0x01, RECOVERY = 0x02, 1158 }; 1159 1160 if(!read_var(a, &locator_flags, NULL)) { 1161 return ARCHIVE_EOF; 1162 } 1163 1164 if(locator_flags & QLIST) { 1165 if(!read_var(a, &rar->qlist_offset, NULL)) { 1166 return ARCHIVE_EOF; 1167 } 1168 1169 /* qlist is not used */ 1170 } 1171 1172 if(locator_flags & RECOVERY) { 1173 if(!read_var(a, &rar->rr_offset, NULL)) { 1174 return ARCHIVE_EOF; 1175 } 1176 1177 /* rr is not used */ 1178 } 1179 1180 return ARCHIVE_OK; 1181} 1182 1183static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar, 1184 ssize_t* extra_data_size) 1185{ 1186 size_t hash_type = 0; 1187 size_t value_len; 1188 1189 enum HASH_TYPE { 1190 BLAKE2sp = 0x00 1191 }; 1192 1193 if(!read_var_sized(a, &hash_type, &value_len)) 1194 return ARCHIVE_EOF; 1195 1196 *extra_data_size -= value_len; 1197 if(ARCHIVE_OK != consume(a, value_len)) { 1198 return ARCHIVE_EOF; 1199 } 1200 1201 /* The file uses BLAKE2sp checksum algorithm instead of plain old 1202 * CRC32. */ 1203 if(hash_type == BLAKE2sp) { 1204 const uint8_t* p; 1205 const int hash_size = sizeof(rar->file.blake2sp); 1206 1207 if(!read_ahead(a, hash_size, &p)) 1208 return ARCHIVE_EOF; 1209 1210 rar->file.has_blake2 = 1; 1211 memcpy(&rar->file.blake2sp, p, hash_size); 1212 1213 if(ARCHIVE_OK != consume(a, hash_size)) { 1214 return ARCHIVE_EOF; 1215 } 1216 1217 *extra_data_size -= hash_size; 1218 } else { 1219 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1220 "Unsupported hash type (0x%x)", (int) hash_type); 1221 return ARCHIVE_FATAL; 1222 } 1223 1224 return ARCHIVE_OK; 1225} 1226 1227static uint64_t time_win_to_unix(uint64_t win_time) { 1228 const size_t ns_in_sec = 10000000; 1229 const uint64_t sec_to_unix = 11644473600LL; 1230 return win_time / ns_in_sec - sec_to_unix; 1231} 1232 1233static int parse_htime_item(struct archive_read* a, char unix_time, 1234 uint64_t* where, ssize_t* extra_data_size) 1235{ 1236 if(unix_time) { 1237 uint32_t time_val; 1238 if(!read_u32(a, &time_val)) 1239 return ARCHIVE_EOF; 1240 1241 *extra_data_size -= 4; 1242 *where = (uint64_t) time_val; 1243 } else { 1244 uint64_t windows_time; 1245 if(!read_u64(a, &windows_time)) 1246 return ARCHIVE_EOF; 1247 1248 *where = time_win_to_unix(windows_time); 1249 *extra_data_size -= 8; 1250 } 1251 1252 return ARCHIVE_OK; 1253} 1254 1255static int parse_file_extra_version(struct archive_read* a, 1256 struct archive_entry* e, ssize_t* extra_data_size) 1257{ 1258 size_t flags = 0; 1259 size_t version = 0; 1260 size_t value_len = 0; 1261 struct archive_string version_string; 1262 struct archive_string name_utf8_string; 1263 const char* cur_filename; 1264 1265 /* Flags are ignored. */ 1266 if(!read_var_sized(a, &flags, &value_len)) 1267 return ARCHIVE_EOF; 1268 1269 *extra_data_size -= value_len; 1270 if(ARCHIVE_OK != consume(a, value_len)) 1271 return ARCHIVE_EOF; 1272 1273 if(!read_var_sized(a, &version, &value_len)) 1274 return ARCHIVE_EOF; 1275 1276 *extra_data_size -= value_len; 1277 if(ARCHIVE_OK != consume(a, value_len)) 1278 return ARCHIVE_EOF; 1279 1280 /* extra_data_size should be zero here. */ 1281 1282 cur_filename = archive_entry_pathname_utf8(e); 1283 if(cur_filename == NULL) { 1284 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1285 "Version entry without file name"); 1286 return ARCHIVE_FATAL; 1287 } 1288 1289 archive_string_init(&version_string); 1290 archive_string_init(&name_utf8_string); 1291 1292 /* Prepare a ;123 suffix for the filename, where '123' is the version 1293 * value of this file. */ 1294 archive_string_sprintf(&version_string, ";%zu", version); 1295 1296 /* Build the new filename. */ 1297 archive_strcat(&name_utf8_string, cur_filename); 1298 archive_strcat(&name_utf8_string, version_string.s); 1299 1300 /* Apply the new filename into this file's context. */ 1301 archive_entry_update_pathname_utf8(e, name_utf8_string.s); 1302 1303 /* Free buffers. */ 1304 archive_string_free(&version_string); 1305 archive_string_free(&name_utf8_string); 1306 return ARCHIVE_OK; 1307} 1308 1309static int parse_file_extra_htime(struct archive_read* a, 1310 struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) 1311{ 1312 char unix_time = 0; 1313 size_t flags = 0; 1314 size_t value_len; 1315 1316 enum HTIME_FLAGS { 1317 IS_UNIX = 0x01, 1318 HAS_MTIME = 0x02, 1319 HAS_CTIME = 0x04, 1320 HAS_ATIME = 0x08, 1321 HAS_UNIX_NS = 0x10, 1322 }; 1323 1324 if(!read_var_sized(a, &flags, &value_len)) 1325 return ARCHIVE_EOF; 1326 1327 *extra_data_size -= value_len; 1328 if(ARCHIVE_OK != consume(a, value_len)) { 1329 return ARCHIVE_EOF; 1330 } 1331 1332 unix_time = flags & IS_UNIX; 1333 1334 if(flags & HAS_MTIME) { 1335 parse_htime_item(a, unix_time, &rar->file.e_mtime, 1336 extra_data_size); 1337 archive_entry_set_mtime(e, rar->file.e_mtime, 0); 1338 } 1339 1340 if(flags & HAS_CTIME) { 1341 parse_htime_item(a, unix_time, &rar->file.e_ctime, 1342 extra_data_size); 1343 archive_entry_set_ctime(e, rar->file.e_ctime, 0); 1344 } 1345 1346 if(flags & HAS_ATIME) { 1347 parse_htime_item(a, unix_time, &rar->file.e_atime, 1348 extra_data_size); 1349 archive_entry_set_atime(e, rar->file.e_atime, 0); 1350 } 1351 1352 if(flags & HAS_UNIX_NS) { 1353 if(!read_u32(a, &rar->file.e_unix_ns)) 1354 return ARCHIVE_EOF; 1355 1356 *extra_data_size -= 4; 1357 } 1358 1359 return ARCHIVE_OK; 1360} 1361 1362static int parse_file_extra_redir(struct archive_read* a, 1363 struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) 1364{ 1365 uint64_t value_size = 0; 1366 size_t target_size = 0; 1367 char target_utf8_buf[MAX_NAME_IN_BYTES]; 1368 const uint8_t* p; 1369 1370 if(!read_var(a, &rar->file.redir_type, &value_size)) 1371 return ARCHIVE_EOF; 1372 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1373 return ARCHIVE_EOF; 1374 *extra_data_size -= value_size; 1375 1376 if(!read_var(a, &rar->file.redir_flags, &value_size)) 1377 return ARCHIVE_EOF; 1378 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1379 return ARCHIVE_EOF; 1380 *extra_data_size -= value_size; 1381 1382 if(!read_var_sized(a, &target_size, NULL)) 1383 return ARCHIVE_EOF; 1384 *extra_data_size -= target_size + 1; 1385 1386 if(!read_ahead(a, target_size, &p)) 1387 return ARCHIVE_EOF; 1388 1389 if(target_size > (MAX_NAME_IN_CHARS - 1)) { 1390 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1391 "Link target is too long"); 1392 return ARCHIVE_FATAL; 1393 } 1394 1395 if(target_size == 0) { 1396 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1397 "No link target specified"); 1398 return ARCHIVE_FATAL; 1399 } 1400 1401 memcpy(target_utf8_buf, p, target_size); 1402 target_utf8_buf[target_size] = 0; 1403 1404 if(ARCHIVE_OK != consume(a, (int64_t)target_size)) 1405 return ARCHIVE_EOF; 1406 1407 switch(rar->file.redir_type) { 1408 case REDIR_TYPE_UNIXSYMLINK: 1409 case REDIR_TYPE_WINSYMLINK: 1410 archive_entry_set_filetype(e, AE_IFLNK); 1411 archive_entry_update_symlink_utf8(e, target_utf8_buf); 1412 if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) { 1413 archive_entry_set_symlink_type(e, 1414 AE_SYMLINK_TYPE_DIRECTORY); 1415 } else { 1416 archive_entry_set_symlink_type(e, 1417 AE_SYMLINK_TYPE_FILE); 1418 } 1419 break; 1420 1421 case REDIR_TYPE_HARDLINK: 1422 archive_entry_set_filetype(e, AE_IFREG); 1423 archive_entry_update_hardlink_utf8(e, target_utf8_buf); 1424 break; 1425 1426 default: 1427 /* Unknown redir type, skip it. */ 1428 break; 1429 } 1430 return ARCHIVE_OK; 1431} 1432 1433static int parse_file_extra_owner(struct archive_read* a, 1434 struct archive_entry* e, ssize_t* extra_data_size) 1435{ 1436 uint64_t flags = 0; 1437 uint64_t value_size = 0; 1438 uint64_t id = 0; 1439 size_t name_len = 0; 1440 size_t name_size = 0; 1441 char namebuf[OWNER_MAXNAMELEN]; 1442 const uint8_t* p; 1443 1444 if(!read_var(a, &flags, &value_size)) 1445 return ARCHIVE_EOF; 1446 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1447 return ARCHIVE_EOF; 1448 *extra_data_size -= value_size; 1449 1450 if ((flags & OWNER_USER_NAME) != 0) { 1451 if(!read_var_sized(a, &name_size, NULL)) 1452 return ARCHIVE_EOF; 1453 *extra_data_size -= name_size + 1; 1454 1455 if(!read_ahead(a, name_size, &p)) 1456 return ARCHIVE_EOF; 1457 1458 if (name_size >= OWNER_MAXNAMELEN) { 1459 name_len = OWNER_MAXNAMELEN - 1; 1460 } else { 1461 name_len = name_size; 1462 } 1463 1464 memcpy(namebuf, p, name_len); 1465 namebuf[name_len] = 0; 1466 if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1467 return ARCHIVE_EOF; 1468 1469 archive_entry_set_uname(e, namebuf); 1470 } 1471 if ((flags & OWNER_GROUP_NAME) != 0) { 1472 if(!read_var_sized(a, &name_size, NULL)) 1473 return ARCHIVE_EOF; 1474 *extra_data_size -= name_size + 1; 1475 1476 if(!read_ahead(a, name_size, &p)) 1477 return ARCHIVE_EOF; 1478 1479 if (name_size >= OWNER_MAXNAMELEN) { 1480 name_len = OWNER_MAXNAMELEN - 1; 1481 } else { 1482 name_len = name_size; 1483 } 1484 1485 memcpy(namebuf, p, name_len); 1486 namebuf[name_len] = 0; 1487 if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1488 return ARCHIVE_EOF; 1489 1490 archive_entry_set_gname(e, namebuf); 1491 } 1492 if ((flags & OWNER_USER_UID) != 0) { 1493 if(!read_var(a, &id, &value_size)) 1494 return ARCHIVE_EOF; 1495 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1496 return ARCHIVE_EOF; 1497 *extra_data_size -= value_size; 1498 1499 archive_entry_set_uid(e, (la_int64_t)id); 1500 } 1501 if ((flags & OWNER_GROUP_GID) != 0) { 1502 if(!read_var(a, &id, &value_size)) 1503 return ARCHIVE_EOF; 1504 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1505 return ARCHIVE_EOF; 1506 *extra_data_size -= value_size; 1507 1508 archive_entry_set_gid(e, (la_int64_t)id); 1509 } 1510 return ARCHIVE_OK; 1511} 1512 1513static int process_head_file_extra(struct archive_read* a, 1514 struct archive_entry* e, struct rar5* rar, ssize_t extra_data_size) 1515{ 1516 size_t extra_field_size; 1517 size_t extra_field_id = 0; 1518 int ret = ARCHIVE_FATAL; 1519 size_t var_size; 1520 1521 while(extra_data_size > 0) { 1522 if(!read_var_sized(a, &extra_field_size, &var_size)) 1523 return ARCHIVE_EOF; 1524 1525 extra_data_size -= var_size; 1526 if(ARCHIVE_OK != consume(a, var_size)) { 1527 return ARCHIVE_EOF; 1528 } 1529 1530 if(!read_var_sized(a, &extra_field_id, &var_size)) 1531 return ARCHIVE_EOF; 1532 1533 extra_data_size -= var_size; 1534 if(ARCHIVE_OK != consume(a, var_size)) { 1535 return ARCHIVE_EOF; 1536 } 1537 1538 switch(extra_field_id) { 1539 case EX_HASH: 1540 ret = parse_file_extra_hash(a, rar, 1541 &extra_data_size); 1542 break; 1543 case EX_HTIME: 1544 ret = parse_file_extra_htime(a, e, rar, 1545 &extra_data_size); 1546 break; 1547 case EX_REDIR: 1548 ret = parse_file_extra_redir(a, e, rar, 1549 &extra_data_size); 1550 break; 1551 case EX_UOWNER: 1552 ret = parse_file_extra_owner(a, e, 1553 &extra_data_size); 1554 break; 1555 case EX_VERSION: 1556 ret = parse_file_extra_version(a, e, 1557 &extra_data_size); 1558 break; 1559 case EX_CRYPT: 1560 /* fallthrough */ 1561 case EX_SUBDATA: 1562 /* fallthrough */ 1563 default: 1564 /* Skip unsupported entry. */ 1565 return consume(a, extra_data_size); 1566 } 1567 } 1568 1569 if(ret != ARCHIVE_OK) { 1570 /* Attribute not implemented. */ 1571 return ret; 1572 } 1573 1574 return ARCHIVE_OK; 1575} 1576 1577static int process_head_file(struct archive_read* a, struct rar5* rar, 1578 struct archive_entry* entry, size_t block_flags) 1579{ 1580 ssize_t extra_data_size = 0; 1581 size_t data_size = 0; 1582 size_t file_flags = 0; 1583 size_t file_attr = 0; 1584 size_t compression_info = 0; 1585 size_t host_os = 0; 1586 size_t name_size = 0; 1587 uint64_t unpacked_size, window_size; 1588 uint32_t mtime = 0, crc = 0; 1589 int c_method = 0, c_version = 0; 1590 char name_utf8_buf[MAX_NAME_IN_BYTES]; 1591 const uint8_t* p; 1592 1593 enum FILE_FLAGS { 1594 DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004, 1595 UNKNOWN_UNPACKED_SIZE = 0x0008, 1596 }; 1597 1598 enum FILE_ATTRS { 1599 ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4, 1600 ATTR_DIRECTORY = 0x10, 1601 }; 1602 1603 enum COMP_INFO_FLAGS { 1604 SOLID = 0x0040, 1605 }; 1606 1607 enum HOST_OS { 1608 HOST_WINDOWS = 0, 1609 HOST_UNIX = 1, 1610 }; 1611 1612 archive_entry_clear(entry); 1613 1614 /* Do not reset file context if we're switching archives. */ 1615 if(!rar->cstate.switch_multivolume) { 1616 reset_file_context(rar); 1617 } 1618 1619 if(block_flags & HFL_EXTRA_DATA) { 1620 size_t edata_size = 0; 1621 if(!read_var_sized(a, &edata_size, NULL)) 1622 return ARCHIVE_EOF; 1623 1624 /* Intentional type cast from unsigned to signed. */ 1625 extra_data_size = (ssize_t) edata_size; 1626 } 1627 1628 if(block_flags & HFL_DATA) { 1629 if(!read_var_sized(a, &data_size, NULL)) 1630 return ARCHIVE_EOF; 1631 1632 rar->file.bytes_remaining = data_size; 1633 } else { 1634 rar->file.bytes_remaining = 0; 1635 1636 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1637 "no data found in file/service block"); 1638 return ARCHIVE_FATAL; 1639 } 1640 1641 if(!read_var_sized(a, &file_flags, NULL)) 1642 return ARCHIVE_EOF; 1643 1644 if(!read_var(a, &unpacked_size, NULL)) 1645 return ARCHIVE_EOF; 1646 1647 if(file_flags & UNKNOWN_UNPACKED_SIZE) { 1648 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1649 "Files with unknown unpacked size are not supported"); 1650 return ARCHIVE_FATAL; 1651 } 1652 1653 rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0); 1654 1655 if(!read_var_sized(a, &file_attr, NULL)) 1656 return ARCHIVE_EOF; 1657 1658 if(file_flags & UTIME) { 1659 if(!read_u32(a, &mtime)) 1660 return ARCHIVE_EOF; 1661 } 1662 1663 if(file_flags & CRC32) { 1664 if(!read_u32(a, &crc)) 1665 return ARCHIVE_EOF; 1666 } 1667 1668 if(!read_var_sized(a, &compression_info, NULL)) 1669 return ARCHIVE_EOF; 1670 1671 c_method = (int) (compression_info >> 7) & 0x7; 1672 c_version = (int) (compression_info & 0x3f); 1673 1674 /* RAR5 seems to limit the dictionary size to 64MB. */ 1675 window_size = (rar->file.dir > 0) ? 1676 0 : 1677 g_unpack_window_size << ((compression_info >> 10) & 15); 1678 rar->cstate.method = c_method; 1679 rar->cstate.version = c_version + 50; 1680 rar->file.solid = (compression_info & SOLID) > 0; 1681 1682 /* Archives which declare solid files without initializing the window 1683 * buffer first are invalid. */ 1684 1685 if(rar->file.solid > 0 && rar->cstate.window_buf == NULL) { 1686 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1687 "Declared solid file, but no window buffer " 1688 "initialized yet."); 1689 return ARCHIVE_FATAL; 1690 } 1691 1692 /* Check if window_size is a sane value. Also, if the file is not 1693 * declared as a directory, disallow window_size == 0. */ 1694 if(window_size > (64 * 1024 * 1024) || 1695 (rar->file.dir == 0 && window_size == 0)) 1696 { 1697 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1698 "Declared dictionary size is not supported."); 1699 return ARCHIVE_FATAL; 1700 } 1701 1702 if(rar->file.solid > 0) { 1703 /* Re-check if current window size is the same as previous 1704 * window size (for solid files only). */ 1705 if(rar->file.solid_window_size > 0 && 1706 rar->file.solid_window_size != (ssize_t) window_size) 1707 { 1708 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1709 "Window size for this solid file doesn't match " 1710 "the window size used in previous solid file. "); 1711 return ARCHIVE_FATAL; 1712 } 1713 } 1714 1715 /* If we're currently switching volumes, ignore the new definition of 1716 * window_size. */ 1717 if(rar->cstate.switch_multivolume == 0) { 1718 /* Values up to 64M should fit into ssize_t on every 1719 * architecture. */ 1720 rar->cstate.window_size = (ssize_t) window_size; 1721 } 1722 1723 if(rar->file.solid > 0 && rar->file.solid_window_size == 0) { 1724 /* Solid files have to have the same window_size across 1725 whole archive. Remember the window_size parameter 1726 for first solid file found. */ 1727 rar->file.solid_window_size = rar->cstate.window_size; 1728 } 1729 1730 init_window_mask(rar); 1731 1732 rar->file.service = 0; 1733 1734 if(!read_var_sized(a, &host_os, NULL)) 1735 return ARCHIVE_EOF; 1736 1737 if(host_os == HOST_WINDOWS) { 1738 /* Host OS is Windows */ 1739 1740 __LA_MODE_T mode; 1741 1742 if(file_attr & ATTR_DIRECTORY) { 1743 if (file_attr & ATTR_READONLY) { 1744 mode = 0555 | AE_IFDIR; 1745 } else { 1746 mode = 0755 | AE_IFDIR; 1747 } 1748 } else { 1749 if (file_attr & ATTR_READONLY) { 1750 mode = 0444 | AE_IFREG; 1751 } else { 1752 mode = 0644 | AE_IFREG; 1753 } 1754 } 1755 1756 archive_entry_set_mode(entry, mode); 1757 1758 if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) { 1759 char *fflags_text, *ptr; 1760 /* allocate for "rdonly,hidden,system," */ 1761 fflags_text = malloc(22 * sizeof(char)); 1762 if (fflags_text != NULL) { 1763 ptr = fflags_text; 1764 if (file_attr & ATTR_READONLY) { 1765 strcpy(ptr, "rdonly,"); 1766 ptr = ptr + 7; 1767 } 1768 if (file_attr & ATTR_HIDDEN) { 1769 strcpy(ptr, "hidden,"); 1770 ptr = ptr + 7; 1771 } 1772 if (file_attr & ATTR_SYSTEM) { 1773 strcpy(ptr, "system,"); 1774 ptr = ptr + 7; 1775 } 1776 if (ptr > fflags_text) { 1777 /* Delete trailing comma */ 1778 *(ptr - 1) = '\0'; 1779 archive_entry_copy_fflags_text(entry, 1780 fflags_text); 1781 } 1782 free(fflags_text); 1783 } 1784 } 1785 } else if(host_os == HOST_UNIX) { 1786 /* Host OS is Unix */ 1787 archive_entry_set_mode(entry, (__LA_MODE_T) file_attr); 1788 } else { 1789 /* Unknown host OS */ 1790 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1791 "Unsupported Host OS: 0x%x", (int) host_os); 1792 1793 return ARCHIVE_FATAL; 1794 } 1795 1796 if(!read_var_sized(a, &name_size, NULL)) 1797 return ARCHIVE_EOF; 1798 1799 if(!read_ahead(a, name_size, &p)) 1800 return ARCHIVE_EOF; 1801 1802 if(name_size > (MAX_NAME_IN_CHARS - 1)) { 1803 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1804 "Filename is too long"); 1805 1806 return ARCHIVE_FATAL; 1807 } 1808 1809 if(name_size == 0) { 1810 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1811 "No filename specified"); 1812 1813 return ARCHIVE_FATAL; 1814 } 1815 1816 memcpy(name_utf8_buf, p, name_size); 1817 name_utf8_buf[name_size] = 0; 1818 if(ARCHIVE_OK != consume(a, name_size)) { 1819 return ARCHIVE_EOF; 1820 } 1821 1822 archive_entry_update_pathname_utf8(entry, name_utf8_buf); 1823 1824 if(extra_data_size > 0) { 1825 int ret = process_head_file_extra(a, entry, rar, 1826 extra_data_size); 1827 1828 /* 1829 * TODO: rewrite or remove useless sanity check 1830 * as extra_data_size is not passed as a pointer 1831 * 1832 if(extra_data_size < 0) { 1833 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1834 "File extra data size is not zero"); 1835 return ARCHIVE_FATAL; 1836 } 1837 */ 1838 1839 if(ret != ARCHIVE_OK) 1840 return ret; 1841 } 1842 1843 if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) { 1844 rar->file.unpacked_size = (ssize_t) unpacked_size; 1845 if(rar->file.redir_type == REDIR_TYPE_NONE) 1846 archive_entry_set_size(entry, unpacked_size); 1847 } 1848 1849 if(file_flags & UTIME) { 1850 archive_entry_set_mtime(entry, (time_t) mtime, 0); 1851 } 1852 1853 if(file_flags & CRC32) { 1854 rar->file.stored_crc32 = crc; 1855 } 1856 1857 if(!rar->cstate.switch_multivolume) { 1858 /* Do not reinitialize unpacking state if we're switching 1859 * archives. */ 1860 rar->cstate.block_parsing_finished = 1; 1861 rar->cstate.all_filters_applied = 1; 1862 rar->cstate.initialized = 0; 1863 } 1864 1865 if(rar->generic.split_before > 0) { 1866 /* If now we're standing on a header that has a 'split before' 1867 * mark, it means we're standing on a 'continuation' file 1868 * header. Signal the caller that if it wants to move to 1869 * another file, it must call rar5_read_header() function 1870 * again. */ 1871 1872 return ARCHIVE_RETRY; 1873 } else { 1874 return ARCHIVE_OK; 1875 } 1876} 1877 1878static int process_head_service(struct archive_read* a, struct rar5* rar, 1879 struct archive_entry* entry, size_t block_flags) 1880{ 1881 /* Process this SERVICE block the same way as FILE blocks. */ 1882 int ret = process_head_file(a, rar, entry, block_flags); 1883 if(ret != ARCHIVE_OK) 1884 return ret; 1885 1886 rar->file.service = 1; 1887 1888 /* But skip the data part automatically. It's no use for the user 1889 * anyway. It contains only service data, not even needed to 1890 * properly unpack the file. */ 1891 ret = rar5_read_data_skip(a); 1892 if(ret != ARCHIVE_OK) 1893 return ret; 1894 1895 /* After skipping, try parsing another block automatically. */ 1896 return ARCHIVE_RETRY; 1897} 1898 1899static int process_head_main(struct archive_read* a, struct rar5* rar, 1900 struct archive_entry* entry, size_t block_flags) 1901{ 1902 int ret; 1903 size_t extra_data_size = 0; 1904 size_t extra_field_size = 0; 1905 size_t extra_field_id = 0; 1906 size_t archive_flags = 0; 1907 1908 enum MAIN_FLAGS { 1909 VOLUME = 0x0001, /* multi-volume archive */ 1910 VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't 1911 * have it */ 1912 SOLID = 0x0004, /* solid archive */ 1913 PROTECT = 0x0008, /* contains Recovery info */ 1914 LOCK = 0x0010, /* readonly flag, not used */ 1915 }; 1916 1917 enum MAIN_EXTRA { 1918 // Just one attribute here. 1919 LOCATOR = 0x01, 1920 }; 1921 1922 (void) entry; 1923 1924 if(block_flags & HFL_EXTRA_DATA) { 1925 if(!read_var_sized(a, &extra_data_size, NULL)) 1926 return ARCHIVE_EOF; 1927 } else { 1928 extra_data_size = 0; 1929 } 1930 1931 if(!read_var_sized(a, &archive_flags, NULL)) { 1932 return ARCHIVE_EOF; 1933 } 1934 1935 rar->main.volume = (archive_flags & VOLUME) > 0; 1936 rar->main.solid = (archive_flags & SOLID) > 0; 1937 1938 if(archive_flags & VOLUME_NUMBER) { 1939 size_t v = 0; 1940 if(!read_var_sized(a, &v, NULL)) { 1941 return ARCHIVE_EOF; 1942 } 1943 1944 if (v > UINT_MAX) { 1945 archive_set_error(&a->archive, 1946 ARCHIVE_ERRNO_FILE_FORMAT, 1947 "Invalid volume number"); 1948 return ARCHIVE_FATAL; 1949 } 1950 1951 rar->main.vol_no = (unsigned int) v; 1952 } else { 1953 rar->main.vol_no = 0; 1954 } 1955 1956 if(rar->vol.expected_vol_no > 0 && 1957 rar->main.vol_no != rar->vol.expected_vol_no) 1958 { 1959 /* Returning EOF instead of FATAL because of strange 1960 * libarchive behavior. When opening multiple files via 1961 * archive_read_open_filenames(), after reading up the whole 1962 * last file, the __archive_read_ahead function wraps up to 1963 * the first archive instead of returning EOF. */ 1964 return ARCHIVE_EOF; 1965 } 1966 1967 if(extra_data_size == 0) { 1968 /* Early return. */ 1969 return ARCHIVE_OK; 1970 } 1971 1972 if(!read_var_sized(a, &extra_field_size, NULL)) { 1973 return ARCHIVE_EOF; 1974 } 1975 1976 if(!read_var_sized(a, &extra_field_id, NULL)) { 1977 return ARCHIVE_EOF; 1978 } 1979 1980 if(extra_field_size == 0) { 1981 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1982 "Invalid extra field size"); 1983 return ARCHIVE_FATAL; 1984 } 1985 1986 switch(extra_field_id) { 1987 case LOCATOR: 1988 ret = process_main_locator_extra_block(a, rar); 1989 if(ret != ARCHIVE_OK) { 1990 /* Error while parsing main locator extra 1991 * block. */ 1992 return ret; 1993 } 1994 1995 break; 1996 default: 1997 archive_set_error(&a->archive, 1998 ARCHIVE_ERRNO_FILE_FORMAT, 1999 "Unsupported extra type (0x%x)", 2000 (int) extra_field_id); 2001 return ARCHIVE_FATAL; 2002 } 2003 2004 return ARCHIVE_OK; 2005} 2006 2007static int skip_unprocessed_bytes(struct archive_read* a) { 2008 struct rar5* rar = get_context(a); 2009 int ret; 2010 2011 if(rar->file.bytes_remaining) { 2012 /* Use different skipping method in block merging mode than in 2013 * normal mode. If merge mode is active, rar5_read_data_skip 2014 * can't be used, because it could allow recursive use of 2015 * merge_block() * function, and this function doesn't support 2016 * recursive use. */ 2017 if(rar->merge_mode) { 2018 /* Discard whole merged block. This is valid in solid 2019 * mode as well, because the code will discard blocks 2020 * only if those blocks are safe to discard (i.e. 2021 * they're not FILE blocks). */ 2022 ret = consume(a, rar->file.bytes_remaining); 2023 if(ret != ARCHIVE_OK) { 2024 return ret; 2025 } 2026 rar->file.bytes_remaining = 0; 2027 } else { 2028 /* If we're not in merge mode, use safe skipping code. 2029 * This will ensure we'll handle solid archives 2030 * properly. */ 2031 ret = rar5_read_data_skip(a); 2032 if(ret != ARCHIVE_OK) { 2033 return ret; 2034 } 2035 } 2036 } 2037 2038 return ARCHIVE_OK; 2039} 2040 2041static int scan_for_signature(struct archive_read* a); 2042 2043/* Base block processing function. A 'base block' is a RARv5 header block 2044 * that tells the reader what kind of data is stored inside the block. 2045 * 2046 * From the birds-eye view a RAR file looks file this: 2047 * 2048 * <magic><base_block_1><base_block_2>...<base_block_n> 2049 * 2050 * There are a few types of base blocks. Those types are specified inside 2051 * the 'switch' statement in this function. For example purposes, I'll write 2052 * how a standard RARv5 file could look like here: 2053 * 2054 * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC> 2055 * 2056 * The structure above could describe an archive file with 3 files in it, 2057 * one service "QuickOpen" block (that is ignored by this parser), and an 2058 * end of file base block marker. 2059 * 2060 * If the file is stored in multiple archive files ("multiarchive"), it might 2061 * look like this: 2062 * 2063 * .part01.rar: <magic><MAIN><FILE><ENDARC> 2064 * .part02.rar: <magic><MAIN><FILE><ENDARC> 2065 * .part03.rar: <magic><MAIN><FILE><ENDARC> 2066 * 2067 * This example could describe 3 RAR files that contain ONE archived file. 2068 * Or it could describe 3 RAR files that contain 3 different files. Or 3 2069 * RAR files than contain 2 files. It all depends what metadata is stored in 2070 * the headers of <FILE> blocks. 2071 * 2072 * Each <FILE> block contains info about its size, the name of the file it's 2073 * storing inside, and whether this FILE block is a continuation block of 2074 * previous archive ('split before'), and is this FILE block should be 2075 * continued in another archive ('split after'). By parsing the 'split before' 2076 * and 'split after' flags, we're able to tell if multiple <FILE> base blocks 2077 * are describing one file, or multiple files (with the same filename, for 2078 * example). 2079 * 2080 * One thing to note is that if we're parsing the first <FILE> block, and 2081 * we see 'split after' flag, then we need to jump over to another <FILE> 2082 * block to be able to decompress rest of the data. To do this, we need 2083 * to skip the <ENDARC> block, then switch to another file, then skip the 2084 * <magic> block, <MAIN> block, and then we're standing on the proper 2085 * <FILE> block. 2086 */ 2087 2088static int process_base_block(struct archive_read* a, 2089 struct archive_entry* entry) 2090{ 2091 const size_t SMALLEST_RAR5_BLOCK_SIZE = 3; 2092 2093 struct rar5* rar = get_context(a); 2094 uint32_t hdr_crc, computed_crc; 2095 size_t raw_hdr_size = 0, hdr_size_len, hdr_size; 2096 size_t header_id = 0; 2097 size_t header_flags = 0; 2098 const uint8_t* p; 2099 int ret; 2100 2101 enum HEADER_TYPE { 2102 HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02, 2103 HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05, 2104 HEAD_UNKNOWN = 0xff, 2105 }; 2106 2107 /* Skip any unprocessed data for this file. */ 2108 ret = skip_unprocessed_bytes(a); 2109 if(ret != ARCHIVE_OK) 2110 return ret; 2111 2112 /* Read the expected CRC32 checksum. */ 2113 if(!read_u32(a, &hdr_crc)) { 2114 return ARCHIVE_EOF; 2115 } 2116 2117 /* Read header size. */ 2118 if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) { 2119 return ARCHIVE_EOF; 2120 } 2121 2122 hdr_size = raw_hdr_size + hdr_size_len; 2123 2124 /* Sanity check, maximum header size for RAR5 is 2MB. */ 2125 if(hdr_size > (2 * 1024 * 1024)) { 2126 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2127 "Base block header is too large"); 2128 2129 return ARCHIVE_FATAL; 2130 } 2131 2132 /* Additional sanity checks to weed out invalid files. */ 2133 if(raw_hdr_size == 0 || hdr_size_len == 0 || 2134 hdr_size < SMALLEST_RAR5_BLOCK_SIZE) 2135 { 2136 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2137 "Too small block encountered (%zu bytes)", 2138 raw_hdr_size); 2139 2140 return ARCHIVE_FATAL; 2141 } 2142 2143 /* Read the whole header data into memory, maximum memory use here is 2144 * 2MB. */ 2145 if(!read_ahead(a, hdr_size, &p)) { 2146 return ARCHIVE_EOF; 2147 } 2148 2149 /* Verify the CRC32 of the header data. */ 2150 computed_crc = (uint32_t) crc32(0, p, (int) hdr_size); 2151 if(computed_crc != hdr_crc) { 2152 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2153 "Header CRC error"); 2154 2155 return ARCHIVE_FATAL; 2156 } 2157 2158 /* If the checksum is OK, we proceed with parsing. */ 2159 if(ARCHIVE_OK != consume(a, hdr_size_len)) { 2160 return ARCHIVE_EOF; 2161 } 2162 2163 if(!read_var_sized(a, &header_id, NULL)) 2164 return ARCHIVE_EOF; 2165 2166 if(!read_var_sized(a, &header_flags, NULL)) 2167 return ARCHIVE_EOF; 2168 2169 rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0; 2170 rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0; 2171 rar->generic.size = (int)hdr_size; 2172 rar->generic.last_header_id = (int)header_id; 2173 rar->main.endarc = 0; 2174 2175 /* Those are possible header ids in RARv5. */ 2176 switch(header_id) { 2177 case HEAD_MAIN: 2178 ret = process_head_main(a, rar, entry, header_flags); 2179 2180 /* Main header doesn't have any files in it, so it's 2181 * pointless to return to the caller. Retry to next 2182 * header, which should be HEAD_FILE/HEAD_SERVICE. */ 2183 if(ret == ARCHIVE_OK) 2184 return ARCHIVE_RETRY; 2185 2186 return ret; 2187 case HEAD_SERVICE: 2188 ret = process_head_service(a, rar, entry, header_flags); 2189 return ret; 2190 case HEAD_FILE: 2191 ret = process_head_file(a, rar, entry, header_flags); 2192 return ret; 2193 case HEAD_CRYPT: 2194 archive_set_error(&a->archive, 2195 ARCHIVE_ERRNO_FILE_FORMAT, 2196 "Encryption is not supported"); 2197 return ARCHIVE_FATAL; 2198 case HEAD_ENDARC: 2199 rar->main.endarc = 1; 2200 2201 /* After encountering an end of file marker, we need 2202 * to take into consideration if this archive is 2203 * continued in another file (i.e. is it part01.rar: 2204 * is there a part02.rar?) */ 2205 if(rar->main.volume) { 2206 /* In case there is part02.rar, position the 2207 * read pointer in a proper place, so we can 2208 * resume parsing. */ 2209 ret = scan_for_signature(a); 2210 if(ret == ARCHIVE_FATAL) { 2211 return ARCHIVE_EOF; 2212 } else { 2213 if(rar->vol.expected_vol_no == 2214 UINT_MAX) { 2215 archive_set_error(&a->archive, 2216 ARCHIVE_ERRNO_FILE_FORMAT, 2217 "Header error"); 2218 return ARCHIVE_FATAL; 2219 } 2220 2221 rar->vol.expected_vol_no = 2222 rar->main.vol_no + 1; 2223 return ARCHIVE_OK; 2224 } 2225 } else { 2226 return ARCHIVE_EOF; 2227 } 2228 case HEAD_MARK: 2229 return ARCHIVE_EOF; 2230 default: 2231 if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) { 2232 archive_set_error(&a->archive, 2233 ARCHIVE_ERRNO_FILE_FORMAT, 2234 "Header type error"); 2235 return ARCHIVE_FATAL; 2236 } else { 2237 /* If the block is marked as 'skip if unknown', 2238 * do as the flag says: skip the block 2239 * instead on failing on it. */ 2240 return ARCHIVE_RETRY; 2241 } 2242 } 2243 2244#if !defined WIN32 2245 // Not reached. 2246 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 2247 "Internal unpacker error"); 2248 return ARCHIVE_FATAL; 2249#endif 2250} 2251 2252static int skip_base_block(struct archive_read* a) { 2253 int ret; 2254 struct rar5* rar = get_context(a); 2255 2256 /* Create a new local archive_entry structure that will be operated on 2257 * by header reader; operations on this archive_entry will be discarded. 2258 */ 2259 struct archive_entry* entry = archive_entry_new(); 2260 ret = process_base_block(a, entry); 2261 2262 /* Discard operations on this archive_entry structure. */ 2263 archive_entry_free(entry); 2264 if(ret == ARCHIVE_FATAL) 2265 return ret; 2266 2267 if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0) 2268 return ARCHIVE_OK; 2269 2270 if(ret == ARCHIVE_OK) 2271 return ARCHIVE_RETRY; 2272 else 2273 return ret; 2274} 2275 2276static int rar5_read_header(struct archive_read *a, 2277 struct archive_entry *entry) 2278{ 2279 struct rar5* rar = get_context(a); 2280 int ret; 2281 2282 if(rar->header_initialized == 0) { 2283 init_header(a); 2284 rar->header_initialized = 1; 2285 } 2286 2287 if(rar->skipped_magic == 0) { 2288 if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) { 2289 return ARCHIVE_EOF; 2290 } 2291 2292 rar->skipped_magic = 1; 2293 } 2294 2295 do { 2296 ret = process_base_block(a, entry); 2297 } while(ret == ARCHIVE_RETRY || 2298 (rar->main.endarc > 0 && ret == ARCHIVE_OK)); 2299 2300 return ret; 2301} 2302 2303static void init_unpack(struct rar5* rar) { 2304 rar->file.calculated_crc32 = 0; 2305 init_window_mask(rar); 2306 2307 free(rar->cstate.window_buf); 2308 free(rar->cstate.filtered_buf); 2309 2310 if(rar->cstate.window_size > 0) { 2311 rar->cstate.window_buf = calloc(1, rar->cstate.window_size); 2312 rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size); 2313 } else { 2314 rar->cstate.window_buf = NULL; 2315 rar->cstate.filtered_buf = NULL; 2316 } 2317 2318 rar->cstate.write_ptr = 0; 2319 rar->cstate.last_write_ptr = 0; 2320 2321 memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd)); 2322 memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld)); 2323 memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd)); 2324 memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd)); 2325 memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd)); 2326} 2327 2328static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) { 2329 int verify_crc; 2330 2331 if(rar->skip_mode) { 2332#if defined CHECK_CRC_ON_SOLID_SKIP 2333 verify_crc = 1; 2334#else 2335 verify_crc = 0; 2336#endif 2337 } else 2338 verify_crc = 1; 2339 2340 if(verify_crc) { 2341 /* Don't update CRC32 if the file doesn't have the 2342 * `stored_crc32` info filled in. */ 2343 if(rar->file.stored_crc32 > 0) { 2344 rar->file.calculated_crc32 = 2345 crc32(rar->file.calculated_crc32, p, to_read); 2346 } 2347 2348 /* Check if the file uses an optional BLAKE2sp checksum 2349 * algorithm. */ 2350 if(rar->file.has_blake2 > 0) { 2351 /* Return value of the `update` function is always 0, 2352 * so we can explicitly ignore it here. */ 2353 (void) blake2sp_update(&rar->file.b2state, p, to_read); 2354 } 2355 } 2356} 2357 2358static int create_decode_tables(uint8_t* bit_length, 2359 struct decode_table* table, int size) 2360{ 2361 int code, upper_limit = 0, i, lc[16]; 2362 uint32_t decode_pos_clone[rar5_countof(table->decode_pos)]; 2363 ssize_t cur_len, quick_data_size; 2364 2365 memset(&lc, 0, sizeof(lc)); 2366 memset(table->decode_num, 0, sizeof(table->decode_num)); 2367 table->size = size; 2368 table->quick_bits = size == HUFF_NC ? 10 : 7; 2369 2370 for(i = 0; i < size; i++) { 2371 lc[bit_length[i] & 15]++; 2372 } 2373 2374 lc[0] = 0; 2375 table->decode_pos[0] = 0; 2376 table->decode_len[0] = 0; 2377 2378 for(i = 1; i < 16; i++) { 2379 upper_limit += lc[i]; 2380 2381 table->decode_len[i] = upper_limit << (16 - i); 2382 table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1]; 2383 2384 upper_limit <<= 1; 2385 } 2386 2387 memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone)); 2388 2389 for(i = 0; i < size; i++) { 2390 uint8_t clen = bit_length[i] & 15; 2391 if(clen > 0) { 2392 int last_pos = decode_pos_clone[clen]; 2393 table->decode_num[last_pos] = i; 2394 decode_pos_clone[clen]++; 2395 } 2396 } 2397 2398 quick_data_size = (int64_t)1 << table->quick_bits; 2399 cur_len = 1; 2400 for(code = 0; code < quick_data_size; code++) { 2401 int bit_field = code << (16 - table->quick_bits); 2402 int dist, pos; 2403 2404 while(cur_len < rar5_countof(table->decode_len) && 2405 bit_field >= table->decode_len[cur_len]) { 2406 cur_len++; 2407 } 2408 2409 table->quick_len[code] = (uint8_t) cur_len; 2410 2411 dist = bit_field - table->decode_len[cur_len - 1]; 2412 dist >>= (16 - cur_len); 2413 2414 pos = table->decode_pos[cur_len & 15] + dist; 2415 if(cur_len < rar5_countof(table->decode_pos) && pos < size) { 2416 table->quick_num[code] = table->decode_num[pos]; 2417 } else { 2418 table->quick_num[code] = 0; 2419 } 2420 } 2421 2422 return ARCHIVE_OK; 2423} 2424 2425static int decode_number(struct archive_read* a, struct decode_table* table, 2426 const uint8_t* p, uint16_t* num) 2427{ 2428 int i, bits, dist; 2429 uint16_t bitfield; 2430 uint32_t pos; 2431 struct rar5* rar = get_context(a); 2432 2433 if(ARCHIVE_OK != read_bits_16(rar, p, &bitfield)) { 2434 return ARCHIVE_EOF; 2435 } 2436 2437 bitfield &= 0xfffe; 2438 2439 if(bitfield < table->decode_len[table->quick_bits]) { 2440 int code = bitfield >> (16 - table->quick_bits); 2441 skip_bits(rar, table->quick_len[code]); 2442 *num = table->quick_num[code]; 2443 return ARCHIVE_OK; 2444 } 2445 2446 bits = 15; 2447 2448 for(i = table->quick_bits + 1; i < 15; i++) { 2449 if(bitfield < table->decode_len[i]) { 2450 bits = i; 2451 break; 2452 } 2453 } 2454 2455 skip_bits(rar, bits); 2456 2457 dist = bitfield - table->decode_len[bits - 1]; 2458 dist >>= (16 - bits); 2459 pos = table->decode_pos[bits] + dist; 2460 2461 if(pos >= table->size) 2462 pos = 0; 2463 2464 *num = table->decode_num[pos]; 2465 return ARCHIVE_OK; 2466} 2467 2468/* Reads and parses Huffman tables from the beginning of the block. */ 2469static int parse_tables(struct archive_read* a, struct rar5* rar, 2470 const uint8_t* p) 2471{ 2472 int ret, value, i, w, idx = 0; 2473 uint8_t bit_length[HUFF_BC], 2474 table[HUFF_TABLE_SIZE], 2475 nibble_mask = 0xF0, 2476 nibble_shift = 4; 2477 2478 enum { ESCAPE = 15 }; 2479 2480 /* The data for table generation is compressed using a simple RLE-like 2481 * algorithm when storing zeroes, so we need to unpack it first. */ 2482 for(w = 0, i = 0; w < HUFF_BC;) { 2483 if(i >= rar->cstate.cur_block_size) { 2484 /* Truncated data, can't continue. */ 2485 archive_set_error(&a->archive, 2486 ARCHIVE_ERRNO_FILE_FORMAT, 2487 "Truncated data in huffman tables"); 2488 return ARCHIVE_FATAL; 2489 } 2490 2491 value = (p[i] & nibble_mask) >> nibble_shift; 2492 2493 if(nibble_mask == 0x0F) 2494 ++i; 2495 2496 nibble_mask ^= 0xFF; 2497 nibble_shift ^= 4; 2498 2499 /* Values smaller than 15 is data, so we write it directly. 2500 * Value 15 is a flag telling us that we need to unpack more 2501 * bytes. */ 2502 if(value == ESCAPE) { 2503 value = (p[i] & nibble_mask) >> nibble_shift; 2504 if(nibble_mask == 0x0F) 2505 ++i; 2506 nibble_mask ^= 0xFF; 2507 nibble_shift ^= 4; 2508 2509 if(value == 0) { 2510 /* We sometimes need to write the actual value 2511 * of 15, so this case handles that. */ 2512 bit_length[w++] = ESCAPE; 2513 } else { 2514 int k; 2515 2516 /* Fill zeroes. */ 2517 for(k = 0; (k < value + 2) && (w < HUFF_BC); 2518 k++) { 2519 bit_length[w++] = 0; 2520 } 2521 } 2522 } else { 2523 bit_length[w++] = value; 2524 } 2525 } 2526 2527 rar->bits.in_addr = i; 2528 rar->bits.bit_addr = nibble_shift ^ 4; 2529 2530 ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC); 2531 if(ret != ARCHIVE_OK) { 2532 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2533 "Decoding huffman tables failed"); 2534 return ARCHIVE_FATAL; 2535 } 2536 2537 for(i = 0; i < HUFF_TABLE_SIZE;) { 2538 uint16_t num; 2539 2540 if((rar->bits.in_addr + 6) >= rar->cstate.cur_block_size) { 2541 /* Truncated data, can't continue. */ 2542 archive_set_error(&a->archive, 2543 ARCHIVE_ERRNO_FILE_FORMAT, 2544 "Truncated data in huffman tables (#2)"); 2545 return ARCHIVE_FATAL; 2546 } 2547 2548 ret = decode_number(a, &rar->cstate.bd, p, &num); 2549 if(ret != ARCHIVE_OK) { 2550 archive_set_error(&a->archive, 2551 ARCHIVE_ERRNO_FILE_FORMAT, 2552 "Decoding huffman tables failed"); 2553 return ARCHIVE_FATAL; 2554 } 2555 2556 if(num < 16) { 2557 /* 0..15: store directly */ 2558 table[i] = (uint8_t) num; 2559 i++; 2560 } else if(num < 18) { 2561 /* 16..17: repeat previous code */ 2562 uint16_t n; 2563 2564 if(ARCHIVE_OK != read_bits_16(rar, p, &n)) 2565 return ARCHIVE_EOF; 2566 2567 if(num == 16) { 2568 n >>= 13; 2569 n += 3; 2570 skip_bits(rar, 3); 2571 } else { 2572 n >>= 9; 2573 n += 11; 2574 skip_bits(rar, 7); 2575 } 2576 2577 if(i > 0) { 2578 while(n-- > 0 && i < HUFF_TABLE_SIZE) { 2579 table[i] = table[i - 1]; 2580 i++; 2581 } 2582 } else { 2583 archive_set_error(&a->archive, 2584 ARCHIVE_ERRNO_FILE_FORMAT, 2585 "Unexpected error when decoding " 2586 "huffman tables"); 2587 return ARCHIVE_FATAL; 2588 } 2589 } else { 2590 /* other codes: fill with zeroes `n` times */ 2591 uint16_t n; 2592 2593 if(ARCHIVE_OK != read_bits_16(rar, p, &n)) 2594 return ARCHIVE_EOF; 2595 2596 if(num == 18) { 2597 n >>= 13; 2598 n += 3; 2599 skip_bits(rar, 3); 2600 } else { 2601 n >>= 9; 2602 n += 11; 2603 skip_bits(rar, 7); 2604 } 2605 2606 while(n-- > 0 && i < HUFF_TABLE_SIZE) 2607 table[i++] = 0; 2608 } 2609 } 2610 2611 ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC); 2612 if(ret != ARCHIVE_OK) { 2613 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2614 "Failed to create literal table"); 2615 return ARCHIVE_FATAL; 2616 } 2617 2618 idx += HUFF_NC; 2619 2620 ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC); 2621 if(ret != ARCHIVE_OK) { 2622 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2623 "Failed to create distance table"); 2624 return ARCHIVE_FATAL; 2625 } 2626 2627 idx += HUFF_DC; 2628 2629 ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC); 2630 if(ret != ARCHIVE_OK) { 2631 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2632 "Failed to create lower bits of distances table"); 2633 return ARCHIVE_FATAL; 2634 } 2635 2636 idx += HUFF_LDC; 2637 2638 ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC); 2639 if(ret != ARCHIVE_OK) { 2640 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2641 "Failed to create repeating distances table"); 2642 return ARCHIVE_FATAL; 2643 } 2644 2645 return ARCHIVE_OK; 2646} 2647 2648/* Parses the block header, verifies its CRC byte, and saves the header 2649 * fields inside the `hdr` pointer. */ 2650static int parse_block_header(struct archive_read* a, const uint8_t* p, 2651 ssize_t* block_size, struct compressed_block_header* hdr) 2652{ 2653 uint8_t calculated_cksum; 2654 memcpy(hdr, p, sizeof(struct compressed_block_header)); 2655 2656 if(bf_byte_count(hdr) > 2) { 2657 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2658 "Unsupported block header size (was %d, max is 2)", 2659 bf_byte_count(hdr)); 2660 return ARCHIVE_FATAL; 2661 } 2662 2663 /* This should probably use bit reader interface in order to be more 2664 * future-proof. */ 2665 *block_size = 0; 2666 switch(bf_byte_count(hdr)) { 2667 /* 1-byte block size */ 2668 case 0: 2669 *block_size = *(const uint8_t*) &p[2]; 2670 break; 2671 2672 /* 2-byte block size */ 2673 case 1: 2674 *block_size = archive_le16dec(&p[2]); 2675 break; 2676 2677 /* 3-byte block size */ 2678 case 2: 2679 *block_size = archive_le32dec(&p[2]); 2680 *block_size &= 0x00FFFFFF; 2681 break; 2682 2683 /* Other block sizes are not supported. This case is not 2684 * reached, because we have an 'if' guard before the switch 2685 * that makes sure of it. */ 2686 default: 2687 return ARCHIVE_FATAL; 2688 } 2689 2690 /* Verify the block header checksum. 0x5A is a magic value and is 2691 * always * constant. */ 2692 calculated_cksum = 0x5A 2693 ^ (uint8_t) hdr->block_flags_u8 2694 ^ (uint8_t) *block_size 2695 ^ (uint8_t) (*block_size >> 8) 2696 ^ (uint8_t) (*block_size >> 16); 2697 2698 if(calculated_cksum != hdr->block_cksum) { 2699 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2700 "Block checksum error: got 0x%x, expected 0x%x", 2701 hdr->block_cksum, calculated_cksum); 2702 2703 return ARCHIVE_FATAL; 2704 } 2705 2706 return ARCHIVE_OK; 2707} 2708 2709/* Convenience function used during filter processing. */ 2710static int parse_filter_data(struct rar5* rar, const uint8_t* p, 2711 uint32_t* filter_data) 2712{ 2713 int i, bytes; 2714 uint32_t data = 0; 2715 2716 if(ARCHIVE_OK != read_consume_bits(rar, p, 2, &bytes)) 2717 return ARCHIVE_EOF; 2718 2719 bytes++; 2720 2721 for(i = 0; i < bytes; i++) { 2722 uint16_t byte; 2723 2724 if(ARCHIVE_OK != read_bits_16(rar, p, &byte)) { 2725 return ARCHIVE_EOF; 2726 } 2727 2728 /* Cast to uint32_t will ensure the shift operation will not 2729 * produce undefined result. */ 2730 data += ((uint32_t) byte >> 8) << (i * 8); 2731 skip_bits(rar, 8); 2732 } 2733 2734 *filter_data = data; 2735 return ARCHIVE_OK; 2736} 2737 2738/* Function is used during sanity checking. */ 2739static int is_valid_filter_block_start(struct rar5* rar, 2740 uint32_t start) 2741{ 2742 const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr; 2743 const int64_t last_bs = rar->cstate.last_block_start; 2744 const ssize_t last_bl = rar->cstate.last_block_length; 2745 2746 if(last_bs == 0 || last_bl == 0) { 2747 /* We didn't have any filters yet, so accept this offset. */ 2748 return 1; 2749 } 2750 2751 if(block_start >= last_bs + last_bl) { 2752 /* Current offset is bigger than last block's end offset, so 2753 * accept current offset. */ 2754 return 1; 2755 } 2756 2757 /* Any other case is not a normal situation and we should fail. */ 2758 return 0; 2759} 2760 2761/* The function will create a new filter, read its parameters from the input 2762 * stream and add it to the filter collection. */ 2763static int parse_filter(struct archive_read* ar, const uint8_t* p) { 2764 uint32_t block_start, block_length; 2765 uint16_t filter_type; 2766 struct filter_info* filt = NULL; 2767 struct rar5* rar = get_context(ar); 2768 2769 /* Read the parameters from the input stream. */ 2770 if(ARCHIVE_OK != parse_filter_data(rar, p, &block_start)) 2771 return ARCHIVE_EOF; 2772 2773 if(ARCHIVE_OK != parse_filter_data(rar, p, &block_length)) 2774 return ARCHIVE_EOF; 2775 2776 if(ARCHIVE_OK != read_bits_16(rar, p, &filter_type)) 2777 return ARCHIVE_EOF; 2778 2779 filter_type >>= 13; 2780 skip_bits(rar, 3); 2781 2782 /* Perform some sanity checks on this filter parameters. Note that we 2783 * allow only DELTA, E8/E9 and ARM filters here, because rest of 2784 * filters are not used in RARv5. */ 2785 2786 if(block_length < 4 || 2787 block_length > 0x400000 || 2788 filter_type > FILTER_ARM || 2789 !is_valid_filter_block_start(rar, block_start)) 2790 { 2791 archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2792 "Invalid filter encountered"); 2793 return ARCHIVE_FATAL; 2794 } 2795 2796 /* Allocate a new filter. */ 2797 filt = add_new_filter(rar); 2798 if(filt == NULL) { 2799 archive_set_error(&ar->archive, ENOMEM, 2800 "Can't allocate memory for a filter descriptor."); 2801 return ARCHIVE_FATAL; 2802 } 2803 2804 filt->type = filter_type; 2805 filt->block_start = rar->cstate.write_ptr + block_start; 2806 filt->block_length = block_length; 2807 2808 rar->cstate.last_block_start = filt->block_start; 2809 rar->cstate.last_block_length = filt->block_length; 2810 2811 /* Read some more data in case this is a DELTA filter. Other filter 2812 * types don't require any additional data over what was already 2813 * read. */ 2814 if(filter_type == FILTER_DELTA) { 2815 int channels; 2816 2817 if(ARCHIVE_OK != read_consume_bits(rar, p, 5, &channels)) 2818 return ARCHIVE_EOF; 2819 2820 filt->channels = channels + 1; 2821 } 2822 2823 return ARCHIVE_OK; 2824} 2825 2826static int decode_code_length(struct rar5* rar, const uint8_t* p, 2827 uint16_t code) 2828{ 2829 int lbits, length = 2; 2830 if(code < 8) { 2831 lbits = 0; 2832 length += code; 2833 } else { 2834 lbits = code / 4 - 1; 2835 length += (4 | (code & 3)) << lbits; 2836 } 2837 2838 if(lbits > 0) { 2839 int add; 2840 2841 if(ARCHIVE_OK != read_consume_bits(rar, p, lbits, &add)) 2842 return -1; 2843 2844 length += add; 2845 } 2846 2847 return length; 2848} 2849 2850static int copy_string(struct archive_read* a, int len, int dist) { 2851 struct rar5* rar = get_context(a); 2852 const uint64_t cmask = rar->cstate.window_mask; 2853 const uint64_t write_ptr = rar->cstate.write_ptr + 2854 rar->cstate.solid_offset; 2855 int i; 2856 2857 if (rar->cstate.window_buf == NULL) 2858 return ARCHIVE_FATAL; 2859 2860 /* The unpacker spends most of the time in this function. It would be 2861 * a good idea to introduce some optimizations here. 2862 * 2863 * Just remember that this loop treats buffers that overlap differently 2864 * than buffers that do not overlap. This is why a simple memcpy(3) 2865 * call will not be enough. */ 2866 2867 for(i = 0; i < len; i++) { 2868 const ssize_t write_idx = (write_ptr + i) & cmask; 2869 const ssize_t read_idx = (write_ptr + i - dist) & cmask; 2870 rar->cstate.window_buf[write_idx] = 2871 rar->cstate.window_buf[read_idx]; 2872 } 2873 2874 rar->cstate.write_ptr += len; 2875 return ARCHIVE_OK; 2876} 2877 2878static int do_uncompress_block(struct archive_read* a, const uint8_t* p) { 2879 struct rar5* rar = get_context(a); 2880 uint16_t num; 2881 int ret; 2882 2883 const uint64_t cmask = rar->cstate.window_mask; 2884 const struct compressed_block_header* hdr = &rar->last_block_hdr; 2885 const uint8_t bit_size = 1 + bf_bit_size(hdr); 2886 2887 while(1) { 2888 if(rar->cstate.write_ptr - rar->cstate.last_write_ptr > 2889 (rar->cstate.window_size >> 1)) { 2890 /* Don't allow growing data by more than half of the 2891 * window size at a time. In such case, break the loop; 2892 * next call to this function will continue processing 2893 * from this moment. */ 2894 break; 2895 } 2896 2897 if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 || 2898 (rar->bits.in_addr == rar->cstate.cur_block_size - 1 && 2899 rar->bits.bit_addr >= bit_size)) 2900 { 2901 /* If the program counter is here, it means the 2902 * function has finished processing the block. */ 2903 rar->cstate.block_parsing_finished = 1; 2904 break; 2905 } 2906 2907 /* Decode the next literal. */ 2908 if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) { 2909 return ARCHIVE_EOF; 2910 } 2911 2912 /* Num holds a decompression literal, or 'command code'. 2913 * 2914 * - Values lower than 256 are just bytes. Those codes 2915 * can be stored in the output buffer directly. 2916 * 2917 * - Code 256 defines a new filter, which is later used to 2918 * ransform the data block accordingly to the filter type. 2919 * The data block needs to be fully uncompressed first. 2920 * 2921 * - Code bigger than 257 and smaller than 262 define 2922 * a repetition pattern that should be copied from 2923 * an already uncompressed chunk of data. 2924 */ 2925 2926 if(num < 256) { 2927 /* Directly store the byte. */ 2928 int64_t write_idx = rar->cstate.solid_offset + 2929 rar->cstate.write_ptr++; 2930 2931 rar->cstate.window_buf[write_idx & cmask] = 2932 (uint8_t) num; 2933 continue; 2934 } else if(num >= 262) { 2935 uint16_t dist_slot; 2936 int len = decode_code_length(rar, p, num - 262), 2937 dbits, 2938 dist = 1; 2939 2940 if(len == -1) { 2941 archive_set_error(&a->archive, 2942 ARCHIVE_ERRNO_PROGRAMMER, 2943 "Failed to decode the code length"); 2944 2945 return ARCHIVE_FATAL; 2946 } 2947 2948 if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p, 2949 &dist_slot)) 2950 { 2951 archive_set_error(&a->archive, 2952 ARCHIVE_ERRNO_PROGRAMMER, 2953 "Failed to decode the distance slot"); 2954 2955 return ARCHIVE_FATAL; 2956 } 2957 2958 if(dist_slot < 4) { 2959 dbits = 0; 2960 dist += dist_slot; 2961 } else { 2962 dbits = dist_slot / 2 - 1; 2963 2964 /* Cast to uint32_t will make sure the shift 2965 * left operation won't produce undefined 2966 * result. Then, the uint32_t type will 2967 * be implicitly casted to int. */ 2968 dist += (uint32_t) (2 | 2969 (dist_slot & 1)) << dbits; 2970 } 2971 2972 if(dbits > 0) { 2973 if(dbits >= 4) { 2974 uint32_t add = 0; 2975 uint16_t low_dist; 2976 2977 if(dbits > 4) { 2978 if(ARCHIVE_OK != read_bits_32( 2979 rar, p, &add)) { 2980 /* Return EOF if we 2981 * can't read more 2982 * data. */ 2983 return ARCHIVE_EOF; 2984 } 2985 2986 skip_bits(rar, dbits - 4); 2987 add = (add >> ( 2988 36 - dbits)) << 4; 2989 dist += add; 2990 } 2991 2992 if(ARCHIVE_OK != decode_number(a, 2993 &rar->cstate.ldd, p, &low_dist)) 2994 { 2995 archive_set_error(&a->archive, 2996 ARCHIVE_ERRNO_PROGRAMMER, 2997 "Failed to decode the " 2998 "distance slot"); 2999 3000 return ARCHIVE_FATAL; 3001 } 3002 3003 if(dist >= INT_MAX - low_dist - 1) { 3004 /* This only happens in 3005 * invalid archives. */ 3006 archive_set_error(&a->archive, 3007 ARCHIVE_ERRNO_FILE_FORMAT, 3008 "Distance pointer " 3009 "overflow"); 3010 return ARCHIVE_FATAL; 3011 } 3012 3013 dist += low_dist; 3014 } else { 3015 /* dbits is one of [0,1,2,3] */ 3016 int add; 3017 3018 if(ARCHIVE_OK != read_consume_bits(rar, 3019 p, dbits, &add)) { 3020 /* Return EOF if we can't read 3021 * more data. */ 3022 return ARCHIVE_EOF; 3023 } 3024 3025 dist += add; 3026 } 3027 } 3028 3029 if(dist > 0x100) { 3030 len++; 3031 3032 if(dist > 0x2000) { 3033 len++; 3034 3035 if(dist > 0x40000) { 3036 len++; 3037 } 3038 } 3039 } 3040 3041 dist_cache_push(rar, dist); 3042 rar->cstate.last_len = len; 3043 3044 if(ARCHIVE_OK != copy_string(a, len, dist)) 3045 return ARCHIVE_FATAL; 3046 3047 continue; 3048 } else if(num == 256) { 3049 /* Create a filter. */ 3050 ret = parse_filter(a, p); 3051 if(ret != ARCHIVE_OK) 3052 return ret; 3053 3054 continue; 3055 } else if(num == 257) { 3056 if(rar->cstate.last_len != 0) { 3057 if(ARCHIVE_OK != copy_string(a, 3058 rar->cstate.last_len, 3059 rar->cstate.dist_cache[0])) 3060 { 3061 return ARCHIVE_FATAL; 3062 } 3063 } 3064 3065 continue; 3066 } else { 3067 /* num < 262 */ 3068 const int idx = num - 258; 3069 const int dist = dist_cache_touch(rar, idx); 3070 3071 uint16_t len_slot; 3072 int len; 3073 3074 if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p, 3075 &len_slot)) { 3076 return ARCHIVE_FATAL; 3077 } 3078 3079 len = decode_code_length(rar, p, len_slot); 3080 rar->cstate.last_len = len; 3081 3082 if(ARCHIVE_OK != copy_string(a, len, dist)) 3083 return ARCHIVE_FATAL; 3084 3085 continue; 3086 } 3087 3088 /* The program counter shouldn't reach here. */ 3089 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3090 "Unsupported block code: 0x%x", num); 3091 3092 return ARCHIVE_FATAL; 3093 } 3094 3095 return ARCHIVE_OK; 3096} 3097 3098/* Binary search for the RARv5 signature. */ 3099static int scan_for_signature(struct archive_read* a) { 3100 const uint8_t* p; 3101 const int chunk_size = 512; 3102 ssize_t i; 3103 char signature[sizeof(rar5_signature_xor)]; 3104 3105 /* If we're here, it means we're on an 'unknown territory' data. 3106 * There's no indication what kind of data we're reading here. 3107 * It could be some text comment, any kind of binary data, 3108 * digital sign, dragons, etc. 3109 * 3110 * We want to find a valid RARv5 magic header inside this unknown 3111 * data. */ 3112 3113 /* Is it possible in libarchive to just skip everything until the 3114 * end of the file? If so, it would be a better approach than the 3115 * current implementation of this function. */ 3116 3117 rar5_signature(signature); 3118 3119 while(1) { 3120 if(!read_ahead(a, chunk_size, &p)) 3121 return ARCHIVE_EOF; 3122 3123 for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor); 3124 i++) { 3125 if(memcmp(&p[i], signature, 3126 sizeof(rar5_signature_xor)) == 0) { 3127 /* Consume the number of bytes we've used to 3128 * search for the signature, as well as the 3129 * number of bytes used by the signature 3130 * itself. After this we should be standing 3131 * on a valid base block header. */ 3132 (void) consume(a, 3133 i + sizeof(rar5_signature_xor)); 3134 return ARCHIVE_OK; 3135 } 3136 } 3137 3138 consume(a, chunk_size); 3139 } 3140 3141 return ARCHIVE_FATAL; 3142} 3143 3144/* This function will switch the multivolume archive file to another file, 3145 * i.e. from part03 to part 04. */ 3146static int advance_multivolume(struct archive_read* a) { 3147 int lret; 3148 struct rar5* rar = get_context(a); 3149 3150 /* A small state machine that will skip unnecessary data, needed to 3151 * switch from one multivolume to another. Such skipping is needed if 3152 * we want to be an stream-oriented (instead of file-oriented) 3153 * unpacker. 3154 * 3155 * The state machine starts with `rar->main.endarc` == 0. It also 3156 * assumes that current stream pointer points to some base block 3157 * header. 3158 * 3159 * The `endarc` field is being set when the base block parsing 3160 * function encounters the 'end of archive' marker. 3161 */ 3162 3163 while(1) { 3164 if(rar->main.endarc == 1) { 3165 int looping = 1; 3166 3167 rar->main.endarc = 0; 3168 3169 while(looping) { 3170 lret = skip_base_block(a); 3171 switch(lret) { 3172 case ARCHIVE_RETRY: 3173 /* Continue looping. */ 3174 break; 3175 case ARCHIVE_OK: 3176 /* Break loop. */ 3177 looping = 0; 3178 break; 3179 default: 3180 /* Forward any errors to the 3181 * caller. */ 3182 return lret; 3183 } 3184 } 3185 3186 break; 3187 } else { 3188 /* Skip current base block. In order to properly skip 3189 * it, we really need to simply parse it and discard 3190 * the results. */ 3191 3192 lret = skip_base_block(a); 3193 if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED) 3194 return lret; 3195 3196 /* The `skip_base_block` function tells us if we 3197 * should continue with skipping, or we should stop 3198 * skipping. We're trying to skip everything up to 3199 * a base FILE block. */ 3200 3201 if(lret != ARCHIVE_RETRY) { 3202 /* If there was an error during skipping, or we 3203 * have just skipped a FILE base block... */ 3204 3205 if(rar->main.endarc == 0) { 3206 return lret; 3207 } else { 3208 continue; 3209 } 3210 } 3211 } 3212 } 3213 3214 return ARCHIVE_OK; 3215} 3216 3217/* Merges the partial block from the first multivolume archive file, and 3218 * partial block from the second multivolume archive file. The result is 3219 * a chunk of memory containing the whole block, and the stream pointer 3220 * is advanced to the next block in the second multivolume archive file. */ 3221static int merge_block(struct archive_read* a, ssize_t block_size, 3222 const uint8_t** p) 3223{ 3224 struct rar5* rar = get_context(a); 3225 ssize_t cur_block_size, partial_offset = 0; 3226 const uint8_t* lp; 3227 int ret; 3228 3229 if(rar->merge_mode) { 3230 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3231 "Recursive merge is not allowed"); 3232 3233 return ARCHIVE_FATAL; 3234 } 3235 3236 /* Set a flag that we're in the switching mode. */ 3237 rar->cstate.switch_multivolume = 1; 3238 3239 /* Reallocate the memory which will hold the whole block. */ 3240 if(rar->vol.push_buf) 3241 free((void*) rar->vol.push_buf); 3242 3243 /* Increasing the allocation block by 8 is due to bit reading functions, 3244 * which are using additional 2 or 4 bytes. Allocating the block size 3245 * by exact value would make bit reader perform reads from invalid 3246 * memory block when reading the last byte from the buffer. */ 3247 rar->vol.push_buf = malloc(block_size + 8); 3248 if(!rar->vol.push_buf) { 3249 archive_set_error(&a->archive, ENOMEM, 3250 "Can't allocate memory for a merge block buffer."); 3251 return ARCHIVE_FATAL; 3252 } 3253 3254 /* Valgrind complains if the extension block for bit reader is not 3255 * initialized, so initialize it. */ 3256 memset(&rar->vol.push_buf[block_size], 0, 8); 3257 3258 /* A single block can span across multiple multivolume archive files, 3259 * so we use a loop here. This loop will consume enough multivolume 3260 * archive files until the whole block is read. */ 3261 3262 while(1) { 3263 /* Get the size of current block chunk in this multivolume 3264 * archive file and read it. */ 3265 cur_block_size = rar5_min(rar->file.bytes_remaining, 3266 block_size - partial_offset); 3267 3268 if(cur_block_size == 0) { 3269 archive_set_error(&a->archive, 3270 ARCHIVE_ERRNO_FILE_FORMAT, 3271 "Encountered block size == 0 during block merge"); 3272 return ARCHIVE_FATAL; 3273 } 3274 3275 if(!read_ahead(a, cur_block_size, &lp)) 3276 return ARCHIVE_EOF; 3277 3278 /* Sanity check; there should never be a situation where this 3279 * function reads more data than the block's size. */ 3280 if(partial_offset + cur_block_size > block_size) { 3281 archive_set_error(&a->archive, 3282 ARCHIVE_ERRNO_PROGRAMMER, 3283 "Consumed too much data when merging blocks."); 3284 return ARCHIVE_FATAL; 3285 } 3286 3287 /* Merge previous block chunk with current block chunk, 3288 * or create first block chunk if this is our first 3289 * iteration. */ 3290 memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size); 3291 3292 /* Advance the stream read pointer by this block chunk size. */ 3293 if(ARCHIVE_OK != consume(a, cur_block_size)) 3294 return ARCHIVE_EOF; 3295 3296 /* Update the pointers. `partial_offset` contains information 3297 * about the sum of merged block chunks. */ 3298 partial_offset += cur_block_size; 3299 rar->file.bytes_remaining -= cur_block_size; 3300 3301 /* If `partial_offset` is the same as `block_size`, this means 3302 * we've merged all block chunks and we have a valid full 3303 * block. */ 3304 if(partial_offset == block_size) { 3305 break; 3306 } 3307 3308 /* If we don't have any bytes to read, this means we should 3309 * switch to another multivolume archive file. */ 3310 if(rar->file.bytes_remaining == 0) { 3311 rar->merge_mode++; 3312 ret = advance_multivolume(a); 3313 rar->merge_mode--; 3314 if(ret != ARCHIVE_OK) { 3315 return ret; 3316 } 3317 } 3318 } 3319 3320 *p = rar->vol.push_buf; 3321 3322 /* If we're here, we can resume unpacking by processing the block 3323 * pointed to by the `*p` memory pointer. */ 3324 3325 return ARCHIVE_OK; 3326} 3327 3328static int process_block(struct archive_read* a) { 3329 const uint8_t* p; 3330 struct rar5* rar = get_context(a); 3331 int ret; 3332 3333 /* If we don't have any data to be processed, this most probably means 3334 * we need to switch to the next volume. */ 3335 if(rar->main.volume && rar->file.bytes_remaining == 0) { 3336 ret = advance_multivolume(a); 3337 if(ret != ARCHIVE_OK) 3338 return ret; 3339 } 3340 3341 if(rar->cstate.block_parsing_finished) { 3342 ssize_t block_size; 3343 ssize_t to_skip; 3344 ssize_t cur_block_size; 3345 3346 /* The header size won't be bigger than 6 bytes. */ 3347 if(!read_ahead(a, 6, &p)) { 3348 /* Failed to prefetch data block header. */ 3349 return ARCHIVE_EOF; 3350 } 3351 3352 /* 3353 * Read block_size by parsing block header. Validate the header 3354 * by calculating CRC byte stored inside the header. Size of 3355 * the header is not constant (block size can be stored either 3356 * in 1 or 2 bytes), that's why block size is left out from the 3357 * `compressed_block_header` structure and returned by 3358 * `parse_block_header` as the second argument. */ 3359 3360 ret = parse_block_header(a, p, &block_size, 3361 &rar->last_block_hdr); 3362 if(ret != ARCHIVE_OK) { 3363 return ret; 3364 } 3365 3366 /* Skip block header. Next data is huffman tables, 3367 * if present. */ 3368 to_skip = sizeof(struct compressed_block_header) + 3369 bf_byte_count(&rar->last_block_hdr) + 1; 3370 3371 if(ARCHIVE_OK != consume(a, to_skip)) 3372 return ARCHIVE_EOF; 3373 3374 rar->file.bytes_remaining -= to_skip; 3375 3376 /* The block size gives information about the whole block size, 3377 * but the block could be stored in split form when using 3378 * multi-volume archives. In this case, the block size will be 3379 * bigger than the actual data stored in this file. Remaining 3380 * part of the data will be in another file. */ 3381 3382 cur_block_size = 3383 rar5_min(rar->file.bytes_remaining, block_size); 3384 3385 if(block_size > rar->file.bytes_remaining) { 3386 /* If current blocks' size is bigger than our data 3387 * size, this means we have a multivolume archive. 3388 * In this case, skip all base headers until the end 3389 * of the file, proceed to next "partXXX.rar" volume, 3390 * find its signature, skip all headers up to the first 3391 * FILE base header, and continue from there. 3392 * 3393 * Note that `merge_block` will update the `rar` 3394 * context structure quite extensively. */ 3395 3396 ret = merge_block(a, block_size, &p); 3397 if(ret != ARCHIVE_OK) { 3398 return ret; 3399 } 3400 3401 cur_block_size = block_size; 3402 3403 /* Current stream pointer should be now directly 3404 * *after* the block that spanned through multiple 3405 * archive files. `p` pointer should have the data of 3406 * the *whole* block (merged from partial blocks 3407 * stored in multiple archives files). */ 3408 } else { 3409 rar->cstate.switch_multivolume = 0; 3410 3411 /* Read the whole block size into memory. This can take 3412 * up to 8 megabytes of memory in theoretical cases. 3413 * Might be worth to optimize this and use a standard 3414 * chunk of 4kb's. */ 3415 if(!read_ahead(a, 4 + cur_block_size, &p)) { 3416 /* Failed to prefetch block data. */ 3417 return ARCHIVE_EOF; 3418 } 3419 } 3420 3421 rar->cstate.block_buf = p; 3422 rar->cstate.cur_block_size = cur_block_size; 3423 rar->cstate.block_parsing_finished = 0; 3424 3425 rar->bits.in_addr = 0; 3426 rar->bits.bit_addr = 0; 3427 3428 if(bf_is_table_present(&rar->last_block_hdr)) { 3429 /* Load Huffman tables. */ 3430 ret = parse_tables(a, rar, p); 3431 if(ret != ARCHIVE_OK) { 3432 /* Error during decompression of Huffman 3433 * tables. */ 3434 return ret; 3435 } 3436 } 3437 } else { 3438 /* Block parsing not finished, reuse previous memory buffer. */ 3439 p = rar->cstate.block_buf; 3440 } 3441 3442 /* Uncompress the block, or a part of it, depending on how many bytes 3443 * will be generated by uncompressing the block. 3444 * 3445 * In case too many bytes will be generated, calling this function 3446 * again will resume the uncompression operation. */ 3447 ret = do_uncompress_block(a, p); 3448 if(ret != ARCHIVE_OK) { 3449 return ret; 3450 } 3451 3452 if(rar->cstate.block_parsing_finished && 3453 rar->cstate.switch_multivolume == 0 && 3454 rar->cstate.cur_block_size > 0) 3455 { 3456 /* If we're processing a normal block, consume the whole 3457 * block. We can do this because we've already read the whole 3458 * block to memory. */ 3459 if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size)) 3460 return ARCHIVE_FATAL; 3461 3462 rar->file.bytes_remaining -= rar->cstate.cur_block_size; 3463 } else if(rar->cstate.switch_multivolume) { 3464 /* Don't consume the block if we're doing multivolume 3465 * processing. The volume switching function will consume 3466 * the proper count of bytes instead. */ 3467 rar->cstate.switch_multivolume = 0; 3468 } 3469 3470 return ARCHIVE_OK; 3471} 3472 3473/* Pops the `buf`, `size` and `offset` from the "data ready" stack. 3474 * 3475 * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY 3476 * when there is no data on the stack. */ 3477static int use_data(struct rar5* rar, const void** buf, size_t* size, 3478 int64_t* offset) 3479{ 3480 int i; 3481 3482 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3483 struct data_ready *d = &rar->cstate.dready[i]; 3484 3485 if(d->used) { 3486 if(buf) *buf = d->buf; 3487 if(size) *size = d->size; 3488 if(offset) *offset = d->offset; 3489 3490 d->used = 0; 3491 return ARCHIVE_OK; 3492 } 3493 } 3494 3495 return ARCHIVE_RETRY; 3496} 3497 3498/* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready 3499 * FIFO stack. Those values will be popped from this stack by the `use_data` 3500 * function. */ 3501static int push_data_ready(struct archive_read* a, struct rar5* rar, 3502 const uint8_t* buf, size_t size, int64_t offset) 3503{ 3504 int i; 3505 3506 /* Don't push if we're in skip mode. This is needed because solid 3507 * streams need full processing even if we're skipping data. After 3508 * fully processing the stream, we need to discard the generated bytes, 3509 * because we're interested only in the side effect: building up the 3510 * internal window circular buffer. This window buffer will be used 3511 * later during unpacking of requested data. */ 3512 if(rar->skip_mode) 3513 return ARCHIVE_OK; 3514 3515 /* Sanity check. */ 3516 if(offset != rar->file.last_offset + rar->file.last_size) { 3517 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3518 "Sanity check error: output stream is not continuous"); 3519 return ARCHIVE_FATAL; 3520 } 3521 3522 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3523 struct data_ready* d = &rar->cstate.dready[i]; 3524 if(!d->used) { 3525 d->used = 1; 3526 d->buf = buf; 3527 d->size = size; 3528 d->offset = offset; 3529 3530 /* These fields are used only in sanity checking. */ 3531 rar->file.last_offset = offset; 3532 rar->file.last_size = size; 3533 3534 /* Calculate the checksum of this new block before 3535 * submitting data to libarchive's engine. */ 3536 update_crc(rar, d->buf, d->size); 3537 3538 return ARCHIVE_OK; 3539 } 3540 } 3541 3542 /* Program counter will reach this code if the `rar->cstate.data_ready` 3543 * stack will be filled up so that no new entries will be allowed. The 3544 * code shouldn't allow such situation to occur. So we treat this case 3545 * as an internal error. */ 3546 3547 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3548 "Error: premature end of data_ready stack"); 3549 return ARCHIVE_FATAL; 3550} 3551 3552/* This function uncompresses the data that is stored in the <FILE> base 3553 * block. 3554 * 3555 * The FILE base block looks like this: 3556 * 3557 * <header><huffman tables><block_1><block_2>...<block_n> 3558 * 3559 * The <header> is a block header, that is parsed in parse_block_header(). 3560 * It's a "compressed_block_header" structure, containing metadata needed 3561 * to know when we should stop looking for more <block_n> blocks. 3562 * 3563 * <huffman tables> contain data needed to set up the huffman tables, needed 3564 * for the actual decompression. 3565 * 3566 * Each <block_n> consists of series of literals: 3567 * 3568 * <literal><literal><literal>...<literal> 3569 * 3570 * Those literals generate the uncompression data. They operate on a circular 3571 * buffer, sometimes writing raw data into it, sometimes referencing 3572 * some previous data inside this buffer, and sometimes declaring a filter 3573 * that will need to be executed on the data stored in the circular buffer. 3574 * It all depends on the literal that is used. 3575 * 3576 * Sometimes blocks produce output data, sometimes they don't. For example, for 3577 * some huge files that use lots of filters, sometimes a block is filled with 3578 * only filter declaration literals. Such blocks won't produce any data in the 3579 * circular buffer. 3580 * 3581 * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte, 3582 * because a literal can reference previously decompressed data. For example, 3583 * there can be a literal that says: 'append a byte 0xFE here', and after 3584 * it another literal can say 'append 1 megabyte of data from circular buffer 3585 * offset 0x12345'. This is how RAR format handles compressing repeated 3586 * patterns. 3587 * 3588 * The RAR compressor creates those literals and the actual efficiency of 3589 * compression depends on what those literals are. The literals can also 3590 * be seen as a kind of a non-turing-complete virtual machine that simply 3591 * tells the decompressor what it should do. 3592 * */ 3593 3594static int do_uncompress_file(struct archive_read* a) { 3595 struct rar5* rar = get_context(a); 3596 int ret; 3597 int64_t max_end_pos; 3598 3599 if(!rar->cstate.initialized) { 3600 /* Don't perform full context reinitialization if we're 3601 * processing a solid archive. */ 3602 if(!rar->main.solid || !rar->cstate.window_buf) { 3603 init_unpack(rar); 3604 } 3605 3606 rar->cstate.initialized = 1; 3607 } 3608 3609 if(rar->cstate.all_filters_applied == 1) { 3610 /* We use while(1) here, but standard case allows for just 1 3611 * iteration. The loop will iterate if process_block() didn't 3612 * generate any data at all. This can happen if the block 3613 * contains only filter definitions (this is common in big 3614 * files). */ 3615 while(1) { 3616 ret = process_block(a); 3617 if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL) 3618 return ret; 3619 3620 if(rar->cstate.last_write_ptr == 3621 rar->cstate.write_ptr) { 3622 /* The block didn't generate any new data, 3623 * so just process a new block. */ 3624 continue; 3625 } 3626 3627 /* The block has generated some new data, so break 3628 * the loop. */ 3629 break; 3630 } 3631 } 3632 3633 /* Try to run filters. If filters won't be applied, it means that 3634 * insufficient data was generated. */ 3635 ret = apply_filters(a); 3636 if(ret == ARCHIVE_RETRY) { 3637 return ARCHIVE_OK; 3638 } else if(ret == ARCHIVE_FATAL) { 3639 return ARCHIVE_FATAL; 3640 } 3641 3642 /* If apply_filters() will return ARCHIVE_OK, we can continue here. */ 3643 3644 if(cdeque_size(&rar->cstate.filters) > 0) { 3645 /* Check if we can write something before hitting first 3646 * filter. */ 3647 struct filter_info* flt; 3648 3649 /* Get the block_start offset from the first filter. */ 3650 if(CDE_OK != cdeque_front(&rar->cstate.filters, 3651 cdeque_filter_p(&flt))) 3652 { 3653 archive_set_error(&a->archive, 3654 ARCHIVE_ERRNO_PROGRAMMER, 3655 "Can't read first filter"); 3656 return ARCHIVE_FATAL; 3657 } 3658 3659 max_end_pos = rar5_min(flt->block_start, 3660 rar->cstate.write_ptr); 3661 } else { 3662 /* There are no filters defined, or all filters were applied. 3663 * This means we can just store the data without any 3664 * postprocessing. */ 3665 max_end_pos = rar->cstate.write_ptr; 3666 } 3667 3668 if(max_end_pos == rar->cstate.last_write_ptr) { 3669 /* We can't write anything yet. The block uncompression 3670 * function did not generate enough data, and no filter can be 3671 * applied. At the same time we don't have any data that can be 3672 * stored without filter postprocessing. This means we need to 3673 * wait for more data to be generated, so we can apply the 3674 * filters. 3675 * 3676 * Signal the caller that we need more data to be able to do 3677 * anything. 3678 */ 3679 return ARCHIVE_RETRY; 3680 } else { 3681 /* We can write the data before hitting the first filter. 3682 * So let's do it. The push_window_data() function will 3683 * effectively return the selected data block to the user 3684 * application. */ 3685 push_window_data(a, rar, rar->cstate.last_write_ptr, 3686 max_end_pos); 3687 rar->cstate.last_write_ptr = max_end_pos; 3688 } 3689 3690 return ARCHIVE_OK; 3691} 3692 3693static int uncompress_file(struct archive_read* a) { 3694 int ret; 3695 3696 while(1) { 3697 /* Sometimes the uncompression function will return a 3698 * 'retry' signal. If this will happen, we have to retry 3699 * the function. */ 3700 ret = do_uncompress_file(a); 3701 if(ret != ARCHIVE_RETRY) 3702 return ret; 3703 } 3704} 3705 3706 3707static int do_unstore_file(struct archive_read* a, 3708 struct rar5* rar, const void** buf, size_t* size, int64_t* offset) 3709{ 3710 size_t to_read; 3711 const uint8_t* p; 3712 3713 if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 && 3714 rar->generic.split_after > 0) 3715 { 3716 int ret; 3717 3718 rar->cstate.switch_multivolume = 1; 3719 ret = advance_multivolume(a); 3720 rar->cstate.switch_multivolume = 0; 3721 3722 if(ret != ARCHIVE_OK) { 3723 /* Failed to advance to next multivolume archive 3724 * file. */ 3725 return ret; 3726 } 3727 } 3728 3729 to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024); 3730 if(to_read == 0) { 3731 return ARCHIVE_EOF; 3732 } 3733 3734 if(!read_ahead(a, to_read, &p)) { 3735 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3736 "I/O error when unstoring file"); 3737 return ARCHIVE_FATAL; 3738 } 3739 3740 if(ARCHIVE_OK != consume(a, to_read)) { 3741 return ARCHIVE_EOF; 3742 } 3743 3744 if(buf) *buf = p; 3745 if(size) *size = to_read; 3746 if(offset) *offset = rar->cstate.last_unstore_ptr; 3747 3748 rar->file.bytes_remaining -= to_read; 3749 rar->cstate.last_unstore_ptr += to_read; 3750 3751 update_crc(rar, p, to_read); 3752 return ARCHIVE_OK; 3753} 3754 3755static int do_unpack(struct archive_read* a, struct rar5* rar, 3756 const void** buf, size_t* size, int64_t* offset) 3757{ 3758 enum COMPRESSION_METHOD { 3759 STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4, 3760 BEST = 5 3761 }; 3762 3763 if(rar->file.service > 0) { 3764 return do_unstore_file(a, rar, buf, size, offset); 3765 } else { 3766 switch(rar->cstate.method) { 3767 case STORE: 3768 return do_unstore_file(a, rar, buf, size, 3769 offset); 3770 case FASTEST: 3771 /* fallthrough */ 3772 case FAST: 3773 /* fallthrough */ 3774 case NORMAL: 3775 /* fallthrough */ 3776 case GOOD: 3777 /* fallthrough */ 3778 case BEST: 3779 return uncompress_file(a); 3780 default: 3781 archive_set_error(&a->archive, 3782 ARCHIVE_ERRNO_FILE_FORMAT, 3783 "Compression method not supported: 0x%x", 3784 rar->cstate.method); 3785 3786 return ARCHIVE_FATAL; 3787 } 3788 } 3789 3790#if !defined WIN32 3791 /* Not reached. */ 3792 return ARCHIVE_OK; 3793#endif 3794} 3795 3796static int verify_checksums(struct archive_read* a) { 3797 int verify_crc; 3798 struct rar5* rar = get_context(a); 3799 3800 /* Check checksums only when actually unpacking the data. There's no 3801 * need to calculate checksum when we're skipping data in solid archives 3802 * (skipping in solid archives is the same thing as unpacking compressed 3803 * data and discarding the result). */ 3804 3805 if(!rar->skip_mode) { 3806 /* Always check checksums if we're not in skip mode */ 3807 verify_crc = 1; 3808 } else { 3809 /* We can override the logic above with a compile-time option 3810 * NO_CRC_ON_SOLID_SKIP. This option is used during debugging, 3811 * and it will check checksums of unpacked data even when 3812 * we're skipping it. */ 3813 3814#if defined CHECK_CRC_ON_SOLID_SKIP 3815 /* Debug case */ 3816 verify_crc = 1; 3817#else 3818 /* Normal case */ 3819 verify_crc = 0; 3820#endif 3821 } 3822 3823 if(verify_crc) { 3824 /* During unpacking, on each unpacked block we're calling the 3825 * update_crc() function. Since we are here, the unpacking 3826 * process is already over and we can check if calculated 3827 * checksum (CRC32 or BLAKE2sp) is the same as what is stored 3828 * in the archive. */ 3829 if(rar->file.stored_crc32 > 0) { 3830 /* Check CRC32 only when the file contains a CRC32 3831 * value for this file. */ 3832 3833 if(rar->file.calculated_crc32 != 3834 rar->file.stored_crc32) { 3835 /* Checksums do not match; the unpacked file 3836 * is corrupted. */ 3837 3838 DEBUG_CODE { 3839 printf("Checksum error: CRC32 " 3840 "(was: %08x, expected: %08x)\n", 3841 rar->file.calculated_crc32, 3842 rar->file.stored_crc32); 3843 } 3844 3845#ifndef DONT_FAIL_ON_CRC_ERROR 3846 archive_set_error(&a->archive, 3847 ARCHIVE_ERRNO_FILE_FORMAT, 3848 "Checksum error: CRC32"); 3849 return ARCHIVE_FATAL; 3850#endif 3851 } else { 3852 DEBUG_CODE { 3853 printf("Checksum OK: CRC32 " 3854 "(%08x/%08x)\n", 3855 rar->file.stored_crc32, 3856 rar->file.calculated_crc32); 3857 } 3858 } 3859 } 3860 3861 if(rar->file.has_blake2 > 0) { 3862 /* BLAKE2sp is an optional checksum algorithm that is 3863 * added to RARv5 archives when using the `-htb` switch 3864 * during creation of archive. 3865 * 3866 * We now finalize the hash calculation by calling the 3867 * `final` function. This will generate the final hash 3868 * value we can use to compare it with the BLAKE2sp 3869 * checksum that is stored in the archive. 3870 * 3871 * The return value of this `final` function is not 3872 * very helpful, as it guards only against improper use. 3873 * This is why we're explicitly ignoring it. */ 3874 3875 uint8_t b2_buf[32]; 3876 (void) blake2sp_final(&rar->file.b2state, b2_buf, 32); 3877 3878 if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) { 3879#ifndef DONT_FAIL_ON_CRC_ERROR 3880 archive_set_error(&a->archive, 3881 ARCHIVE_ERRNO_FILE_FORMAT, 3882 "Checksum error: BLAKE2"); 3883 3884 return ARCHIVE_FATAL; 3885#endif 3886 } 3887 } 3888 } 3889 3890 /* Finalization for this file has been successfully completed. */ 3891 return ARCHIVE_OK; 3892} 3893 3894static int verify_global_checksums(struct archive_read* a) { 3895 return verify_checksums(a); 3896} 3897 3898/* 3899 * Decryption function for the magic signature pattern. Check the comment near 3900 * the `rar5_signature_xor` symbol to read the rationale behind this. 3901 */ 3902static void rar5_signature(char *buf) { 3903 size_t i; 3904 3905 for(i = 0; i < sizeof(rar5_signature_xor); i++) { 3906 buf[i] = rar5_signature_xor[i] ^ 0xA1; 3907 } 3908} 3909 3910static int rar5_read_data(struct archive_read *a, const void **buff, 3911 size_t *size, int64_t *offset) { 3912 int ret; 3913 struct rar5* rar = get_context(a); 3914 3915 if(rar->file.dir > 0) { 3916 /* Don't process any data if this file entry was declared 3917 * as a directory. This is needed, because entries marked as 3918 * directory doesn't have any dictionary buffer allocated, so 3919 * it's impossible to perform any decompression. */ 3920 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3921 "Can't decompress an entry marked as a directory"); 3922 return ARCHIVE_FAILED; 3923 } 3924 3925 if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) { 3926 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3927 "Unpacker has written too many bytes"); 3928 return ARCHIVE_FATAL; 3929 } 3930 3931 ret = use_data(rar, buff, size, offset); 3932 if(ret == ARCHIVE_OK) { 3933 return ret; 3934 } 3935 3936 if(rar->file.eof == 1) { 3937 return ARCHIVE_EOF; 3938 } 3939 3940 ret = do_unpack(a, rar, buff, size, offset); 3941 if(ret != ARCHIVE_OK) { 3942 return ret; 3943 } 3944 3945 if(rar->file.bytes_remaining == 0 && 3946 rar->cstate.last_write_ptr == rar->file.unpacked_size) 3947 { 3948 /* If all bytes of current file were processed, run 3949 * finalization. 3950 * 3951 * Finalization will check checksum against proper values. If 3952 * some of the checksums will not match, we'll return an error 3953 * value in the last `archive_read_data` call to signal an error 3954 * to the user. */ 3955 3956 rar->file.eof = 1; 3957 return verify_global_checksums(a); 3958 } 3959 3960 return ARCHIVE_OK; 3961} 3962 3963static int rar5_read_data_skip(struct archive_read *a) { 3964 struct rar5* rar = get_context(a); 3965 3966 if(rar->main.solid) { 3967 /* In solid archives, instead of skipping the data, we need to 3968 * extract it, and dispose the result. The side effect of this 3969 * operation will be setting up the initial window buffer state 3970 * needed to be able to extract the selected file. */ 3971 3972 int ret; 3973 3974 /* Make sure to process all blocks in the compressed stream. */ 3975 while(rar->file.bytes_remaining > 0) { 3976 /* Setting the "skip mode" will allow us to skip 3977 * checksum checks during data skipping. Checking the 3978 * checksum of skipped data isn't really necessary and 3979 * it's only slowing things down. 3980 * 3981 * This is incremented instead of setting to 1 because 3982 * this data skipping function can be called 3983 * recursively. */ 3984 rar->skip_mode++; 3985 3986 /* We're disposing 1 block of data, so we use triple 3987 * NULLs in arguments. */ 3988 ret = rar5_read_data(a, NULL, NULL, NULL); 3989 3990 /* Turn off "skip mode". */ 3991 rar->skip_mode--; 3992 3993 if(ret < 0 || ret == ARCHIVE_EOF) { 3994 /* Propagate any potential error conditions 3995 * to the caller. */ 3996 return ret; 3997 } 3998 } 3999 } else { 4000 /* In standard archives, we can just jump over the compressed 4001 * stream. Each file in non-solid archives starts from an empty 4002 * window buffer. */ 4003 4004 if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) { 4005 return ARCHIVE_FATAL; 4006 } 4007 4008 rar->file.bytes_remaining = 0; 4009 } 4010 4011 return ARCHIVE_OK; 4012} 4013 4014static int64_t rar5_seek_data(struct archive_read *a, int64_t offset, 4015 int whence) 4016{ 4017 (void) a; 4018 (void) offset; 4019 (void) whence; 4020 4021 /* We're a streaming unpacker, and we don't support seeking. */ 4022 4023 return ARCHIVE_FATAL; 4024} 4025 4026static int rar5_cleanup(struct archive_read *a) { 4027 struct rar5* rar = get_context(a); 4028 4029 free(rar->cstate.window_buf); 4030 free(rar->cstate.filtered_buf); 4031 4032 free(rar->vol.push_buf); 4033 4034 free_filters(rar); 4035 cdeque_free(&rar->cstate.filters); 4036 4037 free(rar); 4038 a->format->data = NULL; 4039 4040 return ARCHIVE_OK; 4041} 4042 4043static int rar5_capabilities(struct archive_read * a) { 4044 (void) a; 4045 return 0; 4046} 4047 4048static int rar5_has_encrypted_entries(struct archive_read *_a) { 4049 (void) _a; 4050 4051 /* Unsupported for now. */ 4052 return ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED; 4053} 4054 4055static int rar5_init(struct rar5* rar) { 4056 memset(rar, 0, sizeof(struct rar5)); 4057 4058 if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192)) 4059 return ARCHIVE_FATAL; 4060 4061 return ARCHIVE_OK; 4062} 4063 4064int archive_read_support_format_rar5(struct archive *_a) { 4065 struct archive_read* ar; 4066 int ret; 4067 struct rar5* rar; 4068 4069 if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar))) 4070 return ret; 4071 4072 rar = malloc(sizeof(*rar)); 4073 if(rar == NULL) { 4074 archive_set_error(&ar->archive, ENOMEM, 4075 "Can't allocate rar5 data"); 4076 return ARCHIVE_FATAL; 4077 } 4078 4079 if(ARCHIVE_OK != rar5_init(rar)) { 4080 archive_set_error(&ar->archive, ENOMEM, 4081 "Can't allocate rar5 filter buffer"); 4082 return ARCHIVE_FATAL; 4083 } 4084 4085 ret = __archive_read_register_format(ar, 4086 rar, 4087 "rar5", 4088 rar5_bid, 4089 rar5_options, 4090 rar5_read_header, 4091 rar5_read_data, 4092 rar5_read_data_skip, 4093 rar5_seek_data, 4094 rar5_cleanup, 4095 rar5_capabilities, 4096 rar5_has_encrypted_entries); 4097 4098 if(ret != ARCHIVE_OK) { 4099 (void) rar5_cleanup(ar); 4100 } 4101 4102 return ret; 4103} 4104