1339640Smm/*- 2339640Smm* Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org) 3339640Smm* All rights reserved. 4339640Smm* 5339640Smm* Redistribution and use in source and binary forms, with or without 6339640Smm* modification, are permitted provided that the following conditions 7339640Smm* are met: 8339640Smm* 1. Redistributions of source code must retain the above copyright 9339640Smm* notice, this list of conditions and the following disclaimer. 10339640Smm* 2. Redistributions in binary form must reproduce the above copyright 11339640Smm* notice, this list of conditions and the following disclaimer in the 12339640Smm* documentation and/or other materials provided with the distribution. 13339640Smm* 14339640Smm* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15339640Smm* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16339640Smm* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17339640Smm* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18339640Smm* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19339640Smm* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20339640Smm* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21339640Smm* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22339640Smm* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23339640Smm* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24339640Smm*/ 25339640Smm 26339640Smm#include "archive_platform.h" 27342360Smm#include "archive_endian.h" 28339640Smm 29339640Smm#ifdef HAVE_ERRNO_H 30339640Smm#include <errno.h> 31339640Smm#endif 32339640Smm#include <time.h> 33339640Smm#ifdef HAVE_ZLIB_H 34339640Smm#include <zlib.h> /* crc32 */ 35339640Smm#endif 36348607Smm#ifdef HAVE_LIMITS_H 37348607Smm#include <limits.h> 38348607Smm#endif 39339640Smm 40339640Smm#include "archive.h" 41339640Smm#ifndef HAVE_ZLIB_H 42339640Smm#include "archive_crc32.h" 43339640Smm#endif 44339640Smm 45339640Smm#include "archive_entry.h" 46339640Smm#include "archive_entry_locale.h" 47339640Smm#include "archive_ppmd7_private.h" 48339640Smm#include "archive_entry_private.h" 49339640Smm 50339640Smm#ifdef HAVE_BLAKE2_H 51339640Smm#include <blake2.h> 52339640Smm#else 53339640Smm#include "archive_blake2.h" 54339640Smm#endif 55339640Smm 56339640Smm/*#define CHECK_CRC_ON_SOLID_SKIP*/ 57339640Smm/*#define DONT_FAIL_ON_CRC_ERROR*/ 58339640Smm/*#define DEBUG*/ 59339640Smm 60339640Smm#define rar5_min(a, b) (((a) > (b)) ? (b) : (a)) 61339640Smm#define rar5_max(a, b) (((a) > (b)) ? (a) : (b)) 62339640Smm#define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X))) 63339640Smm 64339640Smm#if defined DEBUG 65339640Smm#define DEBUG_CODE if(1) 66358088Smm#define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0) 67339640Smm#else 68339640Smm#define DEBUG_CODE if(0) 69339640Smm#endif 70339640Smm 71339640Smm/* Real RAR5 magic number is: 72339640Smm * 73339640Smm * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00 74339640Smm * "Rar!�����������\x00" 75339640Smm * 76358088Smm * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't 77339640Smm * want to put this magic sequence in each binary that uses libarchive, so 78339640Smm * applications that scan through the file for this marker won't trigger on 79339640Smm * this "false" one. 80339640Smm * 81339640Smm * The array itself is decrypted in `rar5_init` function. */ 82339640Smm 83358088Smmstatic unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 }; 84339746Smmstatic const size_t g_unpack_window_size = 0x20000; 85339640Smm 86348607Smm/* These could have been static const's, but they aren't, because of 87348607Smm * Visual Studio. */ 88348607Smm#define MAX_NAME_IN_CHARS 2048 89348607Smm#define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS) 90348607Smm 91339640Smmstruct file_header { 92348607Smm ssize_t bytes_remaining; 93348607Smm ssize_t unpacked_size; 94348607Smm int64_t last_offset; /* Used in sanity checks. */ 95348607Smm int64_t last_size; /* Used in sanity checks. */ 96339640Smm 97348607Smm uint8_t solid : 1; /* Is this a solid stream? */ 98348607Smm uint8_t service : 1; /* Is this file a service data? */ 99348607Smm uint8_t eof : 1; /* Did we finish unpacking the file? */ 100348607Smm uint8_t dir : 1; /* Is this file entry a directory? */ 101339640Smm 102348607Smm /* Optional time fields. */ 103348607Smm uint64_t e_mtime; 104348607Smm uint64_t e_ctime; 105348607Smm uint64_t e_atime; 106348607Smm uint32_t e_unix_ns; 107339640Smm 108348607Smm /* Optional hash fields. */ 109348607Smm uint32_t stored_crc32; 110348607Smm uint32_t calculated_crc32; 111348607Smm uint8_t blake2sp[32]; 112348607Smm blake2sp_state b2state; 113348607Smm char has_blake2; 114348607Smm 115348607Smm /* Optional redir fields */ 116348607Smm uint64_t redir_type; 117348607Smm uint64_t redir_flags; 118358088Smm 119358088Smm ssize_t solid_window_size; /* Used in file format check. */ 120339640Smm}; 121339640Smm 122348607Smmenum EXTRA { 123348607Smm EX_CRYPT = 0x01, 124348607Smm EX_HASH = 0x02, 125348607Smm EX_HTIME = 0x03, 126348607Smm EX_VERSION = 0x04, 127348607Smm EX_REDIR = 0x05, 128348607Smm EX_UOWNER = 0x06, 129348607Smm EX_SUBDATA = 0x07 130348607Smm}; 131348607Smm 132348607Smm#define REDIR_SYMLINK_IS_DIR 1 133348607Smm 134348607Smmenum REDIR_TYPE { 135348607Smm REDIR_TYPE_NONE = 0, 136348607Smm REDIR_TYPE_UNIXSYMLINK = 1, 137348607Smm REDIR_TYPE_WINSYMLINK = 2, 138348607Smm REDIR_TYPE_JUNCTION = 3, 139348607Smm REDIR_TYPE_HARDLINK = 4, 140348607Smm REDIR_TYPE_FILECOPY = 5, 141348607Smm}; 142348607Smm 143348607Smm#define OWNER_USER_NAME 0x01 144348607Smm#define OWNER_GROUP_NAME 0x02 145348607Smm#define OWNER_USER_UID 0x04 146348607Smm#define OWNER_GROUP_GID 0x08 147348607Smm#define OWNER_MAXNAMELEN 256 148348607Smm 149339640Smmenum FILTER_TYPE { 150348607Smm FILTER_DELTA = 0, /* Generic pattern. */ 151348607Smm FILTER_E8 = 1, /* Intel x86 code. */ 152348607Smm FILTER_E8E9 = 2, /* Intel x86 code. */ 153348607Smm FILTER_ARM = 3, /* ARM code. */ 154348607Smm FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */ 155348607Smm FILTER_RGB = 5, /* Color palette, not used in RARv5. */ 156348607Smm FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */ 157348607Smm FILTER_PPM = 7, /* Predictive pattern matching, not used in 158348607Smm RARv5. */ 159348607Smm FILTER_NONE = 8, 160339640Smm}; 161339640Smm 162339640Smmstruct filter_info { 163348607Smm int type; 164348607Smm int channels; 165348607Smm int pos_r; 166339640Smm 167348607Smm int64_t block_start; 168348607Smm ssize_t block_length; 169348607Smm uint16_t width; 170339640Smm}; 171339640Smm 172339640Smmstruct data_ready { 173348607Smm char used; 174348607Smm const uint8_t* buf; 175348607Smm size_t size; 176348607Smm int64_t offset; 177339640Smm}; 178339640Smm 179339640Smmstruct cdeque { 180348607Smm uint16_t beg_pos; 181348607Smm uint16_t end_pos; 182348607Smm uint16_t cap_mask; 183348607Smm uint16_t size; 184348607Smm size_t* arr; 185339640Smm}; 186339640Smm 187339640Smmstruct decode_table { 188348607Smm uint32_t size; 189348607Smm int32_t decode_len[16]; 190348607Smm uint32_t decode_pos[16]; 191348607Smm uint32_t quick_bits; 192348607Smm uint8_t quick_len[1 << 10]; 193348607Smm uint16_t quick_num[1 << 10]; 194348607Smm uint16_t decode_num[306]; 195339640Smm}; 196339640Smm 197339640Smmstruct comp_state { 198348607Smm /* Flag used to specify if unpacker needs to reinitialize the 199348607Smm uncompression context. */ 200348607Smm uint8_t initialized : 1; 201339640Smm 202348607Smm /* Flag used when applying filters. */ 203348607Smm uint8_t all_filters_applied : 1; 204339640Smm 205348607Smm /* Flag used to skip file context reinitialization, used when unpacker 206348607Smm is skipping through different multivolume archives. */ 207348607Smm uint8_t switch_multivolume : 1; 208339640Smm 209348607Smm /* Flag used to specify if unpacker has processed the whole data block 210348607Smm or just a part of it. */ 211348607Smm uint8_t block_parsing_finished : 1; 212339640Smm 213358088Smm signed int notused : 4; 214339640Smm 215348607Smm int flags; /* Uncompression flags. */ 216348607Smm int method; /* Uncompression algorithm method. */ 217348607Smm int version; /* Uncompression algorithm version. */ 218348607Smm ssize_t window_size; /* Size of window_buf. */ 219348607Smm uint8_t* window_buf; /* Circular buffer used during 220348607Smm decompression. */ 221348607Smm uint8_t* filtered_buf; /* Buffer used when applying filters. */ 222348607Smm const uint8_t* block_buf; /* Buffer used when merging blocks. */ 223348607Smm size_t window_mask; /* Convenience field; window_size - 1. */ 224348607Smm int64_t write_ptr; /* This amount of data has been unpacked 225348607Smm in the window buffer. */ 226348607Smm int64_t last_write_ptr; /* This amount of data has been stored in 227348607Smm the output file. */ 228348607Smm int64_t last_unstore_ptr; /* Counter of bytes extracted during 229348607Smm unstoring. This is separate from 230348607Smm last_write_ptr because of how SERVICE 231348607Smm base blocks are handled during skipping 232348607Smm in solid multiarchive archives. */ 233348607Smm int64_t solid_offset; /* Additional offset inside the window 234348607Smm buffer, used in unpacking solid 235348607Smm archives. */ 236348607Smm ssize_t cur_block_size; /* Size of current data block. */ 237348607Smm int last_len; /* Flag used in lzss decompression. */ 238339640Smm 239348607Smm /* Decode tables used during lzss uncompression. */ 240339640Smm 241339640Smm#define HUFF_BC 20 242348607Smm struct decode_table bd; /* huffman bit lengths */ 243339640Smm#define HUFF_NC 306 244348607Smm struct decode_table ld; /* literals */ 245339640Smm#define HUFF_DC 64 246348607Smm struct decode_table dd; /* distances */ 247339640Smm#define HUFF_LDC 16 248348607Smm struct decode_table ldd; /* lower bits of distances */ 249339640Smm#define HUFF_RC 44 250348607Smm struct decode_table rd; /* repeating distances */ 251339640Smm#define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC) 252339640Smm 253348607Smm /* Circular deque for storing filters. */ 254348607Smm struct cdeque filters; 255348607Smm int64_t last_block_start; /* Used for sanity checking. */ 256348607Smm ssize_t last_block_length; /* Used for sanity checking. */ 257339640Smm 258348607Smm /* Distance cache used during lzss uncompression. */ 259348607Smm int dist_cache[4]; 260339640Smm 261348607Smm /* Data buffer stack. */ 262348607Smm struct data_ready dready[2]; 263339640Smm}; 264339640Smm 265339640Smm/* Bit reader state. */ 266339640Smmstruct bit_reader { 267348607Smm int8_t bit_addr; /* Current bit pointer inside current byte. */ 268348607Smm int in_addr; /* Current byte pointer. */ 269339640Smm}; 270339640Smm 271342360Smm/* RARv5 block header structure. Use bf_* functions to get values from 272342360Smm * block_flags_u8 field. I.e. bf_byte_count, etc. */ 273339640Smmstruct compressed_block_header { 274348607Smm /* block_flags_u8 contain fields encoded in little-endian bitfield: 275348607Smm * 276348607Smm * - table present flag (shr 7, and 1), 277348607Smm * - last block flag (shr 6, and 1), 278348607Smm * - byte_count (shr 3, and 7), 279348607Smm * - bit_size (shr 0, and 7). 280348607Smm */ 281348607Smm uint8_t block_flags_u8; 282348607Smm uint8_t block_cksum; 283339640Smm}; 284339640Smm 285339640Smm/* RARv5 main header structure. */ 286339640Smmstruct main_header { 287348607Smm /* Does the archive contain solid streams? */ 288348607Smm uint8_t solid : 1; 289339640Smm 290348607Smm /* If this a multi-file archive? */ 291348607Smm uint8_t volume : 1; 292348607Smm uint8_t endarc : 1; 293348607Smm uint8_t notused : 5; 294339640Smm 295348607Smm unsigned int vol_no; 296339640Smm}; 297339640Smm 298339640Smmstruct generic_header { 299348607Smm uint8_t split_after : 1; 300348607Smm uint8_t split_before : 1; 301348607Smm uint8_t padding : 6; 302348607Smm int size; 303348607Smm int last_header_id; 304339640Smm}; 305339640Smm 306339640Smmstruct multivolume { 307348607Smm unsigned int expected_vol_no; 308348607Smm uint8_t* push_buf; 309339640Smm}; 310339640Smm 311339640Smm/* Main context structure. */ 312339640Smmstruct rar5 { 313348607Smm int header_initialized; 314339640Smm 315348607Smm /* Set to 1 if current file is positioned AFTER the magic value 316348607Smm * of the archive file. This is used in header reading functions. */ 317348607Smm int skipped_magic; 318339640Smm 319348607Smm /* Set to not zero if we're in skip mode (either by calling 320348607Smm * rar5_data_skip function or when skipping over solid streams). 321348607Smm * Set to 0 when in * extraction mode. This is used during checksum 322348607Smm * calculation functions. */ 323348607Smm int skip_mode; 324339640Smm 325348607Smm /* Set to not zero if we're in block merging mode (i.e. when switching 326348607Smm * to another file in multivolume archive, last block from 1st archive 327348607Smm * needs to be merged with 1st block from 2nd archive). This flag 328348607Smm * guards against recursive use of the merging function, which doesn't 329348607Smm * support recursive calls. */ 330348607Smm int merge_mode; 331339640Smm 332348607Smm /* An offset to QuickOpen list. This is not supported by this unpacker, 333348607Smm * because we're focusing on streaming interface. QuickOpen is designed 334348607Smm * to make things quicker for non-stream interfaces, so it's not our 335348607Smm * use case. */ 336348607Smm uint64_t qlist_offset; 337339640Smm 338348607Smm /* An offset to additional Recovery data. This is not supported by this 339348607Smm * unpacker. Recovery data are additional Reed-Solomon codes that could 340348607Smm * be used to calculate bytes that are missing in archive or are 341348607Smm * corrupted. */ 342348607Smm uint64_t rr_offset; 343339640Smm 344348607Smm /* Various context variables grouped to different structures. */ 345348607Smm struct generic_header generic; 346348607Smm struct main_header main; 347348607Smm struct comp_state cstate; 348348607Smm struct file_header file; 349348607Smm struct bit_reader bits; 350348607Smm struct multivolume vol; 351348607Smm 352348607Smm /* The header of currently processed RARv5 block. Used in main 353348607Smm * decompression logic loop. */ 354348607Smm struct compressed_block_header last_block_hdr; 355339640Smm}; 356339640Smm 357339640Smm/* Forward function declarations. */ 358339640Smm 359358088Smmstatic void rar5_signature(char *buf); 360339640Smmstatic int verify_global_checksums(struct archive_read* a); 361339640Smmstatic int rar5_read_data_skip(struct archive_read *a); 362339640Smmstatic int push_data_ready(struct archive_read* a, struct rar5* rar, 363348607Smm const uint8_t* buf, size_t size, int64_t offset); 364339640Smm 365339640Smm/* CDE_xxx = Circular Double Ended (Queue) return values. */ 366339640Smmenum CDE_RETURN_VALUES { 367348607Smm CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS, 368339640Smm}; 369339640Smm 370339640Smm/* Clears the contents of this circular deque. */ 371339640Smmstatic void cdeque_clear(struct cdeque* d) { 372348607Smm d->size = 0; 373348607Smm d->beg_pos = 0; 374348607Smm d->end_pos = 0; 375339640Smm} 376339640Smm 377339640Smm/* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32, 378339640Smm * 64, 256, etc. When the user will add another item above current capacity, 379339640Smm * the circular deque will overwrite the oldest entry. */ 380339640Smmstatic int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) { 381348607Smm if(d == NULL || max_capacity_power_of_2 == 0) 382348607Smm return CDE_PARAM; 383339640Smm 384348607Smm d->cap_mask = max_capacity_power_of_2 - 1; 385348607Smm d->arr = NULL; 386339640Smm 387358088Smm if((max_capacity_power_of_2 & d->cap_mask) != 0) 388348607Smm return CDE_PARAM; 389339640Smm 390348607Smm cdeque_clear(d); 391348607Smm d->arr = malloc(sizeof(void*) * max_capacity_power_of_2); 392339640Smm 393348607Smm return d->arr ? CDE_OK : CDE_ALLOC; 394339640Smm} 395339640Smm 396339640Smm/* Return the current size (not capacity) of circular deque `d`. */ 397339640Smmstatic size_t cdeque_size(struct cdeque* d) { 398348607Smm return d->size; 399339640Smm} 400339640Smm 401339640Smm/* Returns the first element of current circular deque. Note that this function 402339640Smm * doesn't perform any bounds checking. If you need bounds checking, use 403339640Smm * `cdeque_front()` function instead. */ 404339640Smmstatic void cdeque_front_fast(struct cdeque* d, void** value) { 405348607Smm *value = (void*) d->arr[d->beg_pos]; 406339640Smm} 407339640Smm 408339640Smm/* Returns the first element of current circular deque. This function 409339640Smm * performs bounds checking. */ 410339640Smmstatic int cdeque_front(struct cdeque* d, void** value) { 411348607Smm if(d->size > 0) { 412348607Smm cdeque_front_fast(d, value); 413348607Smm return CDE_OK; 414348607Smm } else 415348607Smm return CDE_OUT_OF_BOUNDS; 416339640Smm} 417339640Smm 418339640Smm/* Pushes a new element into the end of this circular deque object. If current 419339640Smm * size will exceed capacity, the oldest element will be overwritten. */ 420339640Smmstatic int cdeque_push_back(struct cdeque* d, void* item) { 421348607Smm if(d == NULL) 422348607Smm return CDE_PARAM; 423339640Smm 424348607Smm if(d->size == d->cap_mask + 1) 425348607Smm return CDE_OUT_OF_BOUNDS; 426339640Smm 427348607Smm d->arr[d->end_pos] = (size_t) item; 428348607Smm d->end_pos = (d->end_pos + 1) & d->cap_mask; 429348607Smm d->size++; 430339640Smm 431348607Smm return CDE_OK; 432339640Smm} 433339640Smm 434339640Smm/* Pops a front element of this circular deque object and returns its value. 435339640Smm * This function doesn't perform any bounds checking. */ 436339640Smmstatic void cdeque_pop_front_fast(struct cdeque* d, void** value) { 437348607Smm *value = (void*) d->arr[d->beg_pos]; 438348607Smm d->beg_pos = (d->beg_pos + 1) & d->cap_mask; 439348607Smm d->size--; 440339640Smm} 441339640Smm 442342360Smm/* Pops a front element of this circular deque object and returns its value. 443339640Smm * This function performs bounds checking. */ 444339640Smmstatic int cdeque_pop_front(struct cdeque* d, void** value) { 445348607Smm if(!d || !value) 446348607Smm return CDE_PARAM; 447339640Smm 448348607Smm if(d->size == 0) 449348607Smm return CDE_OUT_OF_BOUNDS; 450339640Smm 451348607Smm cdeque_pop_front_fast(d, value); 452348607Smm return CDE_OK; 453339640Smm} 454339640Smm 455342360Smm/* Convenience function to cast filter_info** to void **. */ 456339640Smmstatic void** cdeque_filter_p(struct filter_info** f) { 457348607Smm return (void**) (size_t) f; 458339640Smm} 459339640Smm 460342360Smm/* Convenience function to cast filter_info* to void *. */ 461339640Smmstatic void* cdeque_filter(struct filter_info* f) { 462348607Smm return (void**) (size_t) f; 463339640Smm} 464339640Smm 465348607Smm/* Destroys this circular deque object. Deallocates the memory of the 466348607Smm * collection buffer, but doesn't deallocate the memory of any pointer passed 467348607Smm * to this deque as a value. */ 468339640Smmstatic void cdeque_free(struct cdeque* d) { 469348607Smm if(!d) 470348607Smm return; 471339640Smm 472348607Smm if(!d->arr) 473348607Smm return; 474339640Smm 475348607Smm free(d->arr); 476339640Smm 477348607Smm d->arr = NULL; 478348607Smm d->beg_pos = -1; 479348607Smm d->end_pos = -1; 480348607Smm d->cap_mask = 0; 481339640Smm} 482339640Smm 483342360Smmstatic inline 484342360Smmuint8_t bf_bit_size(const struct compressed_block_header* hdr) { 485348607Smm return hdr->block_flags_u8 & 7; 486342360Smm} 487342360Smm 488342360Smmstatic inline 489342360Smmuint8_t bf_byte_count(const struct compressed_block_header* hdr) { 490348607Smm return (hdr->block_flags_u8 >> 3) & 7; 491342360Smm} 492342360Smm 493342360Smmstatic inline 494342360Smmuint8_t bf_is_table_present(const struct compressed_block_header* hdr) { 495348607Smm return (hdr->block_flags_u8 >> 7) & 1; 496342360Smm} 497342360Smm 498339640Smmstatic inline struct rar5* get_context(struct archive_read* a) { 499348607Smm return (struct rar5*) a->format->data; 500339640Smm} 501339640Smm 502342360Smm/* Convenience functions used by filter implementations. */ 503348607Smmstatic void circular_memcpy(uint8_t* dst, uint8_t* window, const uint64_t mask, 504348607Smm int64_t start, int64_t end) 505348607Smm{ 506348607Smm if((start & mask) > (end & mask)) { 507348607Smm ssize_t len1 = mask + 1 - (start & mask); 508348607Smm ssize_t len2 = end & mask; 509339640Smm 510348607Smm memcpy(dst, &window[start & mask], len1); 511348607Smm memcpy(dst + len1, window, len2); 512348607Smm } else { 513348607Smm memcpy(dst, &window[start & mask], (size_t) (end - start)); 514348607Smm } 515348607Smm} 516348607Smm 517339640Smmstatic uint32_t read_filter_data(struct rar5* rar, uint32_t offset) { 518348607Smm uint8_t linear_buf[4]; 519348607Smm circular_memcpy(linear_buf, rar->cstate.window_buf, 520348607Smm rar->cstate.window_mask, offset, offset + 4); 521348607Smm return archive_le32dec(linear_buf); 522339640Smm} 523339640Smm 524339640Smmstatic void write_filter_data(struct rar5* rar, uint32_t offset, 525348607Smm uint32_t value) 526339640Smm{ 527348607Smm archive_le32enc(&rar->cstate.filtered_buf[offset], value); 528339640Smm} 529339640Smm 530339640Smm/* Allocates a new filter descriptor and adds it to the filter array. */ 531339640Smmstatic struct filter_info* add_new_filter(struct rar5* rar) { 532348607Smm struct filter_info* f = 533348607Smm (struct filter_info*) calloc(1, sizeof(struct filter_info)); 534339640Smm 535348607Smm if(!f) { 536348607Smm return NULL; 537348607Smm } 538339640Smm 539348607Smm cdeque_push_back(&rar->cstate.filters, cdeque_filter(f)); 540348607Smm return f; 541339640Smm} 542339640Smm 543339640Smmstatic int run_delta_filter(struct rar5* rar, struct filter_info* flt) { 544348607Smm int i; 545348607Smm ssize_t dest_pos, src_pos = 0; 546339640Smm 547348607Smm for(i = 0; i < flt->channels; i++) { 548348607Smm uint8_t prev_byte = 0; 549348607Smm for(dest_pos = i; 550348607Smm dest_pos < flt->block_length; 551348607Smm dest_pos += flt->channels) 552348607Smm { 553348607Smm uint8_t byte; 554339640Smm 555348607Smm byte = rar->cstate.window_buf[ 556348607Smm (rar->cstate.solid_offset + flt->block_start + 557348607Smm src_pos) & rar->cstate.window_mask]; 558339640Smm 559348607Smm prev_byte -= byte; 560348607Smm rar->cstate.filtered_buf[dest_pos] = prev_byte; 561348607Smm src_pos++; 562348607Smm } 563348607Smm } 564339640Smm 565348607Smm return ARCHIVE_OK; 566339640Smm} 567339640Smm 568339640Smmstatic int run_e8e9_filter(struct rar5* rar, struct filter_info* flt, 569348607Smm int extended) 570339640Smm{ 571348607Smm const uint32_t file_size = 0x1000000; 572348607Smm ssize_t i; 573339640Smm 574348607Smm circular_memcpy(rar->cstate.filtered_buf, 575348607Smm rar->cstate.window_buf, rar->cstate.window_mask, 576348607Smm rar->cstate.solid_offset + flt->block_start, 577348607Smm rar->cstate.solid_offset + flt->block_start + flt->block_length); 578339640Smm 579348607Smm for(i = 0; i < flt->block_length - 4;) { 580348607Smm uint8_t b = rar->cstate.window_buf[ 581348607Smm (rar->cstate.solid_offset + flt->block_start + 582348607Smm i++) & rar->cstate.window_mask]; 583339640Smm 584348607Smm /* 585348607Smm * 0xE8 = x86's call <relative_addr_uint32> (function call) 586348607Smm * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump) 587348607Smm */ 588348607Smm if(b == 0xE8 || (extended && b == 0xE9)) { 589339640Smm 590348607Smm uint32_t addr; 591348607Smm uint32_t offset = (i + flt->block_start) % file_size; 592339640Smm 593348607Smm addr = read_filter_data(rar, 594348607Smm (uint32_t)(rar->cstate.solid_offset + 595348607Smm flt->block_start + i) & rar->cstate.window_mask); 596339640Smm 597348607Smm if(addr & 0x80000000) { 598348607Smm if(((addr + offset) & 0x80000000) == 0) { 599348607Smm write_filter_data(rar, (uint32_t)i, 600348607Smm addr + file_size); 601348607Smm } 602348607Smm } else { 603348607Smm if((addr - file_size) & 0x80000000) { 604348607Smm uint32_t naddr = addr - offset; 605348607Smm write_filter_data(rar, (uint32_t)i, 606348607Smm naddr); 607348607Smm } 608348607Smm } 609339640Smm 610348607Smm i += 4; 611348607Smm } 612348607Smm } 613339640Smm 614348607Smm return ARCHIVE_OK; 615339640Smm} 616339640Smm 617339640Smmstatic int run_arm_filter(struct rar5* rar, struct filter_info* flt) { 618348607Smm ssize_t i = 0; 619348607Smm uint32_t offset; 620339640Smm 621348607Smm circular_memcpy(rar->cstate.filtered_buf, 622348607Smm rar->cstate.window_buf, rar->cstate.window_mask, 623348607Smm rar->cstate.solid_offset + flt->block_start, 624348607Smm rar->cstate.solid_offset + flt->block_start + flt->block_length); 625339640Smm 626348607Smm for(i = 0; i < flt->block_length - 3; i += 4) { 627348607Smm uint8_t* b = &rar->cstate.window_buf[ 628348607Smm (rar->cstate.solid_offset + 629349900Smm flt->block_start + i + 3) & rar->cstate.window_mask]; 630339640Smm 631349900Smm if(*b == 0xEB) { 632348607Smm /* 0xEB = ARM's BL (branch + link) instruction. */ 633348607Smm offset = read_filter_data(rar, 634348607Smm (rar->cstate.solid_offset + flt->block_start + i) & 635348607Smm rar->cstate.window_mask) & 0x00ffffff; 636339640Smm 637348607Smm offset -= (uint32_t) ((i + flt->block_start) / 4); 638348607Smm offset = (offset & 0x00ffffff) | 0xeb000000; 639348607Smm write_filter_data(rar, (uint32_t)i, offset); 640348607Smm } 641348607Smm } 642339640Smm 643348607Smm return ARCHIVE_OK; 644339640Smm} 645339640Smm 646339640Smmstatic int run_filter(struct archive_read* a, struct filter_info* flt) { 647348607Smm int ret; 648348607Smm struct rar5* rar = get_context(a); 649339640Smm 650348607Smm free(rar->cstate.filtered_buf); 651339640Smm 652348607Smm rar->cstate.filtered_buf = malloc(flt->block_length); 653348607Smm if(!rar->cstate.filtered_buf) { 654348607Smm archive_set_error(&a->archive, ENOMEM, 655348607Smm "Can't allocate memory for filter data."); 656348607Smm return ARCHIVE_FATAL; 657348607Smm } 658339640Smm 659348607Smm switch(flt->type) { 660348607Smm case FILTER_DELTA: 661348607Smm ret = run_delta_filter(rar, flt); 662348607Smm break; 663339640Smm 664348607Smm case FILTER_E8: 665348607Smm /* fallthrough */ 666348607Smm case FILTER_E8E9: 667348607Smm ret = run_e8e9_filter(rar, flt, 668348607Smm flt->type == FILTER_E8E9); 669348607Smm break; 670339640Smm 671348607Smm case FILTER_ARM: 672348607Smm ret = run_arm_filter(rar, flt); 673348607Smm break; 674339640Smm 675348607Smm default: 676348607Smm archive_set_error(&a->archive, 677348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 678348607Smm "Unsupported filter type: 0x%x", flt->type); 679348607Smm return ARCHIVE_FATAL; 680348607Smm } 681339640Smm 682348607Smm if(ret != ARCHIVE_OK) { 683348607Smm /* Filter has failed. */ 684348607Smm return ret; 685348607Smm } 686339640Smm 687348607Smm if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf, 688348607Smm flt->block_length, rar->cstate.last_write_ptr)) 689348607Smm { 690348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 691348607Smm "Stack overflow when submitting unpacked data"); 692339640Smm 693348607Smm return ARCHIVE_FATAL; 694348607Smm } 695339640Smm 696348607Smm rar->cstate.last_write_ptr += flt->block_length; 697348607Smm return ARCHIVE_OK; 698339640Smm} 699339640Smm 700339640Smm/* The `push_data` function submits the selected data range to the user. 701339640Smm * Next call of `use_data` will use the pointer, size and offset arguments 702339640Smm * that are specified here. These arguments are pushed to the FIFO stack here, 703339640Smm * and popped from the stack by the `use_data` function. */ 704339640Smmstatic void push_data(struct archive_read* a, struct rar5* rar, 705348607Smm const uint8_t* buf, int64_t idx_begin, int64_t idx_end) 706339640Smm{ 707348607Smm const uint64_t wmask = rar->cstate.window_mask; 708348607Smm const ssize_t solid_write_ptr = (rar->cstate.solid_offset + 709348607Smm rar->cstate.last_write_ptr) & wmask; 710339640Smm 711348607Smm idx_begin += rar->cstate.solid_offset; 712348607Smm idx_end += rar->cstate.solid_offset; 713339640Smm 714348607Smm /* Check if our unpacked data is wrapped inside the window circular 715348607Smm * buffer. If it's not wrapped, it can be copied out by using 716348607Smm * a single memcpy, but when it's wrapped, we need to copy the first 717348607Smm * part with one memcpy, and the second part with another memcpy. */ 718339640Smm 719348607Smm if((idx_begin & wmask) > (idx_end & wmask)) { 720348607Smm /* The data is wrapped (begin offset sis bigger than end 721348607Smm * offset). */ 722348607Smm const ssize_t frag1_size = rar->cstate.window_size - 723348607Smm (idx_begin & wmask); 724348607Smm const ssize_t frag2_size = idx_end & wmask; 725339640Smm 726348607Smm /* Copy the first part of the buffer first. */ 727348607Smm push_data_ready(a, rar, buf + solid_write_ptr, frag1_size, 728348607Smm rar->cstate.last_write_ptr); 729339640Smm 730348607Smm /* Copy the second part of the buffer. */ 731348607Smm push_data_ready(a, rar, buf, frag2_size, 732348607Smm rar->cstate.last_write_ptr + frag1_size); 733339640Smm 734348607Smm rar->cstate.last_write_ptr += frag1_size + frag2_size; 735348607Smm } else { 736348607Smm /* Data is not wrapped, so we can just use one call to copy the 737348607Smm * data. */ 738348607Smm push_data_ready(a, rar, 739348607Smm buf + solid_write_ptr, (idx_end - idx_begin) & wmask, 740348607Smm rar->cstate.last_write_ptr); 741339640Smm 742348607Smm rar->cstate.last_write_ptr += idx_end - idx_begin; 743348607Smm } 744339640Smm} 745339640Smm 746342360Smm/* Convenience function that submits the data to the user. It uses the 747339640Smm * unpack window buffer as a source location. */ 748339640Smmstatic void push_window_data(struct archive_read* a, struct rar5* rar, 749348607Smm int64_t idx_begin, int64_t idx_end) 750339640Smm{ 751348607Smm push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end); 752339640Smm} 753339640Smm 754339640Smmstatic int apply_filters(struct archive_read* a) { 755348607Smm struct filter_info* flt; 756348607Smm struct rar5* rar = get_context(a); 757348607Smm int ret; 758339640Smm 759348607Smm rar->cstate.all_filters_applied = 0; 760339640Smm 761348607Smm /* Get the first filter that can be applied to our data. The data 762348607Smm * needs to be fully unpacked before the filter can be run. */ 763348607Smm if(CDE_OK == cdeque_front(&rar->cstate.filters, 764348607Smm cdeque_filter_p(&flt))) { 765348607Smm /* Check if our unpacked data fully covers this filter's 766348607Smm * range. */ 767348607Smm if(rar->cstate.write_ptr > flt->block_start && 768348607Smm rar->cstate.write_ptr >= flt->block_start + 769348607Smm flt->block_length) { 770348607Smm /* Check if we have some data pending to be written 771348607Smm * right before the filter's start offset. */ 772348607Smm if(rar->cstate.last_write_ptr == flt->block_start) { 773348607Smm /* Run the filter specified by descriptor 774348607Smm * `flt`. */ 775348607Smm ret = run_filter(a, flt); 776348607Smm if(ret != ARCHIVE_OK) { 777348607Smm /* Filter failure, return error. */ 778348607Smm return ret; 779348607Smm } 780339640Smm 781348607Smm /* Filter descriptor won't be needed anymore 782348607Smm * after it's used, * so remove it from the 783348607Smm * filter list and free its memory. */ 784348607Smm (void) cdeque_pop_front(&rar->cstate.filters, 785348607Smm cdeque_filter_p(&flt)); 786339640Smm 787348607Smm free(flt); 788348607Smm } else { 789348607Smm /* We can't run filters yet, dump the memory 790348607Smm * right before the filter. */ 791348607Smm push_window_data(a, rar, 792348607Smm rar->cstate.last_write_ptr, 793348607Smm flt->block_start); 794348607Smm } 795339640Smm 796348607Smm /* Return 'filter applied or not needed' state to the 797348607Smm * caller. */ 798348607Smm return ARCHIVE_RETRY; 799348607Smm } 800348607Smm } 801339640Smm 802348607Smm rar->cstate.all_filters_applied = 1; 803348607Smm return ARCHIVE_OK; 804339640Smm} 805339640Smm 806339640Smmstatic void dist_cache_push(struct rar5* rar, int value) { 807348607Smm int* q = rar->cstate.dist_cache; 808339640Smm 809348607Smm q[3] = q[2]; 810348607Smm q[2] = q[1]; 811348607Smm q[1] = q[0]; 812348607Smm q[0] = value; 813339640Smm} 814339640Smm 815342360Smmstatic int dist_cache_touch(struct rar5* rar, int idx) { 816348607Smm int* q = rar->cstate.dist_cache; 817348607Smm int i, dist = q[idx]; 818339640Smm 819348607Smm for(i = idx; i > 0; i--) 820348607Smm q[i] = q[i - 1]; 821339640Smm 822348607Smm q[0] = dist; 823348607Smm return dist; 824339640Smm} 825339640Smm 826339640Smmstatic void free_filters(struct rar5* rar) { 827348607Smm struct cdeque* d = &rar->cstate.filters; 828339640Smm 829348607Smm /* Free any remaining filters. All filters should be naturally 830348607Smm * consumed by the unpacking function, so remaining filters after 831348607Smm * unpacking normally mean that unpacking wasn't successful. 832348607Smm * But still of course we shouldn't leak memory in such case. */ 833339640Smm 834348607Smm /* cdeque_size() is a fast operation, so we can use it as a loop 835348607Smm * expression. */ 836348607Smm while(cdeque_size(d) > 0) { 837348607Smm struct filter_info* f = NULL; 838339640Smm 839348607Smm /* Pop_front will also decrease the collection's size. */ 840348607Smm if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f))) 841348607Smm free(f); 842348607Smm } 843339640Smm 844348607Smm cdeque_clear(d); 845339640Smm 846348607Smm /* Also clear out the variables needed for sanity checking. */ 847348607Smm rar->cstate.last_block_start = 0; 848348607Smm rar->cstate.last_block_length = 0; 849339640Smm} 850339640Smm 851339640Smmstatic void reset_file_context(struct rar5* rar) { 852348607Smm memset(&rar->file, 0, sizeof(rar->file)); 853348607Smm blake2sp_init(&rar->file.b2state, 32); 854339640Smm 855348607Smm if(rar->main.solid) { 856348607Smm rar->cstate.solid_offset += rar->cstate.write_ptr; 857348607Smm } else { 858348607Smm rar->cstate.solid_offset = 0; 859348607Smm } 860339640Smm 861348607Smm rar->cstate.write_ptr = 0; 862348607Smm rar->cstate.last_write_ptr = 0; 863348607Smm rar->cstate.last_unstore_ptr = 0; 864339640Smm 865348607Smm rar->file.redir_type = REDIR_TYPE_NONE; 866348607Smm rar->file.redir_flags = 0; 867348607Smm 868348607Smm free_filters(rar); 869339640Smm} 870339640Smm 871339640Smmstatic inline int get_archive_read(struct archive* a, 872348607Smm struct archive_read** ar) 873339640Smm{ 874348607Smm *ar = (struct archive_read*) a; 875348607Smm archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 876348607Smm "archive_read_support_format_rar5"); 877339640Smm 878348607Smm return ARCHIVE_OK; 879339640Smm} 880339640Smm 881339640Smmstatic int read_ahead(struct archive_read* a, size_t how_many, 882348607Smm const uint8_t** ptr) 883339640Smm{ 884358088Smm ssize_t avail = -1; 885348607Smm if(!ptr) 886348607Smm return 0; 887339640Smm 888348607Smm *ptr = __archive_read_ahead(a, how_many, &avail); 889348607Smm if(*ptr == NULL) { 890348607Smm return 0; 891348607Smm } 892339640Smm 893348607Smm return 1; 894339640Smm} 895339640Smm 896339640Smmstatic int consume(struct archive_read* a, int64_t how_many) { 897348607Smm int ret; 898339640Smm 899348607Smm ret = how_many == __archive_read_consume(a, how_many) 900348607Smm ? ARCHIVE_OK 901348607Smm : ARCHIVE_FATAL; 902339640Smm 903348607Smm return ret; 904339640Smm} 905339640Smm 906339640Smm/** 907339640Smm * Read a RAR5 variable sized numeric value. This value will be stored in 908339640Smm * `pvalue`. The `pvalue_len` argument points to a variable that will receive 909339640Smm * the byte count that was consumed in order to decode the `pvalue` value, plus 910339640Smm * one. 911339640Smm * 912339640Smm * pvalue_len is optional and can be NULL. 913339640Smm * 914339640Smm * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume 915339640Smm * the number of bytes that `pvalue_len` value contains. If the `pvalue_len` 916339640Smm * is NULL, this consuming operation is done automatically. 917339640Smm * 918339640Smm * Returns 1 if *pvalue was successfully read. 919339640Smm * Returns 0 if there was an error. In this case, *pvalue contains an 920339640Smm * invalid value. 921339640Smm */ 922339640Smm 923339640Smmstatic int read_var(struct archive_read* a, uint64_t* pvalue, 924348607Smm uint64_t* pvalue_len) 925339640Smm{ 926348607Smm uint64_t result = 0; 927348607Smm size_t shift, i; 928348607Smm const uint8_t* p; 929348607Smm uint8_t b; 930339640Smm 931348607Smm /* We will read maximum of 8 bytes. We don't have to handle the 932348607Smm * situation to read the RAR5 variable-sized value stored at the end of 933348607Smm * the file, because such situation will never happen. */ 934348607Smm if(!read_ahead(a, 8, &p)) 935348607Smm return 0; 936339640Smm 937348607Smm for(shift = 0, i = 0; i < 8; i++, shift += 7) { 938348607Smm b = p[i]; 939339640Smm 940348607Smm /* Strip the MSB from the input byte and add the resulting 941348607Smm * number to the `result`. */ 942348607Smm result += (b & (uint64_t)0x7F) << shift; 943339640Smm 944348607Smm /* MSB set to 1 means we need to continue decoding process. 945348607Smm * MSB set to 0 means we're done. 946348607Smm * 947348607Smm * This conditional checks for the second case. */ 948348607Smm if((b & 0x80) == 0) { 949348607Smm if(pvalue) { 950348607Smm *pvalue = result; 951348607Smm } 952339640Smm 953348607Smm /* If the caller has passed the `pvalue_len` pointer, 954348607Smm * store the number of consumed bytes in it and do NOT 955348607Smm * consume those bytes, since the caller has all the 956348607Smm * information it needs to perform */ 957348607Smm if(pvalue_len) { 958348607Smm *pvalue_len = 1 + i; 959348607Smm } else { 960348607Smm /* If the caller did not provide the 961348607Smm * `pvalue_len` pointer, it will not have the 962348607Smm * possibility to advance the file pointer, 963348607Smm * because it will not know how many bytes it 964348607Smm * needs to consume. This is why we handle 965348607Smm * such situation here automatically. */ 966348607Smm if(ARCHIVE_OK != consume(a, 1 + i)) { 967348607Smm return 0; 968348607Smm } 969348607Smm } 970339640Smm 971348607Smm /* End of decoding process, return success. */ 972348607Smm return 1; 973348607Smm } 974348607Smm } 975339640Smm 976348607Smm /* The decoded value takes the maximum number of 8 bytes. 977348607Smm * It's a maximum number of bytes, so end decoding process here 978348607Smm * even if the first bit of last byte is 1. */ 979348607Smm if(pvalue) { 980348607Smm *pvalue = result; 981348607Smm } 982339640Smm 983348607Smm if(pvalue_len) { 984348607Smm *pvalue_len = 9; 985348607Smm } else { 986348607Smm if(ARCHIVE_OK != consume(a, 9)) { 987348607Smm return 0; 988348607Smm } 989348607Smm } 990339640Smm 991348607Smm return 1; 992339640Smm} 993339640Smm 994339640Smmstatic int read_var_sized(struct archive_read* a, size_t* pvalue, 995348607Smm size_t* pvalue_len) 996339640Smm{ 997348607Smm uint64_t v; 998348607Smm uint64_t v_size = 0; 999339640Smm 1000348607Smm const int ret = pvalue_len ? read_var(a, &v, &v_size) 1001348607Smm : read_var(a, &v, NULL); 1002339640Smm 1003348607Smm if(ret == 1 && pvalue) { 1004348607Smm *pvalue = (size_t) v; 1005348607Smm } 1006339640Smm 1007348607Smm if(pvalue_len) { 1008348607Smm /* Possible data truncation should be safe. */ 1009348607Smm *pvalue_len = (size_t) v_size; 1010348607Smm } 1011339640Smm 1012348607Smm return ret; 1013339640Smm} 1014339640Smm 1015339640Smmstatic int read_bits_32(struct rar5* rar, const uint8_t* p, uint32_t* value) { 1016348607Smm uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24; 1017348607Smm bits |= p[rar->bits.in_addr + 1] << 16; 1018348607Smm bits |= p[rar->bits.in_addr + 2] << 8; 1019348607Smm bits |= p[rar->bits.in_addr + 3]; 1020348607Smm bits <<= rar->bits.bit_addr; 1021348607Smm bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr); 1022348607Smm *value = bits; 1023348607Smm return ARCHIVE_OK; 1024339640Smm} 1025339640Smm 1026339640Smmstatic int read_bits_16(struct rar5* rar, const uint8_t* p, uint16_t* value) { 1027348607Smm int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16; 1028348607Smm bits |= (int) p[rar->bits.in_addr + 1] << 8; 1029348607Smm bits |= (int) p[rar->bits.in_addr + 2]; 1030348607Smm bits >>= (8 - rar->bits.bit_addr); 1031348607Smm *value = bits & 0xffff; 1032348607Smm return ARCHIVE_OK; 1033339640Smm} 1034339640Smm 1035339640Smmstatic void skip_bits(struct rar5* rar, int bits) { 1036348607Smm const int new_bits = rar->bits.bit_addr + bits; 1037348607Smm rar->bits.in_addr += new_bits >> 3; 1038348607Smm rar->bits.bit_addr = new_bits & 7; 1039339640Smm} 1040339640Smm 1041339640Smm/* n = up to 16 */ 1042339640Smmstatic int read_consume_bits(struct rar5* rar, const uint8_t* p, int n, 1043348607Smm int* value) 1044339640Smm{ 1045348607Smm uint16_t v; 1046348607Smm int ret, num; 1047339640Smm 1048348607Smm if(n == 0 || n > 16) { 1049348607Smm /* This is a programmer error and should never happen 1050348607Smm * in runtime. */ 1051348607Smm return ARCHIVE_FATAL; 1052348607Smm } 1053339640Smm 1054348607Smm ret = read_bits_16(rar, p, &v); 1055348607Smm if(ret != ARCHIVE_OK) 1056348607Smm return ret; 1057339640Smm 1058348607Smm num = (int) v; 1059348607Smm num >>= 16 - n; 1060339640Smm 1061348607Smm skip_bits(rar, n); 1062339640Smm 1063348607Smm if(value) 1064348607Smm *value = num; 1065339640Smm 1066348607Smm return ARCHIVE_OK; 1067339640Smm} 1068339640Smm 1069339640Smmstatic int read_u32(struct archive_read* a, uint32_t* pvalue) { 1070348607Smm const uint8_t* p; 1071348607Smm if(!read_ahead(a, 4, &p)) 1072348607Smm return 0; 1073339640Smm 1074348607Smm *pvalue = archive_le32dec(p); 1075348607Smm return ARCHIVE_OK == consume(a, 4) ? 1 : 0; 1076339640Smm} 1077339640Smm 1078339640Smmstatic int read_u64(struct archive_read* a, uint64_t* pvalue) { 1079348607Smm const uint8_t* p; 1080348607Smm if(!read_ahead(a, 8, &p)) 1081348607Smm return 0; 1082339640Smm 1083348607Smm *pvalue = archive_le64dec(p); 1084348607Smm return ARCHIVE_OK == consume(a, 8) ? 1 : 0; 1085339640Smm} 1086339640Smm 1087339640Smmstatic int bid_standard(struct archive_read* a) { 1088348607Smm const uint8_t* p; 1089358088Smm char signature[sizeof(rar5_signature_xor)]; 1090339640Smm 1091358088Smm rar5_signature(signature); 1092358088Smm 1093358088Smm if(!read_ahead(a, sizeof(rar5_signature_xor), &p)) 1094348607Smm return -1; 1095339640Smm 1096358088Smm if(!memcmp(signature, p, sizeof(rar5_signature_xor))) 1097348607Smm return 30; 1098339640Smm 1099348607Smm return -1; 1100339640Smm} 1101339640Smm 1102339640Smmstatic int rar5_bid(struct archive_read* a, int best_bid) { 1103348607Smm int my_bid; 1104339640Smm 1105348607Smm if(best_bid > 30) 1106348607Smm return -1; 1107339640Smm 1108348607Smm my_bid = bid_standard(a); 1109348607Smm if(my_bid > -1) { 1110348607Smm return my_bid; 1111348607Smm } 1112339640Smm 1113348607Smm return -1; 1114339640Smm} 1115339640Smm 1116348607Smmstatic int rar5_options(struct archive_read *a, const char *key, 1117348607Smm const char *val) { 1118348607Smm (void) a; 1119348607Smm (void) key; 1120348607Smm (void) val; 1121339640Smm 1122348607Smm /* No options supported in this version. Return the ARCHIVE_WARN code 1123348607Smm * to signal the options supervisor that the unpacker didn't handle 1124348607Smm * setting this option. */ 1125339640Smm 1126348607Smm return ARCHIVE_WARN; 1127339640Smm} 1128339640Smm 1129339640Smmstatic void init_header(struct archive_read* a) { 1130348607Smm a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5; 1131348607Smm a->archive.archive_format_name = "RAR5"; 1132339640Smm} 1133339640Smm 1134349524Smmstatic void init_window_mask(struct rar5* rar) { 1135349524Smm if (rar->cstate.window_size) 1136349524Smm rar->cstate.window_mask = rar->cstate.window_size - 1; 1137349524Smm else 1138349524Smm rar->cstate.window_mask = 0; 1139349524Smm} 1140349524Smm 1141339640Smmenum HEADER_FLAGS { 1142348607Smm HFL_EXTRA_DATA = 0x0001, 1143348607Smm HFL_DATA = 0x0002, 1144348607Smm HFL_SKIP_IF_UNKNOWN = 0x0004, 1145348607Smm HFL_SPLIT_BEFORE = 0x0008, 1146348607Smm HFL_SPLIT_AFTER = 0x0010, 1147348607Smm HFL_CHILD = 0x0020, 1148348607Smm HFL_INHERITED = 0x0040 1149339640Smm}; 1150339640Smm 1151339640Smmstatic int process_main_locator_extra_block(struct archive_read* a, 1152348607Smm struct rar5* rar) 1153339640Smm{ 1154348607Smm uint64_t locator_flags; 1155339640Smm 1156358088Smm enum LOCATOR_FLAGS { 1157358088Smm QLIST = 0x01, RECOVERY = 0x02, 1158358088Smm }; 1159358088Smm 1160348607Smm if(!read_var(a, &locator_flags, NULL)) { 1161348607Smm return ARCHIVE_EOF; 1162348607Smm } 1163339640Smm 1164348607Smm if(locator_flags & QLIST) { 1165348607Smm if(!read_var(a, &rar->qlist_offset, NULL)) { 1166348607Smm return ARCHIVE_EOF; 1167348607Smm } 1168339640Smm 1169348607Smm /* qlist is not used */ 1170348607Smm } 1171339640Smm 1172348607Smm if(locator_flags & RECOVERY) { 1173348607Smm if(!read_var(a, &rar->rr_offset, NULL)) { 1174348607Smm return ARCHIVE_EOF; 1175348607Smm } 1176339640Smm 1177348607Smm /* rr is not used */ 1178348607Smm } 1179339640Smm 1180348607Smm return ARCHIVE_OK; 1181339640Smm} 1182339640Smm 1183339640Smmstatic int parse_file_extra_hash(struct archive_read* a, struct rar5* rar, 1184348607Smm ssize_t* extra_data_size) 1185339640Smm{ 1186358088Smm size_t hash_type = 0; 1187348607Smm size_t value_len; 1188339640Smm 1189358088Smm enum HASH_TYPE { 1190358088Smm BLAKE2sp = 0x00 1191358088Smm }; 1192358088Smm 1193348607Smm if(!read_var_sized(a, &hash_type, &value_len)) 1194348607Smm return ARCHIVE_EOF; 1195339640Smm 1196348607Smm *extra_data_size -= value_len; 1197348607Smm if(ARCHIVE_OK != consume(a, value_len)) { 1198348607Smm return ARCHIVE_EOF; 1199348607Smm } 1200339640Smm 1201348607Smm /* The file uses BLAKE2sp checksum algorithm instead of plain old 1202348607Smm * CRC32. */ 1203348607Smm if(hash_type == BLAKE2sp) { 1204348607Smm const uint8_t* p; 1205348607Smm const int hash_size = sizeof(rar->file.blake2sp); 1206339640Smm 1207348607Smm if(!read_ahead(a, hash_size, &p)) 1208348607Smm return ARCHIVE_EOF; 1209339640Smm 1210348607Smm rar->file.has_blake2 = 1; 1211348607Smm memcpy(&rar->file.blake2sp, p, hash_size); 1212339640Smm 1213348607Smm if(ARCHIVE_OK != consume(a, hash_size)) { 1214348607Smm return ARCHIVE_EOF; 1215348607Smm } 1216339640Smm 1217348607Smm *extra_data_size -= hash_size; 1218348607Smm } else { 1219348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1220348607Smm "Unsupported hash type (0x%x)", (int) hash_type); 1221348607Smm return ARCHIVE_FATAL; 1222348607Smm } 1223339640Smm 1224348607Smm return ARCHIVE_OK; 1225339640Smm} 1226339640Smm 1227339640Smmstatic uint64_t time_win_to_unix(uint64_t win_time) { 1228348607Smm const size_t ns_in_sec = 10000000; 1229348607Smm const uint64_t sec_to_unix = 11644473600LL; 1230348607Smm return win_time / ns_in_sec - sec_to_unix; 1231339640Smm} 1232339640Smm 1233339640Smmstatic int parse_htime_item(struct archive_read* a, char unix_time, 1234348607Smm uint64_t* where, ssize_t* extra_data_size) 1235339640Smm{ 1236348607Smm if(unix_time) { 1237348607Smm uint32_t time_val; 1238348607Smm if(!read_u32(a, &time_val)) 1239348607Smm return ARCHIVE_EOF; 1240339640Smm 1241348607Smm *extra_data_size -= 4; 1242348607Smm *where = (uint64_t) time_val; 1243348607Smm } else { 1244348607Smm uint64_t windows_time; 1245348607Smm if(!read_u64(a, &windows_time)) 1246348607Smm return ARCHIVE_EOF; 1247339640Smm 1248348607Smm *where = time_win_to_unix(windows_time); 1249348607Smm *extra_data_size -= 8; 1250348607Smm } 1251339640Smm 1252348607Smm return ARCHIVE_OK; 1253339640Smm} 1254339640Smm 1255348607Smmstatic int parse_file_extra_version(struct archive_read* a, 1256348607Smm struct archive_entry* e, ssize_t* extra_data_size) 1257348607Smm{ 1258348607Smm size_t flags = 0; 1259348607Smm size_t version = 0; 1260348607Smm size_t value_len = 0; 1261348607Smm struct archive_string version_string; 1262348607Smm struct archive_string name_utf8_string; 1263358088Smm const char* cur_filename; 1264348607Smm 1265348607Smm /* Flags are ignored. */ 1266348607Smm if(!read_var_sized(a, &flags, &value_len)) 1267348607Smm return ARCHIVE_EOF; 1268348607Smm 1269348607Smm *extra_data_size -= value_len; 1270348607Smm if(ARCHIVE_OK != consume(a, value_len)) 1271348607Smm return ARCHIVE_EOF; 1272348607Smm 1273348607Smm if(!read_var_sized(a, &version, &value_len)) 1274348607Smm return ARCHIVE_EOF; 1275348607Smm 1276348607Smm *extra_data_size -= value_len; 1277348607Smm if(ARCHIVE_OK != consume(a, value_len)) 1278348607Smm return ARCHIVE_EOF; 1279348607Smm 1280348607Smm /* extra_data_size should be zero here. */ 1281348607Smm 1282358088Smm cur_filename = archive_entry_pathname_utf8(e); 1283348607Smm if(cur_filename == NULL) { 1284348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1285348607Smm "Version entry without file name"); 1286348607Smm return ARCHIVE_FATAL; 1287348607Smm } 1288348607Smm 1289348607Smm archive_string_init(&version_string); 1290348607Smm archive_string_init(&name_utf8_string); 1291348607Smm 1292348607Smm /* Prepare a ;123 suffix for the filename, where '123' is the version 1293348607Smm * value of this file. */ 1294348607Smm archive_string_sprintf(&version_string, ";%zu", version); 1295348607Smm 1296348607Smm /* Build the new filename. */ 1297348607Smm archive_strcat(&name_utf8_string, cur_filename); 1298348607Smm archive_strcat(&name_utf8_string, version_string.s); 1299348607Smm 1300348607Smm /* Apply the new filename into this file's context. */ 1301348607Smm archive_entry_update_pathname_utf8(e, name_utf8_string.s); 1302348607Smm 1303348607Smm /* Free buffers. */ 1304348607Smm archive_string_free(&version_string); 1305348607Smm archive_string_free(&name_utf8_string); 1306348607Smm return ARCHIVE_OK; 1307348607Smm} 1308348607Smm 1309339640Smmstatic int parse_file_extra_htime(struct archive_read* a, 1310348607Smm struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) 1311339640Smm{ 1312348607Smm char unix_time = 0; 1313358088Smm size_t flags = 0; 1314348607Smm size_t value_len; 1315339640Smm 1316348607Smm enum HTIME_FLAGS { 1317348607Smm IS_UNIX = 0x01, 1318348607Smm HAS_MTIME = 0x02, 1319348607Smm HAS_CTIME = 0x04, 1320348607Smm HAS_ATIME = 0x08, 1321348607Smm HAS_UNIX_NS = 0x10, 1322348607Smm }; 1323339640Smm 1324348607Smm if(!read_var_sized(a, &flags, &value_len)) 1325348607Smm return ARCHIVE_EOF; 1326339640Smm 1327348607Smm *extra_data_size -= value_len; 1328348607Smm if(ARCHIVE_OK != consume(a, value_len)) { 1329348607Smm return ARCHIVE_EOF; 1330348607Smm } 1331339640Smm 1332348607Smm unix_time = flags & IS_UNIX; 1333339640Smm 1334348607Smm if(flags & HAS_MTIME) { 1335348607Smm parse_htime_item(a, unix_time, &rar->file.e_mtime, 1336348607Smm extra_data_size); 1337348607Smm archive_entry_set_mtime(e, rar->file.e_mtime, 0); 1338348607Smm } 1339339640Smm 1340348607Smm if(flags & HAS_CTIME) { 1341348607Smm parse_htime_item(a, unix_time, &rar->file.e_ctime, 1342348607Smm extra_data_size); 1343348607Smm archive_entry_set_ctime(e, rar->file.e_ctime, 0); 1344348607Smm } 1345339640Smm 1346348607Smm if(flags & HAS_ATIME) { 1347348607Smm parse_htime_item(a, unix_time, &rar->file.e_atime, 1348348607Smm extra_data_size); 1349348607Smm archive_entry_set_atime(e, rar->file.e_atime, 0); 1350348607Smm } 1351339640Smm 1352348607Smm if(flags & HAS_UNIX_NS) { 1353348607Smm if(!read_u32(a, &rar->file.e_unix_ns)) 1354348607Smm return ARCHIVE_EOF; 1355339640Smm 1356348607Smm *extra_data_size -= 4; 1357348607Smm } 1358339640Smm 1359348607Smm return ARCHIVE_OK; 1360339640Smm} 1361339640Smm 1362348607Smmstatic int parse_file_extra_redir(struct archive_read* a, 1363348607Smm struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size) 1364339640Smm{ 1365348607Smm uint64_t value_size = 0; 1366348607Smm size_t target_size = 0; 1367348607Smm char target_utf8_buf[MAX_NAME_IN_BYTES]; 1368348607Smm const uint8_t* p; 1369339640Smm 1370348607Smm if(!read_var(a, &rar->file.redir_type, &value_size)) 1371348607Smm return ARCHIVE_EOF; 1372348607Smm if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1373348607Smm return ARCHIVE_EOF; 1374348607Smm *extra_data_size -= value_size; 1375339640Smm 1376348607Smm if(!read_var(a, &rar->file.redir_flags, &value_size)) 1377348607Smm return ARCHIVE_EOF; 1378348607Smm if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1379348607Smm return ARCHIVE_EOF; 1380348607Smm *extra_data_size -= value_size; 1381339640Smm 1382348607Smm if(!read_var_sized(a, &target_size, NULL)) 1383348607Smm return ARCHIVE_EOF; 1384348607Smm *extra_data_size -= target_size + 1; 1385339640Smm 1386348607Smm if(!read_ahead(a, target_size, &p)) 1387348607Smm return ARCHIVE_EOF; 1388339640Smm 1389348607Smm if(target_size > (MAX_NAME_IN_CHARS - 1)) { 1390348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1391348607Smm "Link target is too long"); 1392348607Smm return ARCHIVE_FATAL; 1393348607Smm } 1394339640Smm 1395348607Smm if(target_size == 0) { 1396348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1397348607Smm "No link target specified"); 1398348607Smm return ARCHIVE_FATAL; 1399348607Smm } 1400339640Smm 1401348607Smm memcpy(target_utf8_buf, p, target_size); 1402348607Smm target_utf8_buf[target_size] = 0; 1403339640Smm 1404348607Smm if(ARCHIVE_OK != consume(a, (int64_t)target_size)) 1405348607Smm return ARCHIVE_EOF; 1406348607Smm 1407348607Smm switch(rar->file.redir_type) { 1408348607Smm case REDIR_TYPE_UNIXSYMLINK: 1409348607Smm case REDIR_TYPE_WINSYMLINK: 1410348607Smm archive_entry_set_filetype(e, AE_IFLNK); 1411348607Smm archive_entry_update_symlink_utf8(e, target_utf8_buf); 1412348607Smm if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) { 1413348607Smm archive_entry_set_symlink_type(e, 1414348607Smm AE_SYMLINK_TYPE_DIRECTORY); 1415348607Smm } else { 1416348607Smm archive_entry_set_symlink_type(e, 1417348607Smm AE_SYMLINK_TYPE_FILE); 1418348607Smm } 1419348607Smm break; 1420348607Smm 1421348607Smm case REDIR_TYPE_HARDLINK: 1422348607Smm archive_entry_set_filetype(e, AE_IFREG); 1423348607Smm archive_entry_update_hardlink_utf8(e, target_utf8_buf); 1424348607Smm break; 1425348607Smm 1426348607Smm default: 1427348607Smm /* Unknown redir type, skip it. */ 1428348607Smm break; 1429348607Smm } 1430348607Smm return ARCHIVE_OK; 1431339640Smm} 1432339640Smm 1433348607Smmstatic int parse_file_extra_owner(struct archive_read* a, 1434348607Smm struct archive_entry* e, ssize_t* extra_data_size) 1435348607Smm{ 1436348607Smm uint64_t flags = 0; 1437348607Smm uint64_t value_size = 0; 1438348607Smm uint64_t id = 0; 1439348607Smm size_t name_len = 0; 1440348607Smm size_t name_size = 0; 1441348607Smm char namebuf[OWNER_MAXNAMELEN]; 1442348607Smm const uint8_t* p; 1443348607Smm 1444348607Smm if(!read_var(a, &flags, &value_size)) 1445348607Smm return ARCHIVE_EOF; 1446348607Smm if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1447348607Smm return ARCHIVE_EOF; 1448348607Smm *extra_data_size -= value_size; 1449348607Smm 1450348607Smm if ((flags & OWNER_USER_NAME) != 0) { 1451348607Smm if(!read_var_sized(a, &name_size, NULL)) 1452348607Smm return ARCHIVE_EOF; 1453348607Smm *extra_data_size -= name_size + 1; 1454348607Smm 1455348607Smm if(!read_ahead(a, name_size, &p)) 1456348607Smm return ARCHIVE_EOF; 1457348607Smm 1458348607Smm if (name_size >= OWNER_MAXNAMELEN) { 1459348607Smm name_len = OWNER_MAXNAMELEN - 1; 1460348607Smm } else { 1461348607Smm name_len = name_size; 1462348607Smm } 1463348607Smm 1464348607Smm memcpy(namebuf, p, name_len); 1465348607Smm namebuf[name_len] = 0; 1466348607Smm if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1467348607Smm return ARCHIVE_EOF; 1468348607Smm 1469348607Smm archive_entry_set_uname(e, namebuf); 1470348607Smm } 1471348607Smm if ((flags & OWNER_GROUP_NAME) != 0) { 1472348607Smm if(!read_var_sized(a, &name_size, NULL)) 1473348607Smm return ARCHIVE_EOF; 1474348607Smm *extra_data_size -= name_size + 1; 1475348607Smm 1476348607Smm if(!read_ahead(a, name_size, &p)) 1477348607Smm return ARCHIVE_EOF; 1478348607Smm 1479348607Smm if (name_size >= OWNER_MAXNAMELEN) { 1480348607Smm name_len = OWNER_MAXNAMELEN - 1; 1481348607Smm } else { 1482348607Smm name_len = name_size; 1483348607Smm } 1484348607Smm 1485348607Smm memcpy(namebuf, p, name_len); 1486348607Smm namebuf[name_len] = 0; 1487348607Smm if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1488348607Smm return ARCHIVE_EOF; 1489348607Smm 1490348607Smm archive_entry_set_gname(e, namebuf); 1491348607Smm } 1492348607Smm if ((flags & OWNER_USER_UID) != 0) { 1493348607Smm if(!read_var(a, &id, &value_size)) 1494348607Smm return ARCHIVE_EOF; 1495348607Smm if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1496348607Smm return ARCHIVE_EOF; 1497348607Smm *extra_data_size -= value_size; 1498348607Smm 1499348607Smm archive_entry_set_uid(e, (la_int64_t)id); 1500348607Smm } 1501348607Smm if ((flags & OWNER_GROUP_GID) != 0) { 1502348607Smm if(!read_var(a, &id, &value_size)) 1503348607Smm return ARCHIVE_EOF; 1504348607Smm if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1505348607Smm return ARCHIVE_EOF; 1506348607Smm *extra_data_size -= value_size; 1507348607Smm 1508348607Smm archive_entry_set_gid(e, (la_int64_t)id); 1509348607Smm } 1510348607Smm return ARCHIVE_OK; 1511348607Smm} 1512348607Smm 1513348607Smmstatic int process_head_file_extra(struct archive_read* a, 1514348607Smm struct archive_entry* e, struct rar5* rar, ssize_t extra_data_size) 1515348607Smm{ 1516348607Smm size_t extra_field_size; 1517348607Smm size_t extra_field_id = 0; 1518348607Smm int ret = ARCHIVE_FATAL; 1519348607Smm size_t var_size; 1520348607Smm 1521348607Smm while(extra_data_size > 0) { 1522348607Smm if(!read_var_sized(a, &extra_field_size, &var_size)) 1523348607Smm return ARCHIVE_EOF; 1524348607Smm 1525348607Smm extra_data_size -= var_size; 1526348607Smm if(ARCHIVE_OK != consume(a, var_size)) { 1527348607Smm return ARCHIVE_EOF; 1528348607Smm } 1529348607Smm 1530348607Smm if(!read_var_sized(a, &extra_field_id, &var_size)) 1531348607Smm return ARCHIVE_EOF; 1532348607Smm 1533348607Smm extra_data_size -= var_size; 1534348607Smm if(ARCHIVE_OK != consume(a, var_size)) { 1535348607Smm return ARCHIVE_EOF; 1536348607Smm } 1537348607Smm 1538348607Smm switch(extra_field_id) { 1539348607Smm case EX_HASH: 1540348607Smm ret = parse_file_extra_hash(a, rar, 1541348607Smm &extra_data_size); 1542348607Smm break; 1543348607Smm case EX_HTIME: 1544348607Smm ret = parse_file_extra_htime(a, e, rar, 1545348607Smm &extra_data_size); 1546348607Smm break; 1547348607Smm case EX_REDIR: 1548348607Smm ret = parse_file_extra_redir(a, e, rar, 1549348607Smm &extra_data_size); 1550348607Smm break; 1551348607Smm case EX_UOWNER: 1552348607Smm ret = parse_file_extra_owner(a, e, 1553348607Smm &extra_data_size); 1554348607Smm break; 1555348607Smm case EX_VERSION: 1556348607Smm ret = parse_file_extra_version(a, e, 1557348607Smm &extra_data_size); 1558348607Smm break; 1559348607Smm case EX_CRYPT: 1560348607Smm /* fallthrough */ 1561348607Smm case EX_SUBDATA: 1562348607Smm /* fallthrough */ 1563348607Smm default: 1564348607Smm /* Skip unsupported entry. */ 1565348607Smm return consume(a, extra_data_size); 1566348607Smm } 1567348607Smm } 1568348607Smm 1569348607Smm if(ret != ARCHIVE_OK) { 1570348607Smm /* Attribute not implemented. */ 1571348607Smm return ret; 1572348607Smm } 1573348607Smm 1574348607Smm return ARCHIVE_OK; 1575348607Smm} 1576348607Smm 1577339640Smmstatic int process_head_file(struct archive_read* a, struct rar5* rar, 1578348607Smm struct archive_entry* entry, size_t block_flags) 1579339640Smm{ 1580348607Smm ssize_t extra_data_size = 0; 1581348607Smm size_t data_size = 0; 1582348607Smm size_t file_flags = 0; 1583348607Smm size_t file_attr = 0; 1584348607Smm size_t compression_info = 0; 1585348607Smm size_t host_os = 0; 1586348607Smm size_t name_size = 0; 1587348607Smm uint64_t unpacked_size, window_size; 1588348607Smm uint32_t mtime = 0, crc = 0; 1589348607Smm int c_method = 0, c_version = 0; 1590348607Smm char name_utf8_buf[MAX_NAME_IN_BYTES]; 1591348607Smm const uint8_t* p; 1592339640Smm 1593358088Smm enum FILE_FLAGS { 1594358088Smm DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004, 1595358088Smm UNKNOWN_UNPACKED_SIZE = 0x0008, 1596358088Smm }; 1597358088Smm 1598358088Smm enum FILE_ATTRS { 1599358088Smm ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4, 1600358088Smm ATTR_DIRECTORY = 0x10, 1601358088Smm }; 1602358088Smm 1603358088Smm enum COMP_INFO_FLAGS { 1604358088Smm SOLID = 0x0040, 1605358088Smm }; 1606358088Smm 1607358088Smm enum HOST_OS { 1608358088Smm HOST_WINDOWS = 0, 1609358088Smm HOST_UNIX = 1, 1610358088Smm }; 1611358088Smm 1612348607Smm archive_entry_clear(entry); 1613339640Smm 1614348607Smm /* Do not reset file context if we're switching archives. */ 1615348607Smm if(!rar->cstate.switch_multivolume) { 1616348607Smm reset_file_context(rar); 1617348607Smm } 1618339640Smm 1619348607Smm if(block_flags & HFL_EXTRA_DATA) { 1620348607Smm size_t edata_size = 0; 1621348607Smm if(!read_var_sized(a, &edata_size, NULL)) 1622348607Smm return ARCHIVE_EOF; 1623339640Smm 1624348607Smm /* Intentional type cast from unsigned to signed. */ 1625348607Smm extra_data_size = (ssize_t) edata_size; 1626348607Smm } 1627339640Smm 1628348607Smm if(block_flags & HFL_DATA) { 1629348607Smm if(!read_var_sized(a, &data_size, NULL)) 1630348607Smm return ARCHIVE_EOF; 1631339640Smm 1632348607Smm rar->file.bytes_remaining = data_size; 1633348607Smm } else { 1634348607Smm rar->file.bytes_remaining = 0; 1635339640Smm 1636348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1637348607Smm "no data found in file/service block"); 1638348607Smm return ARCHIVE_FATAL; 1639348607Smm } 1640339640Smm 1641348607Smm if(!read_var_sized(a, &file_flags, NULL)) 1642348607Smm return ARCHIVE_EOF; 1643339640Smm 1644348607Smm if(!read_var(a, &unpacked_size, NULL)) 1645348607Smm return ARCHIVE_EOF; 1646339640Smm 1647348607Smm if(file_flags & UNKNOWN_UNPACKED_SIZE) { 1648348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1649348607Smm "Files with unknown unpacked size are not supported"); 1650348607Smm return ARCHIVE_FATAL; 1651348607Smm } 1652339640Smm 1653348607Smm rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0); 1654339640Smm 1655348607Smm if(!read_var_sized(a, &file_attr, NULL)) 1656348607Smm return ARCHIVE_EOF; 1657339640Smm 1658348607Smm if(file_flags & UTIME) { 1659348607Smm if(!read_u32(a, &mtime)) 1660348607Smm return ARCHIVE_EOF; 1661348607Smm } 1662339640Smm 1663348607Smm if(file_flags & CRC32) { 1664348607Smm if(!read_u32(a, &crc)) 1665348607Smm return ARCHIVE_EOF; 1666348607Smm } 1667339640Smm 1668348607Smm if(!read_var_sized(a, &compression_info, NULL)) 1669348607Smm return ARCHIVE_EOF; 1670339640Smm 1671348607Smm c_method = (int) (compression_info >> 7) & 0x7; 1672348607Smm c_version = (int) (compression_info & 0x3f); 1673339640Smm 1674348607Smm /* RAR5 seems to limit the dictionary size to 64MB. */ 1675348607Smm window_size = (rar->file.dir > 0) ? 1676348607Smm 0 : 1677348607Smm g_unpack_window_size << ((compression_info >> 10) & 15); 1678348607Smm rar->cstate.method = c_method; 1679348607Smm rar->cstate.version = c_version + 50; 1680358088Smm rar->file.solid = (compression_info & SOLID) > 0; 1681339640Smm 1682358088Smm /* Archives which declare solid files without initializing the window 1683358088Smm * buffer first are invalid. */ 1684358088Smm 1685358088Smm if(rar->file.solid > 0 && rar->cstate.window_buf == NULL) { 1686358088Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1687358088Smm "Declared solid file, but no window buffer " 1688358088Smm "initialized yet."); 1689358088Smm return ARCHIVE_FATAL; 1690358088Smm } 1691358088Smm 1692348607Smm /* Check if window_size is a sane value. Also, if the file is not 1693348607Smm * declared as a directory, disallow window_size == 0. */ 1694348607Smm if(window_size > (64 * 1024 * 1024) || 1695348607Smm (rar->file.dir == 0 && window_size == 0)) 1696348607Smm { 1697348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1698348607Smm "Declared dictionary size is not supported."); 1699348607Smm return ARCHIVE_FATAL; 1700348607Smm } 1701339640Smm 1702358088Smm if(rar->file.solid > 0) { 1703358088Smm /* Re-check if current window size is the same as previous 1704358088Smm * window size (for solid files only). */ 1705358088Smm if(rar->file.solid_window_size > 0 && 1706358088Smm rar->file.solid_window_size != (ssize_t) window_size) 1707358088Smm { 1708358088Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1709358088Smm "Window size for this solid file doesn't match " 1710358088Smm "the window size used in previous solid file. "); 1711358088Smm return ARCHIVE_FATAL; 1712358088Smm } 1713358088Smm } 1714358088Smm 1715358088Smm /* If we're currently switching volumes, ignore the new definition of 1716358088Smm * window_size. */ 1717358088Smm if(rar->cstate.switch_multivolume == 0) { 1718358088Smm /* Values up to 64M should fit into ssize_t on every 1719358088Smm * architecture. */ 1720358088Smm rar->cstate.window_size = (ssize_t) window_size; 1721358088Smm } 1722358088Smm 1723358088Smm if(rar->file.solid > 0 && rar->file.solid_window_size == 0) { 1724358088Smm /* Solid files have to have the same window_size across 1725358088Smm whole archive. Remember the window_size parameter 1726358088Smm for first solid file found. */ 1727358088Smm rar->file.solid_window_size = rar->cstate.window_size; 1728358088Smm } 1729358088Smm 1730349524Smm init_window_mask(rar); 1731339640Smm 1732348607Smm rar->file.service = 0; 1733339640Smm 1734348607Smm if(!read_var_sized(a, &host_os, NULL)) 1735348607Smm return ARCHIVE_EOF; 1736339640Smm 1737348607Smm if(host_os == HOST_WINDOWS) { 1738348607Smm /* Host OS is Windows */ 1739339640Smm 1740348607Smm __LA_MODE_T mode; 1741339640Smm 1742348607Smm if(file_attr & ATTR_DIRECTORY) { 1743348607Smm if (file_attr & ATTR_READONLY) { 1744348607Smm mode = 0555 | AE_IFDIR; 1745348607Smm } else { 1746348607Smm mode = 0755 | AE_IFDIR; 1747348607Smm } 1748348607Smm } else { 1749348607Smm if (file_attr & ATTR_READONLY) { 1750348607Smm mode = 0444 | AE_IFREG; 1751348607Smm } else { 1752348607Smm mode = 0644 | AE_IFREG; 1753348607Smm } 1754348607Smm } 1755339640Smm 1756348607Smm archive_entry_set_mode(entry, mode); 1757339640Smm 1758348607Smm if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) { 1759348607Smm char *fflags_text, *ptr; 1760348607Smm /* allocate for "rdonly,hidden,system," */ 1761348607Smm fflags_text = malloc(22 * sizeof(char)); 1762348607Smm if (fflags_text != NULL) { 1763348607Smm ptr = fflags_text; 1764348607Smm if (file_attr & ATTR_READONLY) { 1765348607Smm strcpy(ptr, "rdonly,"); 1766348607Smm ptr = ptr + 7; 1767348607Smm } 1768348607Smm if (file_attr & ATTR_HIDDEN) { 1769348607Smm strcpy(ptr, "hidden,"); 1770348607Smm ptr = ptr + 7; 1771348607Smm } 1772348607Smm if (file_attr & ATTR_SYSTEM) { 1773348607Smm strcpy(ptr, "system,"); 1774348607Smm ptr = ptr + 7; 1775348607Smm } 1776348607Smm if (ptr > fflags_text) { 1777348607Smm /* Delete trailing comma */ 1778348607Smm *(ptr - 1) = '\0'; 1779348607Smm archive_entry_copy_fflags_text(entry, 1780348607Smm fflags_text); 1781348607Smm } 1782348607Smm free(fflags_text); 1783348607Smm } 1784348607Smm } 1785348607Smm } else if(host_os == HOST_UNIX) { 1786348607Smm /* Host OS is Unix */ 1787348607Smm archive_entry_set_mode(entry, (__LA_MODE_T) file_attr); 1788348607Smm } else { 1789348607Smm /* Unknown host OS */ 1790348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1791348607Smm "Unsupported Host OS: 0x%x", (int) host_os); 1792339640Smm 1793348607Smm return ARCHIVE_FATAL; 1794348607Smm } 1795339640Smm 1796348607Smm if(!read_var_sized(a, &name_size, NULL)) 1797348607Smm return ARCHIVE_EOF; 1798339640Smm 1799348607Smm if(!read_ahead(a, name_size, &p)) 1800348607Smm return ARCHIVE_EOF; 1801339640Smm 1802348607Smm if(name_size > (MAX_NAME_IN_CHARS - 1)) { 1803348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1804348607Smm "Filename is too long"); 1805339640Smm 1806348607Smm return ARCHIVE_FATAL; 1807348607Smm } 1808339640Smm 1809348607Smm if(name_size == 0) { 1810348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1811348607Smm "No filename specified"); 1812339640Smm 1813348607Smm return ARCHIVE_FATAL; 1814348607Smm } 1815339640Smm 1816348607Smm memcpy(name_utf8_buf, p, name_size); 1817348607Smm name_utf8_buf[name_size] = 0; 1818348607Smm if(ARCHIVE_OK != consume(a, name_size)) { 1819348607Smm return ARCHIVE_EOF; 1820348607Smm } 1821339640Smm 1822348607Smm archive_entry_update_pathname_utf8(entry, name_utf8_buf); 1823339640Smm 1824348607Smm if(extra_data_size > 0) { 1825348607Smm int ret = process_head_file_extra(a, entry, rar, 1826348607Smm extra_data_size); 1827339640Smm 1828358088Smm /* 1829358088Smm * TODO: rewrite or remove useless sanity check 1830358088Smm * as extra_data_size is not passed as a pointer 1831358088Smm * 1832348607Smm if(extra_data_size < 0) { 1833348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1834348607Smm "File extra data size is not zero"); 1835348607Smm return ARCHIVE_FATAL; 1836348607Smm } 1837358088Smm */ 1838339640Smm 1839348607Smm if(ret != ARCHIVE_OK) 1840348607Smm return ret; 1841348607Smm } 1842339640Smm 1843348607Smm if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) { 1844348607Smm rar->file.unpacked_size = (ssize_t) unpacked_size; 1845348607Smm if(rar->file.redir_type == REDIR_TYPE_NONE) 1846348607Smm archive_entry_set_size(entry, unpacked_size); 1847348607Smm } 1848339640Smm 1849348607Smm if(file_flags & UTIME) { 1850348607Smm archive_entry_set_mtime(entry, (time_t) mtime, 0); 1851348607Smm } 1852348607Smm 1853348607Smm if(file_flags & CRC32) { 1854348607Smm rar->file.stored_crc32 = crc; 1855348607Smm } 1856348607Smm 1857348607Smm if(!rar->cstate.switch_multivolume) { 1858348607Smm /* Do not reinitialize unpacking state if we're switching 1859348607Smm * archives. */ 1860348607Smm rar->cstate.block_parsing_finished = 1; 1861348607Smm rar->cstate.all_filters_applied = 1; 1862348607Smm rar->cstate.initialized = 0; 1863348607Smm } 1864348607Smm 1865348607Smm if(rar->generic.split_before > 0) { 1866348607Smm /* If now we're standing on a header that has a 'split before' 1867348607Smm * mark, it means we're standing on a 'continuation' file 1868348607Smm * header. Signal the caller that if it wants to move to 1869348607Smm * another file, it must call rar5_read_header() function 1870348607Smm * again. */ 1871348607Smm 1872348607Smm return ARCHIVE_RETRY; 1873348607Smm } else { 1874348607Smm return ARCHIVE_OK; 1875348607Smm } 1876339640Smm} 1877339640Smm 1878339640Smmstatic int process_head_service(struct archive_read* a, struct rar5* rar, 1879348607Smm struct archive_entry* entry, size_t block_flags) 1880339640Smm{ 1881348607Smm /* Process this SERVICE block the same way as FILE blocks. */ 1882348607Smm int ret = process_head_file(a, rar, entry, block_flags); 1883348607Smm if(ret != ARCHIVE_OK) 1884348607Smm return ret; 1885339640Smm 1886348607Smm rar->file.service = 1; 1887339640Smm 1888348607Smm /* But skip the data part automatically. It's no use for the user 1889348607Smm * anyway. It contains only service data, not even needed to 1890348607Smm * properly unpack the file. */ 1891348607Smm ret = rar5_read_data_skip(a); 1892348607Smm if(ret != ARCHIVE_OK) 1893348607Smm return ret; 1894339640Smm 1895348607Smm /* After skipping, try parsing another block automatically. */ 1896348607Smm return ARCHIVE_RETRY; 1897339640Smm} 1898339640Smm 1899339640Smmstatic int process_head_main(struct archive_read* a, struct rar5* rar, 1900348607Smm struct archive_entry* entry, size_t block_flags) 1901339640Smm{ 1902348607Smm int ret; 1903348607Smm size_t extra_data_size = 0; 1904348607Smm size_t extra_field_size = 0; 1905348607Smm size_t extra_field_id = 0; 1906348607Smm size_t archive_flags = 0; 1907339640Smm 1908358088Smm enum MAIN_FLAGS { 1909358088Smm VOLUME = 0x0001, /* multi-volume archive */ 1910358088Smm VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't 1911358088Smm * have it */ 1912358088Smm SOLID = 0x0004, /* solid archive */ 1913358088Smm PROTECT = 0x0008, /* contains Recovery info */ 1914358088Smm LOCK = 0x0010, /* readonly flag, not used */ 1915358088Smm }; 1916358088Smm 1917358088Smm enum MAIN_EXTRA { 1918358088Smm // Just one attribute here. 1919358088Smm LOCATOR = 0x01, 1920358088Smm }; 1921358088Smm 1922358088Smm (void) entry; 1923358088Smm 1924348607Smm if(block_flags & HFL_EXTRA_DATA) { 1925348607Smm if(!read_var_sized(a, &extra_data_size, NULL)) 1926348607Smm return ARCHIVE_EOF; 1927348607Smm } else { 1928348607Smm extra_data_size = 0; 1929348607Smm } 1930339640Smm 1931348607Smm if(!read_var_sized(a, &archive_flags, NULL)) { 1932348607Smm return ARCHIVE_EOF; 1933348607Smm } 1934339640Smm 1935348607Smm rar->main.volume = (archive_flags & VOLUME) > 0; 1936348607Smm rar->main.solid = (archive_flags & SOLID) > 0; 1937339640Smm 1938348607Smm if(archive_flags & VOLUME_NUMBER) { 1939348607Smm size_t v = 0; 1940348607Smm if(!read_var_sized(a, &v, NULL)) { 1941348607Smm return ARCHIVE_EOF; 1942348607Smm } 1943339640Smm 1944348607Smm if (v > UINT_MAX) { 1945348607Smm archive_set_error(&a->archive, 1946348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 1947348607Smm "Invalid volume number"); 1948348607Smm return ARCHIVE_FATAL; 1949348607Smm } 1950339640Smm 1951348607Smm rar->main.vol_no = (unsigned int) v; 1952348607Smm } else { 1953348607Smm rar->main.vol_no = 0; 1954348607Smm } 1955339640Smm 1956348607Smm if(rar->vol.expected_vol_no > 0 && 1957348607Smm rar->main.vol_no != rar->vol.expected_vol_no) 1958348607Smm { 1959348607Smm /* Returning EOF instead of FATAL because of strange 1960348607Smm * libarchive behavior. When opening multiple files via 1961348607Smm * archive_read_open_filenames(), after reading up the whole 1962348607Smm * last file, the __archive_read_ahead function wraps up to 1963348607Smm * the first archive instead of returning EOF. */ 1964348607Smm return ARCHIVE_EOF; 1965348607Smm } 1966339640Smm 1967348607Smm if(extra_data_size == 0) { 1968348607Smm /* Early return. */ 1969348607Smm return ARCHIVE_OK; 1970348607Smm } 1971339640Smm 1972348607Smm if(!read_var_sized(a, &extra_field_size, NULL)) { 1973348607Smm return ARCHIVE_EOF; 1974348607Smm } 1975339640Smm 1976348607Smm if(!read_var_sized(a, &extra_field_id, NULL)) { 1977348607Smm return ARCHIVE_EOF; 1978348607Smm } 1979339640Smm 1980348607Smm if(extra_field_size == 0) { 1981348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1982348607Smm "Invalid extra field size"); 1983348607Smm return ARCHIVE_FATAL; 1984348607Smm } 1985339640Smm 1986348607Smm switch(extra_field_id) { 1987348607Smm case LOCATOR: 1988348607Smm ret = process_main_locator_extra_block(a, rar); 1989348607Smm if(ret != ARCHIVE_OK) { 1990348607Smm /* Error while parsing main locator extra 1991348607Smm * block. */ 1992348607Smm return ret; 1993348607Smm } 1994339640Smm 1995348607Smm break; 1996348607Smm default: 1997348607Smm archive_set_error(&a->archive, 1998348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 1999348607Smm "Unsupported extra type (0x%x)", 2000348607Smm (int) extra_field_id); 2001348607Smm return ARCHIVE_FATAL; 2002348607Smm } 2003348607Smm 2004348607Smm return ARCHIVE_OK; 2005339640Smm} 2006339640Smm 2007348607Smmstatic int skip_unprocessed_bytes(struct archive_read* a) { 2008348607Smm struct rar5* rar = get_context(a); 2009348607Smm int ret; 2010348607Smm 2011348607Smm if(rar->file.bytes_remaining) { 2012348607Smm /* Use different skipping method in block merging mode than in 2013348607Smm * normal mode. If merge mode is active, rar5_read_data_skip 2014348607Smm * can't be used, because it could allow recursive use of 2015348607Smm * merge_block() * function, and this function doesn't support 2016348607Smm * recursive use. */ 2017348607Smm if(rar->merge_mode) { 2018348607Smm /* Discard whole merged block. This is valid in solid 2019348607Smm * mode as well, because the code will discard blocks 2020348607Smm * only if those blocks are safe to discard (i.e. 2021348607Smm * they're not FILE blocks). */ 2022348607Smm ret = consume(a, rar->file.bytes_remaining); 2023348607Smm if(ret != ARCHIVE_OK) { 2024348607Smm return ret; 2025348607Smm } 2026348607Smm rar->file.bytes_remaining = 0; 2027348607Smm } else { 2028348607Smm /* If we're not in merge mode, use safe skipping code. 2029348607Smm * This will ensure we'll handle solid archives 2030348607Smm * properly. */ 2031348607Smm ret = rar5_read_data_skip(a); 2032348607Smm if(ret != ARCHIVE_OK) { 2033348607Smm return ret; 2034348607Smm } 2035348607Smm } 2036348607Smm } 2037348607Smm 2038348607Smm return ARCHIVE_OK; 2039348607Smm} 2040348607Smm 2041339640Smmstatic int scan_for_signature(struct archive_read* a); 2042339640Smm 2043339640Smm/* Base block processing function. A 'base block' is a RARv5 header block 2044339640Smm * that tells the reader what kind of data is stored inside the block. 2045339640Smm * 2046339640Smm * From the birds-eye view a RAR file looks file this: 2047339640Smm * 2048339640Smm * <magic><base_block_1><base_block_2>...<base_block_n> 2049339640Smm * 2050339640Smm * There are a few types of base blocks. Those types are specified inside 2051339640Smm * the 'switch' statement in this function. For example purposes, I'll write 2052339640Smm * how a standard RARv5 file could look like here: 2053339640Smm * 2054339640Smm * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC> 2055339640Smm * 2056339640Smm * The structure above could describe an archive file with 3 files in it, 2057339640Smm * one service "QuickOpen" block (that is ignored by this parser), and an 2058339640Smm * end of file base block marker. 2059339640Smm * 2060339640Smm * If the file is stored in multiple archive files ("multiarchive"), it might 2061339640Smm * look like this: 2062339640Smm * 2063339640Smm * .part01.rar: <magic><MAIN><FILE><ENDARC> 2064339640Smm * .part02.rar: <magic><MAIN><FILE><ENDARC> 2065339640Smm * .part03.rar: <magic><MAIN><FILE><ENDARC> 2066339640Smm * 2067339640Smm * This example could describe 3 RAR files that contain ONE archived file. 2068339640Smm * Or it could describe 3 RAR files that contain 3 different files. Or 3 2069339640Smm * RAR files than contain 2 files. It all depends what metadata is stored in 2070339640Smm * the headers of <FILE> blocks. 2071339640Smm * 2072339640Smm * Each <FILE> block contains info about its size, the name of the file it's 2073339640Smm * storing inside, and whether this FILE block is a continuation block of 2074339640Smm * previous archive ('split before'), and is this FILE block should be 2075339640Smm * continued in another archive ('split after'). By parsing the 'split before' 2076339640Smm * and 'split after' flags, we're able to tell if multiple <FILE> base blocks 2077339640Smm * are describing one file, or multiple files (with the same filename, for 2078339640Smm * example). 2079339640Smm * 2080339640Smm * One thing to note is that if we're parsing the first <FILE> block, and 2081339640Smm * we see 'split after' flag, then we need to jump over to another <FILE> 2082339640Smm * block to be able to decompress rest of the data. To do this, we need 2083339640Smm * to skip the <ENDARC> block, then switch to another file, then skip the 2084339640Smm * <magic> block, <MAIN> block, and then we're standing on the proper 2085339640Smm * <FILE> block. 2086339640Smm */ 2087339640Smm 2088339640Smmstatic int process_base_block(struct archive_read* a, 2089348607Smm struct archive_entry* entry) 2090339640Smm{ 2091358088Smm const size_t SMALLEST_RAR5_BLOCK_SIZE = 3; 2092358088Smm 2093348607Smm struct rar5* rar = get_context(a); 2094348607Smm uint32_t hdr_crc, computed_crc; 2095348607Smm size_t raw_hdr_size = 0, hdr_size_len, hdr_size; 2096348607Smm size_t header_id = 0; 2097348607Smm size_t header_flags = 0; 2098348607Smm const uint8_t* p; 2099348607Smm int ret; 2100339640Smm 2101358088Smm enum HEADER_TYPE { 2102358088Smm HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02, 2103358088Smm HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05, 2104358088Smm HEAD_UNKNOWN = 0xff, 2105358088Smm }; 2106358088Smm 2107348607Smm /* Skip any unprocessed data for this file. */ 2108348607Smm ret = skip_unprocessed_bytes(a); 2109348607Smm if(ret != ARCHIVE_OK) 2110348607Smm return ret; 2111339640Smm 2112348607Smm /* Read the expected CRC32 checksum. */ 2113348607Smm if(!read_u32(a, &hdr_crc)) { 2114348607Smm return ARCHIVE_EOF; 2115348607Smm } 2116339640Smm 2117348607Smm /* Read header size. */ 2118348607Smm if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) { 2119348607Smm return ARCHIVE_EOF; 2120348607Smm } 2121339640Smm 2122358088Smm hdr_size = raw_hdr_size + hdr_size_len; 2123358088Smm 2124348607Smm /* Sanity check, maximum header size for RAR5 is 2MB. */ 2125358088Smm if(hdr_size > (2 * 1024 * 1024)) { 2126348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2127348607Smm "Base block header is too large"); 2128339640Smm 2129348607Smm return ARCHIVE_FATAL; 2130348607Smm } 2131339640Smm 2132358088Smm /* Additional sanity checks to weed out invalid files. */ 2133358088Smm if(raw_hdr_size == 0 || hdr_size_len == 0 || 2134358088Smm hdr_size < SMALLEST_RAR5_BLOCK_SIZE) 2135358088Smm { 2136358088Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2137358088Smm "Too small block encountered (%zu bytes)", 2138358088Smm raw_hdr_size); 2139339640Smm 2140358088Smm return ARCHIVE_FATAL; 2141358088Smm } 2142358088Smm 2143348607Smm /* Read the whole header data into memory, maximum memory use here is 2144348607Smm * 2MB. */ 2145348607Smm if(!read_ahead(a, hdr_size, &p)) { 2146348607Smm return ARCHIVE_EOF; 2147348607Smm } 2148339640Smm 2149348607Smm /* Verify the CRC32 of the header data. */ 2150348607Smm computed_crc = (uint32_t) crc32(0, p, (int) hdr_size); 2151348607Smm if(computed_crc != hdr_crc) { 2152348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2153348607Smm "Header CRC error"); 2154339640Smm 2155348607Smm return ARCHIVE_FATAL; 2156348607Smm } 2157339640Smm 2158348607Smm /* If the checksum is OK, we proceed with parsing. */ 2159348607Smm if(ARCHIVE_OK != consume(a, hdr_size_len)) { 2160348607Smm return ARCHIVE_EOF; 2161348607Smm } 2162339640Smm 2163348607Smm if(!read_var_sized(a, &header_id, NULL)) 2164348607Smm return ARCHIVE_EOF; 2165339640Smm 2166348607Smm if(!read_var_sized(a, &header_flags, NULL)) 2167348607Smm return ARCHIVE_EOF; 2168339640Smm 2169348607Smm rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0; 2170348607Smm rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0; 2171348607Smm rar->generic.size = (int)hdr_size; 2172348607Smm rar->generic.last_header_id = (int)header_id; 2173348607Smm rar->main.endarc = 0; 2174339640Smm 2175348607Smm /* Those are possible header ids in RARv5. */ 2176348607Smm switch(header_id) { 2177348607Smm case HEAD_MAIN: 2178348607Smm ret = process_head_main(a, rar, entry, header_flags); 2179339640Smm 2180348607Smm /* Main header doesn't have any files in it, so it's 2181348607Smm * pointless to return to the caller. Retry to next 2182348607Smm * header, which should be HEAD_FILE/HEAD_SERVICE. */ 2183348607Smm if(ret == ARCHIVE_OK) 2184348607Smm return ARCHIVE_RETRY; 2185339640Smm 2186348607Smm return ret; 2187348607Smm case HEAD_SERVICE: 2188348607Smm ret = process_head_service(a, rar, entry, header_flags); 2189348607Smm return ret; 2190348607Smm case HEAD_FILE: 2191348607Smm ret = process_head_file(a, rar, entry, header_flags); 2192348607Smm return ret; 2193348607Smm case HEAD_CRYPT: 2194348607Smm archive_set_error(&a->archive, 2195348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 2196348607Smm "Encryption is not supported"); 2197348607Smm return ARCHIVE_FATAL; 2198348607Smm case HEAD_ENDARC: 2199348607Smm rar->main.endarc = 1; 2200339640Smm 2201348607Smm /* After encountering an end of file marker, we need 2202348607Smm * to take into consideration if this archive is 2203348607Smm * continued in another file (i.e. is it part01.rar: 2204348607Smm * is there a part02.rar?) */ 2205348607Smm if(rar->main.volume) { 2206348607Smm /* In case there is part02.rar, position the 2207348607Smm * read pointer in a proper place, so we can 2208348607Smm * resume parsing. */ 2209348607Smm ret = scan_for_signature(a); 2210348607Smm if(ret == ARCHIVE_FATAL) { 2211348607Smm return ARCHIVE_EOF; 2212348607Smm } else { 2213348607Smm if(rar->vol.expected_vol_no == 2214348607Smm UINT_MAX) { 2215348607Smm archive_set_error(&a->archive, 2216348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 2217348607Smm "Header error"); 2218348607Smm return ARCHIVE_FATAL; 2219348607Smm } 2220339640Smm 2221348607Smm rar->vol.expected_vol_no = 2222348607Smm rar->main.vol_no + 1; 2223348607Smm return ARCHIVE_OK; 2224348607Smm } 2225348607Smm } else { 2226348607Smm return ARCHIVE_EOF; 2227348607Smm } 2228348607Smm case HEAD_MARK: 2229348607Smm return ARCHIVE_EOF; 2230348607Smm default: 2231348607Smm if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) { 2232348607Smm archive_set_error(&a->archive, 2233348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 2234348607Smm "Header type error"); 2235348607Smm return ARCHIVE_FATAL; 2236348607Smm } else { 2237348607Smm /* If the block is marked as 'skip if unknown', 2238348607Smm * do as the flag says: skip the block 2239348607Smm * instead on failing on it. */ 2240348607Smm return ARCHIVE_RETRY; 2241348607Smm } 2242348607Smm } 2243339640Smm 2244339640Smm#if !defined WIN32 2245348607Smm // Not reached. 2246348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 2247348607Smm "Internal unpacker error"); 2248348607Smm return ARCHIVE_FATAL; 2249339640Smm#endif 2250339640Smm} 2251339640Smm 2252339640Smmstatic int skip_base_block(struct archive_read* a) { 2253348607Smm int ret; 2254348607Smm struct rar5* rar = get_context(a); 2255339640Smm 2256348607Smm /* Create a new local archive_entry structure that will be operated on 2257348607Smm * by header reader; operations on this archive_entry will be discarded. 2258348607Smm */ 2259348607Smm struct archive_entry* entry = archive_entry_new(); 2260348607Smm ret = process_base_block(a, entry); 2261339640Smm 2262348607Smm /* Discard operations on this archive_entry structure. */ 2263348607Smm archive_entry_free(entry); 2264348607Smm if(ret == ARCHIVE_FATAL) 2265348607Smm return ret; 2266344673Smm 2267348607Smm if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0) 2268348607Smm return ARCHIVE_OK; 2269339640Smm 2270348607Smm if(ret == ARCHIVE_OK) 2271348607Smm return ARCHIVE_RETRY; 2272348607Smm else 2273348607Smm return ret; 2274339640Smm} 2275339640Smm 2276339640Smmstatic int rar5_read_header(struct archive_read *a, 2277348607Smm struct archive_entry *entry) 2278339640Smm{ 2279348607Smm struct rar5* rar = get_context(a); 2280348607Smm int ret; 2281339640Smm 2282348607Smm if(rar->header_initialized == 0) { 2283348607Smm init_header(a); 2284348607Smm rar->header_initialized = 1; 2285348607Smm } 2286339640Smm 2287348607Smm if(rar->skipped_magic == 0) { 2288358088Smm if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) { 2289348607Smm return ARCHIVE_EOF; 2290348607Smm } 2291339640Smm 2292348607Smm rar->skipped_magic = 1; 2293348607Smm } 2294339640Smm 2295348607Smm do { 2296348607Smm ret = process_base_block(a, entry); 2297348607Smm } while(ret == ARCHIVE_RETRY || 2298348607Smm (rar->main.endarc > 0 && ret == ARCHIVE_OK)); 2299339640Smm 2300348607Smm return ret; 2301339640Smm} 2302339640Smm 2303339640Smmstatic void init_unpack(struct rar5* rar) { 2304348607Smm rar->file.calculated_crc32 = 0; 2305349524Smm init_window_mask(rar); 2306339640Smm 2307348607Smm free(rar->cstate.window_buf); 2308348607Smm free(rar->cstate.filtered_buf); 2309339640Smm 2310348607Smm if(rar->cstate.window_size > 0) { 2311348607Smm rar->cstate.window_buf = calloc(1, rar->cstate.window_size); 2312348607Smm rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size); 2313348607Smm } else { 2314348607Smm rar->cstate.window_buf = NULL; 2315348607Smm rar->cstate.filtered_buf = NULL; 2316348607Smm } 2317339640Smm 2318348607Smm rar->cstate.write_ptr = 0; 2319348607Smm rar->cstate.last_write_ptr = 0; 2320339640Smm 2321348607Smm memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd)); 2322348607Smm memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld)); 2323348607Smm memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd)); 2324348607Smm memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd)); 2325348607Smm memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd)); 2326339640Smm} 2327339640Smm 2328339640Smmstatic void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) { 2329339640Smm int verify_crc; 2330339640Smm 2331348607Smm if(rar->skip_mode) { 2332339640Smm#if defined CHECK_CRC_ON_SOLID_SKIP 2333348607Smm verify_crc = 1; 2334339640Smm#else 2335348607Smm verify_crc = 0; 2336339640Smm#endif 2337348607Smm } else 2338348607Smm verify_crc = 1; 2339339640Smm 2340348607Smm if(verify_crc) { 2341348607Smm /* Don't update CRC32 if the file doesn't have the 2342348607Smm * `stored_crc32` info filled in. */ 2343348607Smm if(rar->file.stored_crc32 > 0) { 2344348607Smm rar->file.calculated_crc32 = 2345348607Smm crc32(rar->file.calculated_crc32, p, to_read); 2346348607Smm } 2347339640Smm 2348348607Smm /* Check if the file uses an optional BLAKE2sp checksum 2349348607Smm * algorithm. */ 2350348607Smm if(rar->file.has_blake2 > 0) { 2351348607Smm /* Return value of the `update` function is always 0, 2352348607Smm * so we can explicitly ignore it here. */ 2353348607Smm (void) blake2sp_update(&rar->file.b2state, p, to_read); 2354348607Smm } 2355348607Smm } 2356339640Smm} 2357339640Smm 2358339640Smmstatic int create_decode_tables(uint8_t* bit_length, 2359348607Smm struct decode_table* table, int size) 2360339640Smm{ 2361348607Smm int code, upper_limit = 0, i, lc[16]; 2362348607Smm uint32_t decode_pos_clone[rar5_countof(table->decode_pos)]; 2363348607Smm ssize_t cur_len, quick_data_size; 2364339640Smm 2365348607Smm memset(&lc, 0, sizeof(lc)); 2366348607Smm memset(table->decode_num, 0, sizeof(table->decode_num)); 2367348607Smm table->size = size; 2368348607Smm table->quick_bits = size == HUFF_NC ? 10 : 7; 2369339640Smm 2370348607Smm for(i = 0; i < size; i++) { 2371348607Smm lc[bit_length[i] & 15]++; 2372348607Smm } 2373339640Smm 2374348607Smm lc[0] = 0; 2375348607Smm table->decode_pos[0] = 0; 2376348607Smm table->decode_len[0] = 0; 2377339640Smm 2378348607Smm for(i = 1; i < 16; i++) { 2379348607Smm upper_limit += lc[i]; 2380339640Smm 2381348607Smm table->decode_len[i] = upper_limit << (16 - i); 2382348607Smm table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1]; 2383339640Smm 2384348607Smm upper_limit <<= 1; 2385348607Smm } 2386339640Smm 2387348607Smm memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone)); 2388339640Smm 2389348607Smm for(i = 0; i < size; i++) { 2390348607Smm uint8_t clen = bit_length[i] & 15; 2391348607Smm if(clen > 0) { 2392348607Smm int last_pos = decode_pos_clone[clen]; 2393348607Smm table->decode_num[last_pos] = i; 2394348607Smm decode_pos_clone[clen]++; 2395348607Smm } 2396348607Smm } 2397339640Smm 2398348607Smm quick_data_size = (int64_t)1 << table->quick_bits; 2399348607Smm cur_len = 1; 2400348607Smm for(code = 0; code < quick_data_size; code++) { 2401348607Smm int bit_field = code << (16 - table->quick_bits); 2402348607Smm int dist, pos; 2403339640Smm 2404348607Smm while(cur_len < rar5_countof(table->decode_len) && 2405348607Smm bit_field >= table->decode_len[cur_len]) { 2406348607Smm cur_len++; 2407348607Smm } 2408339640Smm 2409348607Smm table->quick_len[code] = (uint8_t) cur_len; 2410339640Smm 2411348607Smm dist = bit_field - table->decode_len[cur_len - 1]; 2412348607Smm dist >>= (16 - cur_len); 2413339640Smm 2414348607Smm pos = table->decode_pos[cur_len & 15] + dist; 2415348607Smm if(cur_len < rar5_countof(table->decode_pos) && pos < size) { 2416348607Smm table->quick_num[code] = table->decode_num[pos]; 2417348607Smm } else { 2418348607Smm table->quick_num[code] = 0; 2419348607Smm } 2420348607Smm } 2421339640Smm 2422348607Smm return ARCHIVE_OK; 2423339640Smm} 2424339640Smm 2425339640Smmstatic int decode_number(struct archive_read* a, struct decode_table* table, 2426348607Smm const uint8_t* p, uint16_t* num) 2427339640Smm{ 2428348607Smm int i, bits, dist; 2429348607Smm uint16_t bitfield; 2430348607Smm uint32_t pos; 2431348607Smm struct rar5* rar = get_context(a); 2432339640Smm 2433348607Smm if(ARCHIVE_OK != read_bits_16(rar, p, &bitfield)) { 2434348607Smm return ARCHIVE_EOF; 2435348607Smm } 2436339640Smm 2437348607Smm bitfield &= 0xfffe; 2438339640Smm 2439348607Smm if(bitfield < table->decode_len[table->quick_bits]) { 2440348607Smm int code = bitfield >> (16 - table->quick_bits); 2441348607Smm skip_bits(rar, table->quick_len[code]); 2442348607Smm *num = table->quick_num[code]; 2443348607Smm return ARCHIVE_OK; 2444348607Smm } 2445339640Smm 2446348607Smm bits = 15; 2447339640Smm 2448348607Smm for(i = table->quick_bits + 1; i < 15; i++) { 2449348607Smm if(bitfield < table->decode_len[i]) { 2450348607Smm bits = i; 2451348607Smm break; 2452348607Smm } 2453348607Smm } 2454339640Smm 2455348607Smm skip_bits(rar, bits); 2456339640Smm 2457348607Smm dist = bitfield - table->decode_len[bits - 1]; 2458348607Smm dist >>= (16 - bits); 2459348607Smm pos = table->decode_pos[bits] + dist; 2460339640Smm 2461348607Smm if(pos >= table->size) 2462348607Smm pos = 0; 2463339640Smm 2464348607Smm *num = table->decode_num[pos]; 2465348607Smm return ARCHIVE_OK; 2466339640Smm} 2467339640Smm 2468339640Smm/* Reads and parses Huffman tables from the beginning of the block. */ 2469339640Smmstatic int parse_tables(struct archive_read* a, struct rar5* rar, 2470348607Smm const uint8_t* p) 2471339640Smm{ 2472348607Smm int ret, value, i, w, idx = 0; 2473348607Smm uint8_t bit_length[HUFF_BC], 2474348607Smm table[HUFF_TABLE_SIZE], 2475348607Smm nibble_mask = 0xF0, 2476348607Smm nibble_shift = 4; 2477339640Smm 2478348607Smm enum { ESCAPE = 15 }; 2479339640Smm 2480348607Smm /* The data for table generation is compressed using a simple RLE-like 2481348607Smm * algorithm when storing zeroes, so we need to unpack it first. */ 2482348607Smm for(w = 0, i = 0; w < HUFF_BC;) { 2483348607Smm if(i >= rar->cstate.cur_block_size) { 2484348607Smm /* Truncated data, can't continue. */ 2485348607Smm archive_set_error(&a->archive, 2486348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 2487348607Smm "Truncated data in huffman tables"); 2488348607Smm return ARCHIVE_FATAL; 2489348607Smm } 2490339640Smm 2491348607Smm value = (p[i] & nibble_mask) >> nibble_shift; 2492339640Smm 2493348607Smm if(nibble_mask == 0x0F) 2494348607Smm ++i; 2495339640Smm 2496348607Smm nibble_mask ^= 0xFF; 2497348607Smm nibble_shift ^= 4; 2498339640Smm 2499348607Smm /* Values smaller than 15 is data, so we write it directly. 2500348607Smm * Value 15 is a flag telling us that we need to unpack more 2501348607Smm * bytes. */ 2502348607Smm if(value == ESCAPE) { 2503348607Smm value = (p[i] & nibble_mask) >> nibble_shift; 2504348607Smm if(nibble_mask == 0x0F) 2505348607Smm ++i; 2506348607Smm nibble_mask ^= 0xFF; 2507348607Smm nibble_shift ^= 4; 2508339640Smm 2509348607Smm if(value == 0) { 2510348607Smm /* We sometimes need to write the actual value 2511348607Smm * of 15, so this case handles that. */ 2512348607Smm bit_length[w++] = ESCAPE; 2513348607Smm } else { 2514348607Smm int k; 2515339640Smm 2516348607Smm /* Fill zeroes. */ 2517348607Smm for(k = 0; (k < value + 2) && (w < HUFF_BC); 2518348607Smm k++) { 2519348607Smm bit_length[w++] = 0; 2520348607Smm } 2521348607Smm } 2522348607Smm } else { 2523348607Smm bit_length[w++] = value; 2524348607Smm } 2525348607Smm } 2526339640Smm 2527348607Smm rar->bits.in_addr = i; 2528348607Smm rar->bits.bit_addr = nibble_shift ^ 4; 2529339640Smm 2530348607Smm ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC); 2531348607Smm if(ret != ARCHIVE_OK) { 2532348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2533348607Smm "Decoding huffman tables failed"); 2534348607Smm return ARCHIVE_FATAL; 2535348607Smm } 2536339640Smm 2537348607Smm for(i = 0; i < HUFF_TABLE_SIZE;) { 2538348607Smm uint16_t num; 2539339640Smm 2540348607Smm if((rar->bits.in_addr + 6) >= rar->cstate.cur_block_size) { 2541348607Smm /* Truncated data, can't continue. */ 2542348607Smm archive_set_error(&a->archive, 2543348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 2544348607Smm "Truncated data in huffman tables (#2)"); 2545348607Smm return ARCHIVE_FATAL; 2546348607Smm } 2547339640Smm 2548348607Smm ret = decode_number(a, &rar->cstate.bd, p, &num); 2549348607Smm if(ret != ARCHIVE_OK) { 2550348607Smm archive_set_error(&a->archive, 2551348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 2552348607Smm "Decoding huffman tables failed"); 2553348607Smm return ARCHIVE_FATAL; 2554348607Smm } 2555339640Smm 2556348607Smm if(num < 16) { 2557348607Smm /* 0..15: store directly */ 2558348607Smm table[i] = (uint8_t) num; 2559348607Smm i++; 2560358088Smm } else if(num < 18) { 2561348607Smm /* 16..17: repeat previous code */ 2562348607Smm uint16_t n; 2563358088Smm 2564348607Smm if(ARCHIVE_OK != read_bits_16(rar, p, &n)) 2565348607Smm return ARCHIVE_EOF; 2566339640Smm 2567348607Smm if(num == 16) { 2568348607Smm n >>= 13; 2569348607Smm n += 3; 2570348607Smm skip_bits(rar, 3); 2571348607Smm } else { 2572348607Smm n >>= 9; 2573348607Smm n += 11; 2574348607Smm skip_bits(rar, 7); 2575348607Smm } 2576339640Smm 2577348607Smm if(i > 0) { 2578348607Smm while(n-- > 0 && i < HUFF_TABLE_SIZE) { 2579348607Smm table[i] = table[i - 1]; 2580348607Smm i++; 2581348607Smm } 2582348607Smm } else { 2583348607Smm archive_set_error(&a->archive, 2584348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 2585348607Smm "Unexpected error when decoding " 2586348607Smm "huffman tables"); 2587348607Smm return ARCHIVE_FATAL; 2588348607Smm } 2589358088Smm } else { 2590358088Smm /* other codes: fill with zeroes `n` times */ 2591358088Smm uint16_t n; 2592339640Smm 2593358088Smm if(ARCHIVE_OK != read_bits_16(rar, p, &n)) 2594358088Smm return ARCHIVE_EOF; 2595339640Smm 2596358088Smm if(num == 18) { 2597358088Smm n >>= 13; 2598358088Smm n += 3; 2599358088Smm skip_bits(rar, 3); 2600358088Smm } else { 2601358088Smm n >>= 9; 2602358088Smm n += 11; 2603358088Smm skip_bits(rar, 7); 2604358088Smm } 2605339640Smm 2606358088Smm while(n-- > 0 && i < HUFF_TABLE_SIZE) 2607358088Smm table[i++] = 0; 2608348607Smm } 2609348607Smm } 2610339640Smm 2611348607Smm ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC); 2612348607Smm if(ret != ARCHIVE_OK) { 2613348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2614348607Smm "Failed to create literal table"); 2615348607Smm return ARCHIVE_FATAL; 2616348607Smm } 2617339640Smm 2618348607Smm idx += HUFF_NC; 2619339640Smm 2620348607Smm ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC); 2621348607Smm if(ret != ARCHIVE_OK) { 2622348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2623348607Smm "Failed to create distance table"); 2624348607Smm return ARCHIVE_FATAL; 2625348607Smm } 2626339640Smm 2627348607Smm idx += HUFF_DC; 2628339640Smm 2629348607Smm ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC); 2630348607Smm if(ret != ARCHIVE_OK) { 2631348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2632348607Smm "Failed to create lower bits of distances table"); 2633348607Smm return ARCHIVE_FATAL; 2634348607Smm } 2635339640Smm 2636348607Smm idx += HUFF_LDC; 2637348607Smm 2638348607Smm ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC); 2639348607Smm if(ret != ARCHIVE_OK) { 2640348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2641348607Smm "Failed to create repeating distances table"); 2642348607Smm return ARCHIVE_FATAL; 2643348607Smm } 2644348607Smm 2645348607Smm return ARCHIVE_OK; 2646339640Smm} 2647339640Smm 2648339640Smm/* Parses the block header, verifies its CRC byte, and saves the header 2649339640Smm * fields inside the `hdr` pointer. */ 2650339640Smmstatic int parse_block_header(struct archive_read* a, const uint8_t* p, 2651348607Smm ssize_t* block_size, struct compressed_block_header* hdr) 2652339640Smm{ 2653358088Smm uint8_t calculated_cksum; 2654348607Smm memcpy(hdr, p, sizeof(struct compressed_block_header)); 2655339640Smm 2656348607Smm if(bf_byte_count(hdr) > 2) { 2657348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2658348607Smm "Unsupported block header size (was %d, max is 2)", 2659348607Smm bf_byte_count(hdr)); 2660348607Smm return ARCHIVE_FATAL; 2661348607Smm } 2662339640Smm 2663348607Smm /* This should probably use bit reader interface in order to be more 2664348607Smm * future-proof. */ 2665348607Smm *block_size = 0; 2666348607Smm switch(bf_byte_count(hdr)) { 2667348607Smm /* 1-byte block size */ 2668348607Smm case 0: 2669348607Smm *block_size = *(const uint8_t*) &p[2]; 2670348607Smm break; 2671339640Smm 2672348607Smm /* 2-byte block size */ 2673348607Smm case 1: 2674348607Smm *block_size = archive_le16dec(&p[2]); 2675348607Smm break; 2676339640Smm 2677348607Smm /* 3-byte block size */ 2678348607Smm case 2: 2679348607Smm *block_size = archive_le32dec(&p[2]); 2680348607Smm *block_size &= 0x00FFFFFF; 2681348607Smm break; 2682339640Smm 2683348607Smm /* Other block sizes are not supported. This case is not 2684348607Smm * reached, because we have an 'if' guard before the switch 2685348607Smm * that makes sure of it. */ 2686348607Smm default: 2687348607Smm return ARCHIVE_FATAL; 2688348607Smm } 2689339640Smm 2690348607Smm /* Verify the block header checksum. 0x5A is a magic value and is 2691348607Smm * always * constant. */ 2692358088Smm calculated_cksum = 0x5A 2693348607Smm ^ (uint8_t) hdr->block_flags_u8 2694348607Smm ^ (uint8_t) *block_size 2695348607Smm ^ (uint8_t) (*block_size >> 8) 2696348607Smm ^ (uint8_t) (*block_size >> 16); 2697339640Smm 2698348607Smm if(calculated_cksum != hdr->block_cksum) { 2699348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2700348607Smm "Block checksum error: got 0x%x, expected 0x%x", 2701348607Smm hdr->block_cksum, calculated_cksum); 2702339640Smm 2703348607Smm return ARCHIVE_FATAL; 2704348607Smm } 2705339640Smm 2706348607Smm return ARCHIVE_OK; 2707339640Smm} 2708339640Smm 2709342360Smm/* Convenience function used during filter processing. */ 2710339640Smmstatic int parse_filter_data(struct rar5* rar, const uint8_t* p, 2711348607Smm uint32_t* filter_data) 2712339640Smm{ 2713348607Smm int i, bytes; 2714348607Smm uint32_t data = 0; 2715339640Smm 2716348607Smm if(ARCHIVE_OK != read_consume_bits(rar, p, 2, &bytes)) 2717348607Smm return ARCHIVE_EOF; 2718339640Smm 2719348607Smm bytes++; 2720339640Smm 2721348607Smm for(i = 0; i < bytes; i++) { 2722348607Smm uint16_t byte; 2723339640Smm 2724348607Smm if(ARCHIVE_OK != read_bits_16(rar, p, &byte)) { 2725348607Smm return ARCHIVE_EOF; 2726348607Smm } 2727339640Smm 2728348607Smm /* Cast to uint32_t will ensure the shift operation will not 2729348607Smm * produce undefined result. */ 2730348607Smm data += ((uint32_t) byte >> 8) << (i * 8); 2731348607Smm skip_bits(rar, 8); 2732348607Smm } 2733339640Smm 2734348607Smm *filter_data = data; 2735348607Smm return ARCHIVE_OK; 2736339640Smm} 2737339640Smm 2738339640Smm/* Function is used during sanity checking. */ 2739339640Smmstatic int is_valid_filter_block_start(struct rar5* rar, 2740348607Smm uint32_t start) 2741339640Smm{ 2742348607Smm const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr; 2743348607Smm const int64_t last_bs = rar->cstate.last_block_start; 2744348607Smm const ssize_t last_bl = rar->cstate.last_block_length; 2745339640Smm 2746348607Smm if(last_bs == 0 || last_bl == 0) { 2747348607Smm /* We didn't have any filters yet, so accept this offset. */ 2748348607Smm return 1; 2749348607Smm } 2750339640Smm 2751348607Smm if(block_start >= last_bs + last_bl) { 2752348607Smm /* Current offset is bigger than last block's end offset, so 2753348607Smm * accept current offset. */ 2754348607Smm return 1; 2755348607Smm } 2756339640Smm 2757348607Smm /* Any other case is not a normal situation and we should fail. */ 2758348607Smm return 0; 2759339640Smm} 2760339640Smm 2761339640Smm/* The function will create a new filter, read its parameters from the input 2762339640Smm * stream and add it to the filter collection. */ 2763339640Smmstatic int parse_filter(struct archive_read* ar, const uint8_t* p) { 2764348607Smm uint32_t block_start, block_length; 2765348607Smm uint16_t filter_type; 2766358088Smm struct filter_info* filt = NULL; 2767348607Smm struct rar5* rar = get_context(ar); 2768339640Smm 2769348607Smm /* Read the parameters from the input stream. */ 2770348607Smm if(ARCHIVE_OK != parse_filter_data(rar, p, &block_start)) 2771348607Smm return ARCHIVE_EOF; 2772339640Smm 2773348607Smm if(ARCHIVE_OK != parse_filter_data(rar, p, &block_length)) 2774348607Smm return ARCHIVE_EOF; 2775339640Smm 2776348607Smm if(ARCHIVE_OK != read_bits_16(rar, p, &filter_type)) 2777348607Smm return ARCHIVE_EOF; 2778339640Smm 2779348607Smm filter_type >>= 13; 2780348607Smm skip_bits(rar, 3); 2781339640Smm 2782348607Smm /* Perform some sanity checks on this filter parameters. Note that we 2783348607Smm * allow only DELTA, E8/E9 and ARM filters here, because rest of 2784348607Smm * filters are not used in RARv5. */ 2785339640Smm 2786348607Smm if(block_length < 4 || 2787348607Smm block_length > 0x400000 || 2788348607Smm filter_type > FILTER_ARM || 2789348607Smm !is_valid_filter_block_start(rar, block_start)) 2790348607Smm { 2791348607Smm archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2792348607Smm "Invalid filter encountered"); 2793348607Smm return ARCHIVE_FATAL; 2794348607Smm } 2795339640Smm 2796348607Smm /* Allocate a new filter. */ 2797358088Smm filt = add_new_filter(rar); 2798348607Smm if(filt == NULL) { 2799348607Smm archive_set_error(&ar->archive, ENOMEM, 2800348607Smm "Can't allocate memory for a filter descriptor."); 2801348607Smm return ARCHIVE_FATAL; 2802348607Smm } 2803339640Smm 2804348607Smm filt->type = filter_type; 2805348607Smm filt->block_start = rar->cstate.write_ptr + block_start; 2806348607Smm filt->block_length = block_length; 2807339640Smm 2808348607Smm rar->cstate.last_block_start = filt->block_start; 2809348607Smm rar->cstate.last_block_length = filt->block_length; 2810339640Smm 2811348607Smm /* Read some more data in case this is a DELTA filter. Other filter 2812348607Smm * types don't require any additional data over what was already 2813348607Smm * read. */ 2814348607Smm if(filter_type == FILTER_DELTA) { 2815348607Smm int channels; 2816339640Smm 2817348607Smm if(ARCHIVE_OK != read_consume_bits(rar, p, 5, &channels)) 2818348607Smm return ARCHIVE_EOF; 2819339640Smm 2820348607Smm filt->channels = channels + 1; 2821348607Smm } 2822339640Smm 2823348607Smm return ARCHIVE_OK; 2824339640Smm} 2825339640Smm 2826339640Smmstatic int decode_code_length(struct rar5* rar, const uint8_t* p, 2827348607Smm uint16_t code) 2828339640Smm{ 2829348607Smm int lbits, length = 2; 2830348607Smm if(code < 8) { 2831348607Smm lbits = 0; 2832348607Smm length += code; 2833348607Smm } else { 2834348607Smm lbits = code / 4 - 1; 2835348607Smm length += (4 | (code & 3)) << lbits; 2836348607Smm } 2837339640Smm 2838348607Smm if(lbits > 0) { 2839348607Smm int add; 2840339640Smm 2841348607Smm if(ARCHIVE_OK != read_consume_bits(rar, p, lbits, &add)) 2842348607Smm return -1; 2843339640Smm 2844348607Smm length += add; 2845348607Smm } 2846339640Smm 2847348607Smm return length; 2848339640Smm} 2849339640Smm 2850339640Smmstatic int copy_string(struct archive_read* a, int len, int dist) { 2851348607Smm struct rar5* rar = get_context(a); 2852348607Smm const uint64_t cmask = rar->cstate.window_mask; 2853348607Smm const uint64_t write_ptr = rar->cstate.write_ptr + 2854348607Smm rar->cstate.solid_offset; 2855348607Smm int i; 2856339640Smm 2857348607Smm if (rar->cstate.window_buf == NULL) 2858348607Smm return ARCHIVE_FATAL; 2859339640Smm 2860348607Smm /* The unpacker spends most of the time in this function. It would be 2861348607Smm * a good idea to introduce some optimizations here. 2862348607Smm * 2863348607Smm * Just remember that this loop treats buffers that overlap differently 2864348607Smm * than buffers that do not overlap. This is why a simple memcpy(3) 2865348607Smm * call will not be enough. */ 2866339640Smm 2867348607Smm for(i = 0; i < len; i++) { 2868348607Smm const ssize_t write_idx = (write_ptr + i) & cmask; 2869348607Smm const ssize_t read_idx = (write_ptr + i - dist) & cmask; 2870348607Smm rar->cstate.window_buf[write_idx] = 2871348607Smm rar->cstate.window_buf[read_idx]; 2872348607Smm } 2873348607Smm 2874348607Smm rar->cstate.write_ptr += len; 2875348607Smm return ARCHIVE_OK; 2876339640Smm} 2877339640Smm 2878339640Smmstatic int do_uncompress_block(struct archive_read* a, const uint8_t* p) { 2879348607Smm struct rar5* rar = get_context(a); 2880348607Smm uint16_t num; 2881348607Smm int ret; 2882339640Smm 2883348607Smm const uint64_t cmask = rar->cstate.window_mask; 2884348607Smm const struct compressed_block_header* hdr = &rar->last_block_hdr; 2885348607Smm const uint8_t bit_size = 1 + bf_bit_size(hdr); 2886339640Smm 2887348607Smm while(1) { 2888348607Smm if(rar->cstate.write_ptr - rar->cstate.last_write_ptr > 2889348607Smm (rar->cstate.window_size >> 1)) { 2890348607Smm /* Don't allow growing data by more than half of the 2891348607Smm * window size at a time. In such case, break the loop; 2892348607Smm * next call to this function will continue processing 2893348607Smm * from this moment. */ 2894348607Smm break; 2895348607Smm } 2896339640Smm 2897348607Smm if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 || 2898348607Smm (rar->bits.in_addr == rar->cstate.cur_block_size - 1 && 2899348607Smm rar->bits.bit_addr >= bit_size)) 2900348607Smm { 2901348607Smm /* If the program counter is here, it means the 2902348607Smm * function has finished processing the block. */ 2903348607Smm rar->cstate.block_parsing_finished = 1; 2904348607Smm break; 2905348607Smm } 2906339640Smm 2907348607Smm /* Decode the next literal. */ 2908348607Smm if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) { 2909348607Smm return ARCHIVE_EOF; 2910348607Smm } 2911339640Smm 2912348607Smm /* Num holds a decompression literal, or 'command code'. 2913348607Smm * 2914348607Smm * - Values lower than 256 are just bytes. Those codes 2915348607Smm * can be stored in the output buffer directly. 2916348607Smm * 2917349524Smm * - Code 256 defines a new filter, which is later used to 2918348607Smm * ransform the data block accordingly to the filter type. 2919348607Smm * The data block needs to be fully uncompressed first. 2920348607Smm * 2921348607Smm * - Code bigger than 257 and smaller than 262 define 2922348607Smm * a repetition pattern that should be copied from 2923348607Smm * an already uncompressed chunk of data. 2924348607Smm */ 2925339640Smm 2926348607Smm if(num < 256) { 2927348607Smm /* Directly store the byte. */ 2928348607Smm int64_t write_idx = rar->cstate.solid_offset + 2929348607Smm rar->cstate.write_ptr++; 2930339640Smm 2931348607Smm rar->cstate.window_buf[write_idx & cmask] = 2932348607Smm (uint8_t) num; 2933348607Smm continue; 2934348607Smm } else if(num >= 262) { 2935348607Smm uint16_t dist_slot; 2936348607Smm int len = decode_code_length(rar, p, num - 262), 2937348607Smm dbits, 2938348607Smm dist = 1; 2939339640Smm 2940348607Smm if(len == -1) { 2941348607Smm archive_set_error(&a->archive, 2942348607Smm ARCHIVE_ERRNO_PROGRAMMER, 2943348607Smm "Failed to decode the code length"); 2944339640Smm 2945348607Smm return ARCHIVE_FATAL; 2946348607Smm } 2947339640Smm 2948348607Smm if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p, 2949348607Smm &dist_slot)) 2950348607Smm { 2951348607Smm archive_set_error(&a->archive, 2952348607Smm ARCHIVE_ERRNO_PROGRAMMER, 2953348607Smm "Failed to decode the distance slot"); 2954339640Smm 2955348607Smm return ARCHIVE_FATAL; 2956348607Smm } 2957339640Smm 2958348607Smm if(dist_slot < 4) { 2959348607Smm dbits = 0; 2960348607Smm dist += dist_slot; 2961348607Smm } else { 2962348607Smm dbits = dist_slot / 2 - 1; 2963339640Smm 2964348607Smm /* Cast to uint32_t will make sure the shift 2965348607Smm * left operation won't produce undefined 2966348607Smm * result. Then, the uint32_t type will 2967348607Smm * be implicitly casted to int. */ 2968348607Smm dist += (uint32_t) (2 | 2969348607Smm (dist_slot & 1)) << dbits; 2970348607Smm } 2971339640Smm 2972348607Smm if(dbits > 0) { 2973348607Smm if(dbits >= 4) { 2974348607Smm uint32_t add = 0; 2975348607Smm uint16_t low_dist; 2976339640Smm 2977348607Smm if(dbits > 4) { 2978348607Smm if(ARCHIVE_OK != read_bits_32( 2979348607Smm rar, p, &add)) { 2980348607Smm /* Return EOF if we 2981348607Smm * can't read more 2982348607Smm * data. */ 2983348607Smm return ARCHIVE_EOF; 2984348607Smm } 2985339640Smm 2986348607Smm skip_bits(rar, dbits - 4); 2987348607Smm add = (add >> ( 2988348607Smm 36 - dbits)) << 4; 2989348607Smm dist += add; 2990348607Smm } 2991339640Smm 2992348607Smm if(ARCHIVE_OK != decode_number(a, 2993348607Smm &rar->cstate.ldd, p, &low_dist)) 2994348607Smm { 2995348607Smm archive_set_error(&a->archive, 2996348607Smm ARCHIVE_ERRNO_PROGRAMMER, 2997348607Smm "Failed to decode the " 2998348607Smm "distance slot"); 2999339640Smm 3000348607Smm return ARCHIVE_FATAL; 3001348607Smm } 3002339640Smm 3003348607Smm if(dist >= INT_MAX - low_dist - 1) { 3004348607Smm /* This only happens in 3005348607Smm * invalid archives. */ 3006348607Smm archive_set_error(&a->archive, 3007348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 3008348607Smm "Distance pointer " 3009348607Smm "overflow"); 3010348607Smm return ARCHIVE_FATAL; 3011348607Smm } 3012339640Smm 3013348607Smm dist += low_dist; 3014348607Smm } else { 3015348607Smm /* dbits is one of [0,1,2,3] */ 3016348607Smm int add; 3017339640Smm 3018348607Smm if(ARCHIVE_OK != read_consume_bits(rar, 3019348607Smm p, dbits, &add)) { 3020348607Smm /* Return EOF if we can't read 3021348607Smm * more data. */ 3022348607Smm return ARCHIVE_EOF; 3023348607Smm } 3024339640Smm 3025348607Smm dist += add; 3026348607Smm } 3027348607Smm } 3028339640Smm 3029348607Smm if(dist > 0x100) { 3030348607Smm len++; 3031339640Smm 3032348607Smm if(dist > 0x2000) { 3033348607Smm len++; 3034339640Smm 3035348607Smm if(dist > 0x40000) { 3036348607Smm len++; 3037348607Smm } 3038348607Smm } 3039348607Smm } 3040339640Smm 3041348607Smm dist_cache_push(rar, dist); 3042348607Smm rar->cstate.last_len = len; 3043339640Smm 3044348607Smm if(ARCHIVE_OK != copy_string(a, len, dist)) 3045348607Smm return ARCHIVE_FATAL; 3046339640Smm 3047348607Smm continue; 3048348607Smm } else if(num == 256) { 3049348607Smm /* Create a filter. */ 3050348607Smm ret = parse_filter(a, p); 3051348607Smm if(ret != ARCHIVE_OK) 3052348607Smm return ret; 3053339640Smm 3054348607Smm continue; 3055348607Smm } else if(num == 257) { 3056348607Smm if(rar->cstate.last_len != 0) { 3057348607Smm if(ARCHIVE_OK != copy_string(a, 3058348607Smm rar->cstate.last_len, 3059348607Smm rar->cstate.dist_cache[0])) 3060348607Smm { 3061348607Smm return ARCHIVE_FATAL; 3062348607Smm } 3063348607Smm } 3064339640Smm 3065348607Smm continue; 3066358088Smm } else { 3067358088Smm /* num < 262 */ 3068348607Smm const int idx = num - 258; 3069348607Smm const int dist = dist_cache_touch(rar, idx); 3070339640Smm 3071348607Smm uint16_t len_slot; 3072348607Smm int len; 3073339640Smm 3074348607Smm if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p, 3075348607Smm &len_slot)) { 3076348607Smm return ARCHIVE_FATAL; 3077348607Smm } 3078339640Smm 3079348607Smm len = decode_code_length(rar, p, len_slot); 3080348607Smm rar->cstate.last_len = len; 3081339640Smm 3082348607Smm if(ARCHIVE_OK != copy_string(a, len, dist)) 3083348607Smm return ARCHIVE_FATAL; 3084339640Smm 3085348607Smm continue; 3086348607Smm } 3087348607Smm } 3088339640Smm 3089348607Smm return ARCHIVE_OK; 3090339640Smm} 3091339640Smm 3092339640Smm/* Binary search for the RARv5 signature. */ 3093339640Smmstatic int scan_for_signature(struct archive_read* a) { 3094348607Smm const uint8_t* p; 3095348607Smm const int chunk_size = 512; 3096348607Smm ssize_t i; 3097358088Smm char signature[sizeof(rar5_signature_xor)]; 3098339640Smm 3099348607Smm /* If we're here, it means we're on an 'unknown territory' data. 3100348607Smm * There's no indication what kind of data we're reading here. 3101348607Smm * It could be some text comment, any kind of binary data, 3102348607Smm * digital sign, dragons, etc. 3103348607Smm * 3104348607Smm * We want to find a valid RARv5 magic header inside this unknown 3105348607Smm * data. */ 3106339640Smm 3107348607Smm /* Is it possible in libarchive to just skip everything until the 3108348607Smm * end of the file? If so, it would be a better approach than the 3109348607Smm * current implementation of this function. */ 3110339640Smm 3111358088Smm rar5_signature(signature); 3112358088Smm 3113348607Smm while(1) { 3114348607Smm if(!read_ahead(a, chunk_size, &p)) 3115348607Smm return ARCHIVE_EOF; 3116339640Smm 3117358088Smm for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor); 3118358088Smm i++) { 3119358088Smm if(memcmp(&p[i], signature, 3120358088Smm sizeof(rar5_signature_xor)) == 0) { 3121348607Smm /* Consume the number of bytes we've used to 3122348607Smm * search for the signature, as well as the 3123348607Smm * number of bytes used by the signature 3124348607Smm * itself. After this we should be standing 3125348607Smm * on a valid base block header. */ 3126358088Smm (void) consume(a, 3127358088Smm i + sizeof(rar5_signature_xor)); 3128348607Smm return ARCHIVE_OK; 3129348607Smm } 3130348607Smm } 3131339640Smm 3132348607Smm consume(a, chunk_size); 3133348607Smm } 3134339640Smm 3135348607Smm return ARCHIVE_FATAL; 3136339640Smm} 3137339640Smm 3138339640Smm/* This function will switch the multivolume archive file to another file, 3139339640Smm * i.e. from part03 to part 04. */ 3140339640Smmstatic int advance_multivolume(struct archive_read* a) { 3141348607Smm int lret; 3142348607Smm struct rar5* rar = get_context(a); 3143339640Smm 3144348607Smm /* A small state machine that will skip unnecessary data, needed to 3145348607Smm * switch from one multivolume to another. Such skipping is needed if 3146348607Smm * we want to be an stream-oriented (instead of file-oriented) 3147348607Smm * unpacker. 3148348607Smm * 3149348607Smm * The state machine starts with `rar->main.endarc` == 0. It also 3150348607Smm * assumes that current stream pointer points to some base block 3151348607Smm * header. 3152348607Smm * 3153348607Smm * The `endarc` field is being set when the base block parsing 3154348607Smm * function encounters the 'end of archive' marker. 3155348607Smm */ 3156339640Smm 3157348607Smm while(1) { 3158348607Smm if(rar->main.endarc == 1) { 3159348607Smm int looping = 1; 3160339640Smm 3161348607Smm rar->main.endarc = 0; 3162339640Smm 3163348607Smm while(looping) { 3164348607Smm lret = skip_base_block(a); 3165348607Smm switch(lret) { 3166348607Smm case ARCHIVE_RETRY: 3167348607Smm /* Continue looping. */ 3168348607Smm break; 3169348607Smm case ARCHIVE_OK: 3170348607Smm /* Break loop. */ 3171348607Smm looping = 0; 3172348607Smm break; 3173348607Smm default: 3174348607Smm /* Forward any errors to the 3175348607Smm * caller. */ 3176348607Smm return lret; 3177348607Smm } 3178348607Smm } 3179339640Smm 3180348607Smm break; 3181348607Smm } else { 3182348607Smm /* Skip current base block. In order to properly skip 3183348607Smm * it, we really need to simply parse it and discard 3184348607Smm * the results. */ 3185339640Smm 3186348607Smm lret = skip_base_block(a); 3187348607Smm if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED) 3188348607Smm return lret; 3189339640Smm 3190348607Smm /* The `skip_base_block` function tells us if we 3191348607Smm * should continue with skipping, or we should stop 3192348607Smm * skipping. We're trying to skip everything up to 3193348607Smm * a base FILE block. */ 3194348607Smm 3195348607Smm if(lret != ARCHIVE_RETRY) { 3196348607Smm /* If there was an error during skipping, or we 3197348607Smm * have just skipped a FILE base block... */ 3198348607Smm 3199348607Smm if(rar->main.endarc == 0) { 3200348607Smm return lret; 3201348607Smm } else { 3202348607Smm continue; 3203348607Smm } 3204348607Smm } 3205348607Smm } 3206348607Smm } 3207348607Smm 3208348607Smm return ARCHIVE_OK; 3209339640Smm} 3210339640Smm 3211339640Smm/* Merges the partial block from the first multivolume archive file, and 3212339640Smm * partial block from the second multivolume archive file. The result is 3213339640Smm * a chunk of memory containing the whole block, and the stream pointer 3214339640Smm * is advanced to the next block in the second multivolume archive file. */ 3215339640Smmstatic int merge_block(struct archive_read* a, ssize_t block_size, 3216348607Smm const uint8_t** p) 3217339640Smm{ 3218348607Smm struct rar5* rar = get_context(a); 3219348607Smm ssize_t cur_block_size, partial_offset = 0; 3220348607Smm const uint8_t* lp; 3221348607Smm int ret; 3222339640Smm 3223348607Smm if(rar->merge_mode) { 3224348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3225348607Smm "Recursive merge is not allowed"); 3226339640Smm 3227348607Smm return ARCHIVE_FATAL; 3228348607Smm } 3229339640Smm 3230348607Smm /* Set a flag that we're in the switching mode. */ 3231348607Smm rar->cstate.switch_multivolume = 1; 3232339640Smm 3233348607Smm /* Reallocate the memory which will hold the whole block. */ 3234348607Smm if(rar->vol.push_buf) 3235348607Smm free((void*) rar->vol.push_buf); 3236344673Smm 3237348607Smm /* Increasing the allocation block by 8 is due to bit reading functions, 3238348607Smm * which are using additional 2 or 4 bytes. Allocating the block size 3239348607Smm * by exact value would make bit reader perform reads from invalid 3240348607Smm * memory block when reading the last byte from the buffer. */ 3241348607Smm rar->vol.push_buf = malloc(block_size + 8); 3242348607Smm if(!rar->vol.push_buf) { 3243348607Smm archive_set_error(&a->archive, ENOMEM, 3244348607Smm "Can't allocate memory for a merge block buffer."); 3245348607Smm return ARCHIVE_FATAL; 3246348607Smm } 3247339640Smm 3248348607Smm /* Valgrind complains if the extension block for bit reader is not 3249348607Smm * initialized, so initialize it. */ 3250348607Smm memset(&rar->vol.push_buf[block_size], 0, 8); 3251339640Smm 3252348607Smm /* A single block can span across multiple multivolume archive files, 3253348607Smm * so we use a loop here. This loop will consume enough multivolume 3254348607Smm * archive files until the whole block is read. */ 3255342360Smm 3256348607Smm while(1) { 3257348607Smm /* Get the size of current block chunk in this multivolume 3258348607Smm * archive file and read it. */ 3259348607Smm cur_block_size = rar5_min(rar->file.bytes_remaining, 3260348607Smm block_size - partial_offset); 3261339640Smm 3262348607Smm if(cur_block_size == 0) { 3263348607Smm archive_set_error(&a->archive, 3264348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 3265348607Smm "Encountered block size == 0 during block merge"); 3266348607Smm return ARCHIVE_FATAL; 3267348607Smm } 3268339640Smm 3269348607Smm if(!read_ahead(a, cur_block_size, &lp)) 3270348607Smm return ARCHIVE_EOF; 3271339640Smm 3272348607Smm /* Sanity check; there should never be a situation where this 3273348607Smm * function reads more data than the block's size. */ 3274348607Smm if(partial_offset + cur_block_size > block_size) { 3275348607Smm archive_set_error(&a->archive, 3276348607Smm ARCHIVE_ERRNO_PROGRAMMER, 3277348607Smm "Consumed too much data when merging blocks."); 3278348607Smm return ARCHIVE_FATAL; 3279348607Smm } 3280339640Smm 3281348607Smm /* Merge previous block chunk with current block chunk, 3282348607Smm * or create first block chunk if this is our first 3283348607Smm * iteration. */ 3284348607Smm memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size); 3285339640Smm 3286348607Smm /* Advance the stream read pointer by this block chunk size. */ 3287348607Smm if(ARCHIVE_OK != consume(a, cur_block_size)) 3288348607Smm return ARCHIVE_EOF; 3289339640Smm 3290348607Smm /* Update the pointers. `partial_offset` contains information 3291348607Smm * about the sum of merged block chunks. */ 3292348607Smm partial_offset += cur_block_size; 3293348607Smm rar->file.bytes_remaining -= cur_block_size; 3294339640Smm 3295348607Smm /* If `partial_offset` is the same as `block_size`, this means 3296348607Smm * we've merged all block chunks and we have a valid full 3297348607Smm * block. */ 3298348607Smm if(partial_offset == block_size) { 3299348607Smm break; 3300348607Smm } 3301339640Smm 3302348607Smm /* If we don't have any bytes to read, this means we should 3303348607Smm * switch to another multivolume archive file. */ 3304348607Smm if(rar->file.bytes_remaining == 0) { 3305348607Smm rar->merge_mode++; 3306348607Smm ret = advance_multivolume(a); 3307348607Smm rar->merge_mode--; 3308348607Smm if(ret != ARCHIVE_OK) { 3309348607Smm return ret; 3310348607Smm } 3311348607Smm } 3312348607Smm } 3313339640Smm 3314348607Smm *p = rar->vol.push_buf; 3315348607Smm 3316348607Smm /* If we're here, we can resume unpacking by processing the block 3317348607Smm * pointed to by the `*p` memory pointer. */ 3318348607Smm 3319348607Smm return ARCHIVE_OK; 3320339640Smm} 3321339640Smm 3322339640Smmstatic int process_block(struct archive_read* a) { 3323348607Smm const uint8_t* p; 3324348607Smm struct rar5* rar = get_context(a); 3325348607Smm int ret; 3326339640Smm 3327348607Smm /* If we don't have any data to be processed, this most probably means 3328348607Smm * we need to switch to the next volume. */ 3329348607Smm if(rar->main.volume && rar->file.bytes_remaining == 0) { 3330348607Smm ret = advance_multivolume(a); 3331348607Smm if(ret != ARCHIVE_OK) 3332348607Smm return ret; 3333348607Smm } 3334339640Smm 3335348607Smm if(rar->cstate.block_parsing_finished) { 3336348607Smm ssize_t block_size; 3337358088Smm ssize_t to_skip; 3338358088Smm ssize_t cur_block_size; 3339339640Smm 3340348607Smm /* The header size won't be bigger than 6 bytes. */ 3341348607Smm if(!read_ahead(a, 6, &p)) { 3342348607Smm /* Failed to prefetch data block header. */ 3343348607Smm return ARCHIVE_EOF; 3344348607Smm } 3345339640Smm 3346348607Smm /* 3347348607Smm * Read block_size by parsing block header. Validate the header 3348348607Smm * by calculating CRC byte stored inside the header. Size of 3349348607Smm * the header is not constant (block size can be stored either 3350348607Smm * in 1 or 2 bytes), that's why block size is left out from the 3351348607Smm * `compressed_block_header` structure and returned by 3352348607Smm * `parse_block_header` as the second argument. */ 3353339640Smm 3354348607Smm ret = parse_block_header(a, p, &block_size, 3355348607Smm &rar->last_block_hdr); 3356348607Smm if(ret != ARCHIVE_OK) { 3357348607Smm return ret; 3358348607Smm } 3359339640Smm 3360348607Smm /* Skip block header. Next data is huffman tables, 3361348607Smm * if present. */ 3362358088Smm to_skip = sizeof(struct compressed_block_header) + 3363348607Smm bf_byte_count(&rar->last_block_hdr) + 1; 3364339640Smm 3365348607Smm if(ARCHIVE_OK != consume(a, to_skip)) 3366348607Smm return ARCHIVE_EOF; 3367339640Smm 3368348607Smm rar->file.bytes_remaining -= to_skip; 3369339640Smm 3370348607Smm /* The block size gives information about the whole block size, 3371348607Smm * but the block could be stored in split form when using 3372348607Smm * multi-volume archives. In this case, the block size will be 3373348607Smm * bigger than the actual data stored in this file. Remaining 3374348607Smm * part of the data will be in another file. */ 3375339640Smm 3376358088Smm cur_block_size = 3377348607Smm rar5_min(rar->file.bytes_remaining, block_size); 3378339640Smm 3379348607Smm if(block_size > rar->file.bytes_remaining) { 3380348607Smm /* If current blocks' size is bigger than our data 3381348607Smm * size, this means we have a multivolume archive. 3382348607Smm * In this case, skip all base headers until the end 3383348607Smm * of the file, proceed to next "partXXX.rar" volume, 3384348607Smm * find its signature, skip all headers up to the first 3385348607Smm * FILE base header, and continue from there. 3386348607Smm * 3387348607Smm * Note that `merge_block` will update the `rar` 3388348607Smm * context structure quite extensively. */ 3389339640Smm 3390348607Smm ret = merge_block(a, block_size, &p); 3391348607Smm if(ret != ARCHIVE_OK) { 3392348607Smm return ret; 3393348607Smm } 3394339640Smm 3395348607Smm cur_block_size = block_size; 3396339640Smm 3397348607Smm /* Current stream pointer should be now directly 3398348607Smm * *after* the block that spanned through multiple 3399348607Smm * archive files. `p` pointer should have the data of 3400348607Smm * the *whole* block (merged from partial blocks 3401348607Smm * stored in multiple archives files). */ 3402348607Smm } else { 3403348607Smm rar->cstate.switch_multivolume = 0; 3404339640Smm 3405348607Smm /* Read the whole block size into memory. This can take 3406348607Smm * up to 8 megabytes of memory in theoretical cases. 3407348607Smm * Might be worth to optimize this and use a standard 3408348607Smm * chunk of 4kb's. */ 3409348607Smm if(!read_ahead(a, 4 + cur_block_size, &p)) { 3410348607Smm /* Failed to prefetch block data. */ 3411348607Smm return ARCHIVE_EOF; 3412348607Smm } 3413348607Smm } 3414339640Smm 3415348607Smm rar->cstate.block_buf = p; 3416348607Smm rar->cstate.cur_block_size = cur_block_size; 3417348607Smm rar->cstate.block_parsing_finished = 0; 3418339640Smm 3419348607Smm rar->bits.in_addr = 0; 3420348607Smm rar->bits.bit_addr = 0; 3421339640Smm 3422348607Smm if(bf_is_table_present(&rar->last_block_hdr)) { 3423348607Smm /* Load Huffman tables. */ 3424348607Smm ret = parse_tables(a, rar, p); 3425348607Smm if(ret != ARCHIVE_OK) { 3426348607Smm /* Error during decompression of Huffman 3427348607Smm * tables. */ 3428348607Smm return ret; 3429348607Smm } 3430348607Smm } 3431348607Smm } else { 3432348607Smm /* Block parsing not finished, reuse previous memory buffer. */ 3433348607Smm p = rar->cstate.block_buf; 3434348607Smm } 3435339640Smm 3436348607Smm /* Uncompress the block, or a part of it, depending on how many bytes 3437348607Smm * will be generated by uncompressing the block. 3438348607Smm * 3439348607Smm * In case too many bytes will be generated, calling this function 3440348607Smm * again will resume the uncompression operation. */ 3441348607Smm ret = do_uncompress_block(a, p); 3442348607Smm if(ret != ARCHIVE_OK) { 3443348607Smm return ret; 3444348607Smm } 3445339640Smm 3446348607Smm if(rar->cstate.block_parsing_finished && 3447348607Smm rar->cstate.switch_multivolume == 0 && 3448348607Smm rar->cstate.cur_block_size > 0) 3449348607Smm { 3450348607Smm /* If we're processing a normal block, consume the whole 3451348607Smm * block. We can do this because we've already read the whole 3452348607Smm * block to memory. */ 3453348607Smm if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size)) 3454348607Smm return ARCHIVE_FATAL; 3455339640Smm 3456348607Smm rar->file.bytes_remaining -= rar->cstate.cur_block_size; 3457348607Smm } else if(rar->cstate.switch_multivolume) { 3458348607Smm /* Don't consume the block if we're doing multivolume 3459348607Smm * processing. The volume switching function will consume 3460348607Smm * the proper count of bytes instead. */ 3461348607Smm rar->cstate.switch_multivolume = 0; 3462348607Smm } 3463339640Smm 3464348607Smm return ARCHIVE_OK; 3465339640Smm} 3466339640Smm 3467339640Smm/* Pops the `buf`, `size` and `offset` from the "data ready" stack. 3468339640Smm * 3469339640Smm * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY 3470339640Smm * when there is no data on the stack. */ 3471339640Smmstatic int use_data(struct rar5* rar, const void** buf, size_t* size, 3472348607Smm int64_t* offset) 3473339640Smm{ 3474348607Smm int i; 3475339640Smm 3476348607Smm for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3477348607Smm struct data_ready *d = &rar->cstate.dready[i]; 3478339640Smm 3479348607Smm if(d->used) { 3480348607Smm if(buf) *buf = d->buf; 3481348607Smm if(size) *size = d->size; 3482348607Smm if(offset) *offset = d->offset; 3483339640Smm 3484348607Smm d->used = 0; 3485348607Smm return ARCHIVE_OK; 3486348607Smm } 3487348607Smm } 3488339640Smm 3489348607Smm return ARCHIVE_RETRY; 3490339640Smm} 3491339640Smm 3492339640Smm/* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready 3493339640Smm * FIFO stack. Those values will be popped from this stack by the `use_data` 3494339640Smm * function. */ 3495339640Smmstatic int push_data_ready(struct archive_read* a, struct rar5* rar, 3496348607Smm const uint8_t* buf, size_t size, int64_t offset) 3497339640Smm{ 3498348607Smm int i; 3499339640Smm 3500348607Smm /* Don't push if we're in skip mode. This is needed because solid 3501348607Smm * streams need full processing even if we're skipping data. After 3502348607Smm * fully processing the stream, we need to discard the generated bytes, 3503348607Smm * because we're interested only in the side effect: building up the 3504348607Smm * internal window circular buffer. This window buffer will be used 3505348607Smm * later during unpacking of requested data. */ 3506348607Smm if(rar->skip_mode) 3507348607Smm return ARCHIVE_OK; 3508339640Smm 3509348607Smm /* Sanity check. */ 3510348607Smm if(offset != rar->file.last_offset + rar->file.last_size) { 3511348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3512348607Smm "Sanity check error: output stream is not continuous"); 3513348607Smm return ARCHIVE_FATAL; 3514348607Smm } 3515339640Smm 3516348607Smm for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3517348607Smm struct data_ready* d = &rar->cstate.dready[i]; 3518348607Smm if(!d->used) { 3519348607Smm d->used = 1; 3520348607Smm d->buf = buf; 3521348607Smm d->size = size; 3522348607Smm d->offset = offset; 3523339640Smm 3524348607Smm /* These fields are used only in sanity checking. */ 3525348607Smm rar->file.last_offset = offset; 3526348607Smm rar->file.last_size = size; 3527339640Smm 3528348607Smm /* Calculate the checksum of this new block before 3529348607Smm * submitting data to libarchive's engine. */ 3530348607Smm update_crc(rar, d->buf, d->size); 3531339640Smm 3532348607Smm return ARCHIVE_OK; 3533348607Smm } 3534348607Smm } 3535339640Smm 3536348607Smm /* Program counter will reach this code if the `rar->cstate.data_ready` 3537348607Smm * stack will be filled up so that no new entries will be allowed. The 3538348607Smm * code shouldn't allow such situation to occur. So we treat this case 3539348607Smm * as an internal error. */ 3540339640Smm 3541348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3542348607Smm "Error: premature end of data_ready stack"); 3543348607Smm return ARCHIVE_FATAL; 3544339640Smm} 3545339640Smm 3546339640Smm/* This function uncompresses the data that is stored in the <FILE> base 3547339640Smm * block. 3548339640Smm * 3549339640Smm * The FILE base block looks like this: 3550339640Smm * 3551339640Smm * <header><huffman tables><block_1><block_2>...<block_n> 3552339640Smm * 3553339640Smm * The <header> is a block header, that is parsed in parse_block_header(). 3554339640Smm * It's a "compressed_block_header" structure, containing metadata needed 3555339640Smm * to know when we should stop looking for more <block_n> blocks. 3556339640Smm * 3557339640Smm * <huffman tables> contain data needed to set up the huffman tables, needed 3558339640Smm * for the actual decompression. 3559339640Smm * 3560339640Smm * Each <block_n> consists of series of literals: 3561339640Smm * 3562339640Smm * <literal><literal><literal>...<literal> 3563339640Smm * 3564339640Smm * Those literals generate the uncompression data. They operate on a circular 3565339640Smm * buffer, sometimes writing raw data into it, sometimes referencing 3566339640Smm * some previous data inside this buffer, and sometimes declaring a filter 3567339640Smm * that will need to be executed on the data stored in the circular buffer. 3568339640Smm * It all depends on the literal that is used. 3569339640Smm * 3570339640Smm * Sometimes blocks produce output data, sometimes they don't. For example, for 3571339640Smm * some huge files that use lots of filters, sometimes a block is filled with 3572339640Smm * only filter declaration literals. Such blocks won't produce any data in the 3573339640Smm * circular buffer. 3574339640Smm * 3575339640Smm * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte, 3576339640Smm * because a literal can reference previously decompressed data. For example, 3577339640Smm * there can be a literal that says: 'append a byte 0xFE here', and after 3578339640Smm * it another literal can say 'append 1 megabyte of data from circular buffer 3579339640Smm * offset 0x12345'. This is how RAR format handles compressing repeated 3580339640Smm * patterns. 3581339640Smm * 3582339640Smm * The RAR compressor creates those literals and the actual efficiency of 3583339640Smm * compression depends on what those literals are. The literals can also 3584339640Smm * be seen as a kind of a non-turing-complete virtual machine that simply 3585339640Smm * tells the decompressor what it should do. 3586339640Smm * */ 3587339640Smm 3588339640Smmstatic int do_uncompress_file(struct archive_read* a) { 3589348607Smm struct rar5* rar = get_context(a); 3590348607Smm int ret; 3591348607Smm int64_t max_end_pos; 3592339640Smm 3593348607Smm if(!rar->cstate.initialized) { 3594348607Smm /* Don't perform full context reinitialization if we're 3595348607Smm * processing a solid archive. */ 3596348607Smm if(!rar->main.solid || !rar->cstate.window_buf) { 3597348607Smm init_unpack(rar); 3598348607Smm } 3599339640Smm 3600348607Smm rar->cstate.initialized = 1; 3601348607Smm } 3602339640Smm 3603348607Smm if(rar->cstate.all_filters_applied == 1) { 3604348607Smm /* We use while(1) here, but standard case allows for just 1 3605348607Smm * iteration. The loop will iterate if process_block() didn't 3606348607Smm * generate any data at all. This can happen if the block 3607348607Smm * contains only filter definitions (this is common in big 3608348607Smm * files). */ 3609348607Smm while(1) { 3610348607Smm ret = process_block(a); 3611348607Smm if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL) 3612348607Smm return ret; 3613339640Smm 3614348607Smm if(rar->cstate.last_write_ptr == 3615348607Smm rar->cstate.write_ptr) { 3616348607Smm /* The block didn't generate any new data, 3617348607Smm * so just process a new block. */ 3618348607Smm continue; 3619348607Smm } 3620339640Smm 3621348607Smm /* The block has generated some new data, so break 3622348607Smm * the loop. */ 3623348607Smm break; 3624348607Smm } 3625348607Smm } 3626339640Smm 3627348607Smm /* Try to run filters. If filters won't be applied, it means that 3628348607Smm * insufficient data was generated. */ 3629348607Smm ret = apply_filters(a); 3630348607Smm if(ret == ARCHIVE_RETRY) { 3631348607Smm return ARCHIVE_OK; 3632348607Smm } else if(ret == ARCHIVE_FATAL) { 3633348607Smm return ARCHIVE_FATAL; 3634348607Smm } 3635339640Smm 3636348607Smm /* If apply_filters() will return ARCHIVE_OK, we can continue here. */ 3637339640Smm 3638348607Smm if(cdeque_size(&rar->cstate.filters) > 0) { 3639348607Smm /* Check if we can write something before hitting first 3640348607Smm * filter. */ 3641348607Smm struct filter_info* flt; 3642339640Smm 3643348607Smm /* Get the block_start offset from the first filter. */ 3644348607Smm if(CDE_OK != cdeque_front(&rar->cstate.filters, 3645348607Smm cdeque_filter_p(&flt))) 3646348607Smm { 3647348607Smm archive_set_error(&a->archive, 3648348607Smm ARCHIVE_ERRNO_PROGRAMMER, 3649348607Smm "Can't read first filter"); 3650348607Smm return ARCHIVE_FATAL; 3651348607Smm } 3652339640Smm 3653348607Smm max_end_pos = rar5_min(flt->block_start, 3654348607Smm rar->cstate.write_ptr); 3655348607Smm } else { 3656348607Smm /* There are no filters defined, or all filters were applied. 3657348607Smm * This means we can just store the data without any 3658348607Smm * postprocessing. */ 3659348607Smm max_end_pos = rar->cstate.write_ptr; 3660348607Smm } 3661339640Smm 3662348607Smm if(max_end_pos == rar->cstate.last_write_ptr) { 3663348607Smm /* We can't write anything yet. The block uncompression 3664348607Smm * function did not generate enough data, and no filter can be 3665348607Smm * applied. At the same time we don't have any data that can be 3666348607Smm * stored without filter postprocessing. This means we need to 3667348607Smm * wait for more data to be generated, so we can apply the 3668348607Smm * filters. 3669348607Smm * 3670348607Smm * Signal the caller that we need more data to be able to do 3671348607Smm * anything. 3672348607Smm */ 3673348607Smm return ARCHIVE_RETRY; 3674348607Smm } else { 3675348607Smm /* We can write the data before hitting the first filter. 3676348607Smm * So let's do it. The push_window_data() function will 3677348607Smm * effectively return the selected data block to the user 3678348607Smm * application. */ 3679348607Smm push_window_data(a, rar, rar->cstate.last_write_ptr, 3680348607Smm max_end_pos); 3681348607Smm rar->cstate.last_write_ptr = max_end_pos; 3682348607Smm } 3683339640Smm 3684348607Smm return ARCHIVE_OK; 3685339640Smm} 3686339640Smm 3687339640Smmstatic int uncompress_file(struct archive_read* a) { 3688348607Smm int ret; 3689339640Smm 3690348607Smm while(1) { 3691348607Smm /* Sometimes the uncompression function will return a 3692348607Smm * 'retry' signal. If this will happen, we have to retry 3693348607Smm * the function. */ 3694348607Smm ret = do_uncompress_file(a); 3695348607Smm if(ret != ARCHIVE_RETRY) 3696348607Smm return ret; 3697348607Smm } 3698339640Smm} 3699339640Smm 3700339640Smm 3701339640Smmstatic int do_unstore_file(struct archive_read* a, 3702348607Smm struct rar5* rar, const void** buf, size_t* size, int64_t* offset) 3703339640Smm{ 3704358088Smm size_t to_read; 3705348607Smm const uint8_t* p; 3706339640Smm 3707348607Smm if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 && 3708348607Smm rar->generic.split_after > 0) 3709348607Smm { 3710348607Smm int ret; 3711339640Smm 3712348607Smm rar->cstate.switch_multivolume = 1; 3713348607Smm ret = advance_multivolume(a); 3714348607Smm rar->cstate.switch_multivolume = 0; 3715339640Smm 3716348607Smm if(ret != ARCHIVE_OK) { 3717348607Smm /* Failed to advance to next multivolume archive 3718348607Smm * file. */ 3719348607Smm return ret; 3720348607Smm } 3721348607Smm } 3722339640Smm 3723358088Smm to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024); 3724348607Smm if(to_read == 0) { 3725348607Smm return ARCHIVE_EOF; 3726348607Smm } 3727339640Smm 3728348607Smm if(!read_ahead(a, to_read, &p)) { 3729348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3730348607Smm "I/O error when unstoring file"); 3731348607Smm return ARCHIVE_FATAL; 3732348607Smm } 3733339640Smm 3734348607Smm if(ARCHIVE_OK != consume(a, to_read)) { 3735348607Smm return ARCHIVE_EOF; 3736348607Smm } 3737339640Smm 3738348607Smm if(buf) *buf = p; 3739348607Smm if(size) *size = to_read; 3740348607Smm if(offset) *offset = rar->cstate.last_unstore_ptr; 3741339640Smm 3742348607Smm rar->file.bytes_remaining -= to_read; 3743348607Smm rar->cstate.last_unstore_ptr += to_read; 3744339640Smm 3745348607Smm update_crc(rar, p, to_read); 3746348607Smm return ARCHIVE_OK; 3747339640Smm} 3748339640Smm 3749339640Smmstatic int do_unpack(struct archive_read* a, struct rar5* rar, 3750348607Smm const void** buf, size_t* size, int64_t* offset) 3751339640Smm{ 3752348607Smm enum COMPRESSION_METHOD { 3753348607Smm STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4, 3754348607Smm BEST = 5 3755348607Smm }; 3756339640Smm 3757348607Smm if(rar->file.service > 0) { 3758348607Smm return do_unstore_file(a, rar, buf, size, offset); 3759348607Smm } else { 3760348607Smm switch(rar->cstate.method) { 3761348607Smm case STORE: 3762348607Smm return do_unstore_file(a, rar, buf, size, 3763348607Smm offset); 3764348607Smm case FASTEST: 3765348607Smm /* fallthrough */ 3766348607Smm case FAST: 3767348607Smm /* fallthrough */ 3768348607Smm case NORMAL: 3769348607Smm /* fallthrough */ 3770348607Smm case GOOD: 3771348607Smm /* fallthrough */ 3772348607Smm case BEST: 3773348607Smm return uncompress_file(a); 3774348607Smm default: 3775348607Smm archive_set_error(&a->archive, 3776348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 3777348607Smm "Compression method not supported: 0x%x", 3778348607Smm rar->cstate.method); 3779339640Smm 3780348607Smm return ARCHIVE_FATAL; 3781348607Smm } 3782348607Smm } 3783339640Smm 3784339640Smm#if !defined WIN32 3785348607Smm /* Not reached. */ 3786348607Smm return ARCHIVE_OK; 3787339640Smm#endif 3788339640Smm} 3789339640Smm 3790339640Smmstatic int verify_checksums(struct archive_read* a) { 3791348607Smm int verify_crc; 3792348607Smm struct rar5* rar = get_context(a); 3793339640Smm 3794348607Smm /* Check checksums only when actually unpacking the data. There's no 3795348607Smm * need to calculate checksum when we're skipping data in solid archives 3796348607Smm * (skipping in solid archives is the same thing as unpacking compressed 3797348607Smm * data and discarding the result). */ 3798339640Smm 3799348607Smm if(!rar->skip_mode) { 3800348607Smm /* Always check checksums if we're not in skip mode */ 3801348607Smm verify_crc = 1; 3802348607Smm } else { 3803348607Smm /* We can override the logic above with a compile-time option 3804348607Smm * NO_CRC_ON_SOLID_SKIP. This option is used during debugging, 3805348607Smm * and it will check checksums of unpacked data even when 3806348607Smm * we're skipping it. */ 3807339640Smm 3808339640Smm#if defined CHECK_CRC_ON_SOLID_SKIP 3809348607Smm /* Debug case */ 3810348607Smm verify_crc = 1; 3811339640Smm#else 3812348607Smm /* Normal case */ 3813348607Smm verify_crc = 0; 3814339640Smm#endif 3815348607Smm } 3816339640Smm 3817348607Smm if(verify_crc) { 3818348607Smm /* During unpacking, on each unpacked block we're calling the 3819348607Smm * update_crc() function. Since we are here, the unpacking 3820348607Smm * process is already over and we can check if calculated 3821348607Smm * checksum (CRC32 or BLAKE2sp) is the same as what is stored 3822348607Smm * in the archive. */ 3823348607Smm if(rar->file.stored_crc32 > 0) { 3824348607Smm /* Check CRC32 only when the file contains a CRC32 3825348607Smm * value for this file. */ 3826339640Smm 3827348607Smm if(rar->file.calculated_crc32 != 3828348607Smm rar->file.stored_crc32) { 3829348607Smm /* Checksums do not match; the unpacked file 3830348607Smm * is corrupted. */ 3831339640Smm 3832348607Smm DEBUG_CODE { 3833348607Smm printf("Checksum error: CRC32 " 3834368707Smm "(was: %08" PRIx32 ", expected: %08" PRIx32 ")\n", 3835348607Smm rar->file.calculated_crc32, 3836348607Smm rar->file.stored_crc32); 3837348607Smm } 3838339640Smm 3839339640Smm#ifndef DONT_FAIL_ON_CRC_ERROR 3840348607Smm archive_set_error(&a->archive, 3841348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 3842348607Smm "Checksum error: CRC32"); 3843348607Smm return ARCHIVE_FATAL; 3844339640Smm#endif 3845348607Smm } else { 3846348607Smm DEBUG_CODE { 3847348607Smm printf("Checksum OK: CRC32 " 3848368707Smm "(%08" PRIx32 "/%08" PRIx32 ")\n", 3849348607Smm rar->file.stored_crc32, 3850348607Smm rar->file.calculated_crc32); 3851348607Smm } 3852348607Smm } 3853348607Smm } 3854339640Smm 3855348607Smm if(rar->file.has_blake2 > 0) { 3856348607Smm /* BLAKE2sp is an optional checksum algorithm that is 3857348607Smm * added to RARv5 archives when using the `-htb` switch 3858348607Smm * during creation of archive. 3859348607Smm * 3860348607Smm * We now finalize the hash calculation by calling the 3861348607Smm * `final` function. This will generate the final hash 3862348607Smm * value we can use to compare it with the BLAKE2sp 3863348607Smm * checksum that is stored in the archive. 3864348607Smm * 3865348607Smm * The return value of this `final` function is not 3866348607Smm * very helpful, as it guards only against improper use. 3867348607Smm * This is why we're explicitly ignoring it. */ 3868339640Smm 3869348607Smm uint8_t b2_buf[32]; 3870348607Smm (void) blake2sp_final(&rar->file.b2state, b2_buf, 32); 3871339640Smm 3872348607Smm if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) { 3873339640Smm#ifndef DONT_FAIL_ON_CRC_ERROR 3874348607Smm archive_set_error(&a->archive, 3875348607Smm ARCHIVE_ERRNO_FILE_FORMAT, 3876348607Smm "Checksum error: BLAKE2"); 3877339640Smm 3878348607Smm return ARCHIVE_FATAL; 3879339640Smm#endif 3880348607Smm } 3881348607Smm } 3882348607Smm } 3883339640Smm 3884348607Smm /* Finalization for this file has been successfully completed. */ 3885348607Smm return ARCHIVE_OK; 3886339640Smm} 3887339640Smm 3888339640Smmstatic int verify_global_checksums(struct archive_read* a) { 3889348607Smm return verify_checksums(a); 3890339640Smm} 3891339640Smm 3892358088Smm/* 3893358088Smm * Decryption function for the magic signature pattern. Check the comment near 3894358088Smm * the `rar5_signature_xor` symbol to read the rationale behind this. 3895358088Smm */ 3896358088Smmstatic void rar5_signature(char *buf) { 3897358088Smm size_t i; 3898358088Smm 3899358088Smm for(i = 0; i < sizeof(rar5_signature_xor); i++) { 3900358088Smm buf[i] = rar5_signature_xor[i] ^ 0xA1; 3901358088Smm } 3902358088Smm} 3903358088Smm 3904339640Smmstatic int rar5_read_data(struct archive_read *a, const void **buff, 3905348607Smm size_t *size, int64_t *offset) { 3906348607Smm int ret; 3907348607Smm struct rar5* rar = get_context(a); 3908339640Smm 3909368707Smm if (size) 3910368707Smm *size = 0; 3911368707Smm 3912348607Smm if(rar->file.dir > 0) { 3913348607Smm /* Don't process any data if this file entry was declared 3914348607Smm * as a directory. This is needed, because entries marked as 3915348607Smm * directory doesn't have any dictionary buffer allocated, so 3916348607Smm * it's impossible to perform any decompression. */ 3917348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3918348607Smm "Can't decompress an entry marked as a directory"); 3919348607Smm return ARCHIVE_FAILED; 3920348607Smm } 3921339640Smm 3922348607Smm if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) { 3923348607Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3924348607Smm "Unpacker has written too many bytes"); 3925348607Smm return ARCHIVE_FATAL; 3926348607Smm } 3927339640Smm 3928348607Smm ret = use_data(rar, buff, size, offset); 3929348607Smm if(ret == ARCHIVE_OK) { 3930348607Smm return ret; 3931348607Smm } 3932342360Smm 3933348607Smm if(rar->file.eof == 1) { 3934348607Smm return ARCHIVE_EOF; 3935348607Smm } 3936339640Smm 3937348607Smm ret = do_unpack(a, rar, buff, size, offset); 3938348607Smm if(ret != ARCHIVE_OK) { 3939348607Smm return ret; 3940348607Smm } 3941339640Smm 3942348607Smm if(rar->file.bytes_remaining == 0 && 3943348607Smm rar->cstate.last_write_ptr == rar->file.unpacked_size) 3944348607Smm { 3945348607Smm /* If all bytes of current file were processed, run 3946348607Smm * finalization. 3947348607Smm * 3948348607Smm * Finalization will check checksum against proper values. If 3949348607Smm * some of the checksums will not match, we'll return an error 3950348607Smm * value in the last `archive_read_data` call to signal an error 3951348607Smm * to the user. */ 3952339640Smm 3953348607Smm rar->file.eof = 1; 3954348607Smm return verify_global_checksums(a); 3955348607Smm } 3956348607Smm 3957348607Smm return ARCHIVE_OK; 3958339640Smm} 3959339640Smm 3960339640Smmstatic int rar5_read_data_skip(struct archive_read *a) { 3961348607Smm struct rar5* rar = get_context(a); 3962339640Smm 3963348607Smm if(rar->main.solid) { 3964348607Smm /* In solid archives, instead of skipping the data, we need to 3965348607Smm * extract it, and dispose the result. The side effect of this 3966348607Smm * operation will be setting up the initial window buffer state 3967348607Smm * needed to be able to extract the selected file. */ 3968339640Smm 3969348607Smm int ret; 3970339640Smm 3971348607Smm /* Make sure to process all blocks in the compressed stream. */ 3972348607Smm while(rar->file.bytes_remaining > 0) { 3973348607Smm /* Setting the "skip mode" will allow us to skip 3974348607Smm * checksum checks during data skipping. Checking the 3975348607Smm * checksum of skipped data isn't really necessary and 3976348607Smm * it's only slowing things down. 3977348607Smm * 3978348607Smm * This is incremented instead of setting to 1 because 3979348607Smm * this data skipping function can be called 3980348607Smm * recursively. */ 3981348607Smm rar->skip_mode++; 3982339640Smm 3983348607Smm /* We're disposing 1 block of data, so we use triple 3984348607Smm * NULLs in arguments. */ 3985348607Smm ret = rar5_read_data(a, NULL, NULL, NULL); 3986339640Smm 3987348607Smm /* Turn off "skip mode". */ 3988348607Smm rar->skip_mode--; 3989339640Smm 3990349524Smm if(ret < 0 || ret == ARCHIVE_EOF) { 3991348607Smm /* Propagate any potential error conditions 3992348607Smm * to the caller. */ 3993348607Smm return ret; 3994348607Smm } 3995348607Smm } 3996348607Smm } else { 3997348607Smm /* In standard archives, we can just jump over the compressed 3998348607Smm * stream. Each file in non-solid archives starts from an empty 3999348607Smm * window buffer. */ 4000339640Smm 4001348607Smm if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) { 4002348607Smm return ARCHIVE_FATAL; 4003348607Smm } 4004339640Smm 4005348607Smm rar->file.bytes_remaining = 0; 4006348607Smm } 4007339640Smm 4008348607Smm return ARCHIVE_OK; 4009339640Smm} 4010339640Smm 4011339640Smmstatic int64_t rar5_seek_data(struct archive_read *a, int64_t offset, 4012348607Smm int whence) 4013339640Smm{ 4014348607Smm (void) a; 4015348607Smm (void) offset; 4016348607Smm (void) whence; 4017339640Smm 4018348607Smm /* We're a streaming unpacker, and we don't support seeking. */ 4019339640Smm 4020348607Smm return ARCHIVE_FATAL; 4021339640Smm} 4022339640Smm 4023339640Smmstatic int rar5_cleanup(struct archive_read *a) { 4024348607Smm struct rar5* rar = get_context(a); 4025339640Smm 4026348607Smm free(rar->cstate.window_buf); 4027348607Smm free(rar->cstate.filtered_buf); 4028339640Smm 4029348607Smm free(rar->vol.push_buf); 4030339640Smm 4031348607Smm free_filters(rar); 4032348607Smm cdeque_free(&rar->cstate.filters); 4033339640Smm 4034348607Smm free(rar); 4035348607Smm a->format->data = NULL; 4036339640Smm 4037348607Smm return ARCHIVE_OK; 4038339640Smm} 4039339640Smm 4040339640Smmstatic int rar5_capabilities(struct archive_read * a) { 4041348607Smm (void) a; 4042348607Smm return 0; 4043339640Smm} 4044339640Smm 4045339640Smmstatic int rar5_has_encrypted_entries(struct archive_read *_a) { 4046348607Smm (void) _a; 4047339640Smm 4048348607Smm /* Unsupported for now. */ 4049348607Smm return ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED; 4050339640Smm} 4051339640Smm 4052339640Smmstatic int rar5_init(struct rar5* rar) { 4053348607Smm memset(rar, 0, sizeof(struct rar5)); 4054339640Smm 4055348607Smm if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192)) 4056348607Smm return ARCHIVE_FATAL; 4057339640Smm 4058348607Smm return ARCHIVE_OK; 4059339640Smm} 4060339640Smm 4061339640Smmint archive_read_support_format_rar5(struct archive *_a) { 4062348607Smm struct archive_read* ar; 4063348607Smm int ret; 4064348607Smm struct rar5* rar; 4065339640Smm 4066348607Smm if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar))) 4067348607Smm return ret; 4068339640Smm 4069348607Smm rar = malloc(sizeof(*rar)); 4070348607Smm if(rar == NULL) { 4071348607Smm archive_set_error(&ar->archive, ENOMEM, 4072348607Smm "Can't allocate rar5 data"); 4073348607Smm return ARCHIVE_FATAL; 4074348607Smm } 4075339640Smm 4076348607Smm if(ARCHIVE_OK != rar5_init(rar)) { 4077348607Smm archive_set_error(&ar->archive, ENOMEM, 4078348607Smm "Can't allocate rar5 filter buffer"); 4079370535Sgit2svn free(rar); 4080348607Smm return ARCHIVE_FATAL; 4081348607Smm } 4082339640Smm 4083348607Smm ret = __archive_read_register_format(ar, 4084348607Smm rar, 4085348607Smm "rar5", 4086348607Smm rar5_bid, 4087348607Smm rar5_options, 4088348607Smm rar5_read_header, 4089348607Smm rar5_read_data, 4090348607Smm rar5_read_data_skip, 4091348607Smm rar5_seek_data, 4092348607Smm rar5_cleanup, 4093348607Smm rar5_capabilities, 4094348607Smm rar5_has_encrypted_entries); 4095339640Smm 4096348607Smm if(ret != ARCHIVE_OK) { 4097348607Smm (void) rar5_cleanup(ar); 4098348607Smm } 4099339640Smm 4100348607Smm return ret; 4101339640Smm} 4102