1229159Sadrian/* 2229159Sadrian * .xz Stream decoder 3229159Sadrian * 4229159Sadrian * Author: Lasse Collin <lasse.collin@tukaani.org> 5229159Sadrian * 6229159Sadrian * This file has been put into the public domain. 7229159Sadrian * You can do whatever you want with this file. 8229159Sadrian */ 9229159Sadrian 10229159Sadrian#include "xz_private.h" 11229159Sadrian#include "xz_stream.h" 12229159Sadrian 13262764Sdelphij#ifdef XZ_USE_CRC64 14262764Sdelphij# define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64) 15262764Sdelphij#else 16262764Sdelphij# define IS_CRC64(check_type) false 17262764Sdelphij#endif 18262764Sdelphij 19229159Sadrian/* Hash used to validate the Index field */ 20229159Sadrianstruct xz_dec_hash { 21229159Sadrian vli_type unpadded; 22229159Sadrian vli_type uncompressed; 23229159Sadrian uint32_t crc32; 24229159Sadrian}; 25229159Sadrian 26229159Sadrianstruct xz_dec { 27229159Sadrian /* Position in dec_main() */ 28229159Sadrian enum { 29229159Sadrian SEQ_STREAM_HEADER, 30229159Sadrian SEQ_BLOCK_START, 31229159Sadrian SEQ_BLOCK_HEADER, 32229159Sadrian SEQ_BLOCK_UNCOMPRESS, 33229159Sadrian SEQ_BLOCK_PADDING, 34229159Sadrian SEQ_BLOCK_CHECK, 35229159Sadrian SEQ_INDEX, 36229159Sadrian SEQ_INDEX_PADDING, 37229159Sadrian SEQ_INDEX_CRC32, 38229159Sadrian SEQ_STREAM_FOOTER 39229159Sadrian } sequence; 40229159Sadrian 41229159Sadrian /* Position in variable-length integers and Check fields */ 42229159Sadrian uint32_t pos; 43229159Sadrian 44229159Sadrian /* Variable-length integer decoded by dec_vli() */ 45229159Sadrian vli_type vli; 46229159Sadrian 47229159Sadrian /* Saved in_pos and out_pos */ 48229159Sadrian size_t in_start; 49229159Sadrian size_t out_start; 50229159Sadrian 51262764Sdelphij#ifdef XZ_USE_CRC64 52262764Sdelphij /* CRC32 or CRC64 value in Block or CRC32 value in Index */ 53262764Sdelphij uint64_t crc; 54262764Sdelphij#else 55229159Sadrian /* CRC32 value in Block or Index */ 56262764Sdelphij uint32_t crc; 57262764Sdelphij#endif 58229159Sadrian 59229159Sadrian /* Type of the integrity check calculated from uncompressed data */ 60229159Sadrian enum xz_check check_type; 61229159Sadrian 62229159Sadrian /* Operation mode */ 63229159Sadrian enum xz_mode mode; 64229159Sadrian 65229159Sadrian /* 66229159Sadrian * True if the next call to xz_dec_run() is allowed to return 67229159Sadrian * XZ_BUF_ERROR. 68229159Sadrian */ 69229159Sadrian bool allow_buf_error; 70229159Sadrian 71229159Sadrian /* Information stored in Block Header */ 72229159Sadrian struct { 73229159Sadrian /* 74229159Sadrian * Value stored in the Compressed Size field, or 75229159Sadrian * VLI_UNKNOWN if Compressed Size is not present. 76229159Sadrian */ 77229159Sadrian vli_type compressed; 78229159Sadrian 79229159Sadrian /* 80229159Sadrian * Value stored in the Uncompressed Size field, or 81229159Sadrian * VLI_UNKNOWN if Uncompressed Size is not present. 82229159Sadrian */ 83229159Sadrian vli_type uncompressed; 84229159Sadrian 85229159Sadrian /* Size of the Block Header field */ 86229159Sadrian uint32_t size; 87229159Sadrian } block_header; 88229159Sadrian 89229159Sadrian /* Information collected when decoding Blocks */ 90229159Sadrian struct { 91229159Sadrian /* Observed compressed size of the current Block */ 92229159Sadrian vli_type compressed; 93229159Sadrian 94229159Sadrian /* Observed uncompressed size of the current Block */ 95229159Sadrian vli_type uncompressed; 96229159Sadrian 97229159Sadrian /* Number of Blocks decoded so far */ 98229159Sadrian vli_type count; 99229159Sadrian 100229159Sadrian /* 101229159Sadrian * Hash calculated from the Block sizes. This is used to 102229159Sadrian * validate the Index field. 103229159Sadrian */ 104229159Sadrian struct xz_dec_hash hash; 105229159Sadrian } block; 106229159Sadrian 107229159Sadrian /* Variables needed when verifying the Index field */ 108229159Sadrian struct { 109229159Sadrian /* Position in dec_index() */ 110229159Sadrian enum { 111229159Sadrian SEQ_INDEX_COUNT, 112229159Sadrian SEQ_INDEX_UNPADDED, 113229159Sadrian SEQ_INDEX_UNCOMPRESSED 114229159Sadrian } sequence; 115229159Sadrian 116229159Sadrian /* Size of the Index in bytes */ 117229159Sadrian vli_type size; 118229159Sadrian 119229159Sadrian /* Number of Records (matches block.count in valid files) */ 120229159Sadrian vli_type count; 121229159Sadrian 122229159Sadrian /* 123229159Sadrian * Hash calculated from the Records (matches block.hash in 124229159Sadrian * valid files). 125229159Sadrian */ 126229159Sadrian struct xz_dec_hash hash; 127229159Sadrian } index; 128229159Sadrian 129229159Sadrian /* 130229159Sadrian * Temporary buffer needed to hold Stream Header, Block Header, 131229159Sadrian * and Stream Footer. The Block Header is the biggest (1 KiB) 132229159Sadrian * so we reserve space according to that. buf[] has to be aligned 133229159Sadrian * to a multiple of four bytes; the size_t variables before it 134229159Sadrian * should guarantee this. 135229159Sadrian */ 136229159Sadrian struct { 137229159Sadrian size_t pos; 138229159Sadrian size_t size; 139229159Sadrian uint8_t buf[1024]; 140229159Sadrian } temp; 141229159Sadrian 142229159Sadrian struct xz_dec_lzma2 *lzma2; 143229159Sadrian 144229159Sadrian#ifdef XZ_DEC_BCJ 145229159Sadrian struct xz_dec_bcj *bcj; 146229159Sadrian bool bcj_active; 147229159Sadrian#endif 148229159Sadrian}; 149229159Sadrian 150229159Sadrian#ifdef XZ_DEC_ANY_CHECK 151229159Sadrian/* Sizes of the Check field with different Check IDs */ 152229159Sadrianstatic const uint8_t check_sizes[16] = { 153229159Sadrian 0, 154229159Sadrian 4, 4, 4, 155229159Sadrian 8, 8, 8, 156229159Sadrian 16, 16, 16, 157229159Sadrian 32, 32, 32, 158229159Sadrian 64, 64, 64 159229159Sadrian}; 160229159Sadrian#endif 161229159Sadrian 162229159Sadrian/* 163229159Sadrian * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller 164229159Sadrian * must have set s->temp.pos to indicate how much data we are supposed 165229159Sadrian * to copy into s->temp.buf. Return true once s->temp.pos has reached 166229159Sadrian * s->temp.size. 167229159Sadrian */ 168229159Sadrianstatic bool fill_temp(struct xz_dec *s, struct xz_buf *b) 169229159Sadrian{ 170229159Sadrian size_t copy_size = min_t(size_t, 171229159Sadrian b->in_size - b->in_pos, s->temp.size - s->temp.pos); 172229159Sadrian 173229159Sadrian memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); 174229159Sadrian b->in_pos += copy_size; 175229159Sadrian s->temp.pos += copy_size; 176229159Sadrian 177229159Sadrian if (s->temp.pos == s->temp.size) { 178229159Sadrian s->temp.pos = 0; 179229159Sadrian return true; 180229159Sadrian } 181229159Sadrian 182229159Sadrian return false; 183229159Sadrian} 184229159Sadrian 185229159Sadrian/* Decode a variable-length integer (little-endian base-128 encoding) */ 186229159Sadrianstatic enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in, 187229159Sadrian size_t *in_pos, size_t in_size) 188229159Sadrian{ 189229159Sadrian uint8_t byte; 190229159Sadrian 191229159Sadrian if (s->pos == 0) 192229159Sadrian s->vli = 0; 193229159Sadrian 194229159Sadrian while (*in_pos < in_size) { 195229159Sadrian byte = in[*in_pos]; 196229159Sadrian ++*in_pos; 197229159Sadrian 198229159Sadrian s->vli |= (vli_type)(byte & 0x7F) << s->pos; 199229159Sadrian 200229159Sadrian if ((byte & 0x80) == 0) { 201229159Sadrian /* Don't allow non-minimal encodings. */ 202229159Sadrian if (byte == 0 && s->pos != 0) 203229159Sadrian return XZ_DATA_ERROR; 204229159Sadrian 205229159Sadrian s->pos = 0; 206229159Sadrian return XZ_STREAM_END; 207229159Sadrian } 208229159Sadrian 209229159Sadrian s->pos += 7; 210229159Sadrian if (s->pos == 7 * VLI_BYTES_MAX) 211229159Sadrian return XZ_DATA_ERROR; 212229159Sadrian } 213229159Sadrian 214229159Sadrian return XZ_OK; 215229159Sadrian} 216229159Sadrian 217229159Sadrian/* 218229159Sadrian * Decode the Compressed Data field from a Block. Update and validate 219229159Sadrian * the observed compressed and uncompressed sizes of the Block so that 220229159Sadrian * they don't exceed the values possibly stored in the Block Header 221229159Sadrian * (validation assumes that no integer overflow occurs, since vli_type 222262764Sdelphij * is normally uint64_t). Update the CRC32 or CRC64 value if presence of 223262764Sdelphij * the CRC32 or CRC64 field was indicated in Stream Header. 224229159Sadrian * 225229159Sadrian * Once the decoding is finished, validate that the observed sizes match 226229159Sadrian * the sizes possibly stored in the Block Header. Update the hash and 227229159Sadrian * Block count, which are later used to validate the Index field. 228229159Sadrian */ 229229159Sadrianstatic enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) 230229159Sadrian{ 231229159Sadrian enum xz_ret ret; 232229159Sadrian 233229159Sadrian s->in_start = b->in_pos; 234229159Sadrian s->out_start = b->out_pos; 235229159Sadrian 236229159Sadrian#ifdef XZ_DEC_BCJ 237229159Sadrian if (s->bcj_active) 238229159Sadrian ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); 239229159Sadrian else 240229159Sadrian#endif 241229159Sadrian ret = xz_dec_lzma2_run(s->lzma2, b); 242229159Sadrian 243229159Sadrian s->block.compressed += b->in_pos - s->in_start; 244229159Sadrian s->block.uncompressed += b->out_pos - s->out_start; 245229159Sadrian 246229159Sadrian /* 247229159Sadrian * There is no need to separately check for VLI_UNKNOWN, since 248229159Sadrian * the observed sizes are always smaller than VLI_UNKNOWN. 249229159Sadrian */ 250229159Sadrian if (s->block.compressed > s->block_header.compressed 251229159Sadrian || s->block.uncompressed 252229159Sadrian > s->block_header.uncompressed) 253229159Sadrian return XZ_DATA_ERROR; 254229159Sadrian 255229159Sadrian if (s->check_type == XZ_CHECK_CRC32) 256262764Sdelphij s->crc = xz_crc32(b->out + s->out_start, 257262764Sdelphij b->out_pos - s->out_start, s->crc); 258262764Sdelphij#ifdef XZ_USE_CRC64 259262764Sdelphij else if (s->check_type == XZ_CHECK_CRC64) 260262764Sdelphij s->crc = xz_crc64(b->out + s->out_start, 261262764Sdelphij b->out_pos - s->out_start, s->crc); 262262764Sdelphij#endif 263229159Sadrian 264229159Sadrian if (ret == XZ_STREAM_END) { 265229159Sadrian if (s->block_header.compressed != VLI_UNKNOWN 266229159Sadrian && s->block_header.compressed 267229159Sadrian != s->block.compressed) 268229159Sadrian return XZ_DATA_ERROR; 269229159Sadrian 270229159Sadrian if (s->block_header.uncompressed != VLI_UNKNOWN 271229159Sadrian && s->block_header.uncompressed 272229159Sadrian != s->block.uncompressed) 273229159Sadrian return XZ_DATA_ERROR; 274229159Sadrian 275229159Sadrian s->block.hash.unpadded += s->block_header.size 276229159Sadrian + s->block.compressed; 277229159Sadrian 278229159Sadrian#ifdef XZ_DEC_ANY_CHECK 279229159Sadrian s->block.hash.unpadded += check_sizes[s->check_type]; 280229159Sadrian#else 281229159Sadrian if (s->check_type == XZ_CHECK_CRC32) 282229159Sadrian s->block.hash.unpadded += 4; 283262764Sdelphij else if (IS_CRC64(s->check_type)) 284262764Sdelphij s->block.hash.unpadded += 8; 285229159Sadrian#endif 286229159Sadrian 287229159Sadrian s->block.hash.uncompressed += s->block.uncompressed; 288229159Sadrian s->block.hash.crc32 = xz_crc32( 289229159Sadrian (const uint8_t *)&s->block.hash, 290229159Sadrian sizeof(s->block.hash), s->block.hash.crc32); 291229159Sadrian 292229159Sadrian ++s->block.count; 293229159Sadrian } 294229159Sadrian 295229159Sadrian return ret; 296229159Sadrian} 297229159Sadrian 298229159Sadrian/* Update the Index size and the CRC32 value. */ 299229159Sadrianstatic void index_update(struct xz_dec *s, const struct xz_buf *b) 300229159Sadrian{ 301229159Sadrian size_t in_used = b->in_pos - s->in_start; 302229159Sadrian s->index.size += in_used; 303262764Sdelphij s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc); 304229159Sadrian} 305229159Sadrian 306229159Sadrian/* 307229159Sadrian * Decode the Number of Records, Unpadded Size, and Uncompressed Size 308229159Sadrian * fields from the Index field. That is, Index Padding and CRC32 are not 309229159Sadrian * decoded by this function. 310229159Sadrian * 311229159Sadrian * This can return XZ_OK (more input needed), XZ_STREAM_END (everything 312229159Sadrian * successfully decoded), or XZ_DATA_ERROR (input is corrupt). 313229159Sadrian */ 314229159Sadrianstatic enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b) 315229159Sadrian{ 316229159Sadrian enum xz_ret ret; 317229159Sadrian 318229159Sadrian do { 319229159Sadrian ret = dec_vli(s, b->in, &b->in_pos, b->in_size); 320229159Sadrian if (ret != XZ_STREAM_END) { 321229159Sadrian index_update(s, b); 322229159Sadrian return ret; 323229159Sadrian } 324229159Sadrian 325229159Sadrian switch (s->index.sequence) { 326229159Sadrian case SEQ_INDEX_COUNT: 327229159Sadrian s->index.count = s->vli; 328229159Sadrian 329229159Sadrian /* 330229159Sadrian * Validate that the Number of Records field 331229159Sadrian * indicates the same number of Records as 332229159Sadrian * there were Blocks in the Stream. 333229159Sadrian */ 334229159Sadrian if (s->index.count != s->block.count) 335229159Sadrian return XZ_DATA_ERROR; 336229159Sadrian 337229159Sadrian s->index.sequence = SEQ_INDEX_UNPADDED; 338229159Sadrian break; 339229159Sadrian 340229159Sadrian case SEQ_INDEX_UNPADDED: 341229159Sadrian s->index.hash.unpadded += s->vli; 342229159Sadrian s->index.sequence = SEQ_INDEX_UNCOMPRESSED; 343229159Sadrian break; 344229159Sadrian 345229159Sadrian case SEQ_INDEX_UNCOMPRESSED: 346229159Sadrian s->index.hash.uncompressed += s->vli; 347229159Sadrian s->index.hash.crc32 = xz_crc32( 348229159Sadrian (const uint8_t *)&s->index.hash, 349229159Sadrian sizeof(s->index.hash), 350229159Sadrian s->index.hash.crc32); 351229159Sadrian --s->index.count; 352229159Sadrian s->index.sequence = SEQ_INDEX_UNPADDED; 353229159Sadrian break; 354229159Sadrian } 355229159Sadrian } while (s->index.count > 0); 356229159Sadrian 357229159Sadrian return XZ_STREAM_END; 358229159Sadrian} 359229159Sadrian 360229159Sadrian/* 361262764Sdelphij * Validate that the next four or eight input bytes match the value 362262764Sdelphij * of s->crc. s->pos must be zero when starting to validate the first byte. 363262764Sdelphij * The "bits" argument allows using the same code for both CRC32 and CRC64. 364229159Sadrian */ 365262764Sdelphijstatic enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b, 366262764Sdelphij uint32_t bits) 367229159Sadrian{ 368229159Sadrian do { 369229159Sadrian if (b->in_pos == b->in_size) 370229159Sadrian return XZ_OK; 371229159Sadrian 372262764Sdelphij if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++]) 373229159Sadrian return XZ_DATA_ERROR; 374229159Sadrian 375229159Sadrian s->pos += 8; 376229159Sadrian 377262764Sdelphij } while (s->pos < bits); 378229159Sadrian 379262764Sdelphij s->crc = 0; 380229159Sadrian s->pos = 0; 381229159Sadrian 382229159Sadrian return XZ_STREAM_END; 383229159Sadrian} 384229159Sadrian 385229159Sadrian#ifdef XZ_DEC_ANY_CHECK 386229159Sadrian/* 387229159Sadrian * Skip over the Check field when the Check ID is not supported. 388229159Sadrian * Returns true once the whole Check field has been skipped over. 389229159Sadrian */ 390229159Sadrianstatic bool check_skip(struct xz_dec *s, struct xz_buf *b) 391229159Sadrian{ 392229159Sadrian while (s->pos < check_sizes[s->check_type]) { 393229159Sadrian if (b->in_pos == b->in_size) 394229159Sadrian return false; 395229159Sadrian 396229159Sadrian ++b->in_pos; 397229159Sadrian ++s->pos; 398229159Sadrian } 399229159Sadrian 400229159Sadrian s->pos = 0; 401229159Sadrian 402229159Sadrian return true; 403229159Sadrian} 404229159Sadrian#endif 405229159Sadrian 406229159Sadrian/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ 407229159Sadrianstatic enum xz_ret dec_stream_header(struct xz_dec *s) 408229159Sadrian{ 409229159Sadrian if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) 410229159Sadrian return XZ_FORMAT_ERROR; 411229159Sadrian 412229159Sadrian if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) 413229159Sadrian != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) 414229159Sadrian return XZ_DATA_ERROR; 415229159Sadrian 416229159Sadrian if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) 417229159Sadrian return XZ_OPTIONS_ERROR; 418229159Sadrian 419229159Sadrian /* 420262764Sdelphij * Of integrity checks, we support none (Check ID = 0), 421262764Sdelphij * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4). 422262764Sdelphij * However, if XZ_DEC_ANY_CHECK is defined, we will accept other 423262764Sdelphij * check types too, but then the check won't be verified and 424262764Sdelphij * a warning (XZ_UNSUPPORTED_CHECK) will be given. 425229159Sadrian */ 426229159Sadrian s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; 427229159Sadrian 428229159Sadrian#ifdef XZ_DEC_ANY_CHECK 429229159Sadrian if (s->check_type > XZ_CHECK_MAX) 430229159Sadrian return XZ_OPTIONS_ERROR; 431229159Sadrian 432262764Sdelphij if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) 433229159Sadrian return XZ_UNSUPPORTED_CHECK; 434229159Sadrian#else 435262764Sdelphij if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) 436229159Sadrian return XZ_OPTIONS_ERROR; 437229159Sadrian#endif 438229159Sadrian 439229159Sadrian return XZ_OK; 440229159Sadrian} 441229159Sadrian 442229159Sadrian/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ 443229159Sadrianstatic enum xz_ret dec_stream_footer(struct xz_dec *s) 444229159Sadrian{ 445229159Sadrian if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) 446229159Sadrian return XZ_DATA_ERROR; 447229159Sadrian 448229159Sadrian if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) 449229159Sadrian return XZ_DATA_ERROR; 450229159Sadrian 451229159Sadrian /* 452229159Sadrian * Validate Backward Size. Note that we never added the size of the 453229159Sadrian * Index CRC32 field to s->index.size, thus we use s->index.size / 4 454229159Sadrian * instead of s->index.size / 4 - 1. 455229159Sadrian */ 456229159Sadrian if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) 457229159Sadrian return XZ_DATA_ERROR; 458229159Sadrian 459229159Sadrian if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) 460229159Sadrian return XZ_DATA_ERROR; 461229159Sadrian 462229159Sadrian /* 463229159Sadrian * Use XZ_STREAM_END instead of XZ_OK to be more convenient 464229159Sadrian * for the caller. 465229159Sadrian */ 466229159Sadrian return XZ_STREAM_END; 467229159Sadrian} 468229159Sadrian 469229159Sadrian/* Decode the Block Header and initialize the filter chain. */ 470229159Sadrianstatic enum xz_ret dec_block_header(struct xz_dec *s) 471229159Sadrian{ 472229159Sadrian enum xz_ret ret; 473229159Sadrian 474229159Sadrian /* 475229159Sadrian * Validate the CRC32. We know that the temp buffer is at least 476229159Sadrian * eight bytes so this is safe. 477229159Sadrian */ 478229159Sadrian s->temp.size -= 4; 479229159Sadrian if (xz_crc32(s->temp.buf, s->temp.size, 0) 480229159Sadrian != get_le32(s->temp.buf + s->temp.size)) 481229159Sadrian return XZ_DATA_ERROR; 482229159Sadrian 483229159Sadrian s->temp.pos = 2; 484229159Sadrian 485229159Sadrian /* 486229159Sadrian * Catch unsupported Block Flags. We support only one or two filters 487229159Sadrian * in the chain, so we catch that with the same test. 488229159Sadrian */ 489229159Sadrian#ifdef XZ_DEC_BCJ 490229159Sadrian if (s->temp.buf[1] & 0x3E) 491229159Sadrian#else 492229159Sadrian if (s->temp.buf[1] & 0x3F) 493229159Sadrian#endif 494229159Sadrian return XZ_OPTIONS_ERROR; 495229159Sadrian 496229159Sadrian /* Compressed Size */ 497229159Sadrian if (s->temp.buf[1] & 0x40) { 498229159Sadrian if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) 499229159Sadrian != XZ_STREAM_END) 500229159Sadrian return XZ_DATA_ERROR; 501229159Sadrian 502229159Sadrian s->block_header.compressed = s->vli; 503229159Sadrian } else { 504229159Sadrian s->block_header.compressed = VLI_UNKNOWN; 505229159Sadrian } 506229159Sadrian 507229159Sadrian /* Uncompressed Size */ 508229159Sadrian if (s->temp.buf[1] & 0x80) { 509229159Sadrian if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) 510229159Sadrian != XZ_STREAM_END) 511229159Sadrian return XZ_DATA_ERROR; 512229159Sadrian 513229159Sadrian s->block_header.uncompressed = s->vli; 514229159Sadrian } else { 515229159Sadrian s->block_header.uncompressed = VLI_UNKNOWN; 516229159Sadrian } 517229159Sadrian 518229159Sadrian#ifdef XZ_DEC_BCJ 519229159Sadrian /* If there are two filters, the first one must be a BCJ filter. */ 520229159Sadrian s->bcj_active = s->temp.buf[1] & 0x01; 521229159Sadrian if (s->bcj_active) { 522229159Sadrian if (s->temp.size - s->temp.pos < 2) 523229159Sadrian return XZ_OPTIONS_ERROR; 524229159Sadrian 525229159Sadrian ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); 526229159Sadrian if (ret != XZ_OK) 527229159Sadrian return ret; 528229159Sadrian 529229159Sadrian /* 530229159Sadrian * We don't support custom start offset, 531229159Sadrian * so Size of Properties must be zero. 532229159Sadrian */ 533229159Sadrian if (s->temp.buf[s->temp.pos++] != 0x00) 534229159Sadrian return XZ_OPTIONS_ERROR; 535229159Sadrian } 536229159Sadrian#endif 537229159Sadrian 538229159Sadrian /* Valid Filter Flags always take at least two bytes. */ 539229159Sadrian if (s->temp.size - s->temp.pos < 2) 540229159Sadrian return XZ_DATA_ERROR; 541229159Sadrian 542229159Sadrian /* Filter ID = LZMA2 */ 543229159Sadrian if (s->temp.buf[s->temp.pos++] != 0x21) 544229159Sadrian return XZ_OPTIONS_ERROR; 545229159Sadrian 546229159Sadrian /* Size of Properties = 1-byte Filter Properties */ 547229159Sadrian if (s->temp.buf[s->temp.pos++] != 0x01) 548229159Sadrian return XZ_OPTIONS_ERROR; 549229159Sadrian 550229159Sadrian /* Filter Properties contains LZMA2 dictionary size. */ 551229159Sadrian if (s->temp.size - s->temp.pos < 1) 552229159Sadrian return XZ_DATA_ERROR; 553229159Sadrian 554229159Sadrian ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); 555229159Sadrian if (ret != XZ_OK) 556229159Sadrian return ret; 557229159Sadrian 558229159Sadrian /* The rest must be Header Padding. */ 559229159Sadrian while (s->temp.pos < s->temp.size) 560229159Sadrian if (s->temp.buf[s->temp.pos++] != 0x00) 561229159Sadrian return XZ_OPTIONS_ERROR; 562229159Sadrian 563229159Sadrian s->temp.pos = 0; 564229159Sadrian s->block.compressed = 0; 565229159Sadrian s->block.uncompressed = 0; 566229159Sadrian 567229159Sadrian return XZ_OK; 568229159Sadrian} 569229159Sadrian 570229159Sadrianstatic enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) 571229159Sadrian{ 572229159Sadrian enum xz_ret ret; 573229159Sadrian 574229159Sadrian /* 575229159Sadrian * Store the start position for the case when we are in the middle 576229159Sadrian * of the Index field. 577229159Sadrian */ 578229159Sadrian s->in_start = b->in_pos; 579229159Sadrian 580229159Sadrian while (true) { 581229159Sadrian switch (s->sequence) { 582229159Sadrian case SEQ_STREAM_HEADER: 583229159Sadrian /* 584229159Sadrian * Stream Header is copied to s->temp, and then 585229159Sadrian * decoded from there. This way if the caller 586229159Sadrian * gives us only little input at a time, we can 587229159Sadrian * still keep the Stream Header decoding code 588229159Sadrian * simple. Similar approach is used in many places 589229159Sadrian * in this file. 590229159Sadrian */ 591229159Sadrian if (!fill_temp(s, b)) 592229159Sadrian return XZ_OK; 593229159Sadrian 594229159Sadrian /* 595229159Sadrian * If dec_stream_header() returns 596229159Sadrian * XZ_UNSUPPORTED_CHECK, it is still possible 597229159Sadrian * to continue decoding if working in multi-call 598229159Sadrian * mode. Thus, update s->sequence before calling 599229159Sadrian * dec_stream_header(). 600229159Sadrian */ 601229159Sadrian s->sequence = SEQ_BLOCK_START; 602229159Sadrian 603229159Sadrian ret = dec_stream_header(s); 604229159Sadrian if (ret != XZ_OK) 605229159Sadrian return ret; 606229159Sadrian 607229159Sadrian case SEQ_BLOCK_START: 608229159Sadrian /* We need one byte of input to continue. */ 609229159Sadrian if (b->in_pos == b->in_size) 610229159Sadrian return XZ_OK; 611229159Sadrian 612229159Sadrian /* See if this is the beginning of the Index field. */ 613229159Sadrian if (b->in[b->in_pos] == 0) { 614229159Sadrian s->in_start = b->in_pos++; 615229159Sadrian s->sequence = SEQ_INDEX; 616229159Sadrian break; 617229159Sadrian } 618229159Sadrian 619229159Sadrian /* 620229159Sadrian * Calculate the size of the Block Header and 621229159Sadrian * prepare to decode it. 622229159Sadrian */ 623229159Sadrian s->block_header.size 624229159Sadrian = ((uint32_t)b->in[b->in_pos] + 1) * 4; 625229159Sadrian 626229159Sadrian s->temp.size = s->block_header.size; 627229159Sadrian s->temp.pos = 0; 628229159Sadrian s->sequence = SEQ_BLOCK_HEADER; 629229159Sadrian 630229159Sadrian case SEQ_BLOCK_HEADER: 631229159Sadrian if (!fill_temp(s, b)) 632229159Sadrian return XZ_OK; 633229159Sadrian 634229159Sadrian ret = dec_block_header(s); 635229159Sadrian if (ret != XZ_OK) 636229159Sadrian return ret; 637229159Sadrian 638229159Sadrian s->sequence = SEQ_BLOCK_UNCOMPRESS; 639229159Sadrian 640229159Sadrian case SEQ_BLOCK_UNCOMPRESS: 641229159Sadrian ret = dec_block(s, b); 642229159Sadrian if (ret != XZ_STREAM_END) 643229159Sadrian return ret; 644229159Sadrian 645229159Sadrian s->sequence = SEQ_BLOCK_PADDING; 646229159Sadrian 647229159Sadrian case SEQ_BLOCK_PADDING: 648229159Sadrian /* 649229159Sadrian * Size of Compressed Data + Block Padding 650229159Sadrian * must be a multiple of four. We don't need 651229159Sadrian * s->block.compressed for anything else 652229159Sadrian * anymore, so we use it here to test the size 653229159Sadrian * of the Block Padding field. 654229159Sadrian */ 655229159Sadrian while (s->block.compressed & 3) { 656229159Sadrian if (b->in_pos == b->in_size) 657229159Sadrian return XZ_OK; 658229159Sadrian 659229159Sadrian if (b->in[b->in_pos++] != 0) 660229159Sadrian return XZ_DATA_ERROR; 661229159Sadrian 662229159Sadrian ++s->block.compressed; 663229159Sadrian } 664229159Sadrian 665229159Sadrian s->sequence = SEQ_BLOCK_CHECK; 666229159Sadrian 667229159Sadrian case SEQ_BLOCK_CHECK: 668229159Sadrian if (s->check_type == XZ_CHECK_CRC32) { 669262764Sdelphij ret = crc_validate(s, b, 32); 670229159Sadrian if (ret != XZ_STREAM_END) 671229159Sadrian return ret; 672229159Sadrian } 673262764Sdelphij else if (IS_CRC64(s->check_type)) { 674262764Sdelphij ret = crc_validate(s, b, 64); 675262764Sdelphij if (ret != XZ_STREAM_END) 676262764Sdelphij return ret; 677262764Sdelphij } 678229159Sadrian#ifdef XZ_DEC_ANY_CHECK 679229159Sadrian else if (!check_skip(s, b)) { 680229159Sadrian return XZ_OK; 681229159Sadrian } 682229159Sadrian#endif 683229159Sadrian 684229159Sadrian s->sequence = SEQ_BLOCK_START; 685229159Sadrian break; 686229159Sadrian 687229159Sadrian case SEQ_INDEX: 688229159Sadrian ret = dec_index(s, b); 689229159Sadrian if (ret != XZ_STREAM_END) 690229159Sadrian return ret; 691229159Sadrian 692229159Sadrian s->sequence = SEQ_INDEX_PADDING; 693229159Sadrian 694229159Sadrian case SEQ_INDEX_PADDING: 695229159Sadrian while ((s->index.size + (b->in_pos - s->in_start)) 696229159Sadrian & 3) { 697229159Sadrian if (b->in_pos == b->in_size) { 698229159Sadrian index_update(s, b); 699229159Sadrian return XZ_OK; 700229159Sadrian } 701229159Sadrian 702229159Sadrian if (b->in[b->in_pos++] != 0) 703229159Sadrian return XZ_DATA_ERROR; 704229159Sadrian } 705229159Sadrian 706229159Sadrian /* Finish the CRC32 value and Index size. */ 707229159Sadrian index_update(s, b); 708229159Sadrian 709229159Sadrian /* Compare the hashes to validate the Index field. */ 710229159Sadrian if (!memeq(&s->block.hash, &s->index.hash, 711229159Sadrian sizeof(s->block.hash))) 712229159Sadrian return XZ_DATA_ERROR; 713229159Sadrian 714229159Sadrian s->sequence = SEQ_INDEX_CRC32; 715229159Sadrian 716229159Sadrian case SEQ_INDEX_CRC32: 717262764Sdelphij ret = crc_validate(s, b, 32); 718229159Sadrian if (ret != XZ_STREAM_END) 719229159Sadrian return ret; 720229159Sadrian 721229159Sadrian s->temp.size = STREAM_HEADER_SIZE; 722229159Sadrian s->sequence = SEQ_STREAM_FOOTER; 723229159Sadrian 724229159Sadrian case SEQ_STREAM_FOOTER: 725229159Sadrian if (!fill_temp(s, b)) 726229159Sadrian return XZ_OK; 727229159Sadrian 728229159Sadrian return dec_stream_footer(s); 729229159Sadrian } 730229159Sadrian } 731229159Sadrian 732229159Sadrian /* Never reached */ 733229159Sadrian} 734229159Sadrian 735229159Sadrian/* 736229159Sadrian * xz_dec_run() is a wrapper for dec_main() to handle some special cases in 737229159Sadrian * multi-call and single-call decoding. 738229159Sadrian * 739229159Sadrian * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we 740229159Sadrian * are not going to make any progress anymore. This is to prevent the caller 741229159Sadrian * from calling us infinitely when the input file is truncated or otherwise 742229159Sadrian * corrupt. Since zlib-style API allows that the caller fills the input buffer 743229159Sadrian * only when the decoder doesn't produce any new output, we have to be careful 744229159Sadrian * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only 745229159Sadrian * after the second consecutive call to xz_dec_run() that makes no progress. 746229159Sadrian * 747229159Sadrian * In single-call mode, if we couldn't decode everything and no error 748229159Sadrian * occurred, either the input is truncated or the output buffer is too small. 749229159Sadrian * Since we know that the last input byte never produces any output, we know 750229159Sadrian * that if all the input was consumed and decoding wasn't finished, the file 751229159Sadrian * must be corrupt. Otherwise the output buffer has to be too small or the 752229159Sadrian * file is corrupt in a way that decoding it produces too big output. 753229159Sadrian * 754229159Sadrian * If single-call decoding fails, we reset b->in_pos and b->out_pos back to 755229159Sadrian * their original values. This is because with some filter chains there won't 756229159Sadrian * be any valid uncompressed data in the output buffer unless the decoding 757229159Sadrian * actually succeeds (that's the price to pay of using the output buffer as 758229159Sadrian * the workspace). 759229159Sadrian */ 760229159SadrianXZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) 761229159Sadrian{ 762229159Sadrian size_t in_start; 763229159Sadrian size_t out_start; 764229159Sadrian enum xz_ret ret; 765229159Sadrian 766229159Sadrian if (DEC_IS_SINGLE(s->mode)) 767229159Sadrian xz_dec_reset(s); 768229159Sadrian 769229159Sadrian in_start = b->in_pos; 770229159Sadrian out_start = b->out_pos; 771229159Sadrian ret = dec_main(s, b); 772229159Sadrian 773229159Sadrian if (DEC_IS_SINGLE(s->mode)) { 774229159Sadrian if (ret == XZ_OK) 775229159Sadrian ret = b->in_pos == b->in_size 776229159Sadrian ? XZ_DATA_ERROR : XZ_BUF_ERROR; 777229159Sadrian 778229159Sadrian if (ret != XZ_STREAM_END) { 779229159Sadrian b->in_pos = in_start; 780229159Sadrian b->out_pos = out_start; 781229159Sadrian } 782229159Sadrian 783229159Sadrian } else if (ret == XZ_OK && in_start == b->in_pos 784229159Sadrian && out_start == b->out_pos) { 785229159Sadrian if (s->allow_buf_error) 786229159Sadrian ret = XZ_BUF_ERROR; 787229159Sadrian 788229159Sadrian s->allow_buf_error = true; 789229159Sadrian } else { 790229159Sadrian s->allow_buf_error = false; 791229159Sadrian } 792229159Sadrian 793229159Sadrian return ret; 794229159Sadrian} 795229159Sadrian 796229159SadrianXZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) 797229159Sadrian{ 798229159Sadrian struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); 799229159Sadrian if (s == NULL) 800229159Sadrian return NULL; 801229159Sadrian 802229159Sadrian s->mode = mode; 803229159Sadrian 804229159Sadrian#ifdef XZ_DEC_BCJ 805229159Sadrian s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); 806229159Sadrian if (s->bcj == NULL) 807229159Sadrian goto error_bcj; 808229159Sadrian#endif 809229159Sadrian 810229159Sadrian s->lzma2 = xz_dec_lzma2_create(mode, dict_max); 811229159Sadrian if (s->lzma2 == NULL) 812229159Sadrian goto error_lzma2; 813229159Sadrian 814229159Sadrian xz_dec_reset(s); 815229159Sadrian return s; 816229159Sadrian 817229159Sadrianerror_lzma2: 818229159Sadrian#ifdef XZ_DEC_BCJ 819229159Sadrian xz_dec_bcj_end(s->bcj); 820229159Sadrianerror_bcj: 821229159Sadrian#endif 822229159Sadrian kfree(s); 823229159Sadrian return NULL; 824229159Sadrian} 825229159Sadrian 826229159SadrianXZ_EXTERN void xz_dec_reset(struct xz_dec *s) 827229159Sadrian{ 828229159Sadrian s->sequence = SEQ_STREAM_HEADER; 829229159Sadrian s->allow_buf_error = false; 830229159Sadrian s->pos = 0; 831262764Sdelphij s->crc = 0; 832229159Sadrian memzero(&s->block, sizeof(s->block)); 833229159Sadrian memzero(&s->index, sizeof(s->index)); 834229159Sadrian s->temp.pos = 0; 835229159Sadrian s->temp.size = STREAM_HEADER_SIZE; 836229159Sadrian} 837229159Sadrian 838229159SadrianXZ_EXTERN void xz_dec_end(struct xz_dec *s) 839229159Sadrian{ 840229159Sadrian if (s != NULL) { 841229159Sadrian xz_dec_lzma2_end(s->lzma2); 842229159Sadrian#ifdef XZ_DEC_BCJ 843229159Sadrian xz_dec_bcj_end(s->bcj); 844229159Sadrian#endif 845229159Sadrian kfree(s); 846229159Sadrian } 847229159Sadrian} 848