xz_dec_stream.c revision 229159
1229159Sadrian/* 2229159Sadrian * .xz Stream decoder 3229159Sadrian * 4229159Sadrian * Author: Lasse Collin <lasse.collin@tukaani.org> 5229159Sadrian * 6229159Sadrian * This file has been put into the public domain. 7229159Sadrian * You can do whatever you want with this file. 8229159Sadrian */ 9229159Sadrian 10229159Sadrian#include "xz_private.h" 11229159Sadrian#include "xz_stream.h" 12229159Sadrian 13229159Sadrian/* Hash used to validate the Index field */ 14229159Sadrianstruct xz_dec_hash { 15229159Sadrian vli_type unpadded; 16229159Sadrian vli_type uncompressed; 17229159Sadrian uint32_t crc32; 18229159Sadrian}; 19229159Sadrian 20229159Sadrianstruct xz_dec { 21229159Sadrian /* Position in dec_main() */ 22229159Sadrian enum { 23229159Sadrian SEQ_STREAM_HEADER, 24229159Sadrian SEQ_BLOCK_START, 25229159Sadrian SEQ_BLOCK_HEADER, 26229159Sadrian SEQ_BLOCK_UNCOMPRESS, 27229159Sadrian SEQ_BLOCK_PADDING, 28229159Sadrian SEQ_BLOCK_CHECK, 29229159Sadrian SEQ_INDEX, 30229159Sadrian SEQ_INDEX_PADDING, 31229159Sadrian SEQ_INDEX_CRC32, 32229159Sadrian SEQ_STREAM_FOOTER 33229159Sadrian } sequence; 34229159Sadrian 35229159Sadrian /* Position in variable-length integers and Check fields */ 36229159Sadrian uint32_t pos; 37229159Sadrian 38229159Sadrian /* Variable-length integer decoded by dec_vli() */ 39229159Sadrian vli_type vli; 40229159Sadrian 41229159Sadrian /* Saved in_pos and out_pos */ 42229159Sadrian size_t in_start; 43229159Sadrian size_t out_start; 44229159Sadrian 45229159Sadrian /* CRC32 value in Block or Index */ 46229159Sadrian uint32_t crc32; 47229159Sadrian 48229159Sadrian /* Type of the integrity check calculated from uncompressed data */ 49229159Sadrian enum xz_check check_type; 50229159Sadrian 51229159Sadrian /* Operation mode */ 52229159Sadrian enum xz_mode mode; 53229159Sadrian 54229159Sadrian /* 55229159Sadrian * True if the next call to xz_dec_run() is allowed to return 56229159Sadrian * XZ_BUF_ERROR. 57229159Sadrian */ 58229159Sadrian bool allow_buf_error; 59229159Sadrian 60229159Sadrian /* Information stored in Block Header */ 61229159Sadrian struct { 62229159Sadrian /* 63229159Sadrian * Value stored in the Compressed Size field, or 64229159Sadrian * VLI_UNKNOWN if Compressed Size is not present. 65229159Sadrian */ 66229159Sadrian vli_type compressed; 67229159Sadrian 68229159Sadrian /* 69229159Sadrian * Value stored in the Uncompressed Size field, or 70229159Sadrian * VLI_UNKNOWN if Uncompressed Size is not present. 71229159Sadrian */ 72229159Sadrian vli_type uncompressed; 73229159Sadrian 74229159Sadrian /* Size of the Block Header field */ 75229159Sadrian uint32_t size; 76229159Sadrian } block_header; 77229159Sadrian 78229159Sadrian /* Information collected when decoding Blocks */ 79229159Sadrian struct { 80229159Sadrian /* Observed compressed size of the current Block */ 81229159Sadrian vli_type compressed; 82229159Sadrian 83229159Sadrian /* Observed uncompressed size of the current Block */ 84229159Sadrian vli_type uncompressed; 85229159Sadrian 86229159Sadrian /* Number of Blocks decoded so far */ 87229159Sadrian vli_type count; 88229159Sadrian 89229159Sadrian /* 90229159Sadrian * Hash calculated from the Block sizes. This is used to 91229159Sadrian * validate the Index field. 92229159Sadrian */ 93229159Sadrian struct xz_dec_hash hash; 94229159Sadrian } block; 95229159Sadrian 96229159Sadrian /* Variables needed when verifying the Index field */ 97229159Sadrian struct { 98229159Sadrian /* Position in dec_index() */ 99229159Sadrian enum { 100229159Sadrian SEQ_INDEX_COUNT, 101229159Sadrian SEQ_INDEX_UNPADDED, 102229159Sadrian SEQ_INDEX_UNCOMPRESSED 103229159Sadrian } sequence; 104229159Sadrian 105229159Sadrian /* Size of the Index in bytes */ 106229159Sadrian vli_type size; 107229159Sadrian 108229159Sadrian /* Number of Records (matches block.count in valid files) */ 109229159Sadrian vli_type count; 110229159Sadrian 111229159Sadrian /* 112229159Sadrian * Hash calculated from the Records (matches block.hash in 113229159Sadrian * valid files). 114229159Sadrian */ 115229159Sadrian struct xz_dec_hash hash; 116229159Sadrian } index; 117229159Sadrian 118229159Sadrian /* 119229159Sadrian * Temporary buffer needed to hold Stream Header, Block Header, 120229159Sadrian * and Stream Footer. The Block Header is the biggest (1 KiB) 121229159Sadrian * so we reserve space according to that. buf[] has to be aligned 122229159Sadrian * to a multiple of four bytes; the size_t variables before it 123229159Sadrian * should guarantee this. 124229159Sadrian */ 125229159Sadrian struct { 126229159Sadrian size_t pos; 127229159Sadrian size_t size; 128229159Sadrian uint8_t buf[1024]; 129229159Sadrian } temp; 130229159Sadrian 131229159Sadrian struct xz_dec_lzma2 *lzma2; 132229159Sadrian 133229159Sadrian#ifdef XZ_DEC_BCJ 134229159Sadrian struct xz_dec_bcj *bcj; 135229159Sadrian bool bcj_active; 136229159Sadrian#endif 137229159Sadrian}; 138229159Sadrian 139229159Sadrian#ifdef XZ_DEC_ANY_CHECK 140229159Sadrian/* Sizes of the Check field with different Check IDs */ 141229159Sadrianstatic const uint8_t check_sizes[16] = { 142229159Sadrian 0, 143229159Sadrian 4, 4, 4, 144229159Sadrian 8, 8, 8, 145229159Sadrian 16, 16, 16, 146229159Sadrian 32, 32, 32, 147229159Sadrian 64, 64, 64 148229159Sadrian}; 149229159Sadrian#endif 150229159Sadrian 151229159Sadrian/* 152229159Sadrian * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller 153229159Sadrian * must have set s->temp.pos to indicate how much data we are supposed 154229159Sadrian * to copy into s->temp.buf. Return true once s->temp.pos has reached 155229159Sadrian * s->temp.size. 156229159Sadrian */ 157229159Sadrianstatic bool fill_temp(struct xz_dec *s, struct xz_buf *b) 158229159Sadrian{ 159229159Sadrian size_t copy_size = min_t(size_t, 160229159Sadrian b->in_size - b->in_pos, s->temp.size - s->temp.pos); 161229159Sadrian 162229159Sadrian memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); 163229159Sadrian b->in_pos += copy_size; 164229159Sadrian s->temp.pos += copy_size; 165229159Sadrian 166229159Sadrian if (s->temp.pos == s->temp.size) { 167229159Sadrian s->temp.pos = 0; 168229159Sadrian return true; 169229159Sadrian } 170229159Sadrian 171229159Sadrian return false; 172229159Sadrian} 173229159Sadrian 174229159Sadrian/* Decode a variable-length integer (little-endian base-128 encoding) */ 175229159Sadrianstatic enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in, 176229159Sadrian size_t *in_pos, size_t in_size) 177229159Sadrian{ 178229159Sadrian uint8_t byte; 179229159Sadrian 180229159Sadrian if (s->pos == 0) 181229159Sadrian s->vli = 0; 182229159Sadrian 183229159Sadrian while (*in_pos < in_size) { 184229159Sadrian byte = in[*in_pos]; 185229159Sadrian ++*in_pos; 186229159Sadrian 187229159Sadrian s->vli |= (vli_type)(byte & 0x7F) << s->pos; 188229159Sadrian 189229159Sadrian if ((byte & 0x80) == 0) { 190229159Sadrian /* Don't allow non-minimal encodings. */ 191229159Sadrian if (byte == 0 && s->pos != 0) 192229159Sadrian return XZ_DATA_ERROR; 193229159Sadrian 194229159Sadrian s->pos = 0; 195229159Sadrian return XZ_STREAM_END; 196229159Sadrian } 197229159Sadrian 198229159Sadrian s->pos += 7; 199229159Sadrian if (s->pos == 7 * VLI_BYTES_MAX) 200229159Sadrian return XZ_DATA_ERROR; 201229159Sadrian } 202229159Sadrian 203229159Sadrian return XZ_OK; 204229159Sadrian} 205229159Sadrian 206229159Sadrian/* 207229159Sadrian * Decode the Compressed Data field from a Block. Update and validate 208229159Sadrian * the observed compressed and uncompressed sizes of the Block so that 209229159Sadrian * they don't exceed the values possibly stored in the Block Header 210229159Sadrian * (validation assumes that no integer overflow occurs, since vli_type 211229159Sadrian * is normally uint64_t). Update the CRC32 if presence of the CRC32 212229159Sadrian * field was indicated in Stream Header. 213229159Sadrian * 214229159Sadrian * Once the decoding is finished, validate that the observed sizes match 215229159Sadrian * the sizes possibly stored in the Block Header. Update the hash and 216229159Sadrian * Block count, which are later used to validate the Index field. 217229159Sadrian */ 218229159Sadrianstatic enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) 219229159Sadrian{ 220229159Sadrian enum xz_ret ret; 221229159Sadrian 222229159Sadrian s->in_start = b->in_pos; 223229159Sadrian s->out_start = b->out_pos; 224229159Sadrian 225229159Sadrian#ifdef XZ_DEC_BCJ 226229159Sadrian if (s->bcj_active) 227229159Sadrian ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); 228229159Sadrian else 229229159Sadrian#endif 230229159Sadrian ret = xz_dec_lzma2_run(s->lzma2, b); 231229159Sadrian 232229159Sadrian s->block.compressed += b->in_pos - s->in_start; 233229159Sadrian s->block.uncompressed += b->out_pos - s->out_start; 234229159Sadrian 235229159Sadrian /* 236229159Sadrian * There is no need to separately check for VLI_UNKNOWN, since 237229159Sadrian * the observed sizes are always smaller than VLI_UNKNOWN. 238229159Sadrian */ 239229159Sadrian if (s->block.compressed > s->block_header.compressed 240229159Sadrian || s->block.uncompressed 241229159Sadrian > s->block_header.uncompressed) 242229159Sadrian return XZ_DATA_ERROR; 243229159Sadrian 244229159Sadrian if (s->check_type == XZ_CHECK_CRC32) 245229159Sadrian s->crc32 = xz_crc32(b->out + s->out_start, 246229159Sadrian b->out_pos - s->out_start, s->crc32); 247229159Sadrian 248229159Sadrian if (ret == XZ_STREAM_END) { 249229159Sadrian if (s->block_header.compressed != VLI_UNKNOWN 250229159Sadrian && s->block_header.compressed 251229159Sadrian != s->block.compressed) 252229159Sadrian return XZ_DATA_ERROR; 253229159Sadrian 254229159Sadrian if (s->block_header.uncompressed != VLI_UNKNOWN 255229159Sadrian && s->block_header.uncompressed 256229159Sadrian != s->block.uncompressed) 257229159Sadrian return XZ_DATA_ERROR; 258229159Sadrian 259229159Sadrian s->block.hash.unpadded += s->block_header.size 260229159Sadrian + s->block.compressed; 261229159Sadrian 262229159Sadrian#ifdef XZ_DEC_ANY_CHECK 263229159Sadrian s->block.hash.unpadded += check_sizes[s->check_type]; 264229159Sadrian#else 265229159Sadrian if (s->check_type == XZ_CHECK_CRC32) 266229159Sadrian s->block.hash.unpadded += 4; 267229159Sadrian#endif 268229159Sadrian 269229159Sadrian s->block.hash.uncompressed += s->block.uncompressed; 270229159Sadrian s->block.hash.crc32 = xz_crc32( 271229159Sadrian (const uint8_t *)&s->block.hash, 272229159Sadrian sizeof(s->block.hash), s->block.hash.crc32); 273229159Sadrian 274229159Sadrian ++s->block.count; 275229159Sadrian } 276229159Sadrian 277229159Sadrian return ret; 278229159Sadrian} 279229159Sadrian 280229159Sadrian/* Update the Index size and the CRC32 value. */ 281229159Sadrianstatic void index_update(struct xz_dec *s, const struct xz_buf *b) 282229159Sadrian{ 283229159Sadrian size_t in_used = b->in_pos - s->in_start; 284229159Sadrian s->index.size += in_used; 285229159Sadrian s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32); 286229159Sadrian} 287229159Sadrian 288229159Sadrian/* 289229159Sadrian * Decode the Number of Records, Unpadded Size, and Uncompressed Size 290229159Sadrian * fields from the Index field. That is, Index Padding and CRC32 are not 291229159Sadrian * decoded by this function. 292229159Sadrian * 293229159Sadrian * This can return XZ_OK (more input needed), XZ_STREAM_END (everything 294229159Sadrian * successfully decoded), or XZ_DATA_ERROR (input is corrupt). 295229159Sadrian */ 296229159Sadrianstatic enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b) 297229159Sadrian{ 298229159Sadrian enum xz_ret ret; 299229159Sadrian 300229159Sadrian do { 301229159Sadrian ret = dec_vli(s, b->in, &b->in_pos, b->in_size); 302229159Sadrian if (ret != XZ_STREAM_END) { 303229159Sadrian index_update(s, b); 304229159Sadrian return ret; 305229159Sadrian } 306229159Sadrian 307229159Sadrian switch (s->index.sequence) { 308229159Sadrian case SEQ_INDEX_COUNT: 309229159Sadrian s->index.count = s->vli; 310229159Sadrian 311229159Sadrian /* 312229159Sadrian * Validate that the Number of Records field 313229159Sadrian * indicates the same number of Records as 314229159Sadrian * there were Blocks in the Stream. 315229159Sadrian */ 316229159Sadrian if (s->index.count != s->block.count) 317229159Sadrian return XZ_DATA_ERROR; 318229159Sadrian 319229159Sadrian s->index.sequence = SEQ_INDEX_UNPADDED; 320229159Sadrian break; 321229159Sadrian 322229159Sadrian case SEQ_INDEX_UNPADDED: 323229159Sadrian s->index.hash.unpadded += s->vli; 324229159Sadrian s->index.sequence = SEQ_INDEX_UNCOMPRESSED; 325229159Sadrian break; 326229159Sadrian 327229159Sadrian case SEQ_INDEX_UNCOMPRESSED: 328229159Sadrian s->index.hash.uncompressed += s->vli; 329229159Sadrian s->index.hash.crc32 = xz_crc32( 330229159Sadrian (const uint8_t *)&s->index.hash, 331229159Sadrian sizeof(s->index.hash), 332229159Sadrian s->index.hash.crc32); 333229159Sadrian --s->index.count; 334229159Sadrian s->index.sequence = SEQ_INDEX_UNPADDED; 335229159Sadrian break; 336229159Sadrian } 337229159Sadrian } while (s->index.count > 0); 338229159Sadrian 339229159Sadrian return XZ_STREAM_END; 340229159Sadrian} 341229159Sadrian 342229159Sadrian/* 343229159Sadrian * Validate that the next four input bytes match the value of s->crc32. 344229159Sadrian * s->pos must be zero when starting to validate the first byte. 345229159Sadrian */ 346229159Sadrianstatic enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b) 347229159Sadrian{ 348229159Sadrian do { 349229159Sadrian if (b->in_pos == b->in_size) 350229159Sadrian return XZ_OK; 351229159Sadrian 352229159Sadrian if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++]) 353229159Sadrian return XZ_DATA_ERROR; 354229159Sadrian 355229159Sadrian s->pos += 8; 356229159Sadrian 357229159Sadrian } while (s->pos < 32); 358229159Sadrian 359229159Sadrian s->crc32 = 0; 360229159Sadrian s->pos = 0; 361229159Sadrian 362229159Sadrian return XZ_STREAM_END; 363229159Sadrian} 364229159Sadrian 365229159Sadrian#ifdef XZ_DEC_ANY_CHECK 366229159Sadrian/* 367229159Sadrian * Skip over the Check field when the Check ID is not supported. 368229159Sadrian * Returns true once the whole Check field has been skipped over. 369229159Sadrian */ 370229159Sadrianstatic bool check_skip(struct xz_dec *s, struct xz_buf *b) 371229159Sadrian{ 372229159Sadrian while (s->pos < check_sizes[s->check_type]) { 373229159Sadrian if (b->in_pos == b->in_size) 374229159Sadrian return false; 375229159Sadrian 376229159Sadrian ++b->in_pos; 377229159Sadrian ++s->pos; 378229159Sadrian } 379229159Sadrian 380229159Sadrian s->pos = 0; 381229159Sadrian 382229159Sadrian return true; 383229159Sadrian} 384229159Sadrian#endif 385229159Sadrian 386229159Sadrian/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ 387229159Sadrianstatic enum xz_ret dec_stream_header(struct xz_dec *s) 388229159Sadrian{ 389229159Sadrian if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) 390229159Sadrian return XZ_FORMAT_ERROR; 391229159Sadrian 392229159Sadrian if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) 393229159Sadrian != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) 394229159Sadrian return XZ_DATA_ERROR; 395229159Sadrian 396229159Sadrian if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) 397229159Sadrian return XZ_OPTIONS_ERROR; 398229159Sadrian 399229159Sadrian /* 400229159Sadrian * Of integrity checks, we support only none (Check ID = 0) and 401229159Sadrian * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined, 402229159Sadrian * we will accept other check types too, but then the check won't 403229159Sadrian * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. 404229159Sadrian */ 405229159Sadrian s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; 406229159Sadrian 407229159Sadrian#ifdef XZ_DEC_ANY_CHECK 408229159Sadrian if (s->check_type > XZ_CHECK_MAX) 409229159Sadrian return XZ_OPTIONS_ERROR; 410229159Sadrian 411229159Sadrian if (s->check_type > XZ_CHECK_CRC32) 412229159Sadrian return XZ_UNSUPPORTED_CHECK; 413229159Sadrian#else 414229159Sadrian if (s->check_type > XZ_CHECK_CRC32) 415229159Sadrian return XZ_OPTIONS_ERROR; 416229159Sadrian#endif 417229159Sadrian 418229159Sadrian return XZ_OK; 419229159Sadrian} 420229159Sadrian 421229159Sadrian/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ 422229159Sadrianstatic enum xz_ret dec_stream_footer(struct xz_dec *s) 423229159Sadrian{ 424229159Sadrian if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) 425229159Sadrian return XZ_DATA_ERROR; 426229159Sadrian 427229159Sadrian if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) 428229159Sadrian return XZ_DATA_ERROR; 429229159Sadrian 430229159Sadrian /* 431229159Sadrian * Validate Backward Size. Note that we never added the size of the 432229159Sadrian * Index CRC32 field to s->index.size, thus we use s->index.size / 4 433229159Sadrian * instead of s->index.size / 4 - 1. 434229159Sadrian */ 435229159Sadrian if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) 436229159Sadrian return XZ_DATA_ERROR; 437229159Sadrian 438229159Sadrian if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) 439229159Sadrian return XZ_DATA_ERROR; 440229159Sadrian 441229159Sadrian /* 442229159Sadrian * Use XZ_STREAM_END instead of XZ_OK to be more convenient 443229159Sadrian * for the caller. 444229159Sadrian */ 445229159Sadrian return XZ_STREAM_END; 446229159Sadrian} 447229159Sadrian 448229159Sadrian/* Decode the Block Header and initialize the filter chain. */ 449229159Sadrianstatic enum xz_ret dec_block_header(struct xz_dec *s) 450229159Sadrian{ 451229159Sadrian enum xz_ret ret; 452229159Sadrian 453229159Sadrian /* 454229159Sadrian * Validate the CRC32. We know that the temp buffer is at least 455229159Sadrian * eight bytes so this is safe. 456229159Sadrian */ 457229159Sadrian s->temp.size -= 4; 458229159Sadrian if (xz_crc32(s->temp.buf, s->temp.size, 0) 459229159Sadrian != get_le32(s->temp.buf + s->temp.size)) 460229159Sadrian return XZ_DATA_ERROR; 461229159Sadrian 462229159Sadrian s->temp.pos = 2; 463229159Sadrian 464229159Sadrian /* 465229159Sadrian * Catch unsupported Block Flags. We support only one or two filters 466229159Sadrian * in the chain, so we catch that with the same test. 467229159Sadrian */ 468229159Sadrian#ifdef XZ_DEC_BCJ 469229159Sadrian if (s->temp.buf[1] & 0x3E) 470229159Sadrian#else 471229159Sadrian if (s->temp.buf[1] & 0x3F) 472229159Sadrian#endif 473229159Sadrian return XZ_OPTIONS_ERROR; 474229159Sadrian 475229159Sadrian /* Compressed Size */ 476229159Sadrian if (s->temp.buf[1] & 0x40) { 477229159Sadrian if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) 478229159Sadrian != XZ_STREAM_END) 479229159Sadrian return XZ_DATA_ERROR; 480229159Sadrian 481229159Sadrian s->block_header.compressed = s->vli; 482229159Sadrian } else { 483229159Sadrian s->block_header.compressed = VLI_UNKNOWN; 484229159Sadrian } 485229159Sadrian 486229159Sadrian /* Uncompressed Size */ 487229159Sadrian if (s->temp.buf[1] & 0x80) { 488229159Sadrian if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) 489229159Sadrian != XZ_STREAM_END) 490229159Sadrian return XZ_DATA_ERROR; 491229159Sadrian 492229159Sadrian s->block_header.uncompressed = s->vli; 493229159Sadrian } else { 494229159Sadrian s->block_header.uncompressed = VLI_UNKNOWN; 495229159Sadrian } 496229159Sadrian 497229159Sadrian#ifdef XZ_DEC_BCJ 498229159Sadrian /* If there are two filters, the first one must be a BCJ filter. */ 499229159Sadrian s->bcj_active = s->temp.buf[1] & 0x01; 500229159Sadrian if (s->bcj_active) { 501229159Sadrian if (s->temp.size - s->temp.pos < 2) 502229159Sadrian return XZ_OPTIONS_ERROR; 503229159Sadrian 504229159Sadrian ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); 505229159Sadrian if (ret != XZ_OK) 506229159Sadrian return ret; 507229159Sadrian 508229159Sadrian /* 509229159Sadrian * We don't support custom start offset, 510229159Sadrian * so Size of Properties must be zero. 511229159Sadrian */ 512229159Sadrian if (s->temp.buf[s->temp.pos++] != 0x00) 513229159Sadrian return XZ_OPTIONS_ERROR; 514229159Sadrian } 515229159Sadrian#endif 516229159Sadrian 517229159Sadrian /* Valid Filter Flags always take at least two bytes. */ 518229159Sadrian if (s->temp.size - s->temp.pos < 2) 519229159Sadrian return XZ_DATA_ERROR; 520229159Sadrian 521229159Sadrian /* Filter ID = LZMA2 */ 522229159Sadrian if (s->temp.buf[s->temp.pos++] != 0x21) 523229159Sadrian return XZ_OPTIONS_ERROR; 524229159Sadrian 525229159Sadrian /* Size of Properties = 1-byte Filter Properties */ 526229159Sadrian if (s->temp.buf[s->temp.pos++] != 0x01) 527229159Sadrian return XZ_OPTIONS_ERROR; 528229159Sadrian 529229159Sadrian /* Filter Properties contains LZMA2 dictionary size. */ 530229159Sadrian if (s->temp.size - s->temp.pos < 1) 531229159Sadrian return XZ_DATA_ERROR; 532229159Sadrian 533229159Sadrian ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); 534229159Sadrian if (ret != XZ_OK) 535229159Sadrian return ret; 536229159Sadrian 537229159Sadrian /* The rest must be Header Padding. */ 538229159Sadrian while (s->temp.pos < s->temp.size) 539229159Sadrian if (s->temp.buf[s->temp.pos++] != 0x00) 540229159Sadrian return XZ_OPTIONS_ERROR; 541229159Sadrian 542229159Sadrian s->temp.pos = 0; 543229159Sadrian s->block.compressed = 0; 544229159Sadrian s->block.uncompressed = 0; 545229159Sadrian 546229159Sadrian return XZ_OK; 547229159Sadrian} 548229159Sadrian 549229159Sadrianstatic enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) 550229159Sadrian{ 551229159Sadrian enum xz_ret ret; 552229159Sadrian 553229159Sadrian /* 554229159Sadrian * Store the start position for the case when we are in the middle 555229159Sadrian * of the Index field. 556229159Sadrian */ 557229159Sadrian s->in_start = b->in_pos; 558229159Sadrian 559229159Sadrian while (true) { 560229159Sadrian switch (s->sequence) { 561229159Sadrian case SEQ_STREAM_HEADER: 562229159Sadrian /* 563229159Sadrian * Stream Header is copied to s->temp, and then 564229159Sadrian * decoded from there. This way if the caller 565229159Sadrian * gives us only little input at a time, we can 566229159Sadrian * still keep the Stream Header decoding code 567229159Sadrian * simple. Similar approach is used in many places 568229159Sadrian * in this file. 569229159Sadrian */ 570229159Sadrian if (!fill_temp(s, b)) 571229159Sadrian return XZ_OK; 572229159Sadrian 573229159Sadrian /* 574229159Sadrian * If dec_stream_header() returns 575229159Sadrian * XZ_UNSUPPORTED_CHECK, it is still possible 576229159Sadrian * to continue decoding if working in multi-call 577229159Sadrian * mode. Thus, update s->sequence before calling 578229159Sadrian * dec_stream_header(). 579229159Sadrian */ 580229159Sadrian s->sequence = SEQ_BLOCK_START; 581229159Sadrian 582229159Sadrian ret = dec_stream_header(s); 583229159Sadrian if (ret != XZ_OK) 584229159Sadrian return ret; 585229159Sadrian 586229159Sadrian case SEQ_BLOCK_START: 587229159Sadrian /* We need one byte of input to continue. */ 588229159Sadrian if (b->in_pos == b->in_size) 589229159Sadrian return XZ_OK; 590229159Sadrian 591229159Sadrian /* See if this is the beginning of the Index field. */ 592229159Sadrian if (b->in[b->in_pos] == 0) { 593229159Sadrian s->in_start = b->in_pos++; 594229159Sadrian s->sequence = SEQ_INDEX; 595229159Sadrian break; 596229159Sadrian } 597229159Sadrian 598229159Sadrian /* 599229159Sadrian * Calculate the size of the Block Header and 600229159Sadrian * prepare to decode it. 601229159Sadrian */ 602229159Sadrian s->block_header.size 603229159Sadrian = ((uint32_t)b->in[b->in_pos] + 1) * 4; 604229159Sadrian 605229159Sadrian s->temp.size = s->block_header.size; 606229159Sadrian s->temp.pos = 0; 607229159Sadrian s->sequence = SEQ_BLOCK_HEADER; 608229159Sadrian 609229159Sadrian case SEQ_BLOCK_HEADER: 610229159Sadrian if (!fill_temp(s, b)) 611229159Sadrian return XZ_OK; 612229159Sadrian 613229159Sadrian ret = dec_block_header(s); 614229159Sadrian if (ret != XZ_OK) 615229159Sadrian return ret; 616229159Sadrian 617229159Sadrian s->sequence = SEQ_BLOCK_UNCOMPRESS; 618229159Sadrian 619229159Sadrian case SEQ_BLOCK_UNCOMPRESS: 620229159Sadrian ret = dec_block(s, b); 621229159Sadrian if (ret != XZ_STREAM_END) 622229159Sadrian return ret; 623229159Sadrian 624229159Sadrian s->sequence = SEQ_BLOCK_PADDING; 625229159Sadrian 626229159Sadrian case SEQ_BLOCK_PADDING: 627229159Sadrian /* 628229159Sadrian * Size of Compressed Data + Block Padding 629229159Sadrian * must be a multiple of four. We don't need 630229159Sadrian * s->block.compressed for anything else 631229159Sadrian * anymore, so we use it here to test the size 632229159Sadrian * of the Block Padding field. 633229159Sadrian */ 634229159Sadrian while (s->block.compressed & 3) { 635229159Sadrian if (b->in_pos == b->in_size) 636229159Sadrian return XZ_OK; 637229159Sadrian 638229159Sadrian if (b->in[b->in_pos++] != 0) 639229159Sadrian return XZ_DATA_ERROR; 640229159Sadrian 641229159Sadrian ++s->block.compressed; 642229159Sadrian } 643229159Sadrian 644229159Sadrian s->sequence = SEQ_BLOCK_CHECK; 645229159Sadrian 646229159Sadrian case SEQ_BLOCK_CHECK: 647229159Sadrian if (s->check_type == XZ_CHECK_CRC32) { 648229159Sadrian ret = crc32_validate(s, b); 649229159Sadrian if (ret != XZ_STREAM_END) 650229159Sadrian return ret; 651229159Sadrian } 652229159Sadrian#ifdef XZ_DEC_ANY_CHECK 653229159Sadrian else if (!check_skip(s, b)) { 654229159Sadrian return XZ_OK; 655229159Sadrian } 656229159Sadrian#endif 657229159Sadrian 658229159Sadrian s->sequence = SEQ_BLOCK_START; 659229159Sadrian break; 660229159Sadrian 661229159Sadrian case SEQ_INDEX: 662229159Sadrian ret = dec_index(s, b); 663229159Sadrian if (ret != XZ_STREAM_END) 664229159Sadrian return ret; 665229159Sadrian 666229159Sadrian s->sequence = SEQ_INDEX_PADDING; 667229159Sadrian 668229159Sadrian case SEQ_INDEX_PADDING: 669229159Sadrian while ((s->index.size + (b->in_pos - s->in_start)) 670229159Sadrian & 3) { 671229159Sadrian if (b->in_pos == b->in_size) { 672229159Sadrian index_update(s, b); 673229159Sadrian return XZ_OK; 674229159Sadrian } 675229159Sadrian 676229159Sadrian if (b->in[b->in_pos++] != 0) 677229159Sadrian return XZ_DATA_ERROR; 678229159Sadrian } 679229159Sadrian 680229159Sadrian /* Finish the CRC32 value and Index size. */ 681229159Sadrian index_update(s, b); 682229159Sadrian 683229159Sadrian /* Compare the hashes to validate the Index field. */ 684229159Sadrian if (!memeq(&s->block.hash, &s->index.hash, 685229159Sadrian sizeof(s->block.hash))) 686229159Sadrian return XZ_DATA_ERROR; 687229159Sadrian 688229159Sadrian s->sequence = SEQ_INDEX_CRC32; 689229159Sadrian 690229159Sadrian case SEQ_INDEX_CRC32: 691229159Sadrian ret = crc32_validate(s, b); 692229159Sadrian if (ret != XZ_STREAM_END) 693229159Sadrian return ret; 694229159Sadrian 695229159Sadrian s->temp.size = STREAM_HEADER_SIZE; 696229159Sadrian s->sequence = SEQ_STREAM_FOOTER; 697229159Sadrian 698229159Sadrian case SEQ_STREAM_FOOTER: 699229159Sadrian if (!fill_temp(s, b)) 700229159Sadrian return XZ_OK; 701229159Sadrian 702229159Sadrian return dec_stream_footer(s); 703229159Sadrian } 704229159Sadrian } 705229159Sadrian 706229159Sadrian /* Never reached */ 707229159Sadrian} 708229159Sadrian 709229159Sadrian/* 710229159Sadrian * xz_dec_run() is a wrapper for dec_main() to handle some special cases in 711229159Sadrian * multi-call and single-call decoding. 712229159Sadrian * 713229159Sadrian * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we 714229159Sadrian * are not going to make any progress anymore. This is to prevent the caller 715229159Sadrian * from calling us infinitely when the input file is truncated or otherwise 716229159Sadrian * corrupt. Since zlib-style API allows that the caller fills the input buffer 717229159Sadrian * only when the decoder doesn't produce any new output, we have to be careful 718229159Sadrian * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only 719229159Sadrian * after the second consecutive call to xz_dec_run() that makes no progress. 720229159Sadrian * 721229159Sadrian * In single-call mode, if we couldn't decode everything and no error 722229159Sadrian * occurred, either the input is truncated or the output buffer is too small. 723229159Sadrian * Since we know that the last input byte never produces any output, we know 724229159Sadrian * that if all the input was consumed and decoding wasn't finished, the file 725229159Sadrian * must be corrupt. Otherwise the output buffer has to be too small or the 726229159Sadrian * file is corrupt in a way that decoding it produces too big output. 727229159Sadrian * 728229159Sadrian * If single-call decoding fails, we reset b->in_pos and b->out_pos back to 729229159Sadrian * their original values. This is because with some filter chains there won't 730229159Sadrian * be any valid uncompressed data in the output buffer unless the decoding 731229159Sadrian * actually succeeds (that's the price to pay of using the output buffer as 732229159Sadrian * the workspace). 733229159Sadrian */ 734229159SadrianXZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) 735229159Sadrian{ 736229159Sadrian size_t in_start; 737229159Sadrian size_t out_start; 738229159Sadrian enum xz_ret ret; 739229159Sadrian 740229159Sadrian if (DEC_IS_SINGLE(s->mode)) 741229159Sadrian xz_dec_reset(s); 742229159Sadrian 743229159Sadrian in_start = b->in_pos; 744229159Sadrian out_start = b->out_pos; 745229159Sadrian ret = dec_main(s, b); 746229159Sadrian 747229159Sadrian if (DEC_IS_SINGLE(s->mode)) { 748229159Sadrian if (ret == XZ_OK) 749229159Sadrian ret = b->in_pos == b->in_size 750229159Sadrian ? XZ_DATA_ERROR : XZ_BUF_ERROR; 751229159Sadrian 752229159Sadrian if (ret != XZ_STREAM_END) { 753229159Sadrian b->in_pos = in_start; 754229159Sadrian b->out_pos = out_start; 755229159Sadrian } 756229159Sadrian 757229159Sadrian } else if (ret == XZ_OK && in_start == b->in_pos 758229159Sadrian && out_start == b->out_pos) { 759229159Sadrian if (s->allow_buf_error) 760229159Sadrian ret = XZ_BUF_ERROR; 761229159Sadrian 762229159Sadrian s->allow_buf_error = true; 763229159Sadrian } else { 764229159Sadrian s->allow_buf_error = false; 765229159Sadrian } 766229159Sadrian 767229159Sadrian return ret; 768229159Sadrian} 769229159Sadrian 770229159SadrianXZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) 771229159Sadrian{ 772229159Sadrian struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); 773229159Sadrian if (s == NULL) 774229159Sadrian return NULL; 775229159Sadrian 776229159Sadrian s->mode = mode; 777229159Sadrian 778229159Sadrian#ifdef XZ_DEC_BCJ 779229159Sadrian s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); 780229159Sadrian if (s->bcj == NULL) 781229159Sadrian goto error_bcj; 782229159Sadrian#endif 783229159Sadrian 784229159Sadrian s->lzma2 = xz_dec_lzma2_create(mode, dict_max); 785229159Sadrian if (s->lzma2 == NULL) 786229159Sadrian goto error_lzma2; 787229159Sadrian 788229159Sadrian xz_dec_reset(s); 789229159Sadrian return s; 790229159Sadrian 791229159Sadrianerror_lzma2: 792229159Sadrian#ifdef XZ_DEC_BCJ 793229159Sadrian xz_dec_bcj_end(s->bcj); 794229159Sadrianerror_bcj: 795229159Sadrian#endif 796229159Sadrian kfree(s); 797229159Sadrian return NULL; 798229159Sadrian} 799229159Sadrian 800229159SadrianXZ_EXTERN void xz_dec_reset(struct xz_dec *s) 801229159Sadrian{ 802229159Sadrian s->sequence = SEQ_STREAM_HEADER; 803229159Sadrian s->allow_buf_error = false; 804229159Sadrian s->pos = 0; 805229159Sadrian s->crc32 = 0; 806229159Sadrian memzero(&s->block, sizeof(s->block)); 807229159Sadrian memzero(&s->index, sizeof(s->index)); 808229159Sadrian s->temp.pos = 0; 809229159Sadrian s->temp.size = STREAM_HEADER_SIZE; 810229159Sadrian} 811229159Sadrian 812229159SadrianXZ_EXTERN void xz_dec_end(struct xz_dec *s) 813229159Sadrian{ 814229159Sadrian if (s != NULL) { 815229159Sadrian xz_dec_lzma2_end(s->lzma2); 816229159Sadrian#ifdef XZ_DEC_BCJ 817229159Sadrian xz_dec_bcj_end(s->bcj); 818229159Sadrian#endif 819229159Sadrian kfree(s); 820229159Sadrian } 821229159Sadrian} 822