1/* vi: set sw=4 ts=4: */ 2/* 3 * Small lzma deflate implementation. 4 * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> 5 * 6 * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) 7 * Copyright (C) 1999-2005 Igor Pavlov 8 * 9 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 10 */ 11 12#include "libbb.h" 13#include "unarchive.h" 14 15#if ENABLE_FEATURE_LZMA_FAST 16# define speed_inline ALWAYS_INLINE 17#else 18# define speed_inline 19#endif 20 21 22typedef struct { 23 int fd; 24 uint8_t *ptr; 25 26/* Was keeping rc on stack in unlzma and separately allocating buffer, 27 * but with "buffer 'attached to' allocated rc" code is smaller: */ 28 /* uint8_t *buffer; */ 29#define RC_BUFFER ((uint8_t*)(rc+1)) 30 31 uint8_t *buffer_end; 32 33/* Had provisions for variable buffer, but we don't need it here */ 34 /* int buffer_size; */ 35#define RC_BUFFER_SIZE 0x10000 36 37 uint32_t code; 38 uint32_t range; 39 uint32_t bound; 40} rc_t; 41 42#define RC_TOP_BITS 24 43#define RC_MOVE_BITS 5 44#define RC_MODEL_TOTAL_BITS 11 45 46 47/* Called twice: once at startup and once in rc_normalize() */ 48static void rc_read(rc_t * rc) 49{ 50 int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); 51 if (buffer_size <= 0) 52 bb_error_msg_and_die("unexpected EOF"); 53 rc->ptr = RC_BUFFER; 54 rc->buffer_end = RC_BUFFER + buffer_size; 55} 56 57/* Called once */ 58static rc_t* rc_init(int fd) /*, int buffer_size) */ 59{ 60 int i; 61 rc_t* rc; 62 63 rc = xmalloc(sizeof(rc_t) + RC_BUFFER_SIZE); 64 65 rc->fd = fd; 66 /* rc->buffer_size = buffer_size; */ 67 rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE; 68 rc->ptr = rc->buffer_end; 69 70 rc->code = 0; 71 rc->range = 0xFFFFFFFF; 72 for (i = 0; i < 5; i++) { 73 if (rc->ptr >= rc->buffer_end) 74 rc_read(rc); 75 rc->code = (rc->code << 8) | *rc->ptr++; 76 } 77 return rc; 78} 79 80/* Called once */ 81static ALWAYS_INLINE void rc_free(rc_t * rc) 82{ 83 if (ENABLE_FEATURE_CLEAN_UP) 84 free(rc); 85} 86 87/* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */ 88static void rc_do_normalize(rc_t * rc) 89{ 90 if (rc->ptr >= rc->buffer_end) 91 rc_read(rc); 92 rc->range <<= 8; 93 rc->code = (rc->code << 8) | *rc->ptr++; 94} 95static ALWAYS_INLINE void rc_normalize(rc_t * rc) 96{ 97 if (rc->range < (1 << RC_TOP_BITS)) { 98 rc_do_normalize(rc); 99 } 100} 101 102/* rc_is_bit_0 is called 9 times */ 103/* Why rc_is_bit_0_helper exists? 104 * Because we want to always expose (rc->code < rc->bound) to optimizer. 105 * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined 106 * only if we compile for speed. 107 */ 108static speed_inline uint32_t rc_is_bit_0_helper(rc_t * rc, uint16_t * p) 109{ 110 rc_normalize(rc); 111 rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); 112 return rc->bound; 113} 114static ALWAYS_INLINE int rc_is_bit_0(rc_t * rc, uint16_t * p) 115{ 116 uint32_t t = rc_is_bit_0_helper(rc, p); 117 return rc->code < t; 118} 119 120/* Called ~10 times, but very small, thus inlined */ 121static speed_inline void rc_update_bit_0(rc_t * rc, uint16_t * p) 122{ 123 rc->range = rc->bound; 124 *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; 125} 126static speed_inline void rc_update_bit_1(rc_t * rc, uint16_t * p) 127{ 128 rc->range -= rc->bound; 129 rc->code -= rc->bound; 130 *p -= *p >> RC_MOVE_BITS; 131} 132 133/* Called 4 times in unlzma loop */ 134static int rc_get_bit(rc_t * rc, uint16_t * p, int *symbol) 135{ 136 if (rc_is_bit_0(rc, p)) { 137 rc_update_bit_0(rc, p); 138 *symbol *= 2; 139 return 0; 140 } else { 141 rc_update_bit_1(rc, p); 142 *symbol = *symbol * 2 + 1; 143 return 1; 144 } 145} 146 147/* Called once */ 148static ALWAYS_INLINE int rc_direct_bit(rc_t * rc) 149{ 150 rc_normalize(rc); 151 rc->range >>= 1; 152 if (rc->code >= rc->range) { 153 rc->code -= rc->range; 154 return 1; 155 } 156 return 0; 157} 158 159/* Called twice */ 160static speed_inline void 161rc_bit_tree_decode(rc_t * rc, uint16_t * p, int num_levels, int *symbol) 162{ 163 int i = num_levels; 164 165 *symbol = 1; 166 while (i--) 167 rc_get_bit(rc, p + *symbol, symbol); 168 *symbol -= 1 << num_levels; 169} 170 171 172typedef struct { 173 uint8_t pos; 174 uint32_t dict_size; 175 uint64_t dst_size; 176} __attribute__ ((packed)) lzma_header_t; 177 178 179/* #defines will force compiler to compute/optimize each one with each usage. 180 * Have heart and use enum instead. */ 181enum { 182 LZMA_BASE_SIZE = 1846, 183 LZMA_LIT_SIZE = 768, 184 185 LZMA_NUM_POS_BITS_MAX = 4, 186 187 LZMA_LEN_NUM_LOW_BITS = 3, 188 LZMA_LEN_NUM_MID_BITS = 3, 189 LZMA_LEN_NUM_HIGH_BITS = 8, 190 191 LZMA_LEN_CHOICE = 0, 192 LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1), 193 LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1), 194 LZMA_LEN_MID = (LZMA_LEN_LOW \ 195 + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))), 196 LZMA_LEN_HIGH = (LZMA_LEN_MID \ 197 + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))), 198 LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)), 199 200 LZMA_NUM_STATES = 12, 201 LZMA_NUM_LIT_STATES = 7, 202 203 LZMA_START_POS_MODEL_INDEX = 4, 204 LZMA_END_POS_MODEL_INDEX = 14, 205 LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)), 206 207 LZMA_NUM_POS_SLOT_BITS = 6, 208 LZMA_NUM_LEN_TO_POS_STATES = 4, 209 210 LZMA_NUM_ALIGN_BITS = 4, 211 212 LZMA_MATCH_MIN_LEN = 2, 213 214 LZMA_IS_MATCH = 0, 215 LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), 216 LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES), 217 LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES), 218 LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES), 219 LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES), 220 LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \ 221 + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), 222 LZMA_SPEC_POS = (LZMA_POS_SLOT \ 223 + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)), 224 LZMA_ALIGN = (LZMA_SPEC_POS \ 225 + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX), 226 LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)), 227 LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS), 228 LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS), 229}; 230 231 232USE_DESKTOP(long long) int 233unpack_lzma_stream(int src_fd, int dst_fd) 234{ 235 USE_DESKTOP(long long total_written = 0;) 236 lzma_header_t header; 237 int lc, pb, lp; 238 uint32_t pos_state_mask; 239 uint32_t literal_pos_mask; 240 uint32_t pos; 241 uint16_t *p; 242 uint16_t *prob; 243 uint16_t *prob_lit; 244 int num_bits; 245 int num_probs; 246 rc_t *rc; 247 int i, mi; 248 uint8_t *buffer; 249 uint8_t previous_byte = 0; 250 size_t buffer_pos = 0, global_pos = 0; 251 int len = 0; 252 int state = 0; 253 uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; 254 255 xread(src_fd, &header, sizeof(header)); 256 257 if (header.pos >= (9 * 5 * 5)) 258 bb_error_msg_and_die("bad header"); 259 mi = header.pos / 9; 260 lc = header.pos % 9; 261 pb = mi / 5; 262 lp = mi % 5; 263 pos_state_mask = (1 << pb) - 1; 264 literal_pos_mask = (1 << lp) - 1; 265 266 header.dict_size = SWAP_LE32(header.dict_size); 267 header.dst_size = SWAP_LE64(header.dst_size); 268 269 if (header.dict_size == 0) 270 header.dict_size = 1; 271 272 buffer = xmalloc(MIN(header.dst_size, header.dict_size)); 273 274 num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); 275 p = xmalloc(num_probs * sizeof(*p)); 276 num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); 277 for (i = 0; i < num_probs; i++) 278 p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; 279 280 rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */ 281 282 while (global_pos + buffer_pos < header.dst_size) { 283 int pos_state = (buffer_pos + global_pos) & pos_state_mask; 284 285 prob = 286 p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; 287 if (rc_is_bit_0(rc, prob)) { 288 mi = 1; 289 rc_update_bit_0(rc, prob); 290 prob = (p + LZMA_LITERAL + (LZMA_LIT_SIZE 291 * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) 292 + (previous_byte >> (8 - lc))))); 293 294 if (state >= LZMA_NUM_LIT_STATES) { 295 int match_byte; 296 297 pos = buffer_pos - rep0; 298 while (pos >= header.dict_size) 299 pos += header.dict_size; 300 match_byte = buffer[pos]; 301 do { 302 int bit; 303 304 match_byte <<= 1; 305 bit = match_byte & 0x100; 306 prob_lit = prob + 0x100 + bit + mi; 307 if (rc_get_bit(rc, prob_lit, &mi)) { 308 if (!bit) 309 break; 310 } else { 311 if (bit) 312 break; 313 } 314 } while (mi < 0x100); 315 } 316 while (mi < 0x100) { 317 prob_lit = prob + mi; 318 rc_get_bit(rc, prob_lit, &mi); 319 } 320 previous_byte = (uint8_t) mi; 321 322 buffer[buffer_pos++] = previous_byte; 323 if (buffer_pos == header.dict_size) { 324 buffer_pos = 0; 325 global_pos += header.dict_size; 326 if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size) 327 goto bad; 328 USE_DESKTOP(total_written += header.dict_size;) 329 } 330 if (state < 4) 331 state = 0; 332 else if (state < 10) 333 state -= 3; 334 else 335 state -= 6; 336 } else { 337 int offset; 338 uint16_t *prob_len; 339 340 rc_update_bit_1(rc, prob); 341 prob = p + LZMA_IS_REP + state; 342 if (rc_is_bit_0(rc, prob)) { 343 rc_update_bit_0(rc, prob); 344 rep3 = rep2; 345 rep2 = rep1; 346 rep1 = rep0; 347 state = state < LZMA_NUM_LIT_STATES ? 0 : 3; 348 prob = p + LZMA_LEN_CODER; 349 } else { 350 rc_update_bit_1(rc, prob); 351 prob = p + LZMA_IS_REP_G0 + state; 352 if (rc_is_bit_0(rc, prob)) { 353 rc_update_bit_0(rc, prob); 354 prob = (p + LZMA_IS_REP_0_LONG 355 + (state << LZMA_NUM_POS_BITS_MAX) + pos_state); 356 if (rc_is_bit_0(rc, prob)) { 357 rc_update_bit_0(rc, prob); 358 359 state = state < LZMA_NUM_LIT_STATES ? 9 : 11; 360 pos = buffer_pos - rep0; 361 while (pos >= header.dict_size) 362 pos += header.dict_size; 363 previous_byte = buffer[pos]; 364 buffer[buffer_pos++] = previous_byte; 365 if (buffer_pos == header.dict_size) { 366 buffer_pos = 0; 367 global_pos += header.dict_size; 368 if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size) 369 goto bad; 370 USE_DESKTOP(total_written += header.dict_size;) 371 } 372 continue; 373 } else { 374 rc_update_bit_1(rc, prob); 375 } 376 } else { 377 uint32_t distance; 378 379 rc_update_bit_1(rc, prob); 380 prob = p + LZMA_IS_REP_G1 + state; 381 if (rc_is_bit_0(rc, prob)) { 382 rc_update_bit_0(rc, prob); 383 distance = rep1; 384 } else { 385 rc_update_bit_1(rc, prob); 386 prob = p + LZMA_IS_REP_G2 + state; 387 if (rc_is_bit_0(rc, prob)) { 388 rc_update_bit_0(rc, prob); 389 distance = rep2; 390 } else { 391 rc_update_bit_1(rc, prob); 392 distance = rep3; 393 rep3 = rep2; 394 } 395 rep2 = rep1; 396 } 397 rep1 = rep0; 398 rep0 = distance; 399 } 400 state = state < LZMA_NUM_LIT_STATES ? 8 : 11; 401 prob = p + LZMA_REP_LEN_CODER; 402 } 403 404 prob_len = prob + LZMA_LEN_CHOICE; 405 if (rc_is_bit_0(rc, prob_len)) { 406 rc_update_bit_0(rc, prob_len); 407 prob_len = (prob + LZMA_LEN_LOW 408 + (pos_state << LZMA_LEN_NUM_LOW_BITS)); 409 offset = 0; 410 num_bits = LZMA_LEN_NUM_LOW_BITS; 411 } else { 412 rc_update_bit_1(rc, prob_len); 413 prob_len = prob + LZMA_LEN_CHOICE_2; 414 if (rc_is_bit_0(rc, prob_len)) { 415 rc_update_bit_0(rc, prob_len); 416 prob_len = (prob + LZMA_LEN_MID 417 + (pos_state << LZMA_LEN_NUM_MID_BITS)); 418 offset = 1 << LZMA_LEN_NUM_LOW_BITS; 419 num_bits = LZMA_LEN_NUM_MID_BITS; 420 } else { 421 rc_update_bit_1(rc, prob_len); 422 prob_len = prob + LZMA_LEN_HIGH; 423 offset = ((1 << LZMA_LEN_NUM_LOW_BITS) 424 + (1 << LZMA_LEN_NUM_MID_BITS)); 425 num_bits = LZMA_LEN_NUM_HIGH_BITS; 426 } 427 } 428 rc_bit_tree_decode(rc, prob_len, num_bits, &len); 429 len += offset; 430 431 if (state < 4) { 432 int pos_slot; 433 434 state += LZMA_NUM_LIT_STATES; 435 prob = 436 p + LZMA_POS_SLOT + 437 ((len < 438 LZMA_NUM_LEN_TO_POS_STATES ? len : 439 LZMA_NUM_LEN_TO_POS_STATES - 1) 440 << LZMA_NUM_POS_SLOT_BITS); 441 rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS, 442 &pos_slot); 443 if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { 444 num_bits = (pos_slot >> 1) - 1; 445 rep0 = 2 | (pos_slot & 1); 446 if (pos_slot < LZMA_END_POS_MODEL_INDEX) { 447 rep0 <<= num_bits; 448 prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; 449 } else { 450 num_bits -= LZMA_NUM_ALIGN_BITS; 451 while (num_bits--) 452 rep0 = (rep0 << 1) | rc_direct_bit(rc); 453 prob = p + LZMA_ALIGN; 454 rep0 <<= LZMA_NUM_ALIGN_BITS; 455 num_bits = LZMA_NUM_ALIGN_BITS; 456 } 457 i = 1; 458 mi = 1; 459 while (num_bits--) { 460 if (rc_get_bit(rc, prob + mi, &mi)) 461 rep0 |= i; 462 i <<= 1; 463 } 464 } else 465 rep0 = pos_slot; 466 if (++rep0 == 0) 467 break; 468 } 469 470 len += LZMA_MATCH_MIN_LEN; 471 472 do { 473 pos = buffer_pos - rep0; 474 while (pos >= header.dict_size) 475 pos += header.dict_size; 476 previous_byte = buffer[pos]; 477 buffer[buffer_pos++] = previous_byte; 478 if (buffer_pos == header.dict_size) { 479 buffer_pos = 0; 480 global_pos += header.dict_size; 481 if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size) 482 goto bad; 483 USE_DESKTOP(total_written += header.dict_size;) 484 } 485 len--; 486 } while (len != 0 && buffer_pos < header.dst_size); 487 } 488 } 489 490 491 if (full_write(dst_fd, buffer, buffer_pos) != buffer_pos) { 492 bad: 493 rc_free(rc); 494 return -1; 495 } 496 rc_free(rc); 497 USE_DESKTOP(total_written += buffer_pos;) 498 return USE_DESKTOP(total_written) + 0; 499} 500