1/* vi: set sw=4 ts=4: */
2/*
3 * Small lzma deflate implementation.
4 * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
5 *
6 * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
7 * Copyright (C) 1999-2005  Igor Pavlov
8 *
9 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
10 */
11
12#include "libbb.h"
13#include "unarchive.h"
14
15#if ENABLE_FEATURE_LZMA_FAST
16#  define speed_inline ALWAYS_INLINE
17#else
18#  define speed_inline
19#endif
20
21
22typedef struct {
23	int fd;
24	uint8_t *ptr;
25
26/* Was keeping rc on stack in unlzma and separately allocating buffer,
27 * but with "buffer 'attached to' allocated rc" code is smaller: */
28	/* uint8_t *buffer; */
29#define RC_BUFFER ((uint8_t*)(rc+1))
30
31	uint8_t *buffer_end;
32
33/* Had provisions for variable buffer, but we don't need it here */
34	/* int buffer_size; */
35#define RC_BUFFER_SIZE 0x10000
36
37	uint32_t code;
38	uint32_t range;
39	uint32_t bound;
40} rc_t;
41
42#define RC_TOP_BITS 24
43#define RC_MOVE_BITS 5
44#define RC_MODEL_TOTAL_BITS 11
45
46
47/* Called twice: once at startup and once in rc_normalize() */
48static void rc_read(rc_t * rc)
49{
50	int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE);
51	if (buffer_size <= 0)
52		bb_error_msg_and_die("unexpected EOF");
53	rc->ptr = RC_BUFFER;
54	rc->buffer_end = RC_BUFFER + buffer_size;
55}
56
57/* Called once */
58static rc_t* rc_init(int fd) /*, int buffer_size) */
59{
60	int i;
61	rc_t* rc;
62
63	rc = xmalloc(sizeof(rc_t) + RC_BUFFER_SIZE);
64
65	rc->fd = fd;
66	/* rc->buffer_size = buffer_size; */
67	rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE;
68	rc->ptr = rc->buffer_end;
69
70	rc->code = 0;
71	rc->range = 0xFFFFFFFF;
72	for (i = 0; i < 5; i++) {
73		if (rc->ptr >= rc->buffer_end)
74			rc_read(rc);
75		rc->code = (rc->code << 8) | *rc->ptr++;
76	}
77	return rc;
78}
79
80/* Called once  */
81static ALWAYS_INLINE void rc_free(rc_t * rc)
82{
83	if (ENABLE_FEATURE_CLEAN_UP)
84		free(rc);
85}
86
87/* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */
88static void rc_do_normalize(rc_t * rc)
89{
90	if (rc->ptr >= rc->buffer_end)
91		rc_read(rc);
92	rc->range <<= 8;
93	rc->code = (rc->code << 8) | *rc->ptr++;
94}
95static ALWAYS_INLINE void rc_normalize(rc_t * rc)
96{
97	if (rc->range < (1 << RC_TOP_BITS)) {
98		rc_do_normalize(rc);
99	}
100}
101
102/* rc_is_bit_0 is called 9 times */
103/* Why rc_is_bit_0_helper exists?
104 * Because we want to always expose (rc->code < rc->bound) to optimizer.
105 * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined
106 * only if we compile for speed.
107 */
108static speed_inline uint32_t rc_is_bit_0_helper(rc_t * rc, uint16_t * p)
109{
110	rc_normalize(rc);
111	rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
112	return rc->bound;
113}
114static ALWAYS_INLINE int rc_is_bit_0(rc_t * rc, uint16_t * p)
115{
116	uint32_t t = rc_is_bit_0_helper(rc, p);
117	return rc->code < t;
118}
119
120/* Called ~10 times, but very small, thus inlined */
121static speed_inline void rc_update_bit_0(rc_t * rc, uint16_t * p)
122{
123	rc->range = rc->bound;
124	*p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
125}
126static speed_inline void rc_update_bit_1(rc_t * rc, uint16_t * p)
127{
128	rc->range -= rc->bound;
129	rc->code -= rc->bound;
130	*p -= *p >> RC_MOVE_BITS;
131}
132
133/* Called 4 times in unlzma loop */
134static int rc_get_bit(rc_t * rc, uint16_t * p, int *symbol)
135{
136	if (rc_is_bit_0(rc, p)) {
137		rc_update_bit_0(rc, p);
138		*symbol *= 2;
139		return 0;
140	} else {
141		rc_update_bit_1(rc, p);
142		*symbol = *symbol * 2 + 1;
143		return 1;
144	}
145}
146
147/* Called once */
148static ALWAYS_INLINE int rc_direct_bit(rc_t * rc)
149{
150	rc_normalize(rc);
151	rc->range >>= 1;
152	if (rc->code >= rc->range) {
153		rc->code -= rc->range;
154		return 1;
155	}
156	return 0;
157}
158
159/* Called twice */
160static speed_inline void
161rc_bit_tree_decode(rc_t * rc, uint16_t * p, int num_levels, int *symbol)
162{
163	int i = num_levels;
164
165	*symbol = 1;
166	while (i--)
167		rc_get_bit(rc, p + *symbol, symbol);
168	*symbol -= 1 << num_levels;
169}
170
171
172typedef struct {
173	uint8_t pos;
174	uint32_t dict_size;
175	uint64_t dst_size;
176} __attribute__ ((packed)) lzma_header_t;
177
178
179/* #defines will force compiler to compute/optimize each one with each usage.
180 * Have heart and use enum instead. */
181enum {
182	LZMA_BASE_SIZE = 1846,
183	LZMA_LIT_SIZE  = 768,
184
185	LZMA_NUM_POS_BITS_MAX = 4,
186
187	LZMA_LEN_NUM_LOW_BITS  = 3,
188	LZMA_LEN_NUM_MID_BITS  = 3,
189	LZMA_LEN_NUM_HIGH_BITS = 8,
190
191	LZMA_LEN_CHOICE     = 0,
192	LZMA_LEN_CHOICE_2   = (LZMA_LEN_CHOICE + 1),
193	LZMA_LEN_LOW        = (LZMA_LEN_CHOICE_2 + 1),
194	LZMA_LEN_MID        = (LZMA_LEN_LOW \
195	                      + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))),
196	LZMA_LEN_HIGH       = (LZMA_LEN_MID \
197	                      + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))),
198	LZMA_NUM_LEN_PROBS  = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)),
199
200	LZMA_NUM_STATES     = 12,
201	LZMA_NUM_LIT_STATES = 7,
202
203	LZMA_START_POS_MODEL_INDEX = 4,
204	LZMA_END_POS_MODEL_INDEX   = 14,
205	LZMA_NUM_FULL_DISTANCES    = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)),
206
207	LZMA_NUM_POS_SLOT_BITS = 6,
208	LZMA_NUM_LEN_TO_POS_STATES = 4,
209
210	LZMA_NUM_ALIGN_BITS = 4,
211
212	LZMA_MATCH_MIN_LEN  = 2,
213
214	LZMA_IS_MATCH       = 0,
215	LZMA_IS_REP         = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
216	LZMA_IS_REP_G0      = (LZMA_IS_REP + LZMA_NUM_STATES),
217	LZMA_IS_REP_G1      = (LZMA_IS_REP_G0 + LZMA_NUM_STATES),
218	LZMA_IS_REP_G2      = (LZMA_IS_REP_G1 + LZMA_NUM_STATES),
219	LZMA_IS_REP_0_LONG  = (LZMA_IS_REP_G2 + LZMA_NUM_STATES),
220	LZMA_POS_SLOT       = (LZMA_IS_REP_0_LONG \
221	                      + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
222	LZMA_SPEC_POS       = (LZMA_POS_SLOT \
223	                      + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)),
224	LZMA_ALIGN          = (LZMA_SPEC_POS \
225	                      + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX),
226	LZMA_LEN_CODER      = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)),
227	LZMA_REP_LEN_CODER  = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS),
228	LZMA_LITERAL        = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS),
229};
230
231
232USE_DESKTOP(long long) int
233unpack_lzma_stream(int src_fd, int dst_fd)
234{
235	USE_DESKTOP(long long total_written = 0;)
236	lzma_header_t header;
237	int lc, pb, lp;
238	uint32_t pos_state_mask;
239	uint32_t literal_pos_mask;
240	uint32_t pos;
241	uint16_t *p;
242	uint16_t *prob;
243	uint16_t *prob_lit;
244	int num_bits;
245	int num_probs;
246	rc_t *rc;
247	int i, mi;
248	uint8_t *buffer;
249	uint8_t previous_byte = 0;
250	size_t buffer_pos = 0, global_pos = 0;
251	int len = 0;
252	int state = 0;
253	uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
254
255	xread(src_fd, &header, sizeof(header));
256
257	if (header.pos >= (9 * 5 * 5))
258		bb_error_msg_and_die("bad header");
259	mi = header.pos / 9;
260	lc = header.pos % 9;
261	pb = mi / 5;
262	lp = mi % 5;
263	pos_state_mask = (1 << pb) - 1;
264	literal_pos_mask = (1 << lp) - 1;
265
266	header.dict_size = SWAP_LE32(header.dict_size);
267	header.dst_size = SWAP_LE64(header.dst_size);
268
269	if (header.dict_size == 0)
270		header.dict_size = 1;
271
272	buffer = xmalloc(MIN(header.dst_size, header.dict_size));
273
274	num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
275	p = xmalloc(num_probs * sizeof(*p));
276	num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
277	for (i = 0; i < num_probs; i++)
278		p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
279
280	rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */
281
282	while (global_pos + buffer_pos < header.dst_size) {
283		int pos_state = (buffer_pos + global_pos) & pos_state_mask;
284
285		prob =
286			p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state;
287		if (rc_is_bit_0(rc, prob)) {
288			mi = 1;
289			rc_update_bit_0(rc, prob);
290			prob = (p + LZMA_LITERAL + (LZMA_LIT_SIZE
291					* ((((buffer_pos + global_pos) & literal_pos_mask) << lc)
292					+ (previous_byte >> (8 - lc)))));
293
294			if (state >= LZMA_NUM_LIT_STATES) {
295				int match_byte;
296
297				pos = buffer_pos - rep0;
298				while (pos >= header.dict_size)
299					pos += header.dict_size;
300				match_byte = buffer[pos];
301				do {
302					int bit;
303
304					match_byte <<= 1;
305					bit = match_byte & 0x100;
306					prob_lit = prob + 0x100 + bit + mi;
307					if (rc_get_bit(rc, prob_lit, &mi)) {
308						if (!bit)
309							break;
310					} else {
311						if (bit)
312							break;
313					}
314				} while (mi < 0x100);
315			}
316			while (mi < 0x100) {
317				prob_lit = prob + mi;
318				rc_get_bit(rc, prob_lit, &mi);
319			}
320			previous_byte = (uint8_t) mi;
321
322			buffer[buffer_pos++] = previous_byte;
323			if (buffer_pos == header.dict_size) {
324				buffer_pos = 0;
325				global_pos += header.dict_size;
326				if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size)
327					goto bad;
328				USE_DESKTOP(total_written += header.dict_size;)
329			}
330			if (state < 4)
331				state = 0;
332			else if (state < 10)
333				state -= 3;
334			else
335				state -= 6;
336		} else {
337			int offset;
338			uint16_t *prob_len;
339
340			rc_update_bit_1(rc, prob);
341			prob = p + LZMA_IS_REP + state;
342			if (rc_is_bit_0(rc, prob)) {
343				rc_update_bit_0(rc, prob);
344				rep3 = rep2;
345				rep2 = rep1;
346				rep1 = rep0;
347				state = state < LZMA_NUM_LIT_STATES ? 0 : 3;
348				prob = p + LZMA_LEN_CODER;
349			} else {
350				rc_update_bit_1(rc, prob);
351				prob = p + LZMA_IS_REP_G0 + state;
352				if (rc_is_bit_0(rc, prob)) {
353					rc_update_bit_0(rc, prob);
354					prob = (p + LZMA_IS_REP_0_LONG
355							+ (state << LZMA_NUM_POS_BITS_MAX) + pos_state);
356					if (rc_is_bit_0(rc, prob)) {
357						rc_update_bit_0(rc, prob);
358
359						state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
360						pos = buffer_pos - rep0;
361						while (pos >= header.dict_size)
362							pos += header.dict_size;
363						previous_byte = buffer[pos];
364						buffer[buffer_pos++] = previous_byte;
365						if (buffer_pos == header.dict_size) {
366							buffer_pos = 0;
367							global_pos += header.dict_size;
368							if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size)
369								goto bad;
370							USE_DESKTOP(total_written += header.dict_size;)
371						}
372						continue;
373					} else {
374						rc_update_bit_1(rc, prob);
375					}
376				} else {
377					uint32_t distance;
378
379					rc_update_bit_1(rc, prob);
380					prob = p + LZMA_IS_REP_G1 + state;
381					if (rc_is_bit_0(rc, prob)) {
382						rc_update_bit_0(rc, prob);
383						distance = rep1;
384					} else {
385						rc_update_bit_1(rc, prob);
386						prob = p + LZMA_IS_REP_G2 + state;
387						if (rc_is_bit_0(rc, prob)) {
388							rc_update_bit_0(rc, prob);
389							distance = rep2;
390						} else {
391							rc_update_bit_1(rc, prob);
392							distance = rep3;
393							rep3 = rep2;
394						}
395						rep2 = rep1;
396					}
397					rep1 = rep0;
398					rep0 = distance;
399				}
400				state = state < LZMA_NUM_LIT_STATES ? 8 : 11;
401				prob = p + LZMA_REP_LEN_CODER;
402			}
403
404			prob_len = prob + LZMA_LEN_CHOICE;
405			if (rc_is_bit_0(rc, prob_len)) {
406				rc_update_bit_0(rc, prob_len);
407				prob_len = (prob + LZMA_LEN_LOW
408							+ (pos_state << LZMA_LEN_NUM_LOW_BITS));
409				offset = 0;
410				num_bits = LZMA_LEN_NUM_LOW_BITS;
411			} else {
412				rc_update_bit_1(rc, prob_len);
413				prob_len = prob + LZMA_LEN_CHOICE_2;
414				if (rc_is_bit_0(rc, prob_len)) {
415					rc_update_bit_0(rc, prob_len);
416					prob_len = (prob + LZMA_LEN_MID
417								+ (pos_state << LZMA_LEN_NUM_MID_BITS));
418					offset = 1 << LZMA_LEN_NUM_LOW_BITS;
419					num_bits = LZMA_LEN_NUM_MID_BITS;
420				} else {
421					rc_update_bit_1(rc, prob_len);
422					prob_len = prob + LZMA_LEN_HIGH;
423					offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
424							  + (1 << LZMA_LEN_NUM_MID_BITS));
425					num_bits = LZMA_LEN_NUM_HIGH_BITS;
426				}
427			}
428			rc_bit_tree_decode(rc, prob_len, num_bits, &len);
429			len += offset;
430
431			if (state < 4) {
432				int pos_slot;
433
434				state += LZMA_NUM_LIT_STATES;
435				prob =
436					p + LZMA_POS_SLOT +
437					((len <
438					  LZMA_NUM_LEN_TO_POS_STATES ? len :
439					  LZMA_NUM_LEN_TO_POS_STATES - 1)
440					 << LZMA_NUM_POS_SLOT_BITS);
441				rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS,
442								   &pos_slot);
443				if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
444					num_bits = (pos_slot >> 1) - 1;
445					rep0 = 2 | (pos_slot & 1);
446					if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
447						rep0 <<= num_bits;
448						prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1;
449					} else {
450						num_bits -= LZMA_NUM_ALIGN_BITS;
451						while (num_bits--)
452							rep0 = (rep0 << 1) | rc_direct_bit(rc);
453						prob = p + LZMA_ALIGN;
454						rep0 <<= LZMA_NUM_ALIGN_BITS;
455						num_bits = LZMA_NUM_ALIGN_BITS;
456					}
457					i = 1;
458					mi = 1;
459					while (num_bits--) {
460						if (rc_get_bit(rc, prob + mi, &mi))
461							rep0 |= i;
462						i <<= 1;
463					}
464				} else
465					rep0 = pos_slot;
466				if (++rep0 == 0)
467					break;
468			}
469
470			len += LZMA_MATCH_MIN_LEN;
471
472			do {
473				pos = buffer_pos - rep0;
474				while (pos >= header.dict_size)
475					pos += header.dict_size;
476				previous_byte = buffer[pos];
477				buffer[buffer_pos++] = previous_byte;
478				if (buffer_pos == header.dict_size) {
479					buffer_pos = 0;
480					global_pos += header.dict_size;
481					if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size)
482						goto bad;
483					USE_DESKTOP(total_written += header.dict_size;)
484				}
485				len--;
486			} while (len != 0 && buffer_pos < header.dst_size);
487		}
488	}
489
490
491	if (full_write(dst_fd, buffer, buffer_pos) != buffer_pos) {
492 bad:
493		rc_free(rc);
494		return -1;
495	}
496	rc_free(rc);
497	USE_DESKTOP(total_written += buffer_pos;)
498	return USE_DESKTOP(total_written) + 0;
499}
500