1// SPDX-License-Identifier: 0BSD
2
3///////////////////////////////////////////////////////////////////////////////
4//
5/// \file       lzma_decoder.c
6/// \brief      LZMA decoder
7///
8//  Authors:    Igor Pavlov
9//              Lasse Collin
10//              Jia Tan
11//
12///////////////////////////////////////////////////////////////////////////////
13
14#include "lz_decoder.h"
15#include "lzma_common.h"
16#include "lzma_decoder.h"
17#include "range_decoder.h"
18
19// The macros unroll loops with switch statements.
20// Silence warnings about missing fall-through comments.
21#if TUKLIB_GNUC_REQ(7, 0)
22#	pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
23#endif
24
25// Minimum number of input bytes to safely decode one LZMA symbol.
26// The worst case is that we decode 22 bits using probabilities and 26
27// direct bits. This may decode at maximum 20 bytes of input.
28#define LZMA_IN_REQUIRED 20
29
30
31// Macros for (somewhat) size-optimized code.
32// This is used to decode the match length (how many bytes must be repeated
33// from the dictionary). This version is used in the Resumable mode and
34// does not unroll any loops.
35#define len_decode(target, ld, pos_state, seq) \
36do { \
37case seq ## _CHOICE: \
38	rc_if_0_safe(ld.choice, seq ## _CHOICE) { \
39		rc_update_0(ld.choice); \
40		probs = ld.low[pos_state];\
41		limit = LEN_LOW_SYMBOLS; \
42		target = MATCH_LEN_MIN; \
43	} else { \
44		rc_update_1(ld.choice); \
45case seq ## _CHOICE2: \
46		rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \
47			rc_update_0(ld.choice2); \
48			probs = ld.mid[pos_state]; \
49			limit = LEN_MID_SYMBOLS; \
50			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
51		} else { \
52			rc_update_1(ld.choice2); \
53			probs = ld.high; \
54			limit = LEN_HIGH_SYMBOLS; \
55			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
56					+ LEN_MID_SYMBOLS; \
57		} \
58	} \
59	symbol = 1; \
60case seq ## _BITTREE: \
61	do { \
62		rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \
63	} while (symbol < limit); \
64	target += symbol - limit; \
65} while (0)
66
67
68// This is the faster version of the match length decoder that does not
69// worry about being resumable. It unrolls the bittree decoding loop.
70#define len_decode_fast(target, ld, pos_state) \
71do { \
72	symbol = 1; \
73	rc_if_0(ld.choice) { \
74		rc_update_0(ld.choice); \
75		rc_bittree3(ld.low[pos_state], \
76				-LEN_LOW_SYMBOLS + MATCH_LEN_MIN); \
77		target = symbol; \
78	} else { \
79		rc_update_1(ld.choice); \
80		rc_if_0(ld.choice2) { \
81			rc_update_0(ld.choice2); \
82			rc_bittree3(ld.mid[pos_state], -LEN_MID_SYMBOLS \
83					+ MATCH_LEN_MIN + LEN_LOW_SYMBOLS); \
84			target = symbol; \
85		} else { \
86			rc_update_1(ld.choice2); \
87			rc_bittree8(ld.high, -LEN_HIGH_SYMBOLS \
88					+ MATCH_LEN_MIN \
89					+ LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS); \
90			target = symbol; \
91		} \
92	} \
93} while (0)
94
95
96/// Length decoder probabilities; see comments in lzma_common.h.
97typedef struct {
98	probability choice;
99	probability choice2;
100	probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
101	probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
102	probability high[LEN_HIGH_SYMBOLS];
103} lzma_length_decoder;
104
105
106typedef struct {
107	///////////////////
108	// Probabilities //
109	///////////////////
110
111	/// Literals; see comments in lzma_common.h.
112	probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE];
113
114	/// If 1, it's a match. Otherwise it's a single 8-bit literal.
115	probability is_match[STATES][POS_STATES_MAX];
116
117	/// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
118	probability is_rep[STATES];
119
120	/// If 0, distance of a repeated match is rep0.
121	/// Otherwise check is_rep1.
122	probability is_rep0[STATES];
123
124	/// If 0, distance of a repeated match is rep1.
125	/// Otherwise check is_rep2.
126	probability is_rep1[STATES];
127
128	/// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
129	probability is_rep2[STATES];
130
131	/// If 1, the repeated match has length of one byte. Otherwise
132	/// the length is decoded from rep_len_decoder.
133	probability is_rep0_long[STATES][POS_STATES_MAX];
134
135	/// Probability tree for the highest two bits of the match distance.
136	/// There is a separate probability tree for match lengths of
137	/// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
138	probability dist_slot[DIST_STATES][DIST_SLOTS];
139
140	/// Probability trees for additional bits for match distance when the
141	/// distance is in the range [4, 127].
142	probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
143
144	/// Probability tree for the lowest four bits of a match distance
145	/// that is equal to or greater than 128.
146	probability pos_align[ALIGN_SIZE];
147
148	/// Length of a normal match
149	lzma_length_decoder match_len_decoder;
150
151	/// Length of a repeated match
152	lzma_length_decoder rep_len_decoder;
153
154	///////////////////
155	// Decoder state //
156	///////////////////
157
158	// Range coder
159	lzma_range_decoder rc;
160
161	// Types of the most recently seen LZMA symbols
162	lzma_lzma_state state;
163
164	uint32_t rep0;      ///< Distance of the latest match
165	uint32_t rep1;      ///< Distance of second latest match
166	uint32_t rep2;      ///< Distance of third latest match
167	uint32_t rep3;      ///< Distance of fourth latest match
168
169	uint32_t pos_mask; // (1U << pb) - 1
170	uint32_t literal_context_bits;
171	uint32_t literal_mask;
172
173	/// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
174	/// payload marker is expected.
175	lzma_vli uncompressed_size;
176
177	/// True if end of payload marker (EOPM) is allowed even when
178	/// uncompressed_size is known; false if EOPM must not be present.
179	/// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
180	bool allow_eopm;
181
182	////////////////////////////////
183	// State of incomplete symbol //
184	////////////////////////////////
185
186	/// Position where to continue the decoder loop
187	enum {
188		SEQ_NORMALIZE,
189		SEQ_IS_MATCH,
190		SEQ_LITERAL,
191		SEQ_LITERAL_MATCHED,
192		SEQ_LITERAL_WRITE,
193		SEQ_IS_REP,
194		SEQ_MATCH_LEN_CHOICE,
195		SEQ_MATCH_LEN_CHOICE2,
196		SEQ_MATCH_LEN_BITTREE,
197		SEQ_DIST_SLOT,
198		SEQ_DIST_MODEL,
199		SEQ_DIRECT,
200		SEQ_ALIGN,
201		SEQ_EOPM,
202		SEQ_IS_REP0,
203		SEQ_SHORTREP,
204		SEQ_IS_REP0_LONG,
205		SEQ_IS_REP1,
206		SEQ_IS_REP2,
207		SEQ_REP_LEN_CHOICE,
208		SEQ_REP_LEN_CHOICE2,
209		SEQ_REP_LEN_BITTREE,
210		SEQ_COPY,
211	} sequence;
212
213	/// Base of the current probability tree
214	probability *probs;
215
216	/// Symbol being decoded. This is also used as an index variable in
217	/// bittree decoders: probs[symbol]
218	uint32_t symbol;
219
220	/// Used as a loop termination condition on bittree decoders and
221	/// direct bits decoder.
222	uint32_t limit;
223
224	/// Matched literal decoder: 0x100 or 0 to help avoiding branches.
225	/// Bittree reverse decoders: Offset of the next bit: 1 << offset
226	uint32_t offset;
227
228	/// If decoding a literal: match byte.
229	/// If decoding a match: length of the match.
230	uint32_t len;
231} lzma_lzma1_decoder;
232
233
234static lzma_ret
235lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
236		const uint8_t *restrict in,
237		size_t *restrict in_pos, size_t in_size)
238{
239	lzma_lzma1_decoder *restrict coder = coder_ptr;
240
241	////////////////////
242	// Initialization //
243	////////////////////
244
245	{
246		const lzma_ret ret = rc_read_init(
247				&coder->rc, in, in_pos, in_size);
248		if (ret != LZMA_STREAM_END)
249			return ret;
250	}
251
252	///////////////
253	// Variables //
254	///////////////
255
256	// Making local copies of often-used variables improves both
257	// speed and readability.
258
259	lzma_dict dict = *dictptr;
260
261	const size_t dict_start = dict.pos;
262
263	// Range decoder
264	rc_to_local(coder->rc, *in_pos, LZMA_IN_REQUIRED);
265
266	// State
267	uint32_t state = coder->state;
268	uint32_t rep0 = coder->rep0;
269	uint32_t rep1 = coder->rep1;
270	uint32_t rep2 = coder->rep2;
271	uint32_t rep3 = coder->rep3;
272
273	const uint32_t pos_mask = coder->pos_mask;
274
275	// These variables are actually needed only if we last time ran
276	// out of input in the middle of the decoder loop.
277	probability *probs = coder->probs;
278	uint32_t symbol = coder->symbol;
279	uint32_t limit = coder->limit;
280	uint32_t offset = coder->offset;
281	uint32_t len = coder->len;
282
283	const uint32_t literal_mask = coder->literal_mask;
284	const uint32_t literal_context_bits = coder->literal_context_bits;
285
286	// Temporary variables
287	uint32_t pos_state = dict.pos & pos_mask;
288
289	lzma_ret ret = LZMA_OK;
290
291	// This is true when the next LZMA symbol is allowed to be EOPM.
292	// That is, if this is false, then EOPM is considered
293	// an invalid symbol and we will return LZMA_DATA_ERROR.
294	//
295	// EOPM is always required (not just allowed) when
296	// the uncompressed size isn't known. When uncompressed size
297	// is known, eopm_is_valid may be set to true later.
298	bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
299
300	// If uncompressed size is known and there is enough output space
301	// to decode all the data, limit the available buffer space so that
302	// the main loop won't try to decode past the end of the stream.
303	bool might_finish_without_eopm = false;
304	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
305			&& coder->uncompressed_size <= dict.limit - dict.pos) {
306		dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
307		might_finish_without_eopm = true;
308	}
309
310	// The main decoder loop. The "switch" is used to resume the decoder at
311	// correct location. Once resumed, the "switch" is no longer used.
312	// The decoder loops is split into two modes:
313	//
314	// 1 - Non-resumable mode (fast). This is used when it is guaranteed
315	//     there is enough input to decode the next symbol. If the output
316	//     limit is reached, then the decoder loop will save the place
317	//     for the resumable mode to continue. This mode is not used if
318	//     HAVE_SMALL is defined. This is faster than Resumable mode
319	//     because it reduces the number of branches needed and allows
320	//     for more compiler optimizations.
321	//
322	// 2 - Resumable mode (slow). This is used when a previous decoder
323	//     loop did not have enough space in the input or output buffers
324	//     to complete. It uses sequence enum values to set remind
325	//     coder->sequence where to resume in the decoder loop. This
326	//     is the only mode used when HAVE_SMALL is defined.
327
328	switch (coder->sequence)
329	while (true) {
330		// Calculate new pos_state. This is skipped on the first loop
331		// since we already calculated it when setting up the local
332		// variables.
333		pos_state = dict.pos & pos_mask;
334
335#ifndef HAVE_SMALL
336
337		///////////////////////////////
338		// Non-resumable Mode (fast) //
339		///////////////////////////////
340
341		// Go to Resumable mode (1) if there is not enough input to
342		// safely decode any possible LZMA symbol or (2) if the
343		// dictionary is full, which may need special checks that
344		// are only done in the Resumable mode.
345		if (unlikely(!rc_is_fast_allowed()
346				|| dict.pos == dict.limit))
347			goto slow;
348
349		// Decode the first bit from the next LZMA symbol.
350		// If the bit is a 0, then we handle it as a literal.
351		// If the bit is a 1, then it is a match of previously
352		// decoded data.
353		rc_if_0(coder->is_match[state][pos_state]) {
354			/////////////////////
355			// Decode literal. //
356			/////////////////////
357
358			// Update the RC that we have decoded a 0.
359			rc_update_0(coder->is_match[state][pos_state]);
360
361			// Get the correct probability array from lp and
362			// lc params.
363			probs = literal_subcoder(coder->literal,
364					literal_context_bits, literal_mask,
365					dict.pos, dict_get0(&dict));
366
367			if (is_literal_state(state)) {
368				update_literal_normal(state);
369
370				// Decode literal without match byte.
371				rc_bittree8(probs, 0);
372			} else {
373				update_literal_matched(state);
374
375				// Decode literal with match byte.
376				rc_matched_literal(probs,
377						dict_get(&dict, rep0));
378			}
379
380			// Write decoded literal to dictionary
381			dict_put(&dict, symbol);
382			continue;
383		}
384
385		///////////////////
386		// Decode match. //
387		///////////////////
388
389		// Instead of a new byte we are going to decode a
390		// distance-length pair. The distance represents how far
391		// back in the dictionary to begin copying. The length
392		// represents how many bytes to copy.
393
394		rc_update_1(coder->is_match[state][pos_state]);
395
396		rc_if_0(coder->is_rep[state]) {
397			///////////////////
398			// Simple match. //
399			///////////////////
400
401			// Not a repeated match. In this case,
402			// the length (how many bytes to copy) must be
403			// decoded first. Then, the distance (where to
404			// start copying) is decoded.
405			//
406			// This is also how we know when we are done
407			// decoding. If the distance decodes to UINT32_MAX,
408			// then we know to stop decoding (end of payload
409			// marker).
410
411			rc_update_0(coder->is_rep[state]);
412			update_match(state);
413
414			// The latest three match distances are kept in
415			// memory in case there are repeated matches.
416			rep3 = rep2;
417			rep2 = rep1;
418			rep1 = rep0;
419
420			// Decode the length of the match.
421			len_decode_fast(len, coder->match_len_decoder,
422					pos_state);
423
424			// Next, decode the distance into rep0.
425
426			// The next 6 bits determine how to decode the
427			// rest of the distance.
428			probs = coder->dist_slot[get_dist_state(len)];
429
430			rc_bittree6(probs, -DIST_SLOTS);
431			assert(symbol <= 63);
432
433			if (symbol < DIST_MODEL_START) {
434				// If the decoded symbol is < DIST_MODEL_START
435				// then we use its value directly as the
436				// match distance. No other bits are needed.
437				// The only possible distance values
438				// are [0, 3].
439				rep0 = symbol;
440			} else {
441				// Use the first two bits of symbol as the
442				// highest bits of the match distance.
443
444				// "limit" represents the number of low bits
445				// to decode.
446				limit = (symbol >> 1) - 1;
447				assert(limit >= 1 && limit <= 30);
448				rep0 = 2 + (symbol & 1);
449
450				if (symbol < DIST_MODEL_END) {
451					// When symbol is > DIST_MODEL_START,
452					// but symbol < DIST_MODEL_END, then
453					// it can decode distances between
454					// [4, 127].
455					assert(limit <= 5);
456					rep0 <<= limit;
457					assert(rep0 <= 96);
458
459					// -1 is fine, because we start
460					// decoding at probs[1], not probs[0].
461					// NOTE: This violates the C standard,
462					// since we are doing pointer
463					// arithmetic past the beginning of
464					// the array.
465					assert((int32_t)(rep0 - symbol - 1)
466							>= -1);
467					assert((int32_t)(rep0 - symbol - 1)
468							<= 82);
469					probs = coder->pos_special + rep0
470							- symbol - 1;
471					symbol = 1;
472					offset = 1;
473
474					// Variable number (1-5) of bits
475					// from a reverse bittree. This
476					// isn't worth manual unrolling.
477					//
478					// NOTE: Making one or many of the
479					// variables (probs, symbol, offset,
480					// or limit) local here (instead of
481					// using those declared outside the
482					// main loop) can affect code size
483					// and performance which isn't a
484					// surprise but it's not so clear
485					// what is the best.
486					do {
487						rc_bit_add_if_1(probs,
488								rep0, offset);
489						offset <<= 1;
490					} while (--limit > 0);
491				} else {
492					// The distance is >= 128. Decode the
493					// lower bits without probabilities
494					// except the lowest four bits.
495					assert(symbol >= 14);
496					assert(limit >= 6);
497
498					limit -= ALIGN_BITS;
499					assert(limit >= 2);
500
501					rc_direct(rep0, limit);
502
503					// Decode the lowest four bits using
504					// probabilities.
505					rep0 <<= ALIGN_BITS;
506					rc_bittree_rev4(coder->pos_align);
507					rep0 += symbol;
508
509					// If the end of payload marker (EOPM)
510					// is detected, jump to the safe code.
511					// The EOPM handling isn't speed
512					// critical at all.
513					//
514					// A final normalization is needed
515					// after the EOPM (there can be a
516					// dummy byte to read in some cases).
517					// If the normalization was done here
518					// in the fast code, it would need to
519					// be taken into account in the value
520					// of LZMA_IN_REQUIRED. Using the
521					// safe code allows keeping
522					// LZMA_IN_REQUIRED as 20 instead of
523					// 21.
524					if (rep0 == UINT32_MAX)
525						goto eopm;
526				}
527			}
528
529			// Validate the distance we just decoded.
530			if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
531				ret = LZMA_DATA_ERROR;
532				goto out;
533			}
534
535		} else {
536			rc_update_1(coder->is_rep[state]);
537
538			/////////////////////
539			// Repeated match. //
540			/////////////////////
541
542			// The match distance is a value that we have decoded
543			// recently. The latest four match distances are
544			// available as rep0, rep1, rep2 and rep3. We will
545			// now decode which of them is the new distance.
546			//
547			// There cannot be a match if we haven't produced
548			// any output, so check that first.
549			if (unlikely(!dict_is_distance_valid(&dict, 0))) {
550				ret = LZMA_DATA_ERROR;
551				goto out;
552			}
553
554			rc_if_0(coder->is_rep0[state]) {
555				rc_update_0(coder->is_rep0[state]);
556				// The distance is rep0.
557
558				// Decode the next bit to determine if 1 byte
559				// should be copied from rep0 distance or
560				// if the number of bytes needs to be decoded.
561
562				// If the next bit is 0, then it is a
563				// "Short Rep Match" and only 1 bit is copied.
564				// Otherwise, the length of the match is
565				// decoded after the "else" statement.
566				rc_if_0(coder->is_rep0_long[state][pos_state]) {
567					rc_update_0(coder->is_rep0_long[
568							state][pos_state]);
569
570					update_short_rep(state);
571					dict_put(&dict, dict_get(&dict, rep0));
572					continue;
573				}
574
575				// Repeating more than one byte at
576				// distance of rep0.
577				rc_update_1(coder->is_rep0_long[
578						state][pos_state]);
579
580			} else {
581				rc_update_1(coder->is_rep0[state]);
582
583				// The distance is rep1, rep2 or rep3. Once
584				// we find out which one of these three, it
585				// is stored to rep0 and rep1, rep2 and rep3
586				// are updated accordingly. There is no
587				// "Short Rep Match" option, so the length
588				// of the match must always be decoded next.
589				rc_if_0(coder->is_rep1[state]) {
590					// The distance is rep1.
591					rc_update_0(coder->is_rep1[state]);
592
593					const uint32_t distance = rep1;
594					rep1 = rep0;
595					rep0 = distance;
596
597				} else {
598					rc_update_1(coder->is_rep1[state]);
599
600					rc_if_0(coder->is_rep2[state]) {
601						// The distance is rep2.
602						rc_update_0(coder->is_rep2[
603								state]);
604
605						const uint32_t distance = rep2;
606						rep2 = rep1;
607						rep1 = rep0;
608						rep0 = distance;
609
610					} else {
611						// The distance is rep3.
612						rc_update_1(coder->is_rep2[
613								state]);
614
615						const uint32_t distance = rep3;
616						rep3 = rep2;
617						rep2 = rep1;
618						rep1 = rep0;
619						rep0 = distance;
620					}
621				}
622			}
623
624			update_long_rep(state);
625
626			// Decode the length of the repeated match.
627			len_decode_fast(len, coder->rep_len_decoder,
628					pos_state);
629		}
630
631		/////////////////////////////////
632		// Repeat from history buffer. //
633		/////////////////////////////////
634
635		// The length is always between these limits. There is no way
636		// to trigger the algorithm to set len outside this range.
637		assert(len >= MATCH_LEN_MIN);
638		assert(len <= MATCH_LEN_MAX);
639
640		// Repeat len bytes from distance of rep0.
641		if (unlikely(dict_repeat(&dict, rep0, &len))) {
642			coder->sequence = SEQ_COPY;
643			goto out;
644		}
645
646		continue;
647
648slow:
649#endif
650	///////////////////////////
651	// Resumable Mode (slow) //
652	///////////////////////////
653
654	// This is very similar to Non-resumable Mode, so most of the
655	// comments are not repeated. The main differences are:
656	// - case labels are used to resume at the correct location.
657	// - Loops are not unrolled.
658	// - Range coder macros take an extra sequence argument
659	//   so they can save to coder->sequence the location to
660	//   resume in case there is not enough input.
661	case SEQ_NORMALIZE:
662	case SEQ_IS_MATCH:
663		if (unlikely(might_finish_without_eopm
664				&& dict.pos == dict.limit)) {
665			// In rare cases there is a useless byte that needs
666			// to be read anyway.
667			rc_normalize_safe(SEQ_NORMALIZE);
668
669			// If the range decoder state is such that we can
670			// be at the end of the LZMA stream, then the
671			// decoding is finished.
672			if (rc_is_finished(rc)) {
673				ret = LZMA_STREAM_END;
674				goto out;
675			}
676
677			// If the caller hasn't allowed EOPM to be present
678			// together with known uncompressed size, then the
679			// LZMA stream is corrupt.
680			if (!coder->allow_eopm) {
681				ret = LZMA_DATA_ERROR;
682				goto out;
683			}
684
685			// Otherwise continue decoding with the expectation
686			// that the next LZMA symbol is EOPM.
687			eopm_is_valid = true;
688		}
689
690		rc_if_0_safe(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
691			/////////////////////
692			// Decode literal. //
693			/////////////////////
694
695			rc_update_0(coder->is_match[state][pos_state]);
696
697			probs = literal_subcoder(coder->literal,
698					literal_context_bits, literal_mask,
699					dict.pos, dict_get0(&dict));
700			symbol = 1;
701
702			if (is_literal_state(state)) {
703				update_literal_normal(state);
704
705				// Decode literal without match byte.
706				// The "slow" version does not unroll
707				// the loop.
708	case SEQ_LITERAL:
709				do {
710					rc_bit_safe(probs[symbol], , ,
711							SEQ_LITERAL);
712				} while (symbol < (1 << 8));
713			} else {
714				update_literal_matched(state);
715
716				// Decode literal with match byte.
717				len = (uint32_t)(dict_get(&dict, rep0)) << 1;
718
719				offset = 0x100;
720
721	case SEQ_LITERAL_MATCHED:
722				do {
723					const uint32_t match_bit
724							= len & offset;
725					const uint32_t subcoder_index
726							= offset + match_bit
727							+ symbol;
728
729					rc_bit_safe(probs[subcoder_index],
730							offset &= ~match_bit,
731							offset &= match_bit,
732							SEQ_LITERAL_MATCHED);
733
734					// It seems to be faster to do this
735					// here instead of putting it to the
736					// beginning of the loop and then
737					// putting the "case" in the middle
738					// of the loop.
739					len <<= 1;
740
741				} while (symbol < (1 << 8));
742			}
743
744	case SEQ_LITERAL_WRITE:
745			if (dict_put_safe(&dict, symbol)) {
746				coder->sequence = SEQ_LITERAL_WRITE;
747				goto out;
748			}
749
750			continue;
751		}
752
753		///////////////////
754		// Decode match. //
755		///////////////////
756
757		rc_update_1(coder->is_match[state][pos_state]);
758
759	case SEQ_IS_REP:
760		rc_if_0_safe(coder->is_rep[state], SEQ_IS_REP) {
761			///////////////////
762			// Simple match. //
763			///////////////////
764
765			rc_update_0(coder->is_rep[state]);
766			update_match(state);
767
768			rep3 = rep2;
769			rep2 = rep1;
770			rep1 = rep0;
771
772			len_decode(len, coder->match_len_decoder,
773					pos_state, SEQ_MATCH_LEN);
774
775			probs = coder->dist_slot[get_dist_state(len)];
776			symbol = 1;
777
778	case SEQ_DIST_SLOT:
779			do {
780				rc_bit_safe(probs[symbol], , , SEQ_DIST_SLOT);
781			} while (symbol < DIST_SLOTS);
782
783			symbol -= DIST_SLOTS;
784			assert(symbol <= 63);
785
786			if (symbol < DIST_MODEL_START) {
787				rep0 = symbol;
788			} else {
789				limit = (symbol >> 1) - 1;
790				assert(limit >= 1 && limit <= 30);
791				rep0 = 2 + (symbol & 1);
792
793				if (symbol < DIST_MODEL_END) {
794					assert(limit <= 5);
795					rep0 <<= limit;
796					assert(rep0 <= 96);
797					// -1 is fine, because we start
798					// decoding at probs[1], not probs[0].
799					// NOTE: This violates the C standard,
800					// since we are doing pointer
801					// arithmetic past the beginning of
802					// the array.
803					assert((int32_t)(rep0 - symbol - 1)
804							>= -1);
805					assert((int32_t)(rep0 - symbol - 1)
806							<= 82);
807					probs = coder->pos_special + rep0
808							- symbol - 1;
809					symbol = 1;
810					offset = 0;
811	case SEQ_DIST_MODEL:
812					do {
813						rc_bit_safe(probs[symbol], ,
814							rep0 += 1U << offset,
815							SEQ_DIST_MODEL);
816					} while (++offset < limit);
817				} else {
818					assert(symbol >= 14);
819					assert(limit >= 6);
820					limit -= ALIGN_BITS;
821					assert(limit >= 2);
822	case SEQ_DIRECT:
823					rc_direct_safe(rep0, limit,
824							SEQ_DIRECT);
825
826					rep0 <<= ALIGN_BITS;
827					symbol = 0;
828					offset = 1;
829	case SEQ_ALIGN:
830					do {
831						rc_bit_last_safe(
832							coder->pos_align[
833								offset
834								+ symbol],
835							,
836							symbol += offset,
837							SEQ_ALIGN);
838						offset <<= 1;
839					} while (offset < ALIGN_SIZE);
840
841					rep0 += symbol;
842
843					if (rep0 == UINT32_MAX) {
844						// End of payload marker was
845						// found. It may only be
846						// present if
847						//   - uncompressed size is
848						//     unknown or
849						//   - after known uncompressed
850						//     size amount of bytes has
851						//     been decompressed and
852						//     caller has indicated
853						//     that EOPM might be used
854						//     (it's not allowed in
855						//     LZMA2).
856#ifndef HAVE_SMALL
857eopm:
858#endif
859						if (!eopm_is_valid) {
860							ret = LZMA_DATA_ERROR;
861							goto out;
862						}
863
864	case SEQ_EOPM:
865						// LZMA1 stream with
866						// end-of-payload marker.
867						rc_normalize_safe(SEQ_EOPM);
868						ret = rc_is_finished(rc)
869							? LZMA_STREAM_END
870							: LZMA_DATA_ERROR;
871						goto out;
872					}
873				}
874			}
875
876			if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
877				ret = LZMA_DATA_ERROR;
878				goto out;
879			}
880
881		} else {
882			/////////////////////
883			// Repeated match. //
884			/////////////////////
885
886			rc_update_1(coder->is_rep[state]);
887
888			if (unlikely(!dict_is_distance_valid(&dict, 0))) {
889				ret = LZMA_DATA_ERROR;
890				goto out;
891			}
892
893	case SEQ_IS_REP0:
894			rc_if_0_safe(coder->is_rep0[state], SEQ_IS_REP0) {
895				rc_update_0(coder->is_rep0[state]);
896
897	case SEQ_IS_REP0_LONG:
898				rc_if_0_safe(coder->is_rep0_long
899						[state][pos_state],
900						SEQ_IS_REP0_LONG) {
901					rc_update_0(coder->is_rep0_long[
902							state][pos_state]);
903
904					update_short_rep(state);
905
906	case SEQ_SHORTREP:
907					if (dict_put_safe(&dict,
908							dict_get(&dict,
909							rep0))) {
910						coder->sequence = SEQ_SHORTREP;
911						goto out;
912					}
913
914					continue;
915				}
916
917				rc_update_1(coder->is_rep0_long[
918						state][pos_state]);
919
920			} else {
921				rc_update_1(coder->is_rep0[state]);
922
923	case SEQ_IS_REP1:
924				rc_if_0_safe(coder->is_rep1[state], SEQ_IS_REP1) {
925					rc_update_0(coder->is_rep1[state]);
926
927					const uint32_t distance = rep1;
928					rep1 = rep0;
929					rep0 = distance;
930
931				} else {
932					rc_update_1(coder->is_rep1[state]);
933	case SEQ_IS_REP2:
934					rc_if_0_safe(coder->is_rep2[state],
935							SEQ_IS_REP2) {
936						rc_update_0(coder->is_rep2[
937								state]);
938
939						const uint32_t distance = rep2;
940						rep2 = rep1;
941						rep1 = rep0;
942						rep0 = distance;
943
944					} else {
945						rc_update_1(coder->is_rep2[
946								state]);
947
948						const uint32_t distance = rep3;
949						rep3 = rep2;
950						rep2 = rep1;
951						rep1 = rep0;
952						rep0 = distance;
953					}
954				}
955			}
956
957			update_long_rep(state);
958
959			len_decode(len, coder->rep_len_decoder,
960					pos_state, SEQ_REP_LEN);
961		}
962
963		/////////////////////////////////
964		// Repeat from history buffer. //
965		/////////////////////////////////
966
967		assert(len >= MATCH_LEN_MIN);
968		assert(len <= MATCH_LEN_MAX);
969
970	case SEQ_COPY:
971		if (unlikely(dict_repeat(&dict, rep0, &len))) {
972			coder->sequence = SEQ_COPY;
973			goto out;
974		}
975	}
976
977out:
978	// Save state
979
980	// NOTE: Must not copy dict.limit.
981	dictptr->pos = dict.pos;
982	dictptr->full = dict.full;
983
984	rc_from_local(coder->rc, *in_pos);
985
986	coder->state = state;
987	coder->rep0 = rep0;
988	coder->rep1 = rep1;
989	coder->rep2 = rep2;
990	coder->rep3 = rep3;
991
992	coder->probs = probs;
993	coder->symbol = symbol;
994	coder->limit = limit;
995	coder->offset = offset;
996	coder->len = len;
997
998	// Update the remaining amount of uncompressed data if uncompressed
999	// size was known.
1000	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
1001		coder->uncompressed_size -= dict.pos - dict_start;
1002
1003		// If we have gotten all the output but the decoder wants
1004		// to write more output, the file is corrupt. There are
1005		// three SEQ values where output is produced.
1006		if (coder->uncompressed_size == 0 && ret == LZMA_OK
1007				&& (coder->sequence == SEQ_LITERAL_WRITE
1008					|| coder->sequence == SEQ_SHORTREP
1009					|| coder->sequence == SEQ_COPY))
1010			ret = LZMA_DATA_ERROR;
1011	}
1012
1013	if (ret == LZMA_STREAM_END) {
1014		// Reset the range decoder so that it is ready to reinitialize
1015		// for a new LZMA2 chunk.
1016		rc_reset(coder->rc);
1017		coder->sequence = SEQ_IS_MATCH;
1018	}
1019
1020	return ret;
1021}
1022
1023
1024static void
1025lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
1026		bool allow_eopm)
1027{
1028	lzma_lzma1_decoder *coder = coder_ptr;
1029	coder->uncompressed_size = uncompressed_size;
1030	coder->allow_eopm = allow_eopm;
1031}
1032
1033
1034static void
1035lzma_decoder_reset(void *coder_ptr, const void *opt)
1036{
1037	lzma_lzma1_decoder *coder = coder_ptr;
1038	const lzma_options_lzma *options = opt;
1039
1040	// NOTE: We assume that lc/lp/pb are valid since they were
1041	// successfully decoded with lzma_lzma_decode_properties().
1042
1043	// Calculate pos_mask. We don't need pos_bits as is for anything.
1044	coder->pos_mask = (1U << options->pb) - 1;
1045
1046	// Initialize the literal decoder.
1047	literal_init(coder->literal, options->lc, options->lp);
1048
1049	coder->literal_context_bits = options->lc;
1050	coder->literal_mask = literal_mask_calc(options->lc, options->lp);
1051
1052	// State
1053	coder->state = STATE_LIT_LIT;
1054	coder->rep0 = 0;
1055	coder->rep1 = 0;
1056	coder->rep2 = 0;
1057	coder->rep3 = 0;
1058	coder->pos_mask = (1U << options->pb) - 1;
1059
1060	// Range decoder
1061	rc_reset(coder->rc);
1062
1063	// Bit and bittree decoders
1064	for (uint32_t i = 0; i < STATES; ++i) {
1065		for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
1066			bit_reset(coder->is_match[i][j]);
1067			bit_reset(coder->is_rep0_long[i][j]);
1068		}
1069
1070		bit_reset(coder->is_rep[i]);
1071		bit_reset(coder->is_rep0[i]);
1072		bit_reset(coder->is_rep1[i]);
1073		bit_reset(coder->is_rep2[i]);
1074	}
1075
1076	for (uint32_t i = 0; i < DIST_STATES; ++i)
1077		bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
1078
1079	for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
1080		bit_reset(coder->pos_special[i]);
1081
1082	bittree_reset(coder->pos_align, ALIGN_BITS);
1083
1084	// Len decoders (also bit/bittree)
1085	const uint32_t num_pos_states = 1U << options->pb;
1086	bit_reset(coder->match_len_decoder.choice);
1087	bit_reset(coder->match_len_decoder.choice2);
1088	bit_reset(coder->rep_len_decoder.choice);
1089	bit_reset(coder->rep_len_decoder.choice2);
1090
1091	for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
1092		bittree_reset(coder->match_len_decoder.low[pos_state],
1093				LEN_LOW_BITS);
1094		bittree_reset(coder->match_len_decoder.mid[pos_state],
1095				LEN_MID_BITS);
1096
1097		bittree_reset(coder->rep_len_decoder.low[pos_state],
1098				LEN_LOW_BITS);
1099		bittree_reset(coder->rep_len_decoder.mid[pos_state],
1100				LEN_MID_BITS);
1101	}
1102
1103	bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
1104	bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
1105
1106	coder->sequence = SEQ_IS_MATCH;
1107	coder->probs = NULL;
1108	coder->symbol = 0;
1109	coder->limit = 0;
1110	coder->offset = 0;
1111	coder->len = 0;
1112
1113	return;
1114}
1115
1116
1117extern lzma_ret
1118lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
1119		const lzma_options_lzma *options, lzma_lz_options *lz_options)
1120{
1121	if (lz->coder == NULL) {
1122		lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
1123		if (lz->coder == NULL)
1124			return LZMA_MEM_ERROR;
1125
1126		lz->code = &lzma_decode;
1127		lz->reset = &lzma_decoder_reset;
1128		lz->set_uncompressed = &lzma_decoder_uncompressed;
1129	}
1130
1131	// All dictionary sizes are OK here. LZ decoder will take care of
1132	// the special cases.
1133	lz_options->dict_size = options->dict_size;
1134	lz_options->preset_dict = options->preset_dict;
1135	lz_options->preset_dict_size = options->preset_dict_size;
1136
1137	return LZMA_OK;
1138}
1139
1140
1141/// Allocate and initialize LZMA decoder. This is used only via LZ
1142/// initialization (lzma_lzma_decoder_init() passes function pointer to
1143/// the LZ initialization).
1144static lzma_ret
1145lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
1146		lzma_vli id, const void *options, lzma_lz_options *lz_options)
1147{
1148	if (!is_lclppb_valid(options))
1149		return LZMA_PROG_ERROR;
1150
1151	lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
1152	bool allow_eopm = true;
1153
1154	if (id == LZMA_FILTER_LZMA1EXT) {
1155		const lzma_options_lzma *opt = options;
1156
1157		// Only one flag is supported.
1158		if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
1159			return LZMA_OPTIONS_ERROR;
1160
1161		// FIXME? Using lzma_vli instead of uint64_t is weird because
1162		// this has nothing to do with .xz headers and variable-length
1163		// integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
1164		// instead of UINT64_MAX is clearer when unknown size is
1165		// meant. A problem with using lzma_vli is that now we
1166		// allow > LZMA_VLI_MAX which is fine in this file but
1167		// it's still confusing. Note that alone_decoder.c also
1168		// allows > LZMA_VLI_MAX when setting uncompressed size.
1169		uncomp_size = opt->ext_size_low
1170				+ ((uint64_t)(opt->ext_size_high) << 32);
1171		allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
1172				|| uncomp_size == LZMA_VLI_UNKNOWN;
1173	}
1174
1175	return_if_error(lzma_lzma_decoder_create(
1176			lz, allocator, options, lz_options));
1177
1178	lzma_decoder_reset(lz->coder, options);
1179	lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
1180
1181	return LZMA_OK;
1182}
1183
1184
1185extern lzma_ret
1186lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
1187		const lzma_filter_info *filters)
1188{
1189	// LZMA can only be the last filter in the chain. This is enforced
1190	// by the raw_decoder initialization.
1191	assert(filters[1].init == NULL);
1192
1193	return lzma_lz_decoder_init(next, allocator, filters,
1194			&lzma_decoder_init);
1195}
1196
1197
1198extern bool
1199lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
1200{
1201	if (byte > (4 * 5 + 4) * 9 + 8)
1202		return true;
1203
1204	// See the file format specification to understand this.
1205	options->pb = byte / (9 * 5);
1206	byte -= options->pb * 9 * 5;
1207	options->lp = byte / 9;
1208	options->lc = byte - options->lp * 9;
1209
1210	return options->lc + options->lp > LZMA_LCLP_MAX;
1211}
1212
1213
1214extern uint64_t
1215lzma_lzma_decoder_memusage_nocheck(const void *options)
1216{
1217	const lzma_options_lzma *const opt = options;
1218	return sizeof(lzma_lzma1_decoder)
1219			+ lzma_lz_decoder_memusage(opt->dict_size);
1220}
1221
1222
1223extern uint64_t
1224lzma_lzma_decoder_memusage(const void *options)
1225{
1226	if (!is_lclppb_valid(options))
1227		return UINT64_MAX;
1228
1229	return lzma_lzma_decoder_memusage_nocheck(options);
1230}
1231
1232
1233extern lzma_ret
1234lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
1235		const uint8_t *props, size_t props_size)
1236{
1237	if (props_size != 5)
1238		return LZMA_OPTIONS_ERROR;
1239
1240	lzma_options_lzma *opt
1241			= lzma_alloc(sizeof(lzma_options_lzma), allocator);
1242	if (opt == NULL)
1243		return LZMA_MEM_ERROR;
1244
1245	if (lzma_lzma_lclppb_decode(opt, props[0]))
1246		goto error;
1247
1248	// All dictionary sizes are accepted, including zero. LZ decoder
1249	// will automatically use a dictionary at least a few KiB even if
1250	// a smaller dictionary is requested.
1251	opt->dict_size = read32le(props + 1);
1252
1253	opt->preset_dict = NULL;
1254	opt->preset_dict_size = 0;
1255
1256	*options = opt;
1257
1258	return LZMA_OK;
1259
1260error:
1261	lzma_free(opt, allocator);
1262	return LZMA_OPTIONS_ERROR;
1263}
1264