lz_decoder.h revision 292588
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       lz_decoder.h
4/// \brief      LZ out window
5///
6//  Authors:    Igor Pavlov
7//              Lasse Collin
8//
9//  This file has been put into the public domain.
10//  You can do whatever you want with this file.
11//
12///////////////////////////////////////////////////////////////////////////////
13
14#ifndef LZMA_LZ_DECODER_H
15#define LZMA_LZ_DECODER_H
16
17#include "common.h"
18
19
20typedef struct {
21	/// Pointer to the dictionary buffer. It can be an allocated buffer
22	/// internal to liblzma, or it can a be a buffer given by the
23	/// application when in single-call mode (not implemented yet).
24	uint8_t *buf;
25
26	/// Write position in dictionary. The next byte will be written to
27	/// buf[pos].
28	size_t pos;
29
30	/// Indicates how full the dictionary is. This is used by
31	/// dict_is_distance_valid() to detect corrupt files that would
32	/// read beyond the beginning of the dictionary.
33	size_t full;
34
35	/// Write limit
36	size_t limit;
37
38	/// Size of the dictionary
39	size_t size;
40
41	/// True when dictionary should be reset before decoding more data.
42	bool need_reset;
43
44} lzma_dict;
45
46
47typedef struct {
48	size_t dict_size;
49	const uint8_t *preset_dict;
50	size_t preset_dict_size;
51} lzma_lz_options;
52
53
54typedef struct {
55	/// Data specific to the LZ-based decoder
56	lzma_coder *coder;
57
58	/// Function to decode from in[] to *dict
59	lzma_ret (*code)(lzma_coder *restrict coder,
60			lzma_dict *restrict dict, const uint8_t *restrict in,
61			size_t *restrict in_pos, size_t in_size);
62
63	void (*reset)(lzma_coder *coder, const void *options);
64
65	/// Set the uncompressed size
66	void (*set_uncompressed)(lzma_coder *coder,
67			lzma_vli uncompressed_size);
68
69	/// Free allocated resources
70	void (*end)(lzma_coder *coder, const lzma_allocator *allocator);
71
72} lzma_lz_decoder;
73
74
75#define LZMA_LZ_DECODER_INIT \
76	(lzma_lz_decoder){ \
77		.coder = NULL, \
78		.code = NULL, \
79		.reset = NULL, \
80		.set_uncompressed = NULL, \
81		.end = NULL, \
82	}
83
84
85extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
86		const lzma_allocator *allocator,
87		const lzma_filter_info *filters,
88		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
89			const lzma_allocator *allocator, const void *options,
90			lzma_lz_options *lz_options));
91
92extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
93
94extern void lzma_lz_decoder_uncompressed(
95		lzma_coder *coder, lzma_vli uncompressed_size);
96
97
98//////////////////////
99// Inline functions //
100//////////////////////
101
102/// Get a byte from the history buffer.
103static inline uint8_t
104dict_get(const lzma_dict *const dict, const uint32_t distance)
105{
106	return dict->buf[dict->pos - distance - 1
107			+ (distance < dict->pos ? 0 : dict->size)];
108}
109
110
111/// Test if dictionary is empty.
112static inline bool
113dict_is_empty(const lzma_dict *const dict)
114{
115	return dict->full == 0;
116}
117
118
119/// Validate the match distance
120static inline bool
121dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
122{
123	return dict->full > distance;
124}
125
126
127/// Repeat *len bytes at distance.
128static inline bool
129dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
130{
131	// Don't write past the end of the dictionary.
132	const size_t dict_avail = dict->limit - dict->pos;
133	uint32_t left = my_min(dict_avail, *len);
134	*len -= left;
135
136	// Repeat a block of data from the history. Because memcpy() is faster
137	// than copying byte by byte in a loop, the copying process gets split
138	// into three cases.
139	if (distance < left) {
140		// Source and target areas overlap, thus we can't use
141		// memcpy() nor even memmove() safely.
142		do {
143			dict->buf[dict->pos] = dict_get(dict, distance);
144			++dict->pos;
145		} while (--left > 0);
146
147	} else if (distance < dict->pos) {
148		// The easiest and fastest case
149		memcpy(dict->buf + dict->pos,
150				dict->buf + dict->pos - distance - 1,
151				left);
152		dict->pos += left;
153
154	} else {
155		// The bigger the dictionary, the more rare this
156		// case occurs. We need to "wrap" the dict, thus
157		// we might need two memcpy() to copy all the data.
158		assert(dict->full == dict->size);
159		const uint32_t copy_pos
160				= dict->pos - distance - 1 + dict->size;
161		uint32_t copy_size = dict->size - copy_pos;
162
163		if (copy_size < left) {
164			memmove(dict->buf + dict->pos, dict->buf + copy_pos,
165					copy_size);
166			dict->pos += copy_size;
167			copy_size = left - copy_size;
168			memcpy(dict->buf + dict->pos, dict->buf, copy_size);
169			dict->pos += copy_size;
170		} else {
171			memmove(dict->buf + dict->pos, dict->buf + copy_pos,
172					left);
173			dict->pos += left;
174		}
175	}
176
177	// Update how full the dictionary is.
178	if (dict->full < dict->pos)
179		dict->full = dict->pos;
180
181	return unlikely(*len != 0);
182}
183
184
185/// Puts one byte into the dictionary. Returns true if the dictionary was
186/// already full and the byte couldn't be added.
187static inline bool
188dict_put(lzma_dict *dict, uint8_t byte)
189{
190	if (unlikely(dict->pos == dict->limit))
191		return true;
192
193	dict->buf[dict->pos++] = byte;
194
195	if (dict->pos > dict->full)
196		dict->full = dict->pos;
197
198	return false;
199}
200
201
202/// Copies arbitrary amount of data into the dictionary.
203static inline void
204dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
205		size_t *restrict in_pos, size_t in_size,
206		size_t *restrict left)
207{
208	// NOTE: If we are being given more data than the size of the
209	// dictionary, it could be possible to optimize the LZ decoder
210	// so that not everything needs to go through the dictionary.
211	// This shouldn't be very common thing in practice though, and
212	// the slowdown of one extra memcpy() isn't bad compared to how
213	// much time it would have taken if the data were compressed.
214
215	if (in_size - *in_pos > *left)
216		in_size = *in_pos + *left;
217
218	*left -= lzma_bufcpy(in, in_pos, in_size,
219			dict->buf, &dict->pos, dict->limit);
220
221	if (dict->pos > dict->full)
222		dict->full = dict->pos;
223
224	return;
225}
226
227
228static inline void
229dict_reset(lzma_dict *dict)
230{
231	dict->need_reset = true;
232	return;
233}
234
235#endif
236