base.h revision 207753
1/**
2 * \file        lzma/base.h
3 * \brief       Data types and functions used in many places in liblzma API
4 */
5
6/*
7 * Author: Lasse Collin
8 *
9 * This file has been put into the public domain.
10 * You can do whatever you want with this file.
11 *
12 * See ../lzma.h for information about liblzma as a whole.
13 */
14
15#ifndef LZMA_H_INTERNAL
16#	error Never include this file directly. Use <lzma.h> instead.
17#endif
18
19
20/**
21 * \brief       Boolean
22 *
23 * This is here because C89 doesn't have stdbool.h. To set a value for
24 * variables having type lzma_bool, you can use
25 *   - C99's `true' and `false' from stdbool.h;
26 *   - C++'s internal `true' and `false'; or
27 *   - integers one (true) and zero (false).
28 */
29typedef unsigned char lzma_bool;
30
31
32/**
33 * \brief       Type of reserved enumeration variable in structures
34 *
35 * To avoid breaking library ABI when new features are added, several
36 * structures contain extra variables that may be used in future. Since
37 * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may
38 * even vary depending on the range of enumeration constants, we specify
39 * a separate type to be used for reserved enumeration variables. All
40 * enumeration constants in liblzma API will be non-negative and less
41 * than 128, which should guarantee that the ABI won't break even when
42 * new constants are added to existing enumerations.
43 */
44typedef enum {
45	LZMA_RESERVED_ENUM      = 0
46} lzma_reserved_enum;
47
48
49/**
50 * \brief       Return values used by several functions in liblzma
51 *
52 * Check the descriptions of specific functions to find out which return
53 * values they can return. With some functions the return values may have
54 * more specific meanings than described here; those differences are
55 * described per-function basis.
56 */
57typedef enum {
58	LZMA_OK                 = 0,
59		/**<
60		 * \brief       Operation completed successfully
61		 */
62
63	LZMA_STREAM_END         = 1,
64		/**<
65		 * \brief       End of stream was reached
66		 *
67		 * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or
68		 * LZMA_FINISH was finished. In decoder, this indicates
69		 * that all the data was successfully decoded.
70		 *
71		 * In all cases, when LZMA_STREAM_END is returned, the last
72		 * output bytes should be picked from strm->next_out.
73		 */
74
75	LZMA_NO_CHECK           = 2,
76		/**<
77		 * \brief       Input stream has no integrity check
78		 *
79		 * This return value can be returned only if the
80		 * LZMA_TELL_NO_CHECK flag was used when initializing
81		 * the decoder. LZMA_NO_CHECK is just a warning, and
82		 * the decoding can be continued normally.
83		 *
84		 * It is possible to call lzma_get_check() immediately after
85		 * lzma_code has returned LZMA_NO_CHECK. The result will
86		 * naturally be LZMA_CHECK_NONE, but the possibility to call
87		 * lzma_get_check() may be convenient in some applications.
88		 */
89
90	LZMA_UNSUPPORTED_CHECK  = 3,
91		/**<
92		 * \brief       Cannot calculate the integrity check
93		 *
94		 * The usage of this return value is different in encoders
95		 * and decoders.
96		 *
97		 * Encoders can return this value only from the initialization
98		 * function. If initialization fails with this value, the
99		 * encoding cannot be done, because there's no way to produce
100		 * output with the correct integrity check.
101		 *
102		 * Decoders can return this value only from lzma_code() and
103		 * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when
104		 * initializing the decoder. The decoding can still be
105		 * continued normally even if the check type is unsupported,
106		 * but naturally the check will not be validated, and possible
107		 * errors may go undetected.
108		 *
109		 * With decoder, it is possible to call lzma_get_check()
110		 * immediately after lzma_code() has returned
111		 * LZMA_UNSUPPORTED_CHECK. This way it is possible to find
112		 * out what the unsupported Check ID was.
113		 */
114
115	LZMA_GET_CHECK          = 4,
116		/**<
117		 * \brief       Integrity check type is now available
118		 *
119		 * This value can be returned only by the lzma_code() function
120		 * and only if the decoder was initialized with the
121		 * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the
122		 * application that it may now call lzma_get_check() to find
123		 * out the Check ID. This can be used, for example, to
124		 * implement a decoder that accepts only files that have
125		 * strong enough integrity check.
126		 */
127
128	LZMA_MEM_ERROR          = 5,
129		/**<
130		 * \brief       Cannot allocate memory
131		 *
132		 * Memory allocation failed, or the size of the allocation
133		 * would be greater than SIZE_MAX.
134		 *
135		 * Due to internal implementation reasons, the coding cannot
136		 * be continued even if more memory were made available after
137		 * LZMA_MEM_ERROR.
138		 */
139
140	LZMA_MEMLIMIT_ERROR     = 6,
141		/**
142		 * \brief       Memory usage limit was reached
143		 *
144		 * Decoder would need more memory than allowed by the
145		 * specified memory usage limit. To continue decoding,
146		 * the memory usage limit has to be increased with
147		 * lzma_memlimit_set().
148		 */
149
150	LZMA_FORMAT_ERROR       = 7,
151		/**<
152		 * \brief       File format not recognized
153		 *
154		 * The decoder did not recognize the input as supported file
155		 * format. This error can occur, for example, when trying to
156		 * decode .lzma format file with lzma_stream_decoder,
157		 * because lzma_stream_decoder accepts only the .xz format.
158		 */
159
160	LZMA_OPTIONS_ERROR      = 8,
161		/**<
162		 * \brief       Invalid or unsupported options
163		 *
164		 * Invalid or unsupported options, for example
165		 *  - unsupported filter(s) or filter options; or
166		 *  - reserved bits set in headers (decoder only).
167		 *
168		 * Rebuilding liblzma with more features enabled, or
169		 * upgrading to a newer version of liblzma may help.
170		 */
171
172	LZMA_DATA_ERROR         = 9,
173		/**<
174		 * \brief       Data is corrupt
175		 *
176		 * The usage of this return value is different in encoders
177		 * and decoders. In both encoder and decoder, the coding
178		 * cannot continue after this error.
179		 *
180		 * Encoders return this if size limits of the target file
181		 * format would be exceeded. These limits are huge, thus
182		 * getting this error from an encoder is mostly theoretical.
183		 * For example, the maximum compressed and uncompressed
184		 * size of a .xz Stream is roughly 8 EiB (2^63 bytes).
185		 *
186		 * Decoders return this error if the input data is corrupt.
187		 * This can mean, for example, invalid CRC32 in headers
188		 * or invalid check of uncompressed data.
189		 */
190
191	LZMA_BUF_ERROR          = 10,
192		/**<
193		 * \brief       No progress is possible
194		 *
195		 * This error code is returned when the coder cannot consume
196		 * any new input and produce any new output. The most common
197		 * reason for this error is that the input stream being
198		 * decoded is truncated or corrupt.
199		 *
200		 * This error is not fatal. Coding can be continued normally
201		 * by providing more input and/or more output space, if
202		 * possible.
203		 *
204		 * Typically the first call to lzma_code() that can do no
205		 * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only
206		 * the second consecutive call doing no progress will return
207		 * LZMA_BUF_ERROR. This is intentional.
208		 *
209		 * With zlib, Z_BUF_ERROR may be returned even if the
210		 * application is doing nothing wrong, so apps will need
211		 * to handle Z_BUF_ERROR specially. The above hack
212		 * guarantees that liblzma never returns LZMA_BUF_ERROR
213		 * to properly written applications unless the input file
214		 * is truncated or corrupt. This should simplify the
215		 * applications a little.
216		 */
217
218	LZMA_PROG_ERROR         = 11,
219		/**<
220		 * \brief       Programming error
221		 *
222		 * This indicates that the arguments given to the function are
223		 * invalid or the internal state of the decoder is corrupt.
224		 *   - Function arguments are invalid or the structures
225		 *     pointed by the argument pointers are invalid
226		 *     e.g. if strm->next_out has been set to NULL and
227		 *     strm->avail_out > 0 when calling lzma_code().
228		 *   - lzma_* functions have been called in wrong order
229		 *     e.g. lzma_code() was called right after lzma_end().
230		 *   - If errors occur randomly, the reason might be flaky
231		 *     hardware.
232		 *
233		 * If you think that your code is correct, this error code
234		 * can be a sign of a bug in liblzma. See the documentation
235		 * how to report bugs.
236		 */
237} lzma_ret;
238
239
240/**
241 * \brief       The `action' argument for lzma_code()
242 *
243 * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH,
244 * the same `action' must is used until lzma_code() returns LZMA_STREAM_END.
245 * Also, the amount of input (that is, strm->avail_in) must not be modified
246 * by the application until lzma_code() returns LZMA_STREAM_END. Changing the
247 * `action' or modifying the amount of input will make lzma_code() return
248 * LZMA_PROG_ERROR.
249 */
250typedef enum {
251	LZMA_RUN = 0,
252		/**<
253		 * \brief       Continue coding
254		 *
255		 * Encoder: Encode as much input as possible. Some internal
256		 * buffering will probably be done (depends on the filter
257		 * chain in use), which causes latency: the input used won't
258		 * usually be decodeable from the output of the same
259		 * lzma_code() call.
260		 *
261		 * Decoder: Decode as much input as possible and produce as
262		 * much output as possible.
263		 */
264
265	LZMA_SYNC_FLUSH = 1,
266		/**<
267		 * \brief       Make all the input available at output
268		 *
269		 * Normally the encoder introduces some latency.
270		 * LZMA_SYNC_FLUSH forces all the buffered data to be
271		 * available at output without resetting the internal
272		 * state of the encoder. This way it is possible to use
273		 * compressed stream for example for communication over
274		 * network.
275		 *
276		 * Only some filters support LZMA_SYNC_FLUSH. Trying to use
277		 * LZMA_SYNC_FLUSH with filters that don't support it will
278		 * make lzma_code() return LZMA_OPTIONS_ERROR. For example,
279		 * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does.
280		 *
281		 * Using LZMA_SYNC_FLUSH very often can dramatically reduce
282		 * the compression ratio. With some filters (for example,
283		 * LZMA2), fine-tuning the compression options may help
284		 * mitigate this problem significantly.
285		 *
286		 * Decoders don't support LZMA_SYNC_FLUSH.
287		 */
288
289	LZMA_FULL_FLUSH = 2,
290		/**<
291		 * \brief       Make all the input available at output
292		 *
293		 * Finish encoding of the current Block. All the input
294		 * data going to the current Block must have been given
295		 * to the encoder (the last bytes can still be pending in
296		 * next_in). Call lzma_code() with LZMA_FULL_FLUSH until
297		 * it returns LZMA_STREAM_END. Then continue normally with
298		 * LZMA_RUN or finish the Stream with LZMA_FINISH.
299		 *
300		 * This action is currently supported only by Stream encoder
301		 * and easy encoder (which uses Stream encoder). If there is
302		 * no unfinished Block, no empty Block is created.
303		 */
304
305	LZMA_FINISH = 3
306		/**<
307		 * \brief       Finish the coding operation
308		 *
309		 * Finishes the coding operation. All the input data must
310		 * have been given to the encoder (the last bytes can still
311		 * be pending in next_in). Call lzma_code() with LZMA_FINISH
312		 * until it returns LZMA_STREAM_END. Once LZMA_FINISH has
313		 * been used, the amount of input must no longer be changed
314		 * by the application.
315		 *
316		 * When decoding, using LZMA_FINISH is optional unless the
317		 * LZMA_CONCATENATED flag was used when the decoder was
318		 * initialized. When LZMA_CONCATENATED was not used, the only
319		 * effect of LZMA_FINISH is that the amount of input must not
320		 * be changed just like in the encoder.
321		 */
322} lzma_action;
323
324
325/**
326 * \brief       Custom functions for memory handling
327 *
328 * A pointer to lzma_allocator may be passed via lzma_stream structure
329 * to liblzma, and some advanced functions take a pointer to lzma_allocator
330 * as a separate function argument. The library will use the functions
331 * specified in lzma_allocator for memory handling instead of the default
332 * malloc() and free(). C++ users should note that the custom memory
333 * handling functions must not throw exceptions.
334 *
335 * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is
336 * OK to change these function pointers in the middle of the coding
337 * process, but obviously it must be done carefully to make sure that the
338 * replacement `free' can deallocate memory allocated by the earlier
339 * `alloc' function(s).
340 */
341typedef struct {
342	/**
343	 * \brief       Pointer to a custom memory allocation function
344	 *
345	 * If you don't want a custom allocator, but still want
346	 * custom free(), set this to NULL and liblzma will use
347	 * the standard malloc().
348	 *
349	 * \param       opaque  lzma_allocator.opaque (see below)
350	 * \param       nmemb   Number of elements like in calloc(). liblzma
351	 *                      will always set nmemb to 1, so it is safe to
352	 *                      ignore nmemb in a custom allocator if you like.
353	 *                      The nmemb argument exists only for
354	 *                      compatibility with zlib and libbzip2.
355	 * \param       size    Size of an element in bytes.
356	 *                      liblzma never sets this to zero.
357	 *
358	 * \return      Pointer to the beginning of a memory block of
359	 *              `size' bytes, or NULL if allocation fails
360	 *              for some reason. When allocation fails, functions
361	 *              of liblzma return LZMA_MEM_ERROR.
362	 *
363	 * The allocator should not waste time zeroing the allocated buffers.
364	 * This is not only about speed, but also memory usage, since the
365	 * operating system kernel doesn't necessarily allocate the requested
366	 * memory in physical memory until it is actually used. With small
367	 * input files, liblzma may actually need only a fraction of the
368	 * memory that it requested for allocation.
369	 *
370	 * \note        LZMA_MEM_ERROR is also used when the size of the
371	 *              allocation would be greater than SIZE_MAX. Thus,
372	 *              don't assume that the custom allocator must have
373	 *              returned NULL if some function from liblzma
374	 *              returns LZMA_MEM_ERROR.
375	 */
376	void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size);
377
378	/**
379	 * \brief       Pointer to a custom memory freeing function
380	 *
381	 * If you don't want a custom freeing function, but still
382	 * want a custom allocator, set this to NULL and liblzma
383	 * will use the standard free().
384	 *
385	 * \param       opaque  lzma_allocator.opaque (see below)
386	 * \param       ptr     Pointer returned by lzma_allocator.alloc(),
387	 *                      or when it is set to NULL, a pointer returned
388	 *                      by the standard malloc().
389	 */
390	void (LZMA_API_CALL *free)(void *opaque, void *ptr);
391
392	/**
393	 * \brief       Pointer passed to .alloc() and .free()
394	 *
395	 * opaque is passed as the first argument to lzma_allocator.alloc()
396	 * and lzma_allocator.free(). This intended to ease implementing
397	 * custom memory allocation functions for use with liblzma.
398	 *
399	 * If you don't need this, you should set this to NULL.
400	 */
401	void *opaque;
402
403} lzma_allocator;
404
405
406/**
407 * \brief       Internal data structure
408 *
409 * The contents of this structure is not visible outside the library.
410 */
411typedef struct lzma_internal_s lzma_internal;
412
413
414/**
415 * \brief       Passing data to and from liblzma
416 *
417 * The lzma_stream structure is used for
418 *  - passing pointers to input and output buffers to liblzma;
419 *  - defining custom memory hander functions; and
420 *  - holding a pointer to coder-specific internal data structures.
421 *
422 * Typical usage:
423 *
424 *  - After allocating lzma_stream (on stack or with malloc()), it must be
425 *    initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details).
426 *
427 *  - Initialize a coder to the lzma_stream, for example by using
428 *    lzma_easy_encoder() or lzma_auto_decoder(). Some notes:
429 *      - In contrast to zlib, strm->next_in and strm->next_out are
430 *        ignored by all initialization functions, thus it is safe
431 *        to not initialize them yet.
432 *      - The initialization functions always set strm->total_in and
433 *        strm->total_out to zero.
434 *      - If the initialization function fails, no memory is left allocated
435 *        that would require freeing with lzma_end() even if some memory was
436 *        associated with the lzma_stream structure when the initialization
437 *        function was called.
438 *
439 *  - Use lzma_code() to do the actual work.
440 *
441 *  - Once the coding has been finished, the existing lzma_stream can be
442 *    reused. It is OK to reuse lzma_stream with different initialization
443 *    function without calling lzma_end() first. Old allocations are
444 *    automatically freed.
445 *
446 *  - Finally, use lzma_end() to free the allocated memory. lzma_end() never
447 *    frees the lzma_stream structure itself.
448 *
449 * Application may modify the values of total_in and total_out as it wants.
450 * They are updated by liblzma to match the amount of data read and
451 * written, but aren't used for anything else.
452 */
453typedef struct {
454	const uint8_t *next_in; /**< Pointer to the next input byte. */
455	size_t avail_in;    /**< Number of available input bytes in next_in. */
456	uint64_t total_in;  /**< Total number of bytes read by liblzma. */
457
458	uint8_t *next_out;  /**< Pointer to the next output position. */
459	size_t avail_out;   /**< Amount of free space in next_out. */
460	uint64_t total_out; /**< Total number of bytes written by liblzma. */
461
462	/**
463	 * \brief       Custom memory allocation functions
464	 *
465	 * In most cases this is NULL which makes liblzma use
466	 * the standard malloc() and free().
467	 */
468	lzma_allocator *allocator;
469
470	/** Internal state is not visible to applications. */
471	lzma_internal *internal;
472
473	/*
474	 * Reserved space to allow possible future extensions without
475	 * breaking the ABI. Excluding the initialization of this structure,
476	 * you should not touch these, because the names of these variables
477	 * may change.
478	 */
479	void *reserved_ptr1;
480	void *reserved_ptr2;
481	uint64_t reserved_int1;
482	uint64_t reserved_int2;
483	lzma_reserved_enum reserved_enum1;
484	lzma_reserved_enum reserved_enum2;
485
486} lzma_stream;
487
488
489/**
490 * \brief       Initialization for lzma_stream
491 *
492 * When you declare an instance of lzma_stream, you can immediately
493 * initialize it so that initialization functions know that no memory
494 * has been allocated yet:
495 *
496 *     lzma_stream strm = LZMA_STREAM_INIT;
497 *
498 * If you need to initialize a dynamically allocated lzma_stream, you can use
499 * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this
500 * violates the C standard since NULL may have different internal
501 * representation than zero, but it should be portable enough in practice.
502 * Anyway, for maximum portability, you can use something like this:
503 *
504 *     lzma_stream tmp = LZMA_STREAM_INIT;
505 *     *strm = tmp;
506 */
507#define LZMA_STREAM_INIT \
508	{ NULL, 0, 0, NULL, 0, 0, NULL, NULL, \
509	NULL, NULL, 0, 0, LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM }
510
511
512/**
513 * \brief       Encode or decode data
514 *
515 * Once the lzma_stream has been successfully initialized (e.g. with
516 * lzma_stream_encoder()), the actual encoding or decoding is done
517 * using this function. The application has to update strm->next_in,
518 * strm->avail_in, strm->next_out, and strm->avail_out to pass input
519 * to and get output from liblzma.
520 *
521 * See the description of the coder-specific initialization function to find
522 * out what `action' values are supported by the coder.
523 */
524extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action)
525		lzma_nothrow lzma_attr_warn_unused_result;
526
527
528/**
529 * \brief       Free memory allocated for the coder data structures
530 *
531 * \param       strm    Pointer to lzma_stream that is at least initialized
532 *                      with LZMA_STREAM_INIT.
533 *
534 * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other
535 * members of the lzma_stream structure are touched.
536 *
537 * \note        zlib indicates an error if application end()s unfinished
538 *              stream structure. liblzma doesn't do this, and assumes that
539 *              application knows what it is doing.
540 */
541extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow;
542
543
544/**
545 * \brief       Get the memory usage of decoder filter chain
546 *
547 * This function is currently supported only when *strm has been initialized
548 * with a function that takes a memlimit argument. With other functions, you
549 * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage()
550 * to estimate the memory requirements.
551 *
552 * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big
553 * the memory usage limit should have been to decode the input. Note that
554 * this may give misleading information if decoding .xz Streams that have
555 * multiple Blocks, because each Block can have different memory requirements.
556 *
557 * \return      Rough estimate of how much memory is currently allocated
558 *              for the filter decoders. If no filter chain is currently
559 *              allocated, some non-zero value is still returned, which is
560 *              less than or equal to what any filter chain would indicate
561 *              as its memory requirement.
562 *
563 *              If this function isn't supported by *strm or some other error
564 *              occurs, zero is returned.
565 */
566extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm)
567		lzma_nothrow lzma_attr_pure;
568
569
570/**
571 * \brief       Get the current memory usage limit
572 *
573 * This function is supported only when *strm has been initialized with
574 * a function that takes a memlimit argument.
575 *
576 * \return      On success, the current memory usage limit is returned
577 *              (always non-zero). On error, zero is returned.
578 */
579extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm)
580		lzma_nothrow lzma_attr_pure;
581
582
583/**
584 * \brief       Set the memory usage limit
585 *
586 * This function is supported only when *strm has been initialized with
587 * a function that takes a memlimit argument.
588 *
589 * \return      - LZMA_OK: New memory usage limit successfully set.
590 *              - LZMA_MEMLIMIT_ERROR: The new limit is too small.
591 *                The limit was not changed.
592 *              - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't
593 *                support memory usage limit or memlimit was zero.
594 */
595extern LZMA_API(lzma_ret) lzma_memlimit_set(
596		lzma_stream *strm, uint64_t memlimit) lzma_nothrow;
597