index.h revision 207753
1/**
2 * \file        lzma/index.h
3 * \brief       Handling of .xz Index and related information
4 */
5
6/*
7 * Author: Lasse Collin
8 *
9 * This file has been put into the public domain.
10 * You can do whatever you want with this file.
11 *
12 * See ../lzma.h for information about liblzma as a whole.
13 */
14
15#ifndef LZMA_H_INTERNAL
16#	error Never include this file directly. Use <lzma.h> instead.
17#endif
18
19
20/**
21 * \brief       Opaque data type to hold the Index(es) and other information
22 *
23 * lzma_index often holds just one .xz Index and possibly the Stream Flags
24 * of the same Stream and size of the Stream Padding field. However,
25 * multiple lzma_indexes can be concatenated with lzma_index_cat() and then
26 * there may be information about multiple Streams in the same lzma_index.
27 *
28 * Notes about thread safety: Only one thread may modify lzma_index at
29 * a time. All functions that take non-const pointer to lzma_index
30 * modify it. As long as no thread is modifying the lzma_index, getting
31 * information from the same lzma_index can be done from multiple threads
32 * at the same time with functions that take a const pointer to
33 * lzma_index or use lzma_index_iter. The same iterator must be used
34 * only by one thread at a time, of course, but there can be as many
35 * iterators for the same lzma_index as needed.
36 */
37typedef struct lzma_index_s lzma_index;
38
39
40/**
41 * \brief       Iterator to get information about Blocks and Streams
42 */
43typedef struct {
44	struct {
45		/**
46		 * \brief       Pointer to Stream Flags
47		 *
48		 * This is NULL if Stream Flags have not been set for
49		 * this Stream with lzma_index_stream_flags().
50		 */
51		const lzma_stream_flags *flags;
52
53		const void *reserved_ptr1;
54		const void *reserved_ptr2;
55		const void *reserved_ptr3;
56
57		/**
58		 * \brief       Stream number in the lzma_index
59		 *
60		 * The first Stream is 1.
61		 */
62		lzma_vli number;
63
64		/**
65		 * \brief       Number of Blocks in the Stream
66		 *
67		 * If this is zero, the block structure below has
68		 * undefined values.
69		 */
70		lzma_vli block_count;
71
72		/**
73		 * \brief       Compressed start offset of this Stream
74		 *
75		 * The offset is relative to the beginning of the lzma_index
76		 * (i.e. usually the beginning of the .xz file).
77		 */
78		lzma_vli compressed_offset;
79
80		/**
81		 * \brief       Uncompressed start offset of this Stream
82		 *
83		 * The offset is relative to the beginning of the lzma_index
84		 * (i.e. usually the beginning of the .xz file).
85		 */
86		lzma_vli uncompressed_offset;
87
88		/**
89		 * \brief       Compressed size of this Stream
90		 *
91		 * This includes all headers except the possible
92		 * Stream Padding after this Stream.
93		 */
94		lzma_vli compressed_size;
95
96		/**
97		 * \brief       Uncompressed size of this Stream
98		 */
99		lzma_vli uncompressed_size;
100
101		/**
102		 * \brief       Size of Stream Padding after this Stream
103		 *
104		 * If it hasn't been set with lzma_index_stream_padding(),
105		 * this defaults to zero. Stream Padding is always
106		 * a multiple of four bytes.
107		 */
108		lzma_vli padding;
109
110		lzma_vli reserved_vli1;
111		lzma_vli reserved_vli2;
112		lzma_vli reserved_vli3;
113		lzma_vli reserved_vli4;
114	} stream;
115
116	struct {
117		/**
118		 * \brief       Block number in the file
119		 *
120		 * The first Block is 1.
121		 */
122		lzma_vli number_in_file;
123
124		/**
125		 * \brief       Compressed start offset of this Block
126		 *
127		 * This offset is relative to the beginning of the
128		 * lzma_index (i.e. usually the beginning of the .xz file).
129		 * Normally this is where you should seek in the .xz file
130		 * to start decompressing this Block.
131		 */
132		lzma_vli compressed_file_offset;
133
134		/**
135		 * \brief       Uncompressed start offset of this Block
136		 *
137		 * This offset is relative to the beginning of the lzma_index
138		 * (i.e. usually the beginning of the .xz file).
139		 */
140		lzma_vli uncompressed_file_offset;
141
142		/**
143		 * \brief       Block number in this Stream
144		 *
145		 * The first Block is 1.
146		 */
147		lzma_vli number_in_stream;
148
149		/**
150		 * \brief       Compressed start offset of this Block
151		 *
152		 * This offset is relative to the beginning of the Stream
153		 * containing this Block.
154		 */
155		lzma_vli compressed_stream_offset;
156
157		/**
158		 * \brief       Uncompressed start offset of this Block
159		 *
160		 * This offset is relative to the beginning of the Stream
161		 * containing this Block.
162		 */
163		lzma_vli uncompressed_stream_offset;
164
165		/**
166		 * \brief       Uncompressed size of this Block
167		 *
168		 * You should pass this to the Block decoder if you will
169		 * decode this Block.
170		 *
171		 * When doing random-access reading, it is possible that
172		 * the target offset is not exactly at Block boundary. One
173		 * will need to compare the target offset against
174		 * uncompressed_file_offset or uncompressed_stream_offset,
175		 * and possibly decode and throw away some amount of data
176		 * before reaching the target offset.
177		 */
178		lzma_vli uncompressed_size;
179
180		/**
181		 * \brief       Unpadded size of this Block
182		 *
183		 * You should pass this to the Block decoder if you will
184		 * decode this Block.
185		 */
186		lzma_vli unpadded_size;
187
188		/**
189		 * \brief       Total compressed size
190		 *
191		 * This includes all headers and padding in this Block.
192		 * This is useful if you need to know how many bytes
193		 * the Block decoder will actually read.
194		 */
195		lzma_vli total_size;
196
197		lzma_vli reserved_vli1;
198		lzma_vli reserved_vli2;
199		lzma_vli reserved_vli3;
200		lzma_vli reserved_vli4;
201
202		const void *reserved_ptr1;
203		const void *reserved_ptr2;
204		const void *reserved_ptr3;
205		const void *reserved_ptr4;
206	} block;
207
208	/*
209	 * Internal data which is used to store the state of the iterator.
210	 * The exact format may vary between liblzma versions, so don't
211	 * touch these in any way.
212	 */
213	union {
214		const void *p;
215		size_t s;
216		lzma_vli v;
217	} internal[6];
218} lzma_index_iter;
219
220
221/**
222 * \brief       Operation mode for lzma_index_iter_next()
223 */
224typedef enum {
225	LZMA_INDEX_ITER_ANY             = 0,
226		/**<
227		 * \brief       Get the next Block or Stream
228		 *
229		 * Go to the next Block if the current Stream has at least
230		 * one Block left. Otherwise go to the next Stream even if
231		 * it has no Blocks. If the Stream has no Blocks
232		 * (lzma_index_iter.stream.block_count == 0),
233		 * lzma_index_iter.block will have undefined values.
234		 */
235
236	LZMA_INDEX_ITER_STREAM          = 1,
237		/**<
238		 * \brief       Get the next Stream
239		 *
240		 * Go to the next Stream even if the current Stream has
241		 * unread Blocks left. If the next Stream has at least one
242		 * Block, the iterator will point to the first Block.
243		 * If there are no Blocks, lzma_index_iter.block will have
244		 * undefined values.
245		 */
246
247	LZMA_INDEX_ITER_BLOCK           = 2,
248		/**<
249		 * \brief       Get the next Block
250		 *
251		 * Go to the next Block if the current Stream has at least
252		 * one Block left. If the current Stream has no Blocks left,
253		 * the next Stream with at least one Block is located and
254		 * the iterator will be made to point to the first Block of
255		 * that Stream.
256		 */
257
258	LZMA_INDEX_ITER_NONEMPTY_BLOCK  = 3
259		/**<
260		 * \brief       Get the next non-empty Block
261		 *
262		 * This is like LZMA_INDEX_ITER_BLOCK except that it will
263		 * skip Blocks whose Uncompressed Size is zero.
264		 */
265
266} lzma_index_iter_mode;
267
268
269/**
270 * \brief       Calculate memory usage of lzma_index
271 *
272 * On disk, the size of the Index field depends on both the number of Records
273 * stored and how big values the Records store (due to variable-length integer
274 * encoding). When the Index is kept in lzma_index structure, the memory usage
275 * depends only on the number of Records/Blocks stored in the Index(es), and
276 * in case of concatenated lzma_indexes, the number of Streams. The size in
277 * RAM is almost always significantly bigger than in the encoded form on disk.
278 *
279 * This function calculates an approximate amount of memory needed hold
280 * the given number of Streams and Blocks in lzma_index structure. This
281 * value may vary between CPU architectures and also between liblzma versions
282 * if the internal implementation is modified.
283 */
284extern LZMA_API(uint64_t) lzma_index_memusage(
285		lzma_vli streams, lzma_vli blocks) lzma_nothrow;
286
287
288/**
289 * \brief       Calculate the memory usage of an existing lzma_index
290 *
291 * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i),
292 * lzma_index_block_count(i)).
293 */
294extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i)
295		lzma_nothrow;
296
297
298/**
299 * \brief       Allocate and initialize a new lzma_index structure
300 *
301 * \return      On success, a pointer to an empty initialized lzma_index is
302 *              returned. If allocation fails, NULL is returned.
303 */
304extern LZMA_API(lzma_index *) lzma_index_init(lzma_allocator *allocator)
305		lzma_nothrow;
306
307
308/**
309 * \brief       Deallocate lzma_index
310 *
311 * If i is NULL, this does nothing.
312 */
313extern LZMA_API(void) lzma_index_end(lzma_index *i, lzma_allocator *allocator)
314		lzma_nothrow;
315
316
317/**
318 * \brief       Add a new Block to lzma_index
319 *
320 * \param       i                 Pointer to a lzma_index structure
321 * \param       allocator         Pointer to lzma_allocator, or NULL to
322 *                                use malloc()
323 * \param       unpadded_size     Unpadded Size of a Block. This can be
324 *                                calculated with lzma_block_unpadded_size()
325 *                                after encoding or decoding the Block.
326 * \param       uncompressed_size Uncompressed Size of a Block. This can be
327 *                                taken directly from lzma_block structure
328 *                                after encoding or decoding the Block.
329 *
330 * Appending a new Block does not invalidate iterators. For example,
331 * if an iterator was pointing to the end of the lzma_index, after
332 * lzma_index_append() it is possible to read the next Block with
333 * an existing iterator.
334 *
335 * \return      - LZMA_OK
336 *              - LZMA_MEM_ERROR
337 *              - LZMA_DATA_ERROR: Compressed or uncompressed size of the
338 *                Stream or size of the Index field would grow too big.
339 *              - LZMA_PROG_ERROR
340 */
341extern LZMA_API(lzma_ret) lzma_index_append(
342		lzma_index *i, lzma_allocator *allocator,
343		lzma_vli unpadded_size, lzma_vli uncompressed_size)
344		lzma_nothrow lzma_attr_warn_unused_result;
345
346
347/**
348 * \brief       Set the Stream Flags
349 *
350 * Set the Stream Flags of the last (and typically the only) Stream
351 * in lzma_index. This can be useful when reading information from the
352 * lzma_index, because to decode Blocks, knowing the integrity check type
353 * is needed.
354 *
355 * The given Stream Flags are copied into internal preallocated structure
356 * in the lzma_index, thus the caller doesn't need to keep the *stream_flags
357 * available after calling this function.
358 *
359 * \return      - LZMA_OK
360 *              - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version.
361 *              - LZMA_PROG_ERROR
362 */
363extern LZMA_API(lzma_ret) lzma_index_stream_flags(
364		lzma_index *i, const lzma_stream_flags *stream_flags)
365		lzma_nothrow lzma_attr_warn_unused_result;
366
367
368/**
369 * \brief       Get the types of integrity Checks
370 *
371 * If lzma_index_stream_padding() is used to set the Stream Flags for
372 * every Stream, lzma_index_checks() can be used to get a bitmask to
373 * indicate which Check types have been used. It can be useful e.g. if
374 * showing the Check types to the user.
375 *
376 * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10.
377 */
378extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i)
379		lzma_nothrow lzma_attr_pure;
380
381
382/**
383 * \brief       Set the amount of Stream Padding
384 *
385 * Set the amount of Stream Padding of the last (and typically the only)
386 * Stream in the lzma_index. This is needed when planning to do random-access
387 * reading within multiple concatenated Streams.
388 *
389 * By default, the amount of Stream Padding is assumed to be zero bytes.
390 *
391 * \return      - LZMA_OK
392 *              - LZMA_DATA_ERROR: The file size would grow too big.
393 *              - LZMA_PROG_ERROR
394 */
395extern LZMA_API(lzma_ret) lzma_index_stream_padding(
396		lzma_index *i, lzma_vli stream_padding)
397		lzma_nothrow lzma_attr_warn_unused_result;
398
399
400/**
401 * \brief       Get the number of Streams
402 */
403extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i)
404		lzma_nothrow lzma_attr_pure;
405
406
407/**
408 * \brief       Get the number of Blocks
409 *
410 * This returns the total number of Blocks in lzma_index. To get number
411 * of Blocks in individual Streams, use lzma_index_iter.
412 */
413extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i)
414		lzma_nothrow lzma_attr_pure;
415
416
417/**
418 * \brief       Get the size of the Index field as bytes
419 *
420 * This is needed to verify the Backward Size field in the Stream Footer.
421 */
422extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i)
423		lzma_nothrow lzma_attr_pure;
424
425
426/**
427 * \brief       Get the total size of the Stream
428 *
429 * If multiple lzma_indexes have been combined, this works as if the Blocks
430 * were in a single Stream. This is useful if you are going to combine
431 * Blocks from multiple Streams into a single new Stream.
432 */
433extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i)
434		lzma_nothrow lzma_attr_pure;
435
436
437/**
438 * \brief       Get the total size of the Blocks
439 *
440 * This doesn't include the Stream Header, Stream Footer, Stream Padding,
441 * or Index fields.
442 */
443extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i)
444		lzma_nothrow lzma_attr_pure;
445
446
447/**
448 * \brief       Get the total size of the file
449 *
450 * When no lzma_indexes have been combined with lzma_index_cat() and there is
451 * no Stream Padding, this function is identical to lzma_index_stream_size().
452 * If multiple lzma_indexes have been combined, this includes also the headers
453 * of each separate Stream and the possible Stream Padding fields.
454 */
455extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i)
456		lzma_nothrow lzma_attr_pure;
457
458
459/**
460 * \brief       Get the uncompressed size of the file
461 */
462extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i)
463		lzma_nothrow lzma_attr_pure;
464
465
466/**
467 * \brief       Initialize an iterator
468 *
469 * \param       iter    Pointer to a lzma_index_iter structure
470 * \param       i       lzma_index to which the iterator will be associated
471 *
472 * This function associates the iterator with the given lzma_index, and calls
473 * lzma_index_iter_rewind() on the iterator.
474 *
475 * This function doesn't allocate any memory, thus there is no
476 * lzma_index_iter_end(). The iterator is valid as long as the
477 * associated lzma_index is valid, that is, until lzma_index_end() or
478 * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't
479 * become invalid if new Blocks are added to it with lzma_index_append() or
480 * if it is used as the destination in lzma_index_cat().
481 *
482 * It is safe to make copies of an initialized lzma_index_iter, for example,
483 * to easily restart reading at some particular position.
484 */
485extern LZMA_API(void) lzma_index_iter_init(
486		lzma_index_iter *iter, const lzma_index *i) lzma_nothrow;
487
488
489/**
490 * \brief       Rewind the iterator
491 *
492 * Rewind the iterator so that next call to lzma_index_iter_next() will
493 * return the first Block or Stream.
494 */
495extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter)
496		lzma_nothrow;
497
498
499/**
500 * \brief       Get the next Block or Stream
501 *
502 * \param       iter    Iterator initialized with lzma_index_iter_init()
503 * \param       mode    Specify what kind of information the caller wants
504 *                      to get. See lzma_index_iter_mode for details.
505 *
506 * \return      If next Block or Stream matching the mode was found, *iter
507 *              is updated and this function returns false. If no Block or
508 *              Stream matching the mode is found, *iter is not modified
509 *              and this function returns true. If mode is set to an unknown
510 *              value, *iter is not modified and this function returns true.
511 */
512extern LZMA_API(lzma_bool) lzma_index_iter_next(
513		lzma_index_iter *iter, lzma_index_iter_mode mode)
514		lzma_nothrow lzma_attr_warn_unused_result;
515
516
517/**
518 * \brief       Locate a Block
519 *
520 * If it is possible to seek in the .xz file, it is possible to parse
521 * the Index field(s) and use lzma_index_iter_locate() to do random-access
522 * reading with granularity of Block size.
523 *
524 * \param       iter    Iterator that was earlier initialized with
525 *                      lzma_index_iter_init().
526 * \param       target  Uncompressed target offset which the caller would
527 *                      like to locate from the Stream
528 *
529 * If the target is smaller than the uncompressed size of the Stream (can be
530 * checked with lzma_index_uncompressed_size()):
531 *  - Information about the Stream and Block containing the requested
532 *    uncompressed offset is stored into *iter.
533 *  - Internal state of the iterator is adjusted so that
534 *    lzma_index_iter_next() can be used to read subsequent Blocks or Streams.
535 *  - This function returns false.
536 *
537 * If target is greater than the uncompressed size of the Stream, *iter
538 * is not modified, and this function returns true.
539 */
540extern LZMA_API(lzma_bool) lzma_index_iter_locate(
541		lzma_index_iter *iter, lzma_vli target) lzma_nothrow;
542
543
544/**
545 * \brief       Concatenate lzma_indexes
546 *
547 * Concatenating lzma_indexes is useful when doing random-access reading in
548 * multi-Stream .xz file, or when combining multiple Streams into single
549 * Stream.
550 *
551 * \param       dest      lzma_index after which src is appended
552 * \param       src       lzma_index to be appended after dest. If this
553 *                        function succeeds, the memory allocated for src
554 *                        is freed or moved to be part of dest, and all
555 *                        iterators pointing to src will become invalid.
556 * \param       allocator Custom memory allocator; can be NULL to use
557 *                        malloc() and free().
558 *
559 * \return      - LZMA_OK: lzma_indexes were concatenated successfully.
560 *                src is now a dangling pointer.
561 *              - LZMA_DATA_ERROR: *dest would grow too big.
562 *              - LZMA_MEM_ERROR
563 *              - LZMA_PROG_ERROR
564 */
565extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *lzma_restrict dest,
566		lzma_index *lzma_restrict src,
567		lzma_allocator *allocator)
568		lzma_nothrow lzma_attr_warn_unused_result;
569
570
571/**
572 * \brief       Duplicate lzma_index
573 *
574 * \return      A copy of the lzma_index, or NULL if memory allocation failed.
575 */
576extern LZMA_API(lzma_index *) lzma_index_dup(
577		const lzma_index *i, lzma_allocator *allocator)
578		lzma_nothrow lzma_attr_warn_unused_result;
579
580
581/**
582 * \brief       Initialize .xz Index encoder
583 *
584 * \param       strm        Pointer to properly prepared lzma_stream
585 * \param       i           Pointer to lzma_index which should be encoded.
586 *
587 * The only valid action value for lzma_code() is LZMA_RUN.
588 *
589 * \return      - LZMA_OK: Initialization succeeded, continue with lzma_code().
590 *              - LZMA_MEM_ERROR
591 *              - LZMA_PROG_ERROR
592 */
593extern LZMA_API(lzma_ret) lzma_index_encoder(
594		lzma_stream *strm, const lzma_index *i)
595		lzma_nothrow lzma_attr_warn_unused_result;
596
597
598/**
599 * \brief       Initialize .xz Index decoder
600 *
601 * \param       strm        Pointer to properly prepared lzma_stream
602 * \param       i           The decoded Index will be made available via
603 *                          this pointer. Initially this function will
604 *                          set *i to NULL (the old value is ignored). If
605 *                          decoding succeeds (lzma_code() returns
606 *                          LZMA_STREAM_END), *i will be set to point
607 *                          to a new lzma_index, which the application
608 *                          has to later free with lzma_index_end().
609 * \param       memlimit    How much memory the resulting lzma_index is
610 *                          allowed to require.
611 *
612 * The only valid action value for lzma_code() is LZMA_RUN.
613 *
614 * \return      - LZMA_OK: Initialization succeeded, continue with lzma_code().
615 *              - LZMA_MEM_ERROR
616 *              - LZMA_MEMLIMIT_ERROR
617 *              - LZMA_PROG_ERROR
618 */
619extern LZMA_API(lzma_ret) lzma_index_decoder(
620		lzma_stream *strm, lzma_index **i, uint64_t memlimit)
621		lzma_nothrow lzma_attr_warn_unused_result;
622
623
624/**
625 * \brief       Single-call .xz Index encoder
626 *
627 * \param       i         lzma_index to be encoded
628 * \param       out       Beginning of the output buffer
629 * \param       out_pos   The next byte will be written to out[*out_pos].
630 *                        *out_pos is updated only if encoding succeeds.
631 * \param       out_size  Size of the out buffer; the first byte into
632 *                        which no data is written to is out[out_size].
633 *
634 * \return      - LZMA_OK: Encoding was successful.
635 *              - LZMA_BUF_ERROR: Output buffer is too small. Use
636 *                lzma_index_size() to find out how much output
637 *                space is needed.
638 *              - LZMA_PROG_ERROR
639 *
640 * \note        This function doesn't take allocator argument since all
641 *              the internal data is allocated on stack.
642 */
643extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i,
644		uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow;
645
646
647/**
648 * \brief       Single-call .xz Index decoder
649 *
650 * \param       i           If decoding succeeds, *i will point to a new
651 *                          lzma_index, which the application has to
652 *                          later free with lzma_index_end(). If an error
653 *                          occurs, *i will be NULL. The old value of *i
654 *                          is always ignored and thus doesn't need to be
655 *                          initialized by the caller.
656 * \param       memlimit    Pointer to how much memory the resulting
657 *                          lzma_index is allowed to require. The value
658 *                          pointed by this pointer is modified if and only
659 *                          if LZMA_MEMLIMIT_ERROR is returned.
660 * \param       allocator   Pointer to lzma_allocator, or NULL to use malloc()
661 * \param       in          Beginning of the input buffer
662 * \param       in_pos      The next byte will be read from in[*in_pos].
663 *                          *in_pos is updated only if decoding succeeds.
664 * \param       in_size     Size of the input buffer; the first byte that
665 *                          won't be read is in[in_size].
666 *
667 * \return      - LZMA_OK: Decoding was successful.
668 *              - LZMA_MEM_ERROR
669 *              - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached.
670 *                The minimum required memlimit value was stored to *memlimit.
671 *              - LZMA_DATA_ERROR
672 *              - LZMA_PROG_ERROR
673 */
674extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i,
675		uint64_t *memlimit, lzma_allocator *allocator,
676		const uint8_t *in, size_t *in_pos, size_t in_size)
677		lzma_nothrow;
678