Deleted Added
full compact
archive_read_support_format_lha.c (313570) archive_read_support_format_lha.c (316337)
1/*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#ifdef HAVE_LIMITS_H
32#include <limits.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_entry_locale.h"
44#include "archive_private.h"
45#include "archive_read_private.h"
46#include "archive_endian.h"
47
48
49#define MAXMATCH 256 /* Maximum match length. */
50#define MINMATCH 3 /* Minimum match length. */
51/*
52 * Literal table format:
53 * +0 +256 +510
54 * +---------------+-------------------------+
55 * | literal code | match length |
56 * | 0 ... 255 | MINMATCH ... MAXMATCH |
57 * +---------------+-------------------------+
58 * <--- LT_BITLEN_SIZE --->
59 */
60/* Literal table size. */
61#define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62/* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64#define PT_BITLEN_SIZE (3 + 16)
65
66struct lzh_dec {
67 /* Decoding status. */
68 int state;
69
70 /*
71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72 * data.
73 */
74 int w_size;
75 int w_mask;
76 /* Window buffer, which is a loop buffer. */
77 unsigned char *w_buff;
78 /* The insert position to the window. */
79 int w_pos;
80 /* The position where we can copy decoded code from the window. */
81 int copy_pos;
82 /* The length how many bytes we can copy decoded code from
83 * the window. */
84 int copy_len;
85
86 /*
87 * Bit stream reader.
88 */
89 struct lzh_br {
90#define CACHE_TYPE uint64_t
91#define CACHE_BITS (8 * sizeof(CACHE_TYPE))
92 /* Cache buffer. */
93 CACHE_TYPE cache_buffer;
94 /* Indicates how many bits avail in cache_buffer. */
95 int cache_avail;
96 } br;
97
98 /*
99 * Huffman coding.
100 */
101 struct huffman {
102 int len_size;
103 int len_avail;
104 int len_bits;
105 int freq[17];
106 unsigned char *bitlen;
107
108 /*
109 * Use a index table. It's faster than searching a huffman
110 * coding tree, which is a binary tree. But a use of a large
111 * index table causes L1 cache read miss many times.
112 */
113#define HTBL_BITS 10
114 int max_bits;
115 int shift_bits;
116 int tbl_bits;
117 int tree_used;
118 int tree_avail;
119 /* Direct access table. */
120 uint16_t *tbl;
121 /* Binary tree table for extra bits over the direct access. */
122 struct htree_t {
123 uint16_t left;
124 uint16_t right;
125 } *tree;
126 } lt, pt;
127
128 int blocks_avail;
129 int pos_pt_len_size;
130 int pos_pt_len_bits;
131 int literal_pt_len_size;
132 int literal_pt_len_bits;
133 int reading_position;
134 int loop;
135 int error;
136};
137
138struct lzh_stream {
139 const unsigned char *next_in;
140 int avail_in;
141 int64_t total_in;
142 const unsigned char *ref_ptr;
143 int avail_out;
144 int64_t total_out;
145 struct lzh_dec *ds;
146};
147
148struct lha {
149 /* entry_bytes_remaining is the number of bytes we expect. */
150 int64_t entry_offset;
151 int64_t entry_bytes_remaining;
152 int64_t entry_unconsumed;
153 uint16_t entry_crc_calculated;
154
155 size_t header_size; /* header size */
156 unsigned char level; /* header level */
157 char method[3]; /* compress type */
158 int64_t compsize; /* compressed data size */
159 int64_t origsize; /* original file size */
160 int setflag;
161#define BIRTHTIME_IS_SET 1
162#define ATIME_IS_SET 2
163#define UNIX_MODE_IS_SET 4
164#define CRC_IS_SET 8
165 time_t birthtime;
166 long birthtime_tv_nsec;
167 time_t mtime;
168 long mtime_tv_nsec;
169 time_t atime;
170 long atime_tv_nsec;
171 mode_t mode;
172 int64_t uid;
173 int64_t gid;
174 struct archive_string uname;
175 struct archive_string gname;
176 uint16_t header_crc;
177 uint16_t crc;
178 struct archive_string_conv *sconv;
179 struct archive_string_conv *opt_sconv;
180
181 struct archive_string dirname;
182 struct archive_string filename;
183 struct archive_wstring ws;
184
185 unsigned char dos_attr;
186
187 /* Flag to mark progress that an archive was read their first header.*/
188 char found_first_header;
189 /* Flag to mark that indicates an empty directory. */
190 char directory;
191
192 /* Flags to mark progress of decompression. */
193 char decompress_init;
194 char end_of_entry;
195 char end_of_entry_cleanup;
196 char entry_is_compressed;
197
198 char format_name[64];
199
200 struct lzh_stream strm;
201};
202
203/*
204 * LHA header common member offset.
205 */
206#define H_METHOD_OFFSET 2 /* Compress type. */
207#define H_ATTR_OFFSET 19 /* DOS attribute. */
208#define H_LEVEL_OFFSET 20 /* Header Level. */
209#define H_SIZE 22 /* Minimum header size. */
210
211static int archive_read_format_lha_bid(struct archive_read *, int);
212static int archive_read_format_lha_options(struct archive_read *,
213 const char *, const char *);
214static int archive_read_format_lha_read_header(struct archive_read *,
215 struct archive_entry *);
216static int archive_read_format_lha_read_data(struct archive_read *,
217 const void **, size_t *, int64_t *);
218static int archive_read_format_lha_read_data_skip(struct archive_read *);
219static int archive_read_format_lha_cleanup(struct archive_read *);
220
221static void lha_replace_path_separator(struct lha *,
222 struct archive_entry *);
223static int lha_read_file_header_0(struct archive_read *, struct lha *);
224static int lha_read_file_header_1(struct archive_read *, struct lha *);
225static int lha_read_file_header_2(struct archive_read *, struct lha *);
226static int lha_read_file_header_3(struct archive_read *, struct lha *);
227static int lha_read_file_extended_header(struct archive_read *,
228 struct lha *, uint16_t *, int, size_t, size_t *);
229static size_t lha_check_header_format(const void *);
230static int lha_skip_sfx(struct archive_read *);
231static time_t lha_dos_time(const unsigned char *);
232static time_t lha_win_time(uint64_t, long *);
233static unsigned char lha_calcsum(unsigned char, const void *,
234 int, size_t);
235static int lha_parse_linkname(struct archive_string *,
236 struct archive_string *);
237static int lha_read_data_none(struct archive_read *, const void **,
238 size_t *, int64_t *);
239static int lha_read_data_lzh(struct archive_read *, const void **,
240 size_t *, int64_t *);
241static void lha_crc16_init(void);
242static uint16_t lha_crc16(uint16_t, const void *, size_t);
243static int lzh_decode_init(struct lzh_stream *, const char *);
244static void lzh_decode_free(struct lzh_stream *);
245static int lzh_decode(struct lzh_stream *, int);
246static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247static int lzh_huffman_init(struct huffman *, size_t, int);
248static void lzh_huffman_free(struct huffman *);
249static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250static int lzh_make_fake_table(struct huffman *, uint16_t);
251static int lzh_make_huffman_table(struct huffman *);
252static inline int lzh_decode_huffman(struct huffman *, unsigned);
253static int lzh_decode_huffman_tree(struct huffman *, unsigned, int);
254
255
256int
257archive_read_support_format_lha(struct archive *_a)
258{
259 struct archive_read *a = (struct archive_read *)_a;
260 struct lha *lha;
261 int r;
262
263 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264 ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
265
266 lha = (struct lha *)calloc(1, sizeof(*lha));
267 if (lha == NULL) {
268 archive_set_error(&a->archive, ENOMEM,
269 "Can't allocate lha data");
270 return (ARCHIVE_FATAL);
271 }
272 archive_string_init(&lha->ws);
273
274 r = __archive_read_register_format(a,
275 lha,
276 "lha",
277 archive_read_format_lha_bid,
278 archive_read_format_lha_options,
279 archive_read_format_lha_read_header,
280 archive_read_format_lha_read_data,
281 archive_read_format_lha_read_data_skip,
282 NULL,
283 archive_read_format_lha_cleanup,
284 NULL,
285 NULL);
286
287 if (r != ARCHIVE_OK)
288 free(lha);
289 return (ARCHIVE_OK);
290}
291
292static size_t
293lha_check_header_format(const void *h)
294{
295 const unsigned char *p = h;
296 size_t next_skip_bytes;
297
298 switch (p[H_METHOD_OFFSET+3]) {
299 /*
300 * "-lh0-" ... "-lh7-" "-lhd-"
301 * "-lzs-" "-lz5-"
302 */
303 case '0': case '1': case '2': case '3':
304 case '4': case '5': case '6': case '7':
305 case 'd':
306 case 's':
307 next_skip_bytes = 4;
308
309 /* b0 == 0 means the end of an LHa archive file. */
310 if (p[0] == 0)
311 break;
312 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313 || p[H_METHOD_OFFSET+4] != '-')
314 break;
315
316 if (p[H_METHOD_OFFSET+2] == 'h') {
317 /* "-lh?-" */
318 if (p[H_METHOD_OFFSET+3] == 's')
319 break;
320 if (p[H_LEVEL_OFFSET] == 0)
321 return (0);
322 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
323 return (0);
324 }
325 if (p[H_METHOD_OFFSET+2] == 'z') {
326 /* LArc extensions: -lzs-,-lz4- and -lz5- */
327 if (p[H_LEVEL_OFFSET] != 0)
328 break;
329 if (p[H_METHOD_OFFSET+3] == 's'
330 || p[H_METHOD_OFFSET+3] == '4'
331 || p[H_METHOD_OFFSET+3] == '5')
332 return (0);
333 }
334 break;
335 case 'h': next_skip_bytes = 1; break;
336 case 'z': next_skip_bytes = 1; break;
337 case 'l': next_skip_bytes = 2; break;
338 case '-': next_skip_bytes = 3; break;
339 default : next_skip_bytes = 4; break;
340 }
341
342 return (next_skip_bytes);
343}
344
345static int
346archive_read_format_lha_bid(struct archive_read *a, int best_bid)
347{
348 const char *p;
349 const void *buff;
350 ssize_t bytes_avail, offset, window;
351 size_t next;
352
353 /* If there's already a better bid than we can ever
354 make, don't bother testing. */
355 if (best_bid > 30)
356 return (-1);
357
358 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
359 return (-1);
360
361 if (lha_check_header_format(p) == 0)
362 return (30);
363
364 if (p[0] == 'M' && p[1] == 'Z') {
365 /* PE file */
366 offset = 0;
367 window = 4096;
368 while (offset < (1024 * 20)) {
369 buff = __archive_read_ahead(a, offset + window,
370 &bytes_avail);
371 if (buff == NULL) {
372 /* Remaining bytes are less than window. */
373 window >>= 1;
374 if (window < (H_SIZE + 3))
375 return (0);
376 continue;
377 }
378 p = (const char *)buff + offset;
379 while (p + H_SIZE < (const char *)buff + bytes_avail) {
380 if ((next = lha_check_header_format(p)) == 0)
381 return (30);
382 p += next;
383 }
384 offset = p - (const char *)buff;
385 }
386 }
387 return (0);
388}
389
390static int
391archive_read_format_lha_options(struct archive_read *a,
392 const char *key, const char *val)
393{
394 struct lha *lha;
395 int ret = ARCHIVE_FAILED;
396
397 lha = (struct lha *)(a->format->data);
398 if (strcmp(key, "hdrcharset") == 0) {
399 if (val == NULL || val[0] == 0)
400 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401 "lha: hdrcharset option needs a character-set name");
402 else {
403 lha->opt_sconv =
404 archive_string_conversion_from_charset(
405 &a->archive, val, 0);
406 if (lha->opt_sconv != NULL)
407 ret = ARCHIVE_OK;
408 else
409 ret = ARCHIVE_FATAL;
410 }
411 return (ret);
412 }
413
414 /* Note: The "warn" return is just to inform the options
415 * supervisor that we didn't handle it. It will generate
416 * a suitable error if no one used this option. */
417 return (ARCHIVE_WARN);
418}
419
420static int
421lha_skip_sfx(struct archive_read *a)
422{
423 const void *h;
424 const char *p, *q;
425 size_t next, skip;
426 ssize_t bytes, window;
427
428 window = 4096;
429 for (;;) {
430 h = __archive_read_ahead(a, window, &bytes);
431 if (h == NULL) {
432 /* Remaining bytes are less than window. */
433 window >>= 1;
434 if (window < (H_SIZE + 3))
435 goto fatal;
436 continue;
437 }
438 if (bytes < H_SIZE)
439 goto fatal;
440 p = h;
441 q = p + bytes;
442
443 /*
444 * Scan ahead until we find something that looks
445 * like the lha header.
446 */
447 while (p + H_SIZE < q) {
448 if ((next = lha_check_header_format(p)) == 0) {
449 skip = p - (const char *)h;
450 __archive_read_consume(a, skip);
451 return (ARCHIVE_OK);
452 }
453 p += next;
454 }
455 skip = p - (const char *)h;
456 __archive_read_consume(a, skip);
457 }
458fatal:
459 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460 "Couldn't find out LHa header");
461 return (ARCHIVE_FATAL);
462}
463
464static int
465truncated_error(struct archive_read *a)
466{
467 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468 "Truncated LHa header");
469 return (ARCHIVE_FATAL);
470}
471
472static int
473archive_read_format_lha_read_header(struct archive_read *a,
474 struct archive_entry *entry)
475{
476 struct archive_string linkname;
477 struct archive_string pathname;
478 struct lha *lha;
479 const unsigned char *p;
480 const char *signature;
481 int err;
482
483 lha_crc16_init();
484
485 a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486 if (a->archive.archive_format_name == NULL)
487 a->archive.archive_format_name = "lha";
488
489 lha = (struct lha *)(a->format->data);
490 lha->decompress_init = 0;
491 lha->end_of_entry = 0;
492 lha->end_of_entry_cleanup = 0;
493 lha->entry_unconsumed = 0;
494
495 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
496 /*
497 * LHa archiver added 0 to the tail of its archive file as
498 * the mark of the end of the archive.
499 */
500 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501 if (signature == NULL || signature[0] == 0)
502 return (ARCHIVE_EOF);
503 return (truncated_error(a));
504 }
505
506 signature = (const char *)p;
507 if (lha->found_first_header == 0 &&
508 signature[0] == 'M' && signature[1] == 'Z') {
509 /* This is an executable? Must be self-extracting... */
510 err = lha_skip_sfx(a);
511 if (err < ARCHIVE_WARN)
512 return (err);
513
514 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515 return (truncated_error(a));
516 signature = (const char *)p;
517 }
518 /* signature[0] == 0 means the end of an LHa archive file. */
519 if (signature[0] == 0)
520 return (ARCHIVE_EOF);
521
522 /*
523 * Check the header format and method type.
524 */
525 if (lha_check_header_format(p) != 0) {
526 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
527 "Bad LHa file");
528 return (ARCHIVE_FATAL);
529 }
530
531 /* We've found the first header. */
532 lha->found_first_header = 1;
533 /* Set a default value and common data */
534 lha->header_size = 0;
535 lha->level = p[H_LEVEL_OFFSET];
536 lha->method[0] = p[H_METHOD_OFFSET+1];
537 lha->method[1] = p[H_METHOD_OFFSET+2];
538 lha->method[2] = p[H_METHOD_OFFSET+3];
539 if (memcmp(lha->method, "lhd", 3) == 0)
540 lha->directory = 1;
541 else
542 lha->directory = 0;
543 if (memcmp(lha->method, "lh0", 3) == 0 ||
544 memcmp(lha->method, "lz4", 3) == 0)
545 lha->entry_is_compressed = 0;
546 else
547 lha->entry_is_compressed = 1;
548
549 lha->compsize = 0;
550 lha->origsize = 0;
551 lha->setflag = 0;
552 lha->birthtime = 0;
553 lha->birthtime_tv_nsec = 0;
554 lha->mtime = 0;
555 lha->mtime_tv_nsec = 0;
556 lha->atime = 0;
557 lha->atime_tv_nsec = 0;
558 lha->mode = (lha->directory)? 0777 : 0666;
559 lha->uid = 0;
560 lha->gid = 0;
561 archive_string_empty(&lha->dirname);
562 archive_string_empty(&lha->filename);
563 lha->dos_attr = 0;
564 if (lha->opt_sconv != NULL)
565 lha->sconv = lha->opt_sconv;
566 else
567 lha->sconv = NULL;
568
569 switch (p[H_LEVEL_OFFSET]) {
570 case 0:
571 err = lha_read_file_header_0(a, lha);
572 break;
573 case 1:
574 err = lha_read_file_header_1(a, lha);
575 break;
576 case 2:
577 err = lha_read_file_header_2(a, lha);
578 break;
579 case 3:
580 err = lha_read_file_header_3(a, lha);
581 break;
582 default:
583 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
585 err = ARCHIVE_FATAL;
586 break;
587 }
588 if (err < ARCHIVE_WARN)
589 return (err);
590
591
592 if (!lha->directory && archive_strlen(&lha->filename) == 0)
593 /* The filename has not been set */
594 return (truncated_error(a));
595
596 /*
597 * Make a pathname from a dirname and a filename.
598 */
599 archive_string_concat(&lha->dirname, &lha->filename);
600 archive_string_init(&pathname);
601 archive_string_init(&linkname);
602 archive_string_copy(&pathname, &lha->dirname);
603
604 if ((lha->mode & AE_IFMT) == AE_IFLNK) {
605 /*
606 * Extract the symlink-name if it's included in the pathname.
607 */
608 if (!lha_parse_linkname(&linkname, &pathname)) {
609 /* We couldn't get the symlink-name. */
610 archive_set_error(&a->archive,
611 ARCHIVE_ERRNO_FILE_FORMAT,
612 "Unknown symlink-name");
613 archive_string_free(&pathname);
614 archive_string_free(&linkname);
615 return (ARCHIVE_FAILED);
616 }
617 } else {
618 /*
619 * Make sure a file-type is set.
620 * The mode has been overridden if it is in the extended data.
621 */
622 lha->mode = (lha->mode & ~AE_IFMT) |
623 ((lha->directory)? AE_IFDIR: AE_IFREG);
624 }
625 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626 (lha->dos_attr & 1) != 0)
627 lha->mode &= ~(0222);/* read only. */
628
629 /*
630 * Set basic file parameters.
631 */
632 if (archive_entry_copy_pathname_l(entry, pathname.s,
633 pathname.length, lha->sconv) != 0) {
634 if (errno == ENOMEM) {
635 archive_set_error(&a->archive, ENOMEM,
636 "Can't allocate memory for Pathname");
637 return (ARCHIVE_FATAL);
638 }
639 archive_set_error(&a->archive,
640 ARCHIVE_ERRNO_FILE_FORMAT,
641 "Pathname cannot be converted "
642 "from %s to current locale.",
643 archive_string_conversion_charset_name(lha->sconv));
644 err = ARCHIVE_WARN;
645 }
646 archive_string_free(&pathname);
647 if (archive_strlen(&linkname) > 0) {
648 if (archive_entry_copy_symlink_l(entry, linkname.s,
649 linkname.length, lha->sconv) != 0) {
650 if (errno == ENOMEM) {
651 archive_set_error(&a->archive, ENOMEM,
652 "Can't allocate memory for Linkname");
653 return (ARCHIVE_FATAL);
654 }
655 archive_set_error(&a->archive,
656 ARCHIVE_ERRNO_FILE_FORMAT,
657 "Linkname cannot be converted "
658 "from %s to current locale.",
659 archive_string_conversion_charset_name(lha->sconv));
660 err = ARCHIVE_WARN;
661 }
662 } else
663 archive_entry_set_symlink(entry, NULL);
664 archive_string_free(&linkname);
665 /*
666 * When a header level is 0, there is a possibility that
667 * a pathname and a symlink has '\' character, a directory
668 * separator in DOS/Windows. So we should convert it to '/'.
669 */
670 if (p[H_LEVEL_OFFSET] == 0)
671 lha_replace_path_separator(lha, entry);
672
673 archive_entry_set_mode(entry, lha->mode);
674 archive_entry_set_uid(entry, lha->uid);
675 archive_entry_set_gid(entry, lha->gid);
676 if (archive_strlen(&lha->uname) > 0)
677 archive_entry_set_uname(entry, lha->uname.s);
678 if (archive_strlen(&lha->gname) > 0)
679 archive_entry_set_gname(entry, lha->gname.s);
680 if (lha->setflag & BIRTHTIME_IS_SET) {
681 archive_entry_set_birthtime(entry, lha->birthtime,
682 lha->birthtime_tv_nsec);
683 archive_entry_set_ctime(entry, lha->birthtime,
684 lha->birthtime_tv_nsec);
685 } else {
686 archive_entry_unset_birthtime(entry);
687 archive_entry_unset_ctime(entry);
688 }
689 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690 if (lha->setflag & ATIME_IS_SET)
691 archive_entry_set_atime(entry, lha->atime,
692 lha->atime_tv_nsec);
693 else
694 archive_entry_unset_atime(entry);
695 if (lha->directory || archive_entry_symlink(entry) != NULL)
696 archive_entry_unset_size(entry);
697 else
698 archive_entry_set_size(entry, lha->origsize);
699
700 /*
701 * Prepare variables used to read a file content.
702 */
703 lha->entry_bytes_remaining = lha->compsize;
704 lha->entry_offset = 0;
705 lha->entry_crc_calculated = 0;
706
707 /*
708 * This file does not have a content.
709 */
710 if (lha->directory || lha->compsize == 0)
711 lha->end_of_entry = 1;
712
713 sprintf(lha->format_name, "lha -%c%c%c-",
714 lha->method[0], lha->method[1], lha->method[2]);
715 a->archive.archive_format_name = lha->format_name;
716
717 return (err);
718}
719
720/*
721 * Replace a DOS path separator '\' by a character '/'.
722 * Some multi-byte character set have a character '\' in its second byte.
723 */
724static void
725lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
726{
727 const wchar_t *wp;
728 size_t i;
729
730 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
731 archive_wstrcpy(&(lha->ws), wp);
732 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
733 if (lha->ws.s[i] == L'\\')
734 lha->ws.s[i] = L'/';
735 }
736 archive_entry_copy_pathname_w(entry, lha->ws.s);
737 }
738
739 if ((wp = archive_entry_symlink_w(entry)) != NULL) {
740 archive_wstrcpy(&(lha->ws), wp);
741 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
742 if (lha->ws.s[i] == L'\\')
743 lha->ws.s[i] = L'/';
744 }
745 archive_entry_copy_symlink_w(entry, lha->ws.s);
746 }
747}
748
749/*
750 * Header 0 format
751 *
752 * +0 +1 +2 +7 +11
753 * +---------------+----------+----------------+-------------------+
754 * |header size(*1)|header sum|compression type|compressed size(*2)|
755 * +---------------+----------+----------------+-------------------+
756 * <---------------------(*1)----------*
757 *
758 * +11 +15 +17 +19 +20 +21
759 * +-----------------+---------+---------+--------------+----------------+
760 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
761 * +-----------------+---------+---------+--------------+----------------+
762 * *--------------------------------(*1)---------------------------------*
763 *
764 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4)
765 * +---------------+---------+----------+----------------+------------------+
766 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data |
767 * +---------------+---------+----------+----------------+------------------+
768 * <--(*3)-> <------(*2)------>
769 * *----------------------(*1)-------------------------->
770 *
771 */
772#define H0_HEADER_SIZE_OFFSET 0
773#define H0_HEADER_SUM_OFFSET 1
774#define H0_COMP_SIZE_OFFSET 7
775#define H0_ORIG_SIZE_OFFSET 11
776#define H0_DOS_TIME_OFFSET 15
777#define H0_NAME_LEN_OFFSET 21
778#define H0_FILE_NAME_OFFSET 22
779#define H0_FIXED_SIZE 24
780static int
781lha_read_file_header_0(struct archive_read *a, struct lha *lha)
782{
783 const unsigned char *p;
784 int extdsize, namelen;
785 unsigned char headersum, sum_calculated;
786
787 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
788 return (truncated_error(a));
789 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
790 headersum = p[H0_HEADER_SUM_OFFSET];
791 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
792 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
793 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
794 namelen = p[H0_NAME_LEN_OFFSET];
795 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
796 if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
797 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
798 "Invalid LHa header");
799 return (ARCHIVE_FATAL);
800 }
801 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
802 return (truncated_error(a));
803
804 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
805 /* When extdsize == -2, A CRC16 value is not present in the header. */
806 if (extdsize >= 0) {
807 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
808 lha->setflag |= CRC_IS_SET;
809 }
810 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
811
812 /* Read an extended header */
813 if (extdsize > 0) {
814 /* This extended data is set by 'LHa for UNIX' only.
815 * Maybe fixed size.
816 */
817 p += H0_FILE_NAME_OFFSET + namelen + 2;
818 if (p[0] == 'U' && extdsize == 12) {
819 /* p[1] is a minor version. */
820 lha->mtime = archive_le32dec(&p[2]);
821 lha->mode = archive_le16dec(&p[6]);
822 lha->uid = archive_le16dec(&p[8]);
823 lha->gid = archive_le16dec(&p[10]);
824 lha->setflag |= UNIX_MODE_IS_SET;
825 }
826 }
827 __archive_read_consume(a, lha->header_size);
828
829 if (sum_calculated != headersum) {
830 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
831 "LHa header sum error");
832 return (ARCHIVE_FATAL);
833 }
834
835 return (ARCHIVE_OK);
836}
837
838/*
839 * Header 1 format
840 *
841 * +0 +1 +2 +7 +11
842 * +---------------+----------+----------------+-------------+
843 * |header size(*1)|header sum|compression type|skip size(*2)|
844 * +---------------+----------+----------------+-------------+
845 * <---------------(*1)----------*
846 *
847 * +11 +15 +17 +19 +20 +21
848 * +-----------------+---------+---------+--------------+----------------+
849 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
850 * +-----------------+---------+---------+--------------+----------------+
851 * *-------------------------------(*1)----------------------------------*
852 *
853 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4)
854 * +---------------+---------+----------+-----------+-----------+
855 * |name length(*3)|file name|file CRC16| creator |padding(*4)|
856 * +---------------+---------+----------+-----------+-----------+
857 * <--(*3)->
858 * *----------------------------(*1)----------------------------*
859 *
860 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5)
861 * +----------------+---------------------+------------------------+
862 * |next header size| extended header(*5) | compressed data |
863 * +----------------+---------------------+------------------------+
864 * *------(*1)-----> <--------------------(*2)-------------------->
865 */
866#define H1_HEADER_SIZE_OFFSET 0
867#define H1_HEADER_SUM_OFFSET 1
868#define H1_COMP_SIZE_OFFSET 7
869#define H1_ORIG_SIZE_OFFSET 11
870#define H1_DOS_TIME_OFFSET 15
871#define H1_NAME_LEN_OFFSET 21
872#define H1_FILE_NAME_OFFSET 22
873#define H1_FIXED_SIZE 27
874static int
875lha_read_file_header_1(struct archive_read *a, struct lha *lha)
876{
877 const unsigned char *p;
878 size_t extdsize;
879 int i, err, err2;
880 int namelen, padding;
881 unsigned char headersum, sum_calculated;
882
883 err = ARCHIVE_OK;
884
885 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
886 return (truncated_error(a));
887
888 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
889 headersum = p[H1_HEADER_SUM_OFFSET];
890 /* Note: An extended header size is included in a compsize. */
891 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
892 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
893 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
894 namelen = p[H1_NAME_LEN_OFFSET];
895 /* Calculate a padding size. The result will be normally 0 only(?) */
896 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
897
898 if (namelen > 230 || padding < 0)
899 goto invalid;
900
901 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
902 return (truncated_error(a));
903
904 for (i = 0; i < namelen; i++) {
905 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
906 goto invalid;/* Invalid filename. */
907 }
908 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
909 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
910 lha->setflag |= CRC_IS_SET;
911
912 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
913 /* Consume used bytes but not include `next header size' data
914 * since it will be consumed in lha_read_file_extended_header(). */
915 __archive_read_consume(a, lha->header_size - 2);
916
917 /* Read extended headers */
918 err2 = lha_read_file_extended_header(a, lha, NULL, 2,
919 (size_t)(lha->compsize + 2), &extdsize);
920 if (err2 < ARCHIVE_WARN)
921 return (err2);
922 if (err2 < err)
923 err = err2;
924 /* Get a real compressed file size. */
925 lha->compsize -= extdsize - 2;
926
927 if (lha->compsize < 0)
928 goto invalid; /* Invalid compressed file size */
929
930 if (sum_calculated != headersum) {
931 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
932 "LHa header sum error");
933 return (ARCHIVE_FATAL);
934 }
935 return (err);
936invalid:
937 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
938 "Invalid LHa header");
939 return (ARCHIVE_FATAL);
940}
941
942/*
943 * Header 2 format
944 *
945 * +0 +2 +7 +11 +15
946 * +---------------+----------------+-------------------+-----------------+
947 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
948 * +---------------+----------------+-------------------+-----------------+
949 * <--------------------------------(*1)---------------------------------*
950 *
951 * +15 +19 +20 +21 +23 +24
952 * +-----------------+------------+----------------+----------+-----------+
953 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator |
954 * +-----------------+------------+----------------+----------+-----------+
955 * *---------------------------------(*1)---------------------------------*
956 *
957 * +24 +26 +26+(*3) +26+(*3)+(*4)
958 * +----------------+-------------------+-------------+-------------------+
959 * |next header size|extended header(*3)| padding(*4) | compressed data |
960 * +----------------+-------------------+-------------+-------------------+
961 * *--------------------------(*1)-------------------> <------(*2)------->
962 *
963 */
964#define H2_HEADER_SIZE_OFFSET 0
965#define H2_COMP_SIZE_OFFSET 7
966#define H2_ORIG_SIZE_OFFSET 11
967#define H2_TIME_OFFSET 15
968#define H2_CRC_OFFSET 21
969#define H2_FIXED_SIZE 24
970static int
971lha_read_file_header_2(struct archive_read *a, struct lha *lha)
972{
973 const unsigned char *p;
974 size_t extdsize;
975 int err, padding;
976 uint16_t header_crc;
977
978 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
979 return (truncated_error(a));
980
981 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
982 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
983 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
984 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
985 lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
986 lha->setflag |= CRC_IS_SET;
987
988 if (lha->header_size < H2_FIXED_SIZE) {
989 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
990 "Invalid LHa header size");
991 return (ARCHIVE_FATAL);
992 }
993
994 header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
995 __archive_read_consume(a, H2_FIXED_SIZE);
996
997 /* Read extended headers */
998 err = lha_read_file_extended_header(a, lha, &header_crc, 2,
999 lha->header_size - H2_FIXED_SIZE, &extdsize);
1000 if (err < ARCHIVE_WARN)
1001 return (err);
1002
1003 /* Calculate a padding size. The result will be normally 0 or 1. */
1004 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1005 if (padding > 0) {
1006 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1007 return (truncated_error(a));
1008 header_crc = lha_crc16(header_crc, p, padding);
1009 __archive_read_consume(a, padding);
1010 }
1011
1012 if (header_crc != lha->header_crc) {
1013 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1014 "LHa header CRC error");
1015 return (ARCHIVE_FATAL);
1016 }
1017 return (err);
1018}
1019
1020/*
1021 * Header 3 format
1022 *
1023 * +0 +2 +7 +11 +15
1024 * +------------+----------------+-------------------+-----------------+
1025 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1026 * +------------+----------------+-------------------+-----------------+
1027 * <-------------------------------(*1)-------------------------------*
1028 *
1029 * +15 +19 +20 +21 +23 +24
1030 * +-----------------+------------+----------------+----------+-----------+
1031 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator |
1032 * +-----------------+------------+----------------+----------+-----------+
1033 * *--------------------------------(*1)----------------------------------*
1034 *
1035 * +24 +28 +32 +32+(*3)
1036 * +---------------+----------------+-------------------+-----------------+
1037 * |header size(*1)|next header size|extended header(*3)| compressed data |
1038 * +---------------+----------------+-------------------+-----------------+
1039 * *------------------------(*1)-----------------------> <------(*2)----->
1040 *
1041 */
1042#define H3_FIELD_LEN_OFFSET 0
1043#define H3_COMP_SIZE_OFFSET 7
1044#define H3_ORIG_SIZE_OFFSET 11
1045#define H3_TIME_OFFSET 15
1046#define H3_CRC_OFFSET 21
1047#define H3_HEADER_SIZE_OFFSET 24
1048#define H3_FIXED_SIZE 28
1049static int
1050lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1051{
1052 const unsigned char *p;
1053 size_t extdsize;
1054 int err;
1055 uint16_t header_crc;
1056
1057 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1058 return (truncated_error(a));
1059
1060 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1061 goto invalid;
1062 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1063 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1064 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1065 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1066 lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1067 lha->setflag |= CRC_IS_SET;
1068
1069 if (lha->header_size < H3_FIXED_SIZE + 4)
1070 goto invalid;
1071 header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1072 __archive_read_consume(a, H3_FIXED_SIZE);
1073
1074 /* Read extended headers */
1075 err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1076 lha->header_size - H3_FIXED_SIZE, &extdsize);
1077 if (err < ARCHIVE_WARN)
1078 return (err);
1079
1080 if (header_crc != lha->header_crc) {
1081 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1082 "LHa header CRC error");
1083 return (ARCHIVE_FATAL);
1084 }
1085 return (err);
1086invalid:
1087 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1088 "Invalid LHa header");
1089 return (ARCHIVE_FATAL);
1090}
1091
1092/*
1093 * Extended header format
1094 *
1095 * +0 +2 +3 -- used in header 1 and 2
1096 * +0 +4 +5 -- used in header 3
1097 * +--------------+---------+-------------------+--------------+--
1098 * |ex-header size|header id| data |ex-header size| .......
1099 * +--------------+---------+-------------------+--------------+--
1100 * <-------------( ex-header size)------------> <-- next extended header --*
1101 *
1102 * If the ex-header size is zero, it is the make of the end of extended
1103 * headers.
1104 *
1105 */
1106static int
1107lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1108 uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1109{
1110 const void *h;
1111 const unsigned char *extdheader;
1112 size_t extdsize;
1113 size_t datasize;
1114 unsigned int i;
1115 unsigned char extdtype;
1116
1117#define EXT_HEADER_CRC 0x00 /* Header CRC and information*/
1118#define EXT_FILENAME 0x01 /* Filename */
1119#define EXT_DIRECTORY 0x02 /* Directory name */
1120#define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */
1121#define EXT_TIMESTAMP 0x41 /* Windows time stamp */
1122#define EXT_FILESIZE 0x42 /* Large file size */
1123#define EXT_TIMEZONE 0x43 /* Time zone */
1124#define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */
1125#define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */
1126#define EXT_CODEPAGE 0x46 /* Codepage */
1127#define EXT_UNIX_MODE 0x50 /* File permission */
1128#define EXT_UNIX_GID_UID 0x51 /* gid,uid */
1129#define EXT_UNIX_GNAME 0x52 /* Group name */
1130#define EXT_UNIX_UNAME 0x53 /* User name */
1131#define EXT_UNIX_MTIME 0x54 /* Modified time */
1132#define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */
1133#define EXT_NEW_ATTR 0xff /* new attribute */
1134
1135 *total_size = sizefield_length;
1136
1137 for (;;) {
1138 /* Read an extended header size. */
1139 if ((h =
1140 __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1141 return (truncated_error(a));
1142 /* Check if the size is the zero indicates the end of the
1143 * extended header. */
1144 if (sizefield_length == sizeof(uint16_t))
1145 extdsize = archive_le16dec(h);
1146 else
1147 extdsize = archive_le32dec(h);
1148 if (extdsize == 0) {
1149 /* End of extended header */
1150 if (crc != NULL)
1151 *crc = lha_crc16(*crc, h, sizefield_length);
1152 __archive_read_consume(a, sizefield_length);
1153 return (ARCHIVE_OK);
1154 }
1155
1156 /* Sanity check to the extended header size. */
1157 if (((uint64_t)*total_size + extdsize) >
1158 (uint64_t)limitsize ||
1159 extdsize <= (size_t)sizefield_length)
1160 goto invalid;
1161
1162 /* Read the extended header. */
1163 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1164 return (truncated_error(a));
1165 *total_size += extdsize;
1166
1167 extdheader = (const unsigned char *)h;
1168 /* Get the extended header type. */
1169 extdtype = extdheader[sizefield_length];
1170 /* Calculate an extended data size. */
1171 datasize = extdsize - (1 + sizefield_length);
1172 /* Skip an extended header size field and type field. */
1173 extdheader += sizefield_length + 1;
1174
1175 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1176 *crc = lha_crc16(*crc, h, extdsize);
1177 switch (extdtype) {
1178 case EXT_HEADER_CRC:
1179 /* We only use a header CRC. Following data will not
1180 * be used. */
1181 if (datasize >= 2) {
1182 lha->header_crc = archive_le16dec(extdheader);
1183 if (crc != NULL) {
1184 static const char zeros[2] = {0, 0};
1185 *crc = lha_crc16(*crc, h,
1186 extdsize - datasize);
1187 /* CRC value itself as zero */
1188 *crc = lha_crc16(*crc, zeros, 2);
1189 *crc = lha_crc16(*crc,
1190 extdheader+2, datasize - 2);
1191 }
1192 }
1193 break;
1194 case EXT_FILENAME:
1195 if (datasize == 0) {
1196 /* maybe directory header */
1197 archive_string_empty(&lha->filename);
1198 break;
1199 }
1200 if (extdheader[0] == '\0')
1201 goto invalid;
1202 archive_strncpy(&lha->filename,
1203 (const char *)extdheader, datasize);
1204 break;
1205 case EXT_DIRECTORY:
1206 if (datasize == 0 || extdheader[0] == '\0')
1207 /* no directory name data. exit this case. */
1208 goto invalid;
1209
1210 archive_strncpy(&lha->dirname,
1211 (const char *)extdheader, datasize);
1212 /*
1213 * Convert directory delimiter from 0xFF
1214 * to '/' for local system.
1215 */
1216 for (i = 0; i < lha->dirname.length; i++) {
1217 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1218 lha->dirname.s[i] = '/';
1219 }
1220 /* Is last character directory separator? */
1221 if (lha->dirname.s[lha->dirname.length-1] != '/')
1222 /* invalid directory data */
1223 goto invalid;
1224 break;
1225 case EXT_DOS_ATTR:
1226 if (datasize == 2)
1227 lha->dos_attr = (unsigned char)
1228 (archive_le16dec(extdheader) & 0xff);
1229 break;
1230 case EXT_TIMESTAMP:
1231 if (datasize == (sizeof(uint64_t) * 3)) {
1232 lha->birthtime = lha_win_time(
1233 archive_le64dec(extdheader),
1234 &lha->birthtime_tv_nsec);
1235 extdheader += sizeof(uint64_t);
1236 lha->mtime = lha_win_time(
1237 archive_le64dec(extdheader),
1238 &lha->mtime_tv_nsec);
1239 extdheader += sizeof(uint64_t);
1240 lha->atime = lha_win_time(
1241 archive_le64dec(extdheader),
1242 &lha->atime_tv_nsec);
1243 lha->setflag |= BIRTHTIME_IS_SET |
1244 ATIME_IS_SET;
1245 }
1246 break;
1247 case EXT_FILESIZE:
1248 if (datasize == sizeof(uint64_t) * 2) {
1249 lha->compsize = archive_le64dec(extdheader);
1250 extdheader += sizeof(uint64_t);
1251 lha->origsize = archive_le64dec(extdheader);
1252 }
1253 break;
1254 case EXT_CODEPAGE:
1255 /* Get an archived filename charset from codepage.
1256 * This overwrites the charset specified by
1257 * hdrcharset option. */
1258 if (datasize == sizeof(uint32_t)) {
1259 struct archive_string cp;
1260 const char *charset;
1261
1262 archive_string_init(&cp);
1263 switch (archive_le32dec(extdheader)) {
1264 case 65001: /* UTF-8 */
1265 charset = "UTF-8";
1266 break;
1267 default:
1268 archive_string_sprintf(&cp, "CP%d",
1269 (int)archive_le32dec(extdheader));
1270 charset = cp.s;
1271 break;
1272 }
1273 lha->sconv =
1274 archive_string_conversion_from_charset(
1275 &(a->archive), charset, 1);
1276 archive_string_free(&cp);
1277 if (lha->sconv == NULL)
1278 return (ARCHIVE_FATAL);
1279 }
1280 break;
1281 case EXT_UNIX_MODE:
1282 if (datasize == sizeof(uint16_t)) {
1283 lha->mode = archive_le16dec(extdheader);
1284 lha->setflag |= UNIX_MODE_IS_SET;
1285 }
1286 break;
1287 case EXT_UNIX_GID_UID:
1288 if (datasize == (sizeof(uint16_t) * 2)) {
1289 lha->gid = archive_le16dec(extdheader);
1290 lha->uid = archive_le16dec(extdheader+2);
1291 }
1292 break;
1293 case EXT_UNIX_GNAME:
1294 if (datasize > 0)
1295 archive_strncpy(&lha->gname,
1296 (const char *)extdheader, datasize);
1297 break;
1298 case EXT_UNIX_UNAME:
1299 if (datasize > 0)
1300 archive_strncpy(&lha->uname,
1301 (const char *)extdheader, datasize);
1302 break;
1303 case EXT_UNIX_MTIME:
1304 if (datasize == sizeof(uint32_t))
1305 lha->mtime = archive_le32dec(extdheader);
1306 break;
1307 case EXT_OS2_NEW_ATTR:
1308 /* This extended header is OS/2 depend. */
1309 if (datasize == 16) {
1310 lha->dos_attr = (unsigned char)
1311 (archive_le16dec(extdheader) & 0xff);
1312 lha->mode = archive_le16dec(extdheader+2);
1313 lha->gid = archive_le16dec(extdheader+4);
1314 lha->uid = archive_le16dec(extdheader+6);
1315 lha->birthtime = archive_le32dec(extdheader+8);
1316 lha->atime = archive_le32dec(extdheader+12);
1317 lha->setflag |= UNIX_MODE_IS_SET
1318 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1319 }
1320 break;
1321 case EXT_NEW_ATTR:
1322 if (datasize == 20) {
1323 lha->mode = (mode_t)archive_le32dec(extdheader);
1324 lha->gid = archive_le32dec(extdheader+4);
1325 lha->uid = archive_le32dec(extdheader+8);
1326 lha->birthtime = archive_le32dec(extdheader+12);
1327 lha->atime = archive_le32dec(extdheader+16);
1328 lha->setflag |= UNIX_MODE_IS_SET
1329 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1330 }
1331 break;
1332 case EXT_TIMEZONE: /* Not supported */
1333 case EXT_UTF16_FILENAME: /* Not supported */
1334 case EXT_UTF16_DIRECTORY: /* Not supported */
1335 default:
1336 break;
1337 }
1338
1339 __archive_read_consume(a, extdsize);
1340 }
1341invalid:
1342 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1343 "Invalid extended LHa header");
1344 return (ARCHIVE_FATAL);
1345}
1346
1347static int
1348lha_end_of_entry(struct archive_read *a)
1349{
1350 struct lha *lha = (struct lha *)(a->format->data);
1351 int r = ARCHIVE_EOF;
1352
1353 if (!lha->end_of_entry_cleanup) {
1354 if ((lha->setflag & CRC_IS_SET) &&
1355 lha->crc != lha->entry_crc_calculated) {
1356 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1357 "LHa data CRC error");
1358 r = ARCHIVE_WARN;
1359 }
1360
1361 /* End-of-entry cleanup done. */
1362 lha->end_of_entry_cleanup = 1;
1363 }
1364 return (r);
1365}
1366
1367static int
1368archive_read_format_lha_read_data(struct archive_read *a,
1369 const void **buff, size_t *size, int64_t *offset)
1370{
1371 struct lha *lha = (struct lha *)(a->format->data);
1372 int r;
1373
1374 if (lha->entry_unconsumed) {
1375 /* Consume as much as the decompressor actually used. */
1376 __archive_read_consume(a, lha->entry_unconsumed);
1377 lha->entry_unconsumed = 0;
1378 }
1379 if (lha->end_of_entry) {
1380 *offset = lha->entry_offset;
1381 *size = 0;
1382 *buff = NULL;
1383 return (lha_end_of_entry(a));
1384 }
1385
1386 if (lha->entry_is_compressed)
1387 r = lha_read_data_lzh(a, buff, size, offset);
1388 else
1389 /* No compression. */
1390 r = lha_read_data_none(a, buff, size, offset);
1391 return (r);
1392}
1393
1394/*
1395 * Read a file content in no compression.
1396 *
1397 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1398 * lha->end_of_entry if it consumes all of the data.
1399 */
1400static int
1401lha_read_data_none(struct archive_read *a, const void **buff,
1402 size_t *size, int64_t *offset)
1403{
1404 struct lha *lha = (struct lha *)(a->format->data);
1405 ssize_t bytes_avail;
1406
1407 if (lha->entry_bytes_remaining == 0) {
1408 *buff = NULL;
1409 *size = 0;
1410 *offset = lha->entry_offset;
1411 lha->end_of_entry = 1;
1412 return (ARCHIVE_OK);
1413 }
1414 /*
1415 * Note: '1' here is a performance optimization.
1416 * Recall that the decompression layer returns a count of
1417 * available bytes; asking for more than that forces the
1418 * decompressor to combine reads by copying data.
1419 */
1420 *buff = __archive_read_ahead(a, 1, &bytes_avail);
1421 if (bytes_avail <= 0) {
1422 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1423 "Truncated LHa file data");
1424 return (ARCHIVE_FATAL);
1425 }
1426 if (bytes_avail > lha->entry_bytes_remaining)
1427 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1428 lha->entry_crc_calculated =
1429 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1430 *size = bytes_avail;
1431 *offset = lha->entry_offset;
1432 lha->entry_offset += bytes_avail;
1433 lha->entry_bytes_remaining -= bytes_avail;
1434 if (lha->entry_bytes_remaining == 0)
1435 lha->end_of_entry = 1;
1436 lha->entry_unconsumed = bytes_avail;
1437 return (ARCHIVE_OK);
1438}
1439
1440/*
1441 * Read a file content in LZHUFF encoding.
1442 *
1443 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1444 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1445 * all of the data.
1446 */
1447static int
1448lha_read_data_lzh(struct archive_read *a, const void **buff,
1449 size_t *size, int64_t *offset)
1450{
1451 struct lha *lha = (struct lha *)(a->format->data);
1452 ssize_t bytes_avail;
1453 int r;
1454
1455 /* If we haven't yet read any data, initialize the decompressor. */
1456 if (!lha->decompress_init) {
1457 r = lzh_decode_init(&(lha->strm), lha->method);
1458 switch (r) {
1459 case ARCHIVE_OK:
1460 break;
1461 case ARCHIVE_FAILED:
1462 /* Unsupported compression. */
1463 *buff = NULL;
1464 *size = 0;
1465 *offset = 0;
1466 archive_set_error(&a->archive,
1467 ARCHIVE_ERRNO_FILE_FORMAT,
1468 "Unsupported lzh compression method -%c%c%c-",
1469 lha->method[0], lha->method[1], lha->method[2]);
1470 /* We know compressed size; just skip it. */
1471 archive_read_format_lha_read_data_skip(a);
1472 return (ARCHIVE_WARN);
1473 default:
1474 archive_set_error(&a->archive, ENOMEM,
1475 "Couldn't allocate memory "
1476 "for lzh decompression");
1477 return (ARCHIVE_FATAL);
1478 }
1479 /* We've initialized decompression for this stream. */
1480 lha->decompress_init = 1;
1481 lha->strm.avail_out = 0;
1482 lha->strm.total_out = 0;
1483 }
1484
1485 /*
1486 * Note: '1' here is a performance optimization.
1487 * Recall that the decompression layer returns a count of
1488 * available bytes; asking for more than that forces the
1489 * decompressor to combine reads by copying data.
1490 */
1491 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1492 if (bytes_avail <= 0) {
1493 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1494 "Truncated LHa file body");
1495 return (ARCHIVE_FATAL);
1496 }
1497 if (bytes_avail > lha->entry_bytes_remaining)
1498 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1499
1500 lha->strm.avail_in = (int)bytes_avail;
1501 lha->strm.total_in = 0;
1502 lha->strm.avail_out = 0;
1503
1504 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1505 switch (r) {
1506 case ARCHIVE_OK:
1507 break;
1508 case ARCHIVE_EOF:
1509 lha->end_of_entry = 1;
1510 break;
1511 default:
1512 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1513 "Bad lzh data");
1514 return (ARCHIVE_FAILED);
1515 }
1516 lha->entry_unconsumed = lha->strm.total_in;
1517 lha->entry_bytes_remaining -= lha->strm.total_in;
1518
1519 if (lha->strm.avail_out) {
1520 *offset = lha->entry_offset;
1521 *size = lha->strm.avail_out;
1522 *buff = lha->strm.ref_ptr;
1523 lha->entry_crc_calculated =
1524 lha_crc16(lha->entry_crc_calculated, *buff, *size);
1525 lha->entry_offset += *size;
1526 } else {
1527 *offset = lha->entry_offset;
1528 *size = 0;
1529 *buff = NULL;
1530 if (lha->end_of_entry)
1531 return (lha_end_of_entry(a));
1532 }
1533 return (ARCHIVE_OK);
1534}
1535
1536/*
1537 * Skip a file content.
1538 */
1539static int
1540archive_read_format_lha_read_data_skip(struct archive_read *a)
1541{
1542 struct lha *lha;
1543 int64_t bytes_skipped;
1544
1545 lha = (struct lha *)(a->format->data);
1546
1547 if (lha->entry_unconsumed) {
1548 /* Consume as much as the decompressor actually used. */
1549 __archive_read_consume(a, lha->entry_unconsumed);
1550 lha->entry_unconsumed = 0;
1551 }
1552
1553 /* if we've already read to end of data, we're done. */
1554 if (lha->end_of_entry_cleanup)
1555 return (ARCHIVE_OK);
1556
1557 /*
1558 * If the length is at the beginning, we can skip the
1559 * compressed data much more quickly.
1560 */
1561 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1562 if (bytes_skipped < 0)
1563 return (ARCHIVE_FATAL);
1564
1565 /* This entry is finished and done. */
1566 lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1567 return (ARCHIVE_OK);
1568}
1569
1570static int
1571archive_read_format_lha_cleanup(struct archive_read *a)
1572{
1573 struct lha *lha = (struct lha *)(a->format->data);
1574
1575 lzh_decode_free(&(lha->strm));
1576 archive_string_free(&(lha->dirname));
1577 archive_string_free(&(lha->filename));
1578 archive_string_free(&(lha->uname));
1579 archive_string_free(&(lha->gname));
1580 archive_wstring_free(&(lha->ws));
1581 free(lha);
1582 (a->format->data) = NULL;
1583 return (ARCHIVE_OK);
1584}
1585
1586/*
1587 * 'LHa for UNIX' utility has archived a symbolic-link name after
1588 * a pathname with '|' character.
1589 * This function extracts the symbolic-link name from the pathname.
1590 *
1591 * example.
1592 * 1. a symbolic-name is 'aaa/bb/cc'
1593 * 2. a filename is 'xxx/bbb'
1594 * then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1595 */
1596static int
1597lha_parse_linkname(struct archive_string *linkname,
1598 struct archive_string *pathname)
1599{
1600 char * linkptr;
1601 size_t symlen;
1602
1603 linkptr = strchr(pathname->s, '|');
1604 if (linkptr != NULL) {
1605 symlen = strlen(linkptr + 1);
1606 archive_strncpy(linkname, linkptr+1, symlen);
1607
1608 *linkptr = 0;
1609 pathname->length = strlen(pathname->s);
1610
1611 return (1);
1612 }
1613 return (0);
1614}
1615
1616/* Convert an MSDOS-style date/time into Unix-style time. */
1617static time_t
1618lha_dos_time(const unsigned char *p)
1619{
1620 int msTime, msDate;
1621 struct tm ts;
1622
1623 msTime = archive_le16dec(p);
1624 msDate = archive_le16dec(p+2);
1625
1626 memset(&ts, 0, sizeof(ts));
1627 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
1628 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
1629 ts.tm_mday = msDate & 0x1f; /* Day of month. */
1630 ts.tm_hour = (msTime >> 11) & 0x1f;
1631 ts.tm_min = (msTime >> 5) & 0x3f;
1632 ts.tm_sec = (msTime << 1) & 0x3e;
1633 ts.tm_isdst = -1;
1634 return (mktime(&ts));
1635}
1636
1637/* Convert an MS-Windows-style date/time into Unix-style time. */
1638static time_t
1639lha_win_time(uint64_t wintime, long *ns)
1640{
1641#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1642
1643 if (wintime >= EPOC_TIME) {
1644 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */
1645 if (ns != NULL)
1646 *ns = (long)(wintime % 10000000) * 100;
1647 return (wintime / 10000000);
1648 } else {
1649 if (ns != NULL)
1650 *ns = 0;
1651 return (0);
1652 }
1653}
1654
1655static unsigned char
1656lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1657{
1658 unsigned char const *p = (unsigned char const *)pp;
1659
1660 p += offset;
1661 for (;size > 0; --size)
1662 sum += *p++;
1663 return (sum);
1664}
1665
1666static uint16_t crc16tbl[2][256];
1667static void
1668lha_crc16_init(void)
1669{
1670 unsigned int i;
1671 static int crc16init = 0;
1672
1673 if (crc16init)
1674 return;
1675 crc16init = 1;
1676
1677 for (i = 0; i < 256; i++) {
1678 unsigned int j;
1679 uint16_t crc = (uint16_t)i;
1680 for (j = 8; j; j--)
1681 crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1682 crc16tbl[0][i] = crc;
1683 }
1684
1685 for (i = 0; i < 256; i++) {
1686 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1687 ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1688 }
1689}
1690
1691static uint16_t
1692lha_crc16(uint16_t crc, const void *pp, size_t len)
1693{
1694 const unsigned char *p = (const unsigned char *)pp;
1695 const uint16_t *buff;
1696 const union {
1697 uint32_t i;
1698 char c[4];
1699 } u = { 0x01020304 };
1700
1701 if (len == 0)
1702 return crc;
1703
1704 /* Process unaligned address. */
1705 if (((uintptr_t)p) & (uintptr_t)0x1) {
1706 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1707 len--;
1708 }
1709 buff = (const uint16_t *)p;
1710 /*
1711 * Modern C compiler such as GCC does not unroll automatically yet
1712 * without unrolling pragma, and Clang is so. So we should
1713 * unroll this loop for its performance.
1714 */
1715 for (;len >= 8; len -= 8) {
1716 /* This if statement expects compiler optimization will
1717 * remove the statement which will not be executed. */
1718#undef bswap16
1719#if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */
1720# define bswap16(x) _byteswap_ushort(x)
1721#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1722/* GCC 4.8 and later has __builtin_bswap16() */
1723# define bswap16(x) __builtin_bswap16(x)
1724#elif defined(__clang__)
1725/* All clang versions have __builtin_bswap16() */
1726# define bswap16(x) __builtin_bswap16(x)
1727#else
1728# define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1729#endif
1730#define CRC16W do { \
1731 if(u.c[0] == 1) { /* Big endian */ \
1732 crc ^= bswap16(*buff); buff++; \
1733 } else \
1734 crc ^= *buff++; \
1735 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1736} while (0)
1737 CRC16W;
1738 CRC16W;
1739 CRC16W;
1740 CRC16W;
1741#undef CRC16W
1742#undef bswap16
1743 }
1744
1745 p = (const unsigned char *)buff;
1746 for (;len; len--) {
1747 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1748 }
1749 return crc;
1750}
1751
1752/*
1753 * Initialize LZHUF decoder.
1754 *
1755 * Returns ARCHIVE_OK if initialization was successful.
1756 * Returns ARCHIVE_FAILED if method is unsupported.
1757 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1758 * error occurred.
1759 */
1760static int
1761lzh_decode_init(struct lzh_stream *strm, const char *method)
1762{
1763 struct lzh_dec *ds;
1764 int w_bits, w_size;
1765
1766 if (strm->ds == NULL) {
1767 strm->ds = calloc(1, sizeof(*strm->ds));
1768 if (strm->ds == NULL)
1769 return (ARCHIVE_FATAL);
1770 }
1771 ds = strm->ds;
1772 ds->error = ARCHIVE_FAILED;
1773 if (method == NULL || method[0] != 'l' || method[1] != 'h')
1774 return (ARCHIVE_FAILED);
1775 switch (method[2]) {
1776 case '5':
1777 w_bits = 13;/* 8KiB for window */
1778 break;
1779 case '6':
1780 w_bits = 15;/* 32KiB for window */
1781 break;
1782 case '7':
1783 w_bits = 16;/* 64KiB for window */
1784 break;
1785 default:
1786 return (ARCHIVE_FAILED);/* Not supported. */
1787 }
1788 ds->error = ARCHIVE_FATAL;
1789 /* Expand a window size up to 128 KiB for decompressing process
1790 * performance whatever its original window size is. */
1791 ds->w_size = 1U << 17;
1792 ds->w_mask = ds->w_size -1;
1793 if (ds->w_buff == NULL) {
1794 ds->w_buff = malloc(ds->w_size);
1795 if (ds->w_buff == NULL)
1796 return (ARCHIVE_FATAL);
1797 }
1798 w_size = 1U << w_bits;
1799 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1800 ds->w_pos = 0;
1801 ds->state = 0;
1802 ds->pos_pt_len_size = w_bits + 1;
1803 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1804 ds->literal_pt_len_size = PT_BITLEN_SIZE;
1805 ds->literal_pt_len_bits = 5;
1806 ds->br.cache_buffer = 0;
1807 ds->br.cache_avail = 0;
1808
1809 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1810 != ARCHIVE_OK)
1811 return (ARCHIVE_FATAL);
1812 ds->lt.len_bits = 9;
1813 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1814 != ARCHIVE_OK)
1815 return (ARCHIVE_FATAL);
1816 ds->error = 0;
1817
1818 return (ARCHIVE_OK);
1819}
1820
1821/*
1822 * Release LZHUF decoder.
1823 */
1824static void
1825lzh_decode_free(struct lzh_stream *strm)
1826{
1827
1828 if (strm->ds == NULL)
1829 return;
1830 free(strm->ds->w_buff);
1831 lzh_huffman_free(&(strm->ds->lt));
1832 lzh_huffman_free(&(strm->ds->pt));
1833 free(strm->ds);
1834 strm->ds = NULL;
1835}
1836
1837/*
1838 * Bit stream reader.
1839 */
1840/* Check that the cache buffer has enough bits. */
1841#define lzh_br_has(br, n) ((br)->cache_avail >= n)
1842/* Get compressed data by bit. */
1843#define lzh_br_bits(br, n) \
1844 (((uint16_t)((br)->cache_buffer >> \
1845 ((br)->cache_avail - (n)))) & cache_masks[n])
1846#define lzh_br_bits_forced(br, n) \
1847 (((uint16_t)((br)->cache_buffer << \
1848 ((n) - (br)->cache_avail))) & cache_masks[n])
1849/* Read ahead to make sure the cache buffer has enough compressed data we
1850 * will use.
1851 * True : completed, there is enough data in the cache buffer.
1852 * False : we met that strm->next_in is empty, we have to get following
1853 * bytes. */
1854#define lzh_br_read_ahead_0(strm, br, n) \
1855 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1856/* True : the cache buffer has some bits as much as we need.
1857 * False : there are no enough bits in the cache buffer to be used,
1858 * we have to get following bytes if we could. */
1859#define lzh_br_read_ahead(strm, br, n) \
1860 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1861
1862/* Notify how many bits we consumed. */
1863#define lzh_br_consume(br, n) ((br)->cache_avail -= (n))
1864#define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1865
1866static const uint16_t cache_masks[] = {
1867 0x0000, 0x0001, 0x0003, 0x0007,
1868 0x000F, 0x001F, 0x003F, 0x007F,
1869 0x00FF, 0x01FF, 0x03FF, 0x07FF,
1870 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1871 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1872};
1873
1874/*
1875 * Shift away used bits in the cache data and fill it up with following bits.
1876 * Call this when cache buffer does not have enough bits you need.
1877 *
1878 * Returns 1 if the cache buffer is full.
1879 * Returns 0 if the cache buffer is not full; input buffer is empty.
1880 */
1881static int
1882lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1883{
1884 int n = CACHE_BITS - br->cache_avail;
1885
1886 for (;;) {
1887 const int x = n >> 3;
1888 if (strm->avail_in >= x) {
1889 switch (x) {
1890 case 8:
1891 br->cache_buffer =
1892 ((uint64_t)strm->next_in[0]) << 56 |
1893 ((uint64_t)strm->next_in[1]) << 48 |
1894 ((uint64_t)strm->next_in[2]) << 40 |
1895 ((uint64_t)strm->next_in[3]) << 32 |
1896 ((uint32_t)strm->next_in[4]) << 24 |
1897 ((uint32_t)strm->next_in[5]) << 16 |
1898 ((uint32_t)strm->next_in[6]) << 8 |
1899 (uint32_t)strm->next_in[7];
1900 strm->next_in += 8;
1901 strm->avail_in -= 8;
1902 br->cache_avail += 8 * 8;
1903 return (1);
1904 case 7:
1905 br->cache_buffer =
1906 (br->cache_buffer << 56) |
1907 ((uint64_t)strm->next_in[0]) << 48 |
1908 ((uint64_t)strm->next_in[1]) << 40 |
1909 ((uint64_t)strm->next_in[2]) << 32 |
1910 ((uint32_t)strm->next_in[3]) << 24 |
1911 ((uint32_t)strm->next_in[4]) << 16 |
1912 ((uint32_t)strm->next_in[5]) << 8 |
1913 (uint32_t)strm->next_in[6];
1914 strm->next_in += 7;
1915 strm->avail_in -= 7;
1916 br->cache_avail += 7 * 8;
1917 return (1);
1918 case 6:
1919 br->cache_buffer =
1920 (br->cache_buffer << 48) |
1921 ((uint64_t)strm->next_in[0]) << 40 |
1922 ((uint64_t)strm->next_in[1]) << 32 |
1923 ((uint32_t)strm->next_in[2]) << 24 |
1924 ((uint32_t)strm->next_in[3]) << 16 |
1925 ((uint32_t)strm->next_in[4]) << 8 |
1926 (uint32_t)strm->next_in[5];
1927 strm->next_in += 6;
1928 strm->avail_in -= 6;
1929 br->cache_avail += 6 * 8;
1930 return (1);
1931 case 0:
1932 /* We have enough compressed data in
1933 * the cache buffer.*/
1934 return (1);
1935 default:
1936 break;
1937 }
1938 }
1939 if (strm->avail_in == 0) {
1940 /* There is not enough compressed data to fill up the
1941 * cache buffer. */
1942 return (0);
1943 }
1944 br->cache_buffer =
1945 (br->cache_buffer << 8) | *strm->next_in++;
1946 strm->avail_in--;
1947 br->cache_avail += 8;
1948 n -= 8;
1949 }
1950}
1951
1952/*
1953 * Decode LZHUF.
1954 *
1955 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1956 * Please set available buffer and call this function again.
1957 * 2. Returns ARCHIVE_EOF if decompression has been completed.
1958 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1959 * is broken or you do not set 'last' flag properly.
1960 * 4. 'last' flag is very important, you must set 1 to the flag if there
1961 * is no input data. The lha compressed data format does not provide how
1962 * to know the compressed data is really finished.
1963 * Note: lha command utility check if the total size of output bytes is
1964 * reached the uncompressed size recorded in its header. it does not mind
1965 * that the decoding process is properly finished.
1966 * GNU ZIP can decompress another compressed file made by SCO LZH compress.
1967 * it handles EOF as null to fill read buffer with zero until the decoding
1968 * process meet 2 bytes of zeros at reading a size of a next chunk, so the
1969 * zeros are treated as the mark of the end of the data although the zeros
1970 * is dummy, not the file data.
1971 */
1972static int lzh_read_blocks(struct lzh_stream *, int);
1973static int lzh_decode_blocks(struct lzh_stream *, int);
1974#define ST_RD_BLOCK 0
1975#define ST_RD_PT_1 1
1976#define ST_RD_PT_2 2
1977#define ST_RD_PT_3 3
1978#define ST_RD_PT_4 4
1979#define ST_RD_LITERAL_1 5
1980#define ST_RD_LITERAL_2 6
1981#define ST_RD_LITERAL_3 7
1982#define ST_RD_POS_DATA_1 8
1983#define ST_GET_LITERAL 9
1984#define ST_GET_POS_1 10
1985#define ST_GET_POS_2 11
1986#define ST_COPY_DATA 12
1987
1988static int
1989lzh_decode(struct lzh_stream *strm, int last)
1990{
1991 struct lzh_dec *ds = strm->ds;
1992 int avail_in;
1993 int r;
1994
1995 if (ds->error)
1996 return (ds->error);
1997
1998 avail_in = strm->avail_in;
1999 do {
2000 if (ds->state < ST_GET_LITERAL)
2001 r = lzh_read_blocks(strm, last);
2002 else
2003 r = lzh_decode_blocks(strm, last);
2004 } while (r == 100);
2005 strm->total_in += avail_in - strm->avail_in;
2006 return (r);
2007}
2008
2009static void
2010lzh_emit_window(struct lzh_stream *strm, size_t s)
2011{
2012 strm->ref_ptr = strm->ds->w_buff;
2013 strm->avail_out = (int)s;
2014 strm->total_out += s;
2015}
2016
2017static int
2018lzh_read_blocks(struct lzh_stream *strm, int last)
2019{
2020 struct lzh_dec *ds = strm->ds;
2021 struct lzh_br *br = &(ds->br);
2022 int c = 0, i;
2023 unsigned rbits;
2024
2025 for (;;) {
2026 switch (ds->state) {
2027 case ST_RD_BLOCK:
2028 /*
2029 * Read a block number indicates how many blocks
2030 * we will handle. The block is composed of a
2031 * literal and a match, sometimes a literal only
2032 * in particular, there are no reference data at
2033 * the beginning of the decompression.
2034 */
2035 if (!lzh_br_read_ahead_0(strm, br, 16)) {
2036 if (!last)
2037 /* We need following data. */
2038 return (ARCHIVE_OK);
2039 if (lzh_br_has(br, 8)) {
2040 /*
2041 * It seems there are extra bits.
2042 * 1. Compressed data is broken.
2043 * 2. `last' flag does not properly
2044 * set.
2045 */
2046 goto failed;
2047 }
2048 if (ds->w_pos > 0) {
2049 lzh_emit_window(strm, ds->w_pos);
2050 ds->w_pos = 0;
2051 return (ARCHIVE_OK);
2052 }
2053 /* End of compressed data; we have completely
2054 * handled all compressed data. */
2055 return (ARCHIVE_EOF);
2056 }
2057 ds->blocks_avail = lzh_br_bits(br, 16);
2058 if (ds->blocks_avail == 0)
2059 goto failed;
2060 lzh_br_consume(br, 16);
2061 /*
2062 * Read a literal table compressed in huffman
2063 * coding.
2064 */
2065 ds->pt.len_size = ds->literal_pt_len_size;
2066 ds->pt.len_bits = ds->literal_pt_len_bits;
2067 ds->reading_position = 0;
2068 /* FALL THROUGH */
2069 case ST_RD_PT_1:
2070 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2071 * used in reading both a literal table and a
2072 * position table. */
2073 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2074 if (last)
2075 goto failed;/* Truncated data. */
2076 ds->state = ST_RD_PT_1;
2077 return (ARCHIVE_OK);
2078 }
2079 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2080 lzh_br_consume(br, ds->pt.len_bits);
2081 /* FALL THROUGH */
2082 case ST_RD_PT_2:
2083 if (ds->pt.len_avail == 0) {
2084 /* There is no bitlen. */
2085 if (!lzh_br_read_ahead(strm, br,
2086 ds->pt.len_bits)) {
2087 if (last)
2088 goto failed;/* Truncated data.*/
2089 ds->state = ST_RD_PT_2;
2090 return (ARCHIVE_OK);
2091 }
2092 if (!lzh_make_fake_table(&(ds->pt),
2093 lzh_br_bits(br, ds->pt.len_bits)))
2094 goto failed;/* Invalid data. */
2095 lzh_br_consume(br, ds->pt.len_bits);
2096 if (ds->reading_position)
2097 ds->state = ST_GET_LITERAL;
2098 else
2099 ds->state = ST_RD_LITERAL_1;
2100 break;
2101 } else if (ds->pt.len_avail > ds->pt.len_size)
2102 goto failed;/* Invalid data. */
2103 ds->loop = 0;
2104 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2105 if (ds->pt.len_avail < 3 ||
2106 ds->pt.len_size == ds->pos_pt_len_size) {
2107 ds->state = ST_RD_PT_4;
2108 break;
2109 }
2110 /* FALL THROUGH */
2111 case ST_RD_PT_3:
2112 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2113 if (ds->loop < 3) {
2114 if (ds->loop < 0 || last)
2115 goto failed;/* Invalid data. */
2116 /* Not completed, get following data. */
2117 ds->state = ST_RD_PT_3;
2118 return (ARCHIVE_OK);
2119 }
2120 /* There are some null in bitlen of the literal. */
2121 if (!lzh_br_read_ahead(strm, br, 2)) {
2122 if (last)
2123 goto failed;/* Truncated data. */
2124 ds->state = ST_RD_PT_3;
2125 return (ARCHIVE_OK);
2126 }
2127 c = lzh_br_bits(br, 2);
2128 lzh_br_consume(br, 2);
2129 if (c > ds->pt.len_avail - 3)
2130 goto failed;/* Invalid data. */
2131 for (i = 3; c-- > 0 ;)
2132 ds->pt.bitlen[i++] = 0;
2133 ds->loop = i;
2134 /* FALL THROUGH */
2135 case ST_RD_PT_4:
2136 ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2137 ds->pt.len_avail);
2138 if (ds->loop < ds->pt.len_avail) {
2139 if (ds->loop < 0 || last)
2140 goto failed;/* Invalid data. */
2141 /* Not completed, get following data. */
2142 ds->state = ST_RD_PT_4;
2143 return (ARCHIVE_OK);
2144 }
2145 if (!lzh_make_huffman_table(&(ds->pt)))
2146 goto failed;/* Invalid data */
2147 if (ds->reading_position) {
2148 ds->state = ST_GET_LITERAL;
2149 break;
2150 }
2151 /* FALL THROUGH */
2152 case ST_RD_LITERAL_1:
2153 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2154 if (last)
2155 goto failed;/* Truncated data. */
2156 ds->state = ST_RD_LITERAL_1;
2157 return (ARCHIVE_OK);
2158 }
2159 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2160 lzh_br_consume(br, ds->lt.len_bits);
2161 /* FALL THROUGH */
2162 case ST_RD_LITERAL_2:
2163 if (ds->lt.len_avail == 0) {
2164 /* There is no bitlen. */
2165 if (!lzh_br_read_ahead(strm, br,
2166 ds->lt.len_bits)) {
2167 if (last)
2168 goto failed;/* Truncated data.*/
2169 ds->state = ST_RD_LITERAL_2;
2170 return (ARCHIVE_OK);
2171 }
2172 if (!lzh_make_fake_table(&(ds->lt),
2173 lzh_br_bits(br, ds->lt.len_bits)))
2174 goto failed;/* Invalid data */
2175 lzh_br_consume(br, ds->lt.len_bits);
2176 ds->state = ST_RD_POS_DATA_1;
2177 break;
2178 } else if (ds->lt.len_avail > ds->lt.len_size)
2179 goto failed;/* Invalid data */
2180 ds->loop = 0;
2181 memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2182 /* FALL THROUGH */
2183 case ST_RD_LITERAL_3:
2184 i = ds->loop;
2185 while (i < ds->lt.len_avail) {
2186 if (!lzh_br_read_ahead(strm, br,
2187 ds->pt.max_bits)) {
2188 if (last)
2189 goto failed;/* Truncated data.*/
2190 ds->loop = i;
2191 ds->state = ST_RD_LITERAL_3;
2192 return (ARCHIVE_OK);
2193 }
2194 rbits = lzh_br_bits(br, ds->pt.max_bits);
2195 c = lzh_decode_huffman(&(ds->pt), rbits);
2196 if (c > 2) {
2197 /* Note: 'c' will never be more than
2198 * eighteen since it's limited by
2199 * PT_BITLEN_SIZE, which is being set
2200 * to ds->pt.len_size through
2201 * ds->literal_pt_len_size. */
2202 lzh_br_consume(br, ds->pt.bitlen[c]);
2203 c -= 2;
2204 ds->lt.freq[c]++;
2205 ds->lt.bitlen[i++] = c;
2206 } else if (c == 0) {
2207 lzh_br_consume(br, ds->pt.bitlen[c]);
2208 ds->lt.bitlen[i++] = 0;
2209 } else {
2210 /* c == 1 or c == 2 */
2211 int n = (c == 1)?4:9;
2212 if (!lzh_br_read_ahead(strm, br,
2213 ds->pt.bitlen[c] + n)) {
2214 if (last) /* Truncated data. */
2215 goto failed;
2216 ds->loop = i;
2217 ds->state = ST_RD_LITERAL_3;
2218 return (ARCHIVE_OK);
2219 }
2220 lzh_br_consume(br, ds->pt.bitlen[c]);
2221 c = lzh_br_bits(br, n);
2222 lzh_br_consume(br, n);
2223 c += (n == 4)?3:20;
2224 if (i + c > ds->lt.len_avail)
2225 goto failed;/* Invalid data */
2226 memset(&(ds->lt.bitlen[i]), 0, c);
2227 i += c;
2228 }
2229 }
2230 if (i > ds->lt.len_avail ||
2231 !lzh_make_huffman_table(&(ds->lt)))
2232 goto failed;/* Invalid data */
2233 /* FALL THROUGH */
2234 case ST_RD_POS_DATA_1:
2235 /*
2236 * Read a position table compressed in huffman
2237 * coding.
2238 */
2239 ds->pt.len_size = ds->pos_pt_len_size;
2240 ds->pt.len_bits = ds->pos_pt_len_bits;
2241 ds->reading_position = 1;
2242 ds->state = ST_RD_PT_1;
2243 break;
2244 case ST_GET_LITERAL:
2245 return (100);
2246 }
2247 }
2248failed:
2249 return (ds->error = ARCHIVE_FAILED);
2250}
2251
2252static int
2253lzh_decode_blocks(struct lzh_stream *strm, int last)
2254{
2255 struct lzh_dec *ds = strm->ds;
2256 struct lzh_br bre = ds->br;
2257 struct huffman *lt = &(ds->lt);
2258 struct huffman *pt = &(ds->pt);
2259 unsigned char *w_buff = ds->w_buff;
2260 unsigned char *lt_bitlen = lt->bitlen;
2261 unsigned char *pt_bitlen = pt->bitlen;
2262 int blocks_avail = ds->blocks_avail, c = 0;
2263 int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2264 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2265 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2266 int state = ds->state;
2267
2268 for (;;) {
2269 switch (state) {
2270 case ST_GET_LITERAL:
2271 for (;;) {
2272 if (blocks_avail == 0) {
2273 /* We have decoded all blocks.
2274 * Let's handle next blocks. */
2275 ds->state = ST_RD_BLOCK;
2276 ds->br = bre;
2277 ds->blocks_avail = 0;
2278 ds->w_pos = w_pos;
2279 ds->copy_pos = 0;
2280 return (100);
2281 }
2282
2283 /* lzh_br_read_ahead() always try to fill the
2284 * cache buffer up. In specific situation we
2285 * are close to the end of the data, the cache
2286 * buffer will not be full and thus we have to
2287 * determine if the cache buffer has some bits
2288 * as much as we need after lzh_br_read_ahead()
2289 * failed. */
2290 if (!lzh_br_read_ahead(strm, &bre,
2291 lt_max_bits)) {
2292 if (!last)
2293 goto next_data;
2294 /* Remaining bits are less than
2295 * maximum bits(lt.max_bits) but maybe
2296 * it still remains as much as we need,
2297 * so we should try to use it with
2298 * dummy bits. */
2299 c = lzh_decode_huffman(lt,
2300 lzh_br_bits_forced(&bre,
2301 lt_max_bits));
2302 lzh_br_consume(&bre, lt_bitlen[c]);
2303 if (!lzh_br_has(&bre, 0))
2304 goto failed;/* Over read. */
2305 } else {
2306 c = lzh_decode_huffman(lt,
2307 lzh_br_bits(&bre, lt_max_bits));
2308 lzh_br_consume(&bre, lt_bitlen[c]);
2309 }
2310 blocks_avail--;
2311 if (c > UCHAR_MAX)
2312 /* Current block is a match data. */
2313 break;
2314 /*
2315 * 'c' is exactly a literal code.
2316 */
2317 /* Save a decoded code to reference it
2318 * afterward. */
2319 w_buff[w_pos] = c;
2320 if (++w_pos >= w_size) {
2321 w_pos = 0;
2322 lzh_emit_window(strm, w_size);
2323 goto next_data;
2324 }
2325 }
2326 /* 'c' is the length of a match pattern we have
2327 * already extracted, which has be stored in
2328 * window(ds->w_buff). */
2329 copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2330 /* FALL THROUGH */
2331 case ST_GET_POS_1:
2332 /*
2333 * Get a reference position.
2334 */
2335 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2336 if (!last) {
2337 state = ST_GET_POS_1;
2338 ds->copy_len = copy_len;
2339 goto next_data;
2340 }
2341 copy_pos = lzh_decode_huffman(pt,
2342 lzh_br_bits_forced(&bre, pt_max_bits));
2343 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2344 if (!lzh_br_has(&bre, 0))
2345 goto failed;/* Over read. */
2346 } else {
2347 copy_pos = lzh_decode_huffman(pt,
2348 lzh_br_bits(&bre, pt_max_bits));
2349 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2350 }
2351 /* FALL THROUGH */
2352 case ST_GET_POS_2:
2353 if (copy_pos > 1) {
2354 /* We need an additional adjustment number to
2355 * the position. */
2356 int p = copy_pos - 1;
2357 if (!lzh_br_read_ahead(strm, &bre, p)) {
2358 if (last)
2359 goto failed;/* Truncated data.*/
2360 state = ST_GET_POS_2;
2361 ds->copy_len = copy_len;
2362 ds->copy_pos = copy_pos;
2363 goto next_data;
2364 }
2365 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2366 lzh_br_consume(&bre, p);
2367 }
2368 /* The position is actually a distance from the last
2369 * code we had extracted and thus we have to convert
2370 * it to a position of the window. */
2371 copy_pos = (w_pos - copy_pos - 1) & w_mask;
2372 /* FALL THROUGH */
2373 case ST_COPY_DATA:
2374 /*
2375 * Copy `copy_len' bytes as extracted data from
2376 * the window into the output buffer.
2377 */
2378 for (;;) {
2379 int l;
2380
2381 l = copy_len;
2382 if (copy_pos > w_pos) {
2383 if (l > w_size - copy_pos)
2384 l = w_size - copy_pos;
2385 } else {
2386 if (l > w_size - w_pos)
2387 l = w_size - w_pos;
2388 }
2389 if ((copy_pos + l < w_pos)
2390 || (w_pos + l < copy_pos)) {
2391 /* No overlap. */
2392 memcpy(w_buff + w_pos,
2393 w_buff + copy_pos, l);
2394 } else {
2395 const unsigned char *s;
2396 unsigned char *d;
2397 int li;
2398
2399 d = w_buff + w_pos;
2400 s = w_buff + copy_pos;
2401 for (li = 0; li < l-1;) {
2402 d[li] = s[li];li++;
2403 d[li] = s[li];li++;
2404 }
2405 if (li < l)
2406 d[li] = s[li];
2407 }
2408 w_pos += l;
2409 if (w_pos == w_size) {
2410 w_pos = 0;
2411 lzh_emit_window(strm, w_size);
2412 if (copy_len <= l)
2413 state = ST_GET_LITERAL;
2414 else {
2415 state = ST_COPY_DATA;
2416 ds->copy_len = copy_len - l;
2417 ds->copy_pos =
2418 (copy_pos + l) & w_mask;
2419 }
2420 goto next_data;
2421 }
2422 if (copy_len <= l)
2423 /* A copy of current pattern ended. */
2424 break;
2425 copy_len -= l;
2426 copy_pos = (copy_pos + l) & w_mask;
2427 }
2428 state = ST_GET_LITERAL;
2429 break;
2430 }
2431 }
2432failed:
2433 return (ds->error = ARCHIVE_FAILED);
2434next_data:
2435 ds->br = bre;
2436 ds->blocks_avail = blocks_avail;
2437 ds->state = state;
2438 ds->w_pos = w_pos;
2439 return (ARCHIVE_OK);
2440}
2441
2442static int
2443lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2444{
2445 int bits;
2446
2447 if (hf->bitlen == NULL) {
2448 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2449 if (hf->bitlen == NULL)
2450 return (ARCHIVE_FATAL);
2451 }
2452 if (hf->tbl == NULL) {
2453 if (tbl_bits < HTBL_BITS)
2454 bits = tbl_bits;
2455 else
2456 bits = HTBL_BITS;
2457 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2458 if (hf->tbl == NULL)
2459 return (ARCHIVE_FATAL);
2460 }
2461 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2462 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2463 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2464 if (hf->tree == NULL)
2465 return (ARCHIVE_FATAL);
2466 }
2467 hf->len_size = (int)len_size;
2468 hf->tbl_bits = tbl_bits;
2469 return (ARCHIVE_OK);
2470}
2471
2472static void
2473lzh_huffman_free(struct huffman *hf)
2474{
2475 free(hf->bitlen);
2476 free(hf->tbl);
2477 free(hf->tree);
2478}
2479
1/*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#ifdef HAVE_LIMITS_H
32#include <limits.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_entry_locale.h"
44#include "archive_private.h"
45#include "archive_read_private.h"
46#include "archive_endian.h"
47
48
49#define MAXMATCH 256 /* Maximum match length. */
50#define MINMATCH 3 /* Minimum match length. */
51/*
52 * Literal table format:
53 * +0 +256 +510
54 * +---------------+-------------------------+
55 * | literal code | match length |
56 * | 0 ... 255 | MINMATCH ... MAXMATCH |
57 * +---------------+-------------------------+
58 * <--- LT_BITLEN_SIZE --->
59 */
60/* Literal table size. */
61#define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62/* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64#define PT_BITLEN_SIZE (3 + 16)
65
66struct lzh_dec {
67 /* Decoding status. */
68 int state;
69
70 /*
71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72 * data.
73 */
74 int w_size;
75 int w_mask;
76 /* Window buffer, which is a loop buffer. */
77 unsigned char *w_buff;
78 /* The insert position to the window. */
79 int w_pos;
80 /* The position where we can copy decoded code from the window. */
81 int copy_pos;
82 /* The length how many bytes we can copy decoded code from
83 * the window. */
84 int copy_len;
85
86 /*
87 * Bit stream reader.
88 */
89 struct lzh_br {
90#define CACHE_TYPE uint64_t
91#define CACHE_BITS (8 * sizeof(CACHE_TYPE))
92 /* Cache buffer. */
93 CACHE_TYPE cache_buffer;
94 /* Indicates how many bits avail in cache_buffer. */
95 int cache_avail;
96 } br;
97
98 /*
99 * Huffman coding.
100 */
101 struct huffman {
102 int len_size;
103 int len_avail;
104 int len_bits;
105 int freq[17];
106 unsigned char *bitlen;
107
108 /*
109 * Use a index table. It's faster than searching a huffman
110 * coding tree, which is a binary tree. But a use of a large
111 * index table causes L1 cache read miss many times.
112 */
113#define HTBL_BITS 10
114 int max_bits;
115 int shift_bits;
116 int tbl_bits;
117 int tree_used;
118 int tree_avail;
119 /* Direct access table. */
120 uint16_t *tbl;
121 /* Binary tree table for extra bits over the direct access. */
122 struct htree_t {
123 uint16_t left;
124 uint16_t right;
125 } *tree;
126 } lt, pt;
127
128 int blocks_avail;
129 int pos_pt_len_size;
130 int pos_pt_len_bits;
131 int literal_pt_len_size;
132 int literal_pt_len_bits;
133 int reading_position;
134 int loop;
135 int error;
136};
137
138struct lzh_stream {
139 const unsigned char *next_in;
140 int avail_in;
141 int64_t total_in;
142 const unsigned char *ref_ptr;
143 int avail_out;
144 int64_t total_out;
145 struct lzh_dec *ds;
146};
147
148struct lha {
149 /* entry_bytes_remaining is the number of bytes we expect. */
150 int64_t entry_offset;
151 int64_t entry_bytes_remaining;
152 int64_t entry_unconsumed;
153 uint16_t entry_crc_calculated;
154
155 size_t header_size; /* header size */
156 unsigned char level; /* header level */
157 char method[3]; /* compress type */
158 int64_t compsize; /* compressed data size */
159 int64_t origsize; /* original file size */
160 int setflag;
161#define BIRTHTIME_IS_SET 1
162#define ATIME_IS_SET 2
163#define UNIX_MODE_IS_SET 4
164#define CRC_IS_SET 8
165 time_t birthtime;
166 long birthtime_tv_nsec;
167 time_t mtime;
168 long mtime_tv_nsec;
169 time_t atime;
170 long atime_tv_nsec;
171 mode_t mode;
172 int64_t uid;
173 int64_t gid;
174 struct archive_string uname;
175 struct archive_string gname;
176 uint16_t header_crc;
177 uint16_t crc;
178 struct archive_string_conv *sconv;
179 struct archive_string_conv *opt_sconv;
180
181 struct archive_string dirname;
182 struct archive_string filename;
183 struct archive_wstring ws;
184
185 unsigned char dos_attr;
186
187 /* Flag to mark progress that an archive was read their first header.*/
188 char found_first_header;
189 /* Flag to mark that indicates an empty directory. */
190 char directory;
191
192 /* Flags to mark progress of decompression. */
193 char decompress_init;
194 char end_of_entry;
195 char end_of_entry_cleanup;
196 char entry_is_compressed;
197
198 char format_name[64];
199
200 struct lzh_stream strm;
201};
202
203/*
204 * LHA header common member offset.
205 */
206#define H_METHOD_OFFSET 2 /* Compress type. */
207#define H_ATTR_OFFSET 19 /* DOS attribute. */
208#define H_LEVEL_OFFSET 20 /* Header Level. */
209#define H_SIZE 22 /* Minimum header size. */
210
211static int archive_read_format_lha_bid(struct archive_read *, int);
212static int archive_read_format_lha_options(struct archive_read *,
213 const char *, const char *);
214static int archive_read_format_lha_read_header(struct archive_read *,
215 struct archive_entry *);
216static int archive_read_format_lha_read_data(struct archive_read *,
217 const void **, size_t *, int64_t *);
218static int archive_read_format_lha_read_data_skip(struct archive_read *);
219static int archive_read_format_lha_cleanup(struct archive_read *);
220
221static void lha_replace_path_separator(struct lha *,
222 struct archive_entry *);
223static int lha_read_file_header_0(struct archive_read *, struct lha *);
224static int lha_read_file_header_1(struct archive_read *, struct lha *);
225static int lha_read_file_header_2(struct archive_read *, struct lha *);
226static int lha_read_file_header_3(struct archive_read *, struct lha *);
227static int lha_read_file_extended_header(struct archive_read *,
228 struct lha *, uint16_t *, int, size_t, size_t *);
229static size_t lha_check_header_format(const void *);
230static int lha_skip_sfx(struct archive_read *);
231static time_t lha_dos_time(const unsigned char *);
232static time_t lha_win_time(uint64_t, long *);
233static unsigned char lha_calcsum(unsigned char, const void *,
234 int, size_t);
235static int lha_parse_linkname(struct archive_string *,
236 struct archive_string *);
237static int lha_read_data_none(struct archive_read *, const void **,
238 size_t *, int64_t *);
239static int lha_read_data_lzh(struct archive_read *, const void **,
240 size_t *, int64_t *);
241static void lha_crc16_init(void);
242static uint16_t lha_crc16(uint16_t, const void *, size_t);
243static int lzh_decode_init(struct lzh_stream *, const char *);
244static void lzh_decode_free(struct lzh_stream *);
245static int lzh_decode(struct lzh_stream *, int);
246static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247static int lzh_huffman_init(struct huffman *, size_t, int);
248static void lzh_huffman_free(struct huffman *);
249static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250static int lzh_make_fake_table(struct huffman *, uint16_t);
251static int lzh_make_huffman_table(struct huffman *);
252static inline int lzh_decode_huffman(struct huffman *, unsigned);
253static int lzh_decode_huffman_tree(struct huffman *, unsigned, int);
254
255
256int
257archive_read_support_format_lha(struct archive *_a)
258{
259 struct archive_read *a = (struct archive_read *)_a;
260 struct lha *lha;
261 int r;
262
263 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264 ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
265
266 lha = (struct lha *)calloc(1, sizeof(*lha));
267 if (lha == NULL) {
268 archive_set_error(&a->archive, ENOMEM,
269 "Can't allocate lha data");
270 return (ARCHIVE_FATAL);
271 }
272 archive_string_init(&lha->ws);
273
274 r = __archive_read_register_format(a,
275 lha,
276 "lha",
277 archive_read_format_lha_bid,
278 archive_read_format_lha_options,
279 archive_read_format_lha_read_header,
280 archive_read_format_lha_read_data,
281 archive_read_format_lha_read_data_skip,
282 NULL,
283 archive_read_format_lha_cleanup,
284 NULL,
285 NULL);
286
287 if (r != ARCHIVE_OK)
288 free(lha);
289 return (ARCHIVE_OK);
290}
291
292static size_t
293lha_check_header_format(const void *h)
294{
295 const unsigned char *p = h;
296 size_t next_skip_bytes;
297
298 switch (p[H_METHOD_OFFSET+3]) {
299 /*
300 * "-lh0-" ... "-lh7-" "-lhd-"
301 * "-lzs-" "-lz5-"
302 */
303 case '0': case '1': case '2': case '3':
304 case '4': case '5': case '6': case '7':
305 case 'd':
306 case 's':
307 next_skip_bytes = 4;
308
309 /* b0 == 0 means the end of an LHa archive file. */
310 if (p[0] == 0)
311 break;
312 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313 || p[H_METHOD_OFFSET+4] != '-')
314 break;
315
316 if (p[H_METHOD_OFFSET+2] == 'h') {
317 /* "-lh?-" */
318 if (p[H_METHOD_OFFSET+3] == 's')
319 break;
320 if (p[H_LEVEL_OFFSET] == 0)
321 return (0);
322 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
323 return (0);
324 }
325 if (p[H_METHOD_OFFSET+2] == 'z') {
326 /* LArc extensions: -lzs-,-lz4- and -lz5- */
327 if (p[H_LEVEL_OFFSET] != 0)
328 break;
329 if (p[H_METHOD_OFFSET+3] == 's'
330 || p[H_METHOD_OFFSET+3] == '4'
331 || p[H_METHOD_OFFSET+3] == '5')
332 return (0);
333 }
334 break;
335 case 'h': next_skip_bytes = 1; break;
336 case 'z': next_skip_bytes = 1; break;
337 case 'l': next_skip_bytes = 2; break;
338 case '-': next_skip_bytes = 3; break;
339 default : next_skip_bytes = 4; break;
340 }
341
342 return (next_skip_bytes);
343}
344
345static int
346archive_read_format_lha_bid(struct archive_read *a, int best_bid)
347{
348 const char *p;
349 const void *buff;
350 ssize_t bytes_avail, offset, window;
351 size_t next;
352
353 /* If there's already a better bid than we can ever
354 make, don't bother testing. */
355 if (best_bid > 30)
356 return (-1);
357
358 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
359 return (-1);
360
361 if (lha_check_header_format(p) == 0)
362 return (30);
363
364 if (p[0] == 'M' && p[1] == 'Z') {
365 /* PE file */
366 offset = 0;
367 window = 4096;
368 while (offset < (1024 * 20)) {
369 buff = __archive_read_ahead(a, offset + window,
370 &bytes_avail);
371 if (buff == NULL) {
372 /* Remaining bytes are less than window. */
373 window >>= 1;
374 if (window < (H_SIZE + 3))
375 return (0);
376 continue;
377 }
378 p = (const char *)buff + offset;
379 while (p + H_SIZE < (const char *)buff + bytes_avail) {
380 if ((next = lha_check_header_format(p)) == 0)
381 return (30);
382 p += next;
383 }
384 offset = p - (const char *)buff;
385 }
386 }
387 return (0);
388}
389
390static int
391archive_read_format_lha_options(struct archive_read *a,
392 const char *key, const char *val)
393{
394 struct lha *lha;
395 int ret = ARCHIVE_FAILED;
396
397 lha = (struct lha *)(a->format->data);
398 if (strcmp(key, "hdrcharset") == 0) {
399 if (val == NULL || val[0] == 0)
400 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401 "lha: hdrcharset option needs a character-set name");
402 else {
403 lha->opt_sconv =
404 archive_string_conversion_from_charset(
405 &a->archive, val, 0);
406 if (lha->opt_sconv != NULL)
407 ret = ARCHIVE_OK;
408 else
409 ret = ARCHIVE_FATAL;
410 }
411 return (ret);
412 }
413
414 /* Note: The "warn" return is just to inform the options
415 * supervisor that we didn't handle it. It will generate
416 * a suitable error if no one used this option. */
417 return (ARCHIVE_WARN);
418}
419
420static int
421lha_skip_sfx(struct archive_read *a)
422{
423 const void *h;
424 const char *p, *q;
425 size_t next, skip;
426 ssize_t bytes, window;
427
428 window = 4096;
429 for (;;) {
430 h = __archive_read_ahead(a, window, &bytes);
431 if (h == NULL) {
432 /* Remaining bytes are less than window. */
433 window >>= 1;
434 if (window < (H_SIZE + 3))
435 goto fatal;
436 continue;
437 }
438 if (bytes < H_SIZE)
439 goto fatal;
440 p = h;
441 q = p + bytes;
442
443 /*
444 * Scan ahead until we find something that looks
445 * like the lha header.
446 */
447 while (p + H_SIZE < q) {
448 if ((next = lha_check_header_format(p)) == 0) {
449 skip = p - (const char *)h;
450 __archive_read_consume(a, skip);
451 return (ARCHIVE_OK);
452 }
453 p += next;
454 }
455 skip = p - (const char *)h;
456 __archive_read_consume(a, skip);
457 }
458fatal:
459 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460 "Couldn't find out LHa header");
461 return (ARCHIVE_FATAL);
462}
463
464static int
465truncated_error(struct archive_read *a)
466{
467 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468 "Truncated LHa header");
469 return (ARCHIVE_FATAL);
470}
471
472static int
473archive_read_format_lha_read_header(struct archive_read *a,
474 struct archive_entry *entry)
475{
476 struct archive_string linkname;
477 struct archive_string pathname;
478 struct lha *lha;
479 const unsigned char *p;
480 const char *signature;
481 int err;
482
483 lha_crc16_init();
484
485 a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486 if (a->archive.archive_format_name == NULL)
487 a->archive.archive_format_name = "lha";
488
489 lha = (struct lha *)(a->format->data);
490 lha->decompress_init = 0;
491 lha->end_of_entry = 0;
492 lha->end_of_entry_cleanup = 0;
493 lha->entry_unconsumed = 0;
494
495 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
496 /*
497 * LHa archiver added 0 to the tail of its archive file as
498 * the mark of the end of the archive.
499 */
500 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501 if (signature == NULL || signature[0] == 0)
502 return (ARCHIVE_EOF);
503 return (truncated_error(a));
504 }
505
506 signature = (const char *)p;
507 if (lha->found_first_header == 0 &&
508 signature[0] == 'M' && signature[1] == 'Z') {
509 /* This is an executable? Must be self-extracting... */
510 err = lha_skip_sfx(a);
511 if (err < ARCHIVE_WARN)
512 return (err);
513
514 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515 return (truncated_error(a));
516 signature = (const char *)p;
517 }
518 /* signature[0] == 0 means the end of an LHa archive file. */
519 if (signature[0] == 0)
520 return (ARCHIVE_EOF);
521
522 /*
523 * Check the header format and method type.
524 */
525 if (lha_check_header_format(p) != 0) {
526 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
527 "Bad LHa file");
528 return (ARCHIVE_FATAL);
529 }
530
531 /* We've found the first header. */
532 lha->found_first_header = 1;
533 /* Set a default value and common data */
534 lha->header_size = 0;
535 lha->level = p[H_LEVEL_OFFSET];
536 lha->method[0] = p[H_METHOD_OFFSET+1];
537 lha->method[1] = p[H_METHOD_OFFSET+2];
538 lha->method[2] = p[H_METHOD_OFFSET+3];
539 if (memcmp(lha->method, "lhd", 3) == 0)
540 lha->directory = 1;
541 else
542 lha->directory = 0;
543 if (memcmp(lha->method, "lh0", 3) == 0 ||
544 memcmp(lha->method, "lz4", 3) == 0)
545 lha->entry_is_compressed = 0;
546 else
547 lha->entry_is_compressed = 1;
548
549 lha->compsize = 0;
550 lha->origsize = 0;
551 lha->setflag = 0;
552 lha->birthtime = 0;
553 lha->birthtime_tv_nsec = 0;
554 lha->mtime = 0;
555 lha->mtime_tv_nsec = 0;
556 lha->atime = 0;
557 lha->atime_tv_nsec = 0;
558 lha->mode = (lha->directory)? 0777 : 0666;
559 lha->uid = 0;
560 lha->gid = 0;
561 archive_string_empty(&lha->dirname);
562 archive_string_empty(&lha->filename);
563 lha->dos_attr = 0;
564 if (lha->opt_sconv != NULL)
565 lha->sconv = lha->opt_sconv;
566 else
567 lha->sconv = NULL;
568
569 switch (p[H_LEVEL_OFFSET]) {
570 case 0:
571 err = lha_read_file_header_0(a, lha);
572 break;
573 case 1:
574 err = lha_read_file_header_1(a, lha);
575 break;
576 case 2:
577 err = lha_read_file_header_2(a, lha);
578 break;
579 case 3:
580 err = lha_read_file_header_3(a, lha);
581 break;
582 default:
583 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
585 err = ARCHIVE_FATAL;
586 break;
587 }
588 if (err < ARCHIVE_WARN)
589 return (err);
590
591
592 if (!lha->directory && archive_strlen(&lha->filename) == 0)
593 /* The filename has not been set */
594 return (truncated_error(a));
595
596 /*
597 * Make a pathname from a dirname and a filename.
598 */
599 archive_string_concat(&lha->dirname, &lha->filename);
600 archive_string_init(&pathname);
601 archive_string_init(&linkname);
602 archive_string_copy(&pathname, &lha->dirname);
603
604 if ((lha->mode & AE_IFMT) == AE_IFLNK) {
605 /*
606 * Extract the symlink-name if it's included in the pathname.
607 */
608 if (!lha_parse_linkname(&linkname, &pathname)) {
609 /* We couldn't get the symlink-name. */
610 archive_set_error(&a->archive,
611 ARCHIVE_ERRNO_FILE_FORMAT,
612 "Unknown symlink-name");
613 archive_string_free(&pathname);
614 archive_string_free(&linkname);
615 return (ARCHIVE_FAILED);
616 }
617 } else {
618 /*
619 * Make sure a file-type is set.
620 * The mode has been overridden if it is in the extended data.
621 */
622 lha->mode = (lha->mode & ~AE_IFMT) |
623 ((lha->directory)? AE_IFDIR: AE_IFREG);
624 }
625 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626 (lha->dos_attr & 1) != 0)
627 lha->mode &= ~(0222);/* read only. */
628
629 /*
630 * Set basic file parameters.
631 */
632 if (archive_entry_copy_pathname_l(entry, pathname.s,
633 pathname.length, lha->sconv) != 0) {
634 if (errno == ENOMEM) {
635 archive_set_error(&a->archive, ENOMEM,
636 "Can't allocate memory for Pathname");
637 return (ARCHIVE_FATAL);
638 }
639 archive_set_error(&a->archive,
640 ARCHIVE_ERRNO_FILE_FORMAT,
641 "Pathname cannot be converted "
642 "from %s to current locale.",
643 archive_string_conversion_charset_name(lha->sconv));
644 err = ARCHIVE_WARN;
645 }
646 archive_string_free(&pathname);
647 if (archive_strlen(&linkname) > 0) {
648 if (archive_entry_copy_symlink_l(entry, linkname.s,
649 linkname.length, lha->sconv) != 0) {
650 if (errno == ENOMEM) {
651 archive_set_error(&a->archive, ENOMEM,
652 "Can't allocate memory for Linkname");
653 return (ARCHIVE_FATAL);
654 }
655 archive_set_error(&a->archive,
656 ARCHIVE_ERRNO_FILE_FORMAT,
657 "Linkname cannot be converted "
658 "from %s to current locale.",
659 archive_string_conversion_charset_name(lha->sconv));
660 err = ARCHIVE_WARN;
661 }
662 } else
663 archive_entry_set_symlink(entry, NULL);
664 archive_string_free(&linkname);
665 /*
666 * When a header level is 0, there is a possibility that
667 * a pathname and a symlink has '\' character, a directory
668 * separator in DOS/Windows. So we should convert it to '/'.
669 */
670 if (p[H_LEVEL_OFFSET] == 0)
671 lha_replace_path_separator(lha, entry);
672
673 archive_entry_set_mode(entry, lha->mode);
674 archive_entry_set_uid(entry, lha->uid);
675 archive_entry_set_gid(entry, lha->gid);
676 if (archive_strlen(&lha->uname) > 0)
677 archive_entry_set_uname(entry, lha->uname.s);
678 if (archive_strlen(&lha->gname) > 0)
679 archive_entry_set_gname(entry, lha->gname.s);
680 if (lha->setflag & BIRTHTIME_IS_SET) {
681 archive_entry_set_birthtime(entry, lha->birthtime,
682 lha->birthtime_tv_nsec);
683 archive_entry_set_ctime(entry, lha->birthtime,
684 lha->birthtime_tv_nsec);
685 } else {
686 archive_entry_unset_birthtime(entry);
687 archive_entry_unset_ctime(entry);
688 }
689 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690 if (lha->setflag & ATIME_IS_SET)
691 archive_entry_set_atime(entry, lha->atime,
692 lha->atime_tv_nsec);
693 else
694 archive_entry_unset_atime(entry);
695 if (lha->directory || archive_entry_symlink(entry) != NULL)
696 archive_entry_unset_size(entry);
697 else
698 archive_entry_set_size(entry, lha->origsize);
699
700 /*
701 * Prepare variables used to read a file content.
702 */
703 lha->entry_bytes_remaining = lha->compsize;
704 lha->entry_offset = 0;
705 lha->entry_crc_calculated = 0;
706
707 /*
708 * This file does not have a content.
709 */
710 if (lha->directory || lha->compsize == 0)
711 lha->end_of_entry = 1;
712
713 sprintf(lha->format_name, "lha -%c%c%c-",
714 lha->method[0], lha->method[1], lha->method[2]);
715 a->archive.archive_format_name = lha->format_name;
716
717 return (err);
718}
719
720/*
721 * Replace a DOS path separator '\' by a character '/'.
722 * Some multi-byte character set have a character '\' in its second byte.
723 */
724static void
725lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
726{
727 const wchar_t *wp;
728 size_t i;
729
730 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
731 archive_wstrcpy(&(lha->ws), wp);
732 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
733 if (lha->ws.s[i] == L'\\')
734 lha->ws.s[i] = L'/';
735 }
736 archive_entry_copy_pathname_w(entry, lha->ws.s);
737 }
738
739 if ((wp = archive_entry_symlink_w(entry)) != NULL) {
740 archive_wstrcpy(&(lha->ws), wp);
741 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
742 if (lha->ws.s[i] == L'\\')
743 lha->ws.s[i] = L'/';
744 }
745 archive_entry_copy_symlink_w(entry, lha->ws.s);
746 }
747}
748
749/*
750 * Header 0 format
751 *
752 * +0 +1 +2 +7 +11
753 * +---------------+----------+----------------+-------------------+
754 * |header size(*1)|header sum|compression type|compressed size(*2)|
755 * +---------------+----------+----------------+-------------------+
756 * <---------------------(*1)----------*
757 *
758 * +11 +15 +17 +19 +20 +21
759 * +-----------------+---------+---------+--------------+----------------+
760 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
761 * +-----------------+---------+---------+--------------+----------------+
762 * *--------------------------------(*1)---------------------------------*
763 *
764 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4)
765 * +---------------+---------+----------+----------------+------------------+
766 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data |
767 * +---------------+---------+----------+----------------+------------------+
768 * <--(*3)-> <------(*2)------>
769 * *----------------------(*1)-------------------------->
770 *
771 */
772#define H0_HEADER_SIZE_OFFSET 0
773#define H0_HEADER_SUM_OFFSET 1
774#define H0_COMP_SIZE_OFFSET 7
775#define H0_ORIG_SIZE_OFFSET 11
776#define H0_DOS_TIME_OFFSET 15
777#define H0_NAME_LEN_OFFSET 21
778#define H0_FILE_NAME_OFFSET 22
779#define H0_FIXED_SIZE 24
780static int
781lha_read_file_header_0(struct archive_read *a, struct lha *lha)
782{
783 const unsigned char *p;
784 int extdsize, namelen;
785 unsigned char headersum, sum_calculated;
786
787 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
788 return (truncated_error(a));
789 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
790 headersum = p[H0_HEADER_SUM_OFFSET];
791 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
792 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
793 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
794 namelen = p[H0_NAME_LEN_OFFSET];
795 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
796 if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
797 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
798 "Invalid LHa header");
799 return (ARCHIVE_FATAL);
800 }
801 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
802 return (truncated_error(a));
803
804 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
805 /* When extdsize == -2, A CRC16 value is not present in the header. */
806 if (extdsize >= 0) {
807 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
808 lha->setflag |= CRC_IS_SET;
809 }
810 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
811
812 /* Read an extended header */
813 if (extdsize > 0) {
814 /* This extended data is set by 'LHa for UNIX' only.
815 * Maybe fixed size.
816 */
817 p += H0_FILE_NAME_OFFSET + namelen + 2;
818 if (p[0] == 'U' && extdsize == 12) {
819 /* p[1] is a minor version. */
820 lha->mtime = archive_le32dec(&p[2]);
821 lha->mode = archive_le16dec(&p[6]);
822 lha->uid = archive_le16dec(&p[8]);
823 lha->gid = archive_le16dec(&p[10]);
824 lha->setflag |= UNIX_MODE_IS_SET;
825 }
826 }
827 __archive_read_consume(a, lha->header_size);
828
829 if (sum_calculated != headersum) {
830 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
831 "LHa header sum error");
832 return (ARCHIVE_FATAL);
833 }
834
835 return (ARCHIVE_OK);
836}
837
838/*
839 * Header 1 format
840 *
841 * +0 +1 +2 +7 +11
842 * +---------------+----------+----------------+-------------+
843 * |header size(*1)|header sum|compression type|skip size(*2)|
844 * +---------------+----------+----------------+-------------+
845 * <---------------(*1)----------*
846 *
847 * +11 +15 +17 +19 +20 +21
848 * +-----------------+---------+---------+--------------+----------------+
849 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
850 * +-----------------+---------+---------+--------------+----------------+
851 * *-------------------------------(*1)----------------------------------*
852 *
853 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4)
854 * +---------------+---------+----------+-----------+-----------+
855 * |name length(*3)|file name|file CRC16| creator |padding(*4)|
856 * +---------------+---------+----------+-----------+-----------+
857 * <--(*3)->
858 * *----------------------------(*1)----------------------------*
859 *
860 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5)
861 * +----------------+---------------------+------------------------+
862 * |next header size| extended header(*5) | compressed data |
863 * +----------------+---------------------+------------------------+
864 * *------(*1)-----> <--------------------(*2)-------------------->
865 */
866#define H1_HEADER_SIZE_OFFSET 0
867#define H1_HEADER_SUM_OFFSET 1
868#define H1_COMP_SIZE_OFFSET 7
869#define H1_ORIG_SIZE_OFFSET 11
870#define H1_DOS_TIME_OFFSET 15
871#define H1_NAME_LEN_OFFSET 21
872#define H1_FILE_NAME_OFFSET 22
873#define H1_FIXED_SIZE 27
874static int
875lha_read_file_header_1(struct archive_read *a, struct lha *lha)
876{
877 const unsigned char *p;
878 size_t extdsize;
879 int i, err, err2;
880 int namelen, padding;
881 unsigned char headersum, sum_calculated;
882
883 err = ARCHIVE_OK;
884
885 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
886 return (truncated_error(a));
887
888 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
889 headersum = p[H1_HEADER_SUM_OFFSET];
890 /* Note: An extended header size is included in a compsize. */
891 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
892 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
893 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
894 namelen = p[H1_NAME_LEN_OFFSET];
895 /* Calculate a padding size. The result will be normally 0 only(?) */
896 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
897
898 if (namelen > 230 || padding < 0)
899 goto invalid;
900
901 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
902 return (truncated_error(a));
903
904 for (i = 0; i < namelen; i++) {
905 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
906 goto invalid;/* Invalid filename. */
907 }
908 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
909 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
910 lha->setflag |= CRC_IS_SET;
911
912 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
913 /* Consume used bytes but not include `next header size' data
914 * since it will be consumed in lha_read_file_extended_header(). */
915 __archive_read_consume(a, lha->header_size - 2);
916
917 /* Read extended headers */
918 err2 = lha_read_file_extended_header(a, lha, NULL, 2,
919 (size_t)(lha->compsize + 2), &extdsize);
920 if (err2 < ARCHIVE_WARN)
921 return (err2);
922 if (err2 < err)
923 err = err2;
924 /* Get a real compressed file size. */
925 lha->compsize -= extdsize - 2;
926
927 if (lha->compsize < 0)
928 goto invalid; /* Invalid compressed file size */
929
930 if (sum_calculated != headersum) {
931 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
932 "LHa header sum error");
933 return (ARCHIVE_FATAL);
934 }
935 return (err);
936invalid:
937 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
938 "Invalid LHa header");
939 return (ARCHIVE_FATAL);
940}
941
942/*
943 * Header 2 format
944 *
945 * +0 +2 +7 +11 +15
946 * +---------------+----------------+-------------------+-----------------+
947 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
948 * +---------------+----------------+-------------------+-----------------+
949 * <--------------------------------(*1)---------------------------------*
950 *
951 * +15 +19 +20 +21 +23 +24
952 * +-----------------+------------+----------------+----------+-----------+
953 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator |
954 * +-----------------+------------+----------------+----------+-----------+
955 * *---------------------------------(*1)---------------------------------*
956 *
957 * +24 +26 +26+(*3) +26+(*3)+(*4)
958 * +----------------+-------------------+-------------+-------------------+
959 * |next header size|extended header(*3)| padding(*4) | compressed data |
960 * +----------------+-------------------+-------------+-------------------+
961 * *--------------------------(*1)-------------------> <------(*2)------->
962 *
963 */
964#define H2_HEADER_SIZE_OFFSET 0
965#define H2_COMP_SIZE_OFFSET 7
966#define H2_ORIG_SIZE_OFFSET 11
967#define H2_TIME_OFFSET 15
968#define H2_CRC_OFFSET 21
969#define H2_FIXED_SIZE 24
970static int
971lha_read_file_header_2(struct archive_read *a, struct lha *lha)
972{
973 const unsigned char *p;
974 size_t extdsize;
975 int err, padding;
976 uint16_t header_crc;
977
978 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
979 return (truncated_error(a));
980
981 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
982 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
983 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
984 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
985 lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
986 lha->setflag |= CRC_IS_SET;
987
988 if (lha->header_size < H2_FIXED_SIZE) {
989 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
990 "Invalid LHa header size");
991 return (ARCHIVE_FATAL);
992 }
993
994 header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
995 __archive_read_consume(a, H2_FIXED_SIZE);
996
997 /* Read extended headers */
998 err = lha_read_file_extended_header(a, lha, &header_crc, 2,
999 lha->header_size - H2_FIXED_SIZE, &extdsize);
1000 if (err < ARCHIVE_WARN)
1001 return (err);
1002
1003 /* Calculate a padding size. The result will be normally 0 or 1. */
1004 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1005 if (padding > 0) {
1006 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1007 return (truncated_error(a));
1008 header_crc = lha_crc16(header_crc, p, padding);
1009 __archive_read_consume(a, padding);
1010 }
1011
1012 if (header_crc != lha->header_crc) {
1013 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1014 "LHa header CRC error");
1015 return (ARCHIVE_FATAL);
1016 }
1017 return (err);
1018}
1019
1020/*
1021 * Header 3 format
1022 *
1023 * +0 +2 +7 +11 +15
1024 * +------------+----------------+-------------------+-----------------+
1025 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1026 * +------------+----------------+-------------------+-----------------+
1027 * <-------------------------------(*1)-------------------------------*
1028 *
1029 * +15 +19 +20 +21 +23 +24
1030 * +-----------------+------------+----------------+----------+-----------+
1031 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator |
1032 * +-----------------+------------+----------------+----------+-----------+
1033 * *--------------------------------(*1)----------------------------------*
1034 *
1035 * +24 +28 +32 +32+(*3)
1036 * +---------------+----------------+-------------------+-----------------+
1037 * |header size(*1)|next header size|extended header(*3)| compressed data |
1038 * +---------------+----------------+-------------------+-----------------+
1039 * *------------------------(*1)-----------------------> <------(*2)----->
1040 *
1041 */
1042#define H3_FIELD_LEN_OFFSET 0
1043#define H3_COMP_SIZE_OFFSET 7
1044#define H3_ORIG_SIZE_OFFSET 11
1045#define H3_TIME_OFFSET 15
1046#define H3_CRC_OFFSET 21
1047#define H3_HEADER_SIZE_OFFSET 24
1048#define H3_FIXED_SIZE 28
1049static int
1050lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1051{
1052 const unsigned char *p;
1053 size_t extdsize;
1054 int err;
1055 uint16_t header_crc;
1056
1057 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1058 return (truncated_error(a));
1059
1060 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1061 goto invalid;
1062 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1063 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1064 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1065 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1066 lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1067 lha->setflag |= CRC_IS_SET;
1068
1069 if (lha->header_size < H3_FIXED_SIZE + 4)
1070 goto invalid;
1071 header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1072 __archive_read_consume(a, H3_FIXED_SIZE);
1073
1074 /* Read extended headers */
1075 err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1076 lha->header_size - H3_FIXED_SIZE, &extdsize);
1077 if (err < ARCHIVE_WARN)
1078 return (err);
1079
1080 if (header_crc != lha->header_crc) {
1081 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1082 "LHa header CRC error");
1083 return (ARCHIVE_FATAL);
1084 }
1085 return (err);
1086invalid:
1087 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1088 "Invalid LHa header");
1089 return (ARCHIVE_FATAL);
1090}
1091
1092/*
1093 * Extended header format
1094 *
1095 * +0 +2 +3 -- used in header 1 and 2
1096 * +0 +4 +5 -- used in header 3
1097 * +--------------+---------+-------------------+--------------+--
1098 * |ex-header size|header id| data |ex-header size| .......
1099 * +--------------+---------+-------------------+--------------+--
1100 * <-------------( ex-header size)------------> <-- next extended header --*
1101 *
1102 * If the ex-header size is zero, it is the make of the end of extended
1103 * headers.
1104 *
1105 */
1106static int
1107lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1108 uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1109{
1110 const void *h;
1111 const unsigned char *extdheader;
1112 size_t extdsize;
1113 size_t datasize;
1114 unsigned int i;
1115 unsigned char extdtype;
1116
1117#define EXT_HEADER_CRC 0x00 /* Header CRC and information*/
1118#define EXT_FILENAME 0x01 /* Filename */
1119#define EXT_DIRECTORY 0x02 /* Directory name */
1120#define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */
1121#define EXT_TIMESTAMP 0x41 /* Windows time stamp */
1122#define EXT_FILESIZE 0x42 /* Large file size */
1123#define EXT_TIMEZONE 0x43 /* Time zone */
1124#define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */
1125#define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */
1126#define EXT_CODEPAGE 0x46 /* Codepage */
1127#define EXT_UNIX_MODE 0x50 /* File permission */
1128#define EXT_UNIX_GID_UID 0x51 /* gid,uid */
1129#define EXT_UNIX_GNAME 0x52 /* Group name */
1130#define EXT_UNIX_UNAME 0x53 /* User name */
1131#define EXT_UNIX_MTIME 0x54 /* Modified time */
1132#define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */
1133#define EXT_NEW_ATTR 0xff /* new attribute */
1134
1135 *total_size = sizefield_length;
1136
1137 for (;;) {
1138 /* Read an extended header size. */
1139 if ((h =
1140 __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1141 return (truncated_error(a));
1142 /* Check if the size is the zero indicates the end of the
1143 * extended header. */
1144 if (sizefield_length == sizeof(uint16_t))
1145 extdsize = archive_le16dec(h);
1146 else
1147 extdsize = archive_le32dec(h);
1148 if (extdsize == 0) {
1149 /* End of extended header */
1150 if (crc != NULL)
1151 *crc = lha_crc16(*crc, h, sizefield_length);
1152 __archive_read_consume(a, sizefield_length);
1153 return (ARCHIVE_OK);
1154 }
1155
1156 /* Sanity check to the extended header size. */
1157 if (((uint64_t)*total_size + extdsize) >
1158 (uint64_t)limitsize ||
1159 extdsize <= (size_t)sizefield_length)
1160 goto invalid;
1161
1162 /* Read the extended header. */
1163 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1164 return (truncated_error(a));
1165 *total_size += extdsize;
1166
1167 extdheader = (const unsigned char *)h;
1168 /* Get the extended header type. */
1169 extdtype = extdheader[sizefield_length];
1170 /* Calculate an extended data size. */
1171 datasize = extdsize - (1 + sizefield_length);
1172 /* Skip an extended header size field and type field. */
1173 extdheader += sizefield_length + 1;
1174
1175 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1176 *crc = lha_crc16(*crc, h, extdsize);
1177 switch (extdtype) {
1178 case EXT_HEADER_CRC:
1179 /* We only use a header CRC. Following data will not
1180 * be used. */
1181 if (datasize >= 2) {
1182 lha->header_crc = archive_le16dec(extdheader);
1183 if (crc != NULL) {
1184 static const char zeros[2] = {0, 0};
1185 *crc = lha_crc16(*crc, h,
1186 extdsize - datasize);
1187 /* CRC value itself as zero */
1188 *crc = lha_crc16(*crc, zeros, 2);
1189 *crc = lha_crc16(*crc,
1190 extdheader+2, datasize - 2);
1191 }
1192 }
1193 break;
1194 case EXT_FILENAME:
1195 if (datasize == 0) {
1196 /* maybe directory header */
1197 archive_string_empty(&lha->filename);
1198 break;
1199 }
1200 if (extdheader[0] == '\0')
1201 goto invalid;
1202 archive_strncpy(&lha->filename,
1203 (const char *)extdheader, datasize);
1204 break;
1205 case EXT_DIRECTORY:
1206 if (datasize == 0 || extdheader[0] == '\0')
1207 /* no directory name data. exit this case. */
1208 goto invalid;
1209
1210 archive_strncpy(&lha->dirname,
1211 (const char *)extdheader, datasize);
1212 /*
1213 * Convert directory delimiter from 0xFF
1214 * to '/' for local system.
1215 */
1216 for (i = 0; i < lha->dirname.length; i++) {
1217 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1218 lha->dirname.s[i] = '/';
1219 }
1220 /* Is last character directory separator? */
1221 if (lha->dirname.s[lha->dirname.length-1] != '/')
1222 /* invalid directory data */
1223 goto invalid;
1224 break;
1225 case EXT_DOS_ATTR:
1226 if (datasize == 2)
1227 lha->dos_attr = (unsigned char)
1228 (archive_le16dec(extdheader) & 0xff);
1229 break;
1230 case EXT_TIMESTAMP:
1231 if (datasize == (sizeof(uint64_t) * 3)) {
1232 lha->birthtime = lha_win_time(
1233 archive_le64dec(extdheader),
1234 &lha->birthtime_tv_nsec);
1235 extdheader += sizeof(uint64_t);
1236 lha->mtime = lha_win_time(
1237 archive_le64dec(extdheader),
1238 &lha->mtime_tv_nsec);
1239 extdheader += sizeof(uint64_t);
1240 lha->atime = lha_win_time(
1241 archive_le64dec(extdheader),
1242 &lha->atime_tv_nsec);
1243 lha->setflag |= BIRTHTIME_IS_SET |
1244 ATIME_IS_SET;
1245 }
1246 break;
1247 case EXT_FILESIZE:
1248 if (datasize == sizeof(uint64_t) * 2) {
1249 lha->compsize = archive_le64dec(extdheader);
1250 extdheader += sizeof(uint64_t);
1251 lha->origsize = archive_le64dec(extdheader);
1252 }
1253 break;
1254 case EXT_CODEPAGE:
1255 /* Get an archived filename charset from codepage.
1256 * This overwrites the charset specified by
1257 * hdrcharset option. */
1258 if (datasize == sizeof(uint32_t)) {
1259 struct archive_string cp;
1260 const char *charset;
1261
1262 archive_string_init(&cp);
1263 switch (archive_le32dec(extdheader)) {
1264 case 65001: /* UTF-8 */
1265 charset = "UTF-8";
1266 break;
1267 default:
1268 archive_string_sprintf(&cp, "CP%d",
1269 (int)archive_le32dec(extdheader));
1270 charset = cp.s;
1271 break;
1272 }
1273 lha->sconv =
1274 archive_string_conversion_from_charset(
1275 &(a->archive), charset, 1);
1276 archive_string_free(&cp);
1277 if (lha->sconv == NULL)
1278 return (ARCHIVE_FATAL);
1279 }
1280 break;
1281 case EXT_UNIX_MODE:
1282 if (datasize == sizeof(uint16_t)) {
1283 lha->mode = archive_le16dec(extdheader);
1284 lha->setflag |= UNIX_MODE_IS_SET;
1285 }
1286 break;
1287 case EXT_UNIX_GID_UID:
1288 if (datasize == (sizeof(uint16_t) * 2)) {
1289 lha->gid = archive_le16dec(extdheader);
1290 lha->uid = archive_le16dec(extdheader+2);
1291 }
1292 break;
1293 case EXT_UNIX_GNAME:
1294 if (datasize > 0)
1295 archive_strncpy(&lha->gname,
1296 (const char *)extdheader, datasize);
1297 break;
1298 case EXT_UNIX_UNAME:
1299 if (datasize > 0)
1300 archive_strncpy(&lha->uname,
1301 (const char *)extdheader, datasize);
1302 break;
1303 case EXT_UNIX_MTIME:
1304 if (datasize == sizeof(uint32_t))
1305 lha->mtime = archive_le32dec(extdheader);
1306 break;
1307 case EXT_OS2_NEW_ATTR:
1308 /* This extended header is OS/2 depend. */
1309 if (datasize == 16) {
1310 lha->dos_attr = (unsigned char)
1311 (archive_le16dec(extdheader) & 0xff);
1312 lha->mode = archive_le16dec(extdheader+2);
1313 lha->gid = archive_le16dec(extdheader+4);
1314 lha->uid = archive_le16dec(extdheader+6);
1315 lha->birthtime = archive_le32dec(extdheader+8);
1316 lha->atime = archive_le32dec(extdheader+12);
1317 lha->setflag |= UNIX_MODE_IS_SET
1318 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1319 }
1320 break;
1321 case EXT_NEW_ATTR:
1322 if (datasize == 20) {
1323 lha->mode = (mode_t)archive_le32dec(extdheader);
1324 lha->gid = archive_le32dec(extdheader+4);
1325 lha->uid = archive_le32dec(extdheader+8);
1326 lha->birthtime = archive_le32dec(extdheader+12);
1327 lha->atime = archive_le32dec(extdheader+16);
1328 lha->setflag |= UNIX_MODE_IS_SET
1329 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1330 }
1331 break;
1332 case EXT_TIMEZONE: /* Not supported */
1333 case EXT_UTF16_FILENAME: /* Not supported */
1334 case EXT_UTF16_DIRECTORY: /* Not supported */
1335 default:
1336 break;
1337 }
1338
1339 __archive_read_consume(a, extdsize);
1340 }
1341invalid:
1342 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1343 "Invalid extended LHa header");
1344 return (ARCHIVE_FATAL);
1345}
1346
1347static int
1348lha_end_of_entry(struct archive_read *a)
1349{
1350 struct lha *lha = (struct lha *)(a->format->data);
1351 int r = ARCHIVE_EOF;
1352
1353 if (!lha->end_of_entry_cleanup) {
1354 if ((lha->setflag & CRC_IS_SET) &&
1355 lha->crc != lha->entry_crc_calculated) {
1356 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1357 "LHa data CRC error");
1358 r = ARCHIVE_WARN;
1359 }
1360
1361 /* End-of-entry cleanup done. */
1362 lha->end_of_entry_cleanup = 1;
1363 }
1364 return (r);
1365}
1366
1367static int
1368archive_read_format_lha_read_data(struct archive_read *a,
1369 const void **buff, size_t *size, int64_t *offset)
1370{
1371 struct lha *lha = (struct lha *)(a->format->data);
1372 int r;
1373
1374 if (lha->entry_unconsumed) {
1375 /* Consume as much as the decompressor actually used. */
1376 __archive_read_consume(a, lha->entry_unconsumed);
1377 lha->entry_unconsumed = 0;
1378 }
1379 if (lha->end_of_entry) {
1380 *offset = lha->entry_offset;
1381 *size = 0;
1382 *buff = NULL;
1383 return (lha_end_of_entry(a));
1384 }
1385
1386 if (lha->entry_is_compressed)
1387 r = lha_read_data_lzh(a, buff, size, offset);
1388 else
1389 /* No compression. */
1390 r = lha_read_data_none(a, buff, size, offset);
1391 return (r);
1392}
1393
1394/*
1395 * Read a file content in no compression.
1396 *
1397 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1398 * lha->end_of_entry if it consumes all of the data.
1399 */
1400static int
1401lha_read_data_none(struct archive_read *a, const void **buff,
1402 size_t *size, int64_t *offset)
1403{
1404 struct lha *lha = (struct lha *)(a->format->data);
1405 ssize_t bytes_avail;
1406
1407 if (lha->entry_bytes_remaining == 0) {
1408 *buff = NULL;
1409 *size = 0;
1410 *offset = lha->entry_offset;
1411 lha->end_of_entry = 1;
1412 return (ARCHIVE_OK);
1413 }
1414 /*
1415 * Note: '1' here is a performance optimization.
1416 * Recall that the decompression layer returns a count of
1417 * available bytes; asking for more than that forces the
1418 * decompressor to combine reads by copying data.
1419 */
1420 *buff = __archive_read_ahead(a, 1, &bytes_avail);
1421 if (bytes_avail <= 0) {
1422 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1423 "Truncated LHa file data");
1424 return (ARCHIVE_FATAL);
1425 }
1426 if (bytes_avail > lha->entry_bytes_remaining)
1427 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1428 lha->entry_crc_calculated =
1429 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1430 *size = bytes_avail;
1431 *offset = lha->entry_offset;
1432 lha->entry_offset += bytes_avail;
1433 lha->entry_bytes_remaining -= bytes_avail;
1434 if (lha->entry_bytes_remaining == 0)
1435 lha->end_of_entry = 1;
1436 lha->entry_unconsumed = bytes_avail;
1437 return (ARCHIVE_OK);
1438}
1439
1440/*
1441 * Read a file content in LZHUFF encoding.
1442 *
1443 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1444 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1445 * all of the data.
1446 */
1447static int
1448lha_read_data_lzh(struct archive_read *a, const void **buff,
1449 size_t *size, int64_t *offset)
1450{
1451 struct lha *lha = (struct lha *)(a->format->data);
1452 ssize_t bytes_avail;
1453 int r;
1454
1455 /* If we haven't yet read any data, initialize the decompressor. */
1456 if (!lha->decompress_init) {
1457 r = lzh_decode_init(&(lha->strm), lha->method);
1458 switch (r) {
1459 case ARCHIVE_OK:
1460 break;
1461 case ARCHIVE_FAILED:
1462 /* Unsupported compression. */
1463 *buff = NULL;
1464 *size = 0;
1465 *offset = 0;
1466 archive_set_error(&a->archive,
1467 ARCHIVE_ERRNO_FILE_FORMAT,
1468 "Unsupported lzh compression method -%c%c%c-",
1469 lha->method[0], lha->method[1], lha->method[2]);
1470 /* We know compressed size; just skip it. */
1471 archive_read_format_lha_read_data_skip(a);
1472 return (ARCHIVE_WARN);
1473 default:
1474 archive_set_error(&a->archive, ENOMEM,
1475 "Couldn't allocate memory "
1476 "for lzh decompression");
1477 return (ARCHIVE_FATAL);
1478 }
1479 /* We've initialized decompression for this stream. */
1480 lha->decompress_init = 1;
1481 lha->strm.avail_out = 0;
1482 lha->strm.total_out = 0;
1483 }
1484
1485 /*
1486 * Note: '1' here is a performance optimization.
1487 * Recall that the decompression layer returns a count of
1488 * available bytes; asking for more than that forces the
1489 * decompressor to combine reads by copying data.
1490 */
1491 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1492 if (bytes_avail <= 0) {
1493 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1494 "Truncated LHa file body");
1495 return (ARCHIVE_FATAL);
1496 }
1497 if (bytes_avail > lha->entry_bytes_remaining)
1498 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1499
1500 lha->strm.avail_in = (int)bytes_avail;
1501 lha->strm.total_in = 0;
1502 lha->strm.avail_out = 0;
1503
1504 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1505 switch (r) {
1506 case ARCHIVE_OK:
1507 break;
1508 case ARCHIVE_EOF:
1509 lha->end_of_entry = 1;
1510 break;
1511 default:
1512 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1513 "Bad lzh data");
1514 return (ARCHIVE_FAILED);
1515 }
1516 lha->entry_unconsumed = lha->strm.total_in;
1517 lha->entry_bytes_remaining -= lha->strm.total_in;
1518
1519 if (lha->strm.avail_out) {
1520 *offset = lha->entry_offset;
1521 *size = lha->strm.avail_out;
1522 *buff = lha->strm.ref_ptr;
1523 lha->entry_crc_calculated =
1524 lha_crc16(lha->entry_crc_calculated, *buff, *size);
1525 lha->entry_offset += *size;
1526 } else {
1527 *offset = lha->entry_offset;
1528 *size = 0;
1529 *buff = NULL;
1530 if (lha->end_of_entry)
1531 return (lha_end_of_entry(a));
1532 }
1533 return (ARCHIVE_OK);
1534}
1535
1536/*
1537 * Skip a file content.
1538 */
1539static int
1540archive_read_format_lha_read_data_skip(struct archive_read *a)
1541{
1542 struct lha *lha;
1543 int64_t bytes_skipped;
1544
1545 lha = (struct lha *)(a->format->data);
1546
1547 if (lha->entry_unconsumed) {
1548 /* Consume as much as the decompressor actually used. */
1549 __archive_read_consume(a, lha->entry_unconsumed);
1550 lha->entry_unconsumed = 0;
1551 }
1552
1553 /* if we've already read to end of data, we're done. */
1554 if (lha->end_of_entry_cleanup)
1555 return (ARCHIVE_OK);
1556
1557 /*
1558 * If the length is at the beginning, we can skip the
1559 * compressed data much more quickly.
1560 */
1561 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1562 if (bytes_skipped < 0)
1563 return (ARCHIVE_FATAL);
1564
1565 /* This entry is finished and done. */
1566 lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1567 return (ARCHIVE_OK);
1568}
1569
1570static int
1571archive_read_format_lha_cleanup(struct archive_read *a)
1572{
1573 struct lha *lha = (struct lha *)(a->format->data);
1574
1575 lzh_decode_free(&(lha->strm));
1576 archive_string_free(&(lha->dirname));
1577 archive_string_free(&(lha->filename));
1578 archive_string_free(&(lha->uname));
1579 archive_string_free(&(lha->gname));
1580 archive_wstring_free(&(lha->ws));
1581 free(lha);
1582 (a->format->data) = NULL;
1583 return (ARCHIVE_OK);
1584}
1585
1586/*
1587 * 'LHa for UNIX' utility has archived a symbolic-link name after
1588 * a pathname with '|' character.
1589 * This function extracts the symbolic-link name from the pathname.
1590 *
1591 * example.
1592 * 1. a symbolic-name is 'aaa/bb/cc'
1593 * 2. a filename is 'xxx/bbb'
1594 * then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1595 */
1596static int
1597lha_parse_linkname(struct archive_string *linkname,
1598 struct archive_string *pathname)
1599{
1600 char * linkptr;
1601 size_t symlen;
1602
1603 linkptr = strchr(pathname->s, '|');
1604 if (linkptr != NULL) {
1605 symlen = strlen(linkptr + 1);
1606 archive_strncpy(linkname, linkptr+1, symlen);
1607
1608 *linkptr = 0;
1609 pathname->length = strlen(pathname->s);
1610
1611 return (1);
1612 }
1613 return (0);
1614}
1615
1616/* Convert an MSDOS-style date/time into Unix-style time. */
1617static time_t
1618lha_dos_time(const unsigned char *p)
1619{
1620 int msTime, msDate;
1621 struct tm ts;
1622
1623 msTime = archive_le16dec(p);
1624 msDate = archive_le16dec(p+2);
1625
1626 memset(&ts, 0, sizeof(ts));
1627 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
1628 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
1629 ts.tm_mday = msDate & 0x1f; /* Day of month. */
1630 ts.tm_hour = (msTime >> 11) & 0x1f;
1631 ts.tm_min = (msTime >> 5) & 0x3f;
1632 ts.tm_sec = (msTime << 1) & 0x3e;
1633 ts.tm_isdst = -1;
1634 return (mktime(&ts));
1635}
1636
1637/* Convert an MS-Windows-style date/time into Unix-style time. */
1638static time_t
1639lha_win_time(uint64_t wintime, long *ns)
1640{
1641#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1642
1643 if (wintime >= EPOC_TIME) {
1644 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */
1645 if (ns != NULL)
1646 *ns = (long)(wintime % 10000000) * 100;
1647 return (wintime / 10000000);
1648 } else {
1649 if (ns != NULL)
1650 *ns = 0;
1651 return (0);
1652 }
1653}
1654
1655static unsigned char
1656lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1657{
1658 unsigned char const *p = (unsigned char const *)pp;
1659
1660 p += offset;
1661 for (;size > 0; --size)
1662 sum += *p++;
1663 return (sum);
1664}
1665
1666static uint16_t crc16tbl[2][256];
1667static void
1668lha_crc16_init(void)
1669{
1670 unsigned int i;
1671 static int crc16init = 0;
1672
1673 if (crc16init)
1674 return;
1675 crc16init = 1;
1676
1677 for (i = 0; i < 256; i++) {
1678 unsigned int j;
1679 uint16_t crc = (uint16_t)i;
1680 for (j = 8; j; j--)
1681 crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1682 crc16tbl[0][i] = crc;
1683 }
1684
1685 for (i = 0; i < 256; i++) {
1686 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1687 ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1688 }
1689}
1690
1691static uint16_t
1692lha_crc16(uint16_t crc, const void *pp, size_t len)
1693{
1694 const unsigned char *p = (const unsigned char *)pp;
1695 const uint16_t *buff;
1696 const union {
1697 uint32_t i;
1698 char c[4];
1699 } u = { 0x01020304 };
1700
1701 if (len == 0)
1702 return crc;
1703
1704 /* Process unaligned address. */
1705 if (((uintptr_t)p) & (uintptr_t)0x1) {
1706 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1707 len--;
1708 }
1709 buff = (const uint16_t *)p;
1710 /*
1711 * Modern C compiler such as GCC does not unroll automatically yet
1712 * without unrolling pragma, and Clang is so. So we should
1713 * unroll this loop for its performance.
1714 */
1715 for (;len >= 8; len -= 8) {
1716 /* This if statement expects compiler optimization will
1717 * remove the statement which will not be executed. */
1718#undef bswap16
1719#if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */
1720# define bswap16(x) _byteswap_ushort(x)
1721#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1722/* GCC 4.8 and later has __builtin_bswap16() */
1723# define bswap16(x) __builtin_bswap16(x)
1724#elif defined(__clang__)
1725/* All clang versions have __builtin_bswap16() */
1726# define bswap16(x) __builtin_bswap16(x)
1727#else
1728# define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1729#endif
1730#define CRC16W do { \
1731 if(u.c[0] == 1) { /* Big endian */ \
1732 crc ^= bswap16(*buff); buff++; \
1733 } else \
1734 crc ^= *buff++; \
1735 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1736} while (0)
1737 CRC16W;
1738 CRC16W;
1739 CRC16W;
1740 CRC16W;
1741#undef CRC16W
1742#undef bswap16
1743 }
1744
1745 p = (const unsigned char *)buff;
1746 for (;len; len--) {
1747 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1748 }
1749 return crc;
1750}
1751
1752/*
1753 * Initialize LZHUF decoder.
1754 *
1755 * Returns ARCHIVE_OK if initialization was successful.
1756 * Returns ARCHIVE_FAILED if method is unsupported.
1757 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1758 * error occurred.
1759 */
1760static int
1761lzh_decode_init(struct lzh_stream *strm, const char *method)
1762{
1763 struct lzh_dec *ds;
1764 int w_bits, w_size;
1765
1766 if (strm->ds == NULL) {
1767 strm->ds = calloc(1, sizeof(*strm->ds));
1768 if (strm->ds == NULL)
1769 return (ARCHIVE_FATAL);
1770 }
1771 ds = strm->ds;
1772 ds->error = ARCHIVE_FAILED;
1773 if (method == NULL || method[0] != 'l' || method[1] != 'h')
1774 return (ARCHIVE_FAILED);
1775 switch (method[2]) {
1776 case '5':
1777 w_bits = 13;/* 8KiB for window */
1778 break;
1779 case '6':
1780 w_bits = 15;/* 32KiB for window */
1781 break;
1782 case '7':
1783 w_bits = 16;/* 64KiB for window */
1784 break;
1785 default:
1786 return (ARCHIVE_FAILED);/* Not supported. */
1787 }
1788 ds->error = ARCHIVE_FATAL;
1789 /* Expand a window size up to 128 KiB for decompressing process
1790 * performance whatever its original window size is. */
1791 ds->w_size = 1U << 17;
1792 ds->w_mask = ds->w_size -1;
1793 if (ds->w_buff == NULL) {
1794 ds->w_buff = malloc(ds->w_size);
1795 if (ds->w_buff == NULL)
1796 return (ARCHIVE_FATAL);
1797 }
1798 w_size = 1U << w_bits;
1799 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1800 ds->w_pos = 0;
1801 ds->state = 0;
1802 ds->pos_pt_len_size = w_bits + 1;
1803 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1804 ds->literal_pt_len_size = PT_BITLEN_SIZE;
1805 ds->literal_pt_len_bits = 5;
1806 ds->br.cache_buffer = 0;
1807 ds->br.cache_avail = 0;
1808
1809 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1810 != ARCHIVE_OK)
1811 return (ARCHIVE_FATAL);
1812 ds->lt.len_bits = 9;
1813 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1814 != ARCHIVE_OK)
1815 return (ARCHIVE_FATAL);
1816 ds->error = 0;
1817
1818 return (ARCHIVE_OK);
1819}
1820
1821/*
1822 * Release LZHUF decoder.
1823 */
1824static void
1825lzh_decode_free(struct lzh_stream *strm)
1826{
1827
1828 if (strm->ds == NULL)
1829 return;
1830 free(strm->ds->w_buff);
1831 lzh_huffman_free(&(strm->ds->lt));
1832 lzh_huffman_free(&(strm->ds->pt));
1833 free(strm->ds);
1834 strm->ds = NULL;
1835}
1836
1837/*
1838 * Bit stream reader.
1839 */
1840/* Check that the cache buffer has enough bits. */
1841#define lzh_br_has(br, n) ((br)->cache_avail >= n)
1842/* Get compressed data by bit. */
1843#define lzh_br_bits(br, n) \
1844 (((uint16_t)((br)->cache_buffer >> \
1845 ((br)->cache_avail - (n)))) & cache_masks[n])
1846#define lzh_br_bits_forced(br, n) \
1847 (((uint16_t)((br)->cache_buffer << \
1848 ((n) - (br)->cache_avail))) & cache_masks[n])
1849/* Read ahead to make sure the cache buffer has enough compressed data we
1850 * will use.
1851 * True : completed, there is enough data in the cache buffer.
1852 * False : we met that strm->next_in is empty, we have to get following
1853 * bytes. */
1854#define lzh_br_read_ahead_0(strm, br, n) \
1855 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1856/* True : the cache buffer has some bits as much as we need.
1857 * False : there are no enough bits in the cache buffer to be used,
1858 * we have to get following bytes if we could. */
1859#define lzh_br_read_ahead(strm, br, n) \
1860 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1861
1862/* Notify how many bits we consumed. */
1863#define lzh_br_consume(br, n) ((br)->cache_avail -= (n))
1864#define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1865
1866static const uint16_t cache_masks[] = {
1867 0x0000, 0x0001, 0x0003, 0x0007,
1868 0x000F, 0x001F, 0x003F, 0x007F,
1869 0x00FF, 0x01FF, 0x03FF, 0x07FF,
1870 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1871 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1872};
1873
1874/*
1875 * Shift away used bits in the cache data and fill it up with following bits.
1876 * Call this when cache buffer does not have enough bits you need.
1877 *
1878 * Returns 1 if the cache buffer is full.
1879 * Returns 0 if the cache buffer is not full; input buffer is empty.
1880 */
1881static int
1882lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1883{
1884 int n = CACHE_BITS - br->cache_avail;
1885
1886 for (;;) {
1887 const int x = n >> 3;
1888 if (strm->avail_in >= x) {
1889 switch (x) {
1890 case 8:
1891 br->cache_buffer =
1892 ((uint64_t)strm->next_in[0]) << 56 |
1893 ((uint64_t)strm->next_in[1]) << 48 |
1894 ((uint64_t)strm->next_in[2]) << 40 |
1895 ((uint64_t)strm->next_in[3]) << 32 |
1896 ((uint32_t)strm->next_in[4]) << 24 |
1897 ((uint32_t)strm->next_in[5]) << 16 |
1898 ((uint32_t)strm->next_in[6]) << 8 |
1899 (uint32_t)strm->next_in[7];
1900 strm->next_in += 8;
1901 strm->avail_in -= 8;
1902 br->cache_avail += 8 * 8;
1903 return (1);
1904 case 7:
1905 br->cache_buffer =
1906 (br->cache_buffer << 56) |
1907 ((uint64_t)strm->next_in[0]) << 48 |
1908 ((uint64_t)strm->next_in[1]) << 40 |
1909 ((uint64_t)strm->next_in[2]) << 32 |
1910 ((uint32_t)strm->next_in[3]) << 24 |
1911 ((uint32_t)strm->next_in[4]) << 16 |
1912 ((uint32_t)strm->next_in[5]) << 8 |
1913 (uint32_t)strm->next_in[6];
1914 strm->next_in += 7;
1915 strm->avail_in -= 7;
1916 br->cache_avail += 7 * 8;
1917 return (1);
1918 case 6:
1919 br->cache_buffer =
1920 (br->cache_buffer << 48) |
1921 ((uint64_t)strm->next_in[0]) << 40 |
1922 ((uint64_t)strm->next_in[1]) << 32 |
1923 ((uint32_t)strm->next_in[2]) << 24 |
1924 ((uint32_t)strm->next_in[3]) << 16 |
1925 ((uint32_t)strm->next_in[4]) << 8 |
1926 (uint32_t)strm->next_in[5];
1927 strm->next_in += 6;
1928 strm->avail_in -= 6;
1929 br->cache_avail += 6 * 8;
1930 return (1);
1931 case 0:
1932 /* We have enough compressed data in
1933 * the cache buffer.*/
1934 return (1);
1935 default:
1936 break;
1937 }
1938 }
1939 if (strm->avail_in == 0) {
1940 /* There is not enough compressed data to fill up the
1941 * cache buffer. */
1942 return (0);
1943 }
1944 br->cache_buffer =
1945 (br->cache_buffer << 8) | *strm->next_in++;
1946 strm->avail_in--;
1947 br->cache_avail += 8;
1948 n -= 8;
1949 }
1950}
1951
1952/*
1953 * Decode LZHUF.
1954 *
1955 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1956 * Please set available buffer and call this function again.
1957 * 2. Returns ARCHIVE_EOF if decompression has been completed.
1958 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1959 * is broken or you do not set 'last' flag properly.
1960 * 4. 'last' flag is very important, you must set 1 to the flag if there
1961 * is no input data. The lha compressed data format does not provide how
1962 * to know the compressed data is really finished.
1963 * Note: lha command utility check if the total size of output bytes is
1964 * reached the uncompressed size recorded in its header. it does not mind
1965 * that the decoding process is properly finished.
1966 * GNU ZIP can decompress another compressed file made by SCO LZH compress.
1967 * it handles EOF as null to fill read buffer with zero until the decoding
1968 * process meet 2 bytes of zeros at reading a size of a next chunk, so the
1969 * zeros are treated as the mark of the end of the data although the zeros
1970 * is dummy, not the file data.
1971 */
1972static int lzh_read_blocks(struct lzh_stream *, int);
1973static int lzh_decode_blocks(struct lzh_stream *, int);
1974#define ST_RD_BLOCK 0
1975#define ST_RD_PT_1 1
1976#define ST_RD_PT_2 2
1977#define ST_RD_PT_3 3
1978#define ST_RD_PT_4 4
1979#define ST_RD_LITERAL_1 5
1980#define ST_RD_LITERAL_2 6
1981#define ST_RD_LITERAL_3 7
1982#define ST_RD_POS_DATA_1 8
1983#define ST_GET_LITERAL 9
1984#define ST_GET_POS_1 10
1985#define ST_GET_POS_2 11
1986#define ST_COPY_DATA 12
1987
1988static int
1989lzh_decode(struct lzh_stream *strm, int last)
1990{
1991 struct lzh_dec *ds = strm->ds;
1992 int avail_in;
1993 int r;
1994
1995 if (ds->error)
1996 return (ds->error);
1997
1998 avail_in = strm->avail_in;
1999 do {
2000 if (ds->state < ST_GET_LITERAL)
2001 r = lzh_read_blocks(strm, last);
2002 else
2003 r = lzh_decode_blocks(strm, last);
2004 } while (r == 100);
2005 strm->total_in += avail_in - strm->avail_in;
2006 return (r);
2007}
2008
2009static void
2010lzh_emit_window(struct lzh_stream *strm, size_t s)
2011{
2012 strm->ref_ptr = strm->ds->w_buff;
2013 strm->avail_out = (int)s;
2014 strm->total_out += s;
2015}
2016
2017static int
2018lzh_read_blocks(struct lzh_stream *strm, int last)
2019{
2020 struct lzh_dec *ds = strm->ds;
2021 struct lzh_br *br = &(ds->br);
2022 int c = 0, i;
2023 unsigned rbits;
2024
2025 for (;;) {
2026 switch (ds->state) {
2027 case ST_RD_BLOCK:
2028 /*
2029 * Read a block number indicates how many blocks
2030 * we will handle. The block is composed of a
2031 * literal and a match, sometimes a literal only
2032 * in particular, there are no reference data at
2033 * the beginning of the decompression.
2034 */
2035 if (!lzh_br_read_ahead_0(strm, br, 16)) {
2036 if (!last)
2037 /* We need following data. */
2038 return (ARCHIVE_OK);
2039 if (lzh_br_has(br, 8)) {
2040 /*
2041 * It seems there are extra bits.
2042 * 1. Compressed data is broken.
2043 * 2. `last' flag does not properly
2044 * set.
2045 */
2046 goto failed;
2047 }
2048 if (ds->w_pos > 0) {
2049 lzh_emit_window(strm, ds->w_pos);
2050 ds->w_pos = 0;
2051 return (ARCHIVE_OK);
2052 }
2053 /* End of compressed data; we have completely
2054 * handled all compressed data. */
2055 return (ARCHIVE_EOF);
2056 }
2057 ds->blocks_avail = lzh_br_bits(br, 16);
2058 if (ds->blocks_avail == 0)
2059 goto failed;
2060 lzh_br_consume(br, 16);
2061 /*
2062 * Read a literal table compressed in huffman
2063 * coding.
2064 */
2065 ds->pt.len_size = ds->literal_pt_len_size;
2066 ds->pt.len_bits = ds->literal_pt_len_bits;
2067 ds->reading_position = 0;
2068 /* FALL THROUGH */
2069 case ST_RD_PT_1:
2070 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2071 * used in reading both a literal table and a
2072 * position table. */
2073 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2074 if (last)
2075 goto failed;/* Truncated data. */
2076 ds->state = ST_RD_PT_1;
2077 return (ARCHIVE_OK);
2078 }
2079 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2080 lzh_br_consume(br, ds->pt.len_bits);
2081 /* FALL THROUGH */
2082 case ST_RD_PT_2:
2083 if (ds->pt.len_avail == 0) {
2084 /* There is no bitlen. */
2085 if (!lzh_br_read_ahead(strm, br,
2086 ds->pt.len_bits)) {
2087 if (last)
2088 goto failed;/* Truncated data.*/
2089 ds->state = ST_RD_PT_2;
2090 return (ARCHIVE_OK);
2091 }
2092 if (!lzh_make_fake_table(&(ds->pt),
2093 lzh_br_bits(br, ds->pt.len_bits)))
2094 goto failed;/* Invalid data. */
2095 lzh_br_consume(br, ds->pt.len_bits);
2096 if (ds->reading_position)
2097 ds->state = ST_GET_LITERAL;
2098 else
2099 ds->state = ST_RD_LITERAL_1;
2100 break;
2101 } else if (ds->pt.len_avail > ds->pt.len_size)
2102 goto failed;/* Invalid data. */
2103 ds->loop = 0;
2104 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2105 if (ds->pt.len_avail < 3 ||
2106 ds->pt.len_size == ds->pos_pt_len_size) {
2107 ds->state = ST_RD_PT_4;
2108 break;
2109 }
2110 /* FALL THROUGH */
2111 case ST_RD_PT_3:
2112 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2113 if (ds->loop < 3) {
2114 if (ds->loop < 0 || last)
2115 goto failed;/* Invalid data. */
2116 /* Not completed, get following data. */
2117 ds->state = ST_RD_PT_3;
2118 return (ARCHIVE_OK);
2119 }
2120 /* There are some null in bitlen of the literal. */
2121 if (!lzh_br_read_ahead(strm, br, 2)) {
2122 if (last)
2123 goto failed;/* Truncated data. */
2124 ds->state = ST_RD_PT_3;
2125 return (ARCHIVE_OK);
2126 }
2127 c = lzh_br_bits(br, 2);
2128 lzh_br_consume(br, 2);
2129 if (c > ds->pt.len_avail - 3)
2130 goto failed;/* Invalid data. */
2131 for (i = 3; c-- > 0 ;)
2132 ds->pt.bitlen[i++] = 0;
2133 ds->loop = i;
2134 /* FALL THROUGH */
2135 case ST_RD_PT_4:
2136 ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2137 ds->pt.len_avail);
2138 if (ds->loop < ds->pt.len_avail) {
2139 if (ds->loop < 0 || last)
2140 goto failed;/* Invalid data. */
2141 /* Not completed, get following data. */
2142 ds->state = ST_RD_PT_4;
2143 return (ARCHIVE_OK);
2144 }
2145 if (!lzh_make_huffman_table(&(ds->pt)))
2146 goto failed;/* Invalid data */
2147 if (ds->reading_position) {
2148 ds->state = ST_GET_LITERAL;
2149 break;
2150 }
2151 /* FALL THROUGH */
2152 case ST_RD_LITERAL_1:
2153 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2154 if (last)
2155 goto failed;/* Truncated data. */
2156 ds->state = ST_RD_LITERAL_1;
2157 return (ARCHIVE_OK);
2158 }
2159 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2160 lzh_br_consume(br, ds->lt.len_bits);
2161 /* FALL THROUGH */
2162 case ST_RD_LITERAL_2:
2163 if (ds->lt.len_avail == 0) {
2164 /* There is no bitlen. */
2165 if (!lzh_br_read_ahead(strm, br,
2166 ds->lt.len_bits)) {
2167 if (last)
2168 goto failed;/* Truncated data.*/
2169 ds->state = ST_RD_LITERAL_2;
2170 return (ARCHIVE_OK);
2171 }
2172 if (!lzh_make_fake_table(&(ds->lt),
2173 lzh_br_bits(br, ds->lt.len_bits)))
2174 goto failed;/* Invalid data */
2175 lzh_br_consume(br, ds->lt.len_bits);
2176 ds->state = ST_RD_POS_DATA_1;
2177 break;
2178 } else if (ds->lt.len_avail > ds->lt.len_size)
2179 goto failed;/* Invalid data */
2180 ds->loop = 0;
2181 memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2182 /* FALL THROUGH */
2183 case ST_RD_LITERAL_3:
2184 i = ds->loop;
2185 while (i < ds->lt.len_avail) {
2186 if (!lzh_br_read_ahead(strm, br,
2187 ds->pt.max_bits)) {
2188 if (last)
2189 goto failed;/* Truncated data.*/
2190 ds->loop = i;
2191 ds->state = ST_RD_LITERAL_3;
2192 return (ARCHIVE_OK);
2193 }
2194 rbits = lzh_br_bits(br, ds->pt.max_bits);
2195 c = lzh_decode_huffman(&(ds->pt), rbits);
2196 if (c > 2) {
2197 /* Note: 'c' will never be more than
2198 * eighteen since it's limited by
2199 * PT_BITLEN_SIZE, which is being set
2200 * to ds->pt.len_size through
2201 * ds->literal_pt_len_size. */
2202 lzh_br_consume(br, ds->pt.bitlen[c]);
2203 c -= 2;
2204 ds->lt.freq[c]++;
2205 ds->lt.bitlen[i++] = c;
2206 } else if (c == 0) {
2207 lzh_br_consume(br, ds->pt.bitlen[c]);
2208 ds->lt.bitlen[i++] = 0;
2209 } else {
2210 /* c == 1 or c == 2 */
2211 int n = (c == 1)?4:9;
2212 if (!lzh_br_read_ahead(strm, br,
2213 ds->pt.bitlen[c] + n)) {
2214 if (last) /* Truncated data. */
2215 goto failed;
2216 ds->loop = i;
2217 ds->state = ST_RD_LITERAL_3;
2218 return (ARCHIVE_OK);
2219 }
2220 lzh_br_consume(br, ds->pt.bitlen[c]);
2221 c = lzh_br_bits(br, n);
2222 lzh_br_consume(br, n);
2223 c += (n == 4)?3:20;
2224 if (i + c > ds->lt.len_avail)
2225 goto failed;/* Invalid data */
2226 memset(&(ds->lt.bitlen[i]), 0, c);
2227 i += c;
2228 }
2229 }
2230 if (i > ds->lt.len_avail ||
2231 !lzh_make_huffman_table(&(ds->lt)))
2232 goto failed;/* Invalid data */
2233 /* FALL THROUGH */
2234 case ST_RD_POS_DATA_1:
2235 /*
2236 * Read a position table compressed in huffman
2237 * coding.
2238 */
2239 ds->pt.len_size = ds->pos_pt_len_size;
2240 ds->pt.len_bits = ds->pos_pt_len_bits;
2241 ds->reading_position = 1;
2242 ds->state = ST_RD_PT_1;
2243 break;
2244 case ST_GET_LITERAL:
2245 return (100);
2246 }
2247 }
2248failed:
2249 return (ds->error = ARCHIVE_FAILED);
2250}
2251
2252static int
2253lzh_decode_blocks(struct lzh_stream *strm, int last)
2254{
2255 struct lzh_dec *ds = strm->ds;
2256 struct lzh_br bre = ds->br;
2257 struct huffman *lt = &(ds->lt);
2258 struct huffman *pt = &(ds->pt);
2259 unsigned char *w_buff = ds->w_buff;
2260 unsigned char *lt_bitlen = lt->bitlen;
2261 unsigned char *pt_bitlen = pt->bitlen;
2262 int blocks_avail = ds->blocks_avail, c = 0;
2263 int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2264 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2265 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2266 int state = ds->state;
2267
2268 for (;;) {
2269 switch (state) {
2270 case ST_GET_LITERAL:
2271 for (;;) {
2272 if (blocks_avail == 0) {
2273 /* We have decoded all blocks.
2274 * Let's handle next blocks. */
2275 ds->state = ST_RD_BLOCK;
2276 ds->br = bre;
2277 ds->blocks_avail = 0;
2278 ds->w_pos = w_pos;
2279 ds->copy_pos = 0;
2280 return (100);
2281 }
2282
2283 /* lzh_br_read_ahead() always try to fill the
2284 * cache buffer up. In specific situation we
2285 * are close to the end of the data, the cache
2286 * buffer will not be full and thus we have to
2287 * determine if the cache buffer has some bits
2288 * as much as we need after lzh_br_read_ahead()
2289 * failed. */
2290 if (!lzh_br_read_ahead(strm, &bre,
2291 lt_max_bits)) {
2292 if (!last)
2293 goto next_data;
2294 /* Remaining bits are less than
2295 * maximum bits(lt.max_bits) but maybe
2296 * it still remains as much as we need,
2297 * so we should try to use it with
2298 * dummy bits. */
2299 c = lzh_decode_huffman(lt,
2300 lzh_br_bits_forced(&bre,
2301 lt_max_bits));
2302 lzh_br_consume(&bre, lt_bitlen[c]);
2303 if (!lzh_br_has(&bre, 0))
2304 goto failed;/* Over read. */
2305 } else {
2306 c = lzh_decode_huffman(lt,
2307 lzh_br_bits(&bre, lt_max_bits));
2308 lzh_br_consume(&bre, lt_bitlen[c]);
2309 }
2310 blocks_avail--;
2311 if (c > UCHAR_MAX)
2312 /* Current block is a match data. */
2313 break;
2314 /*
2315 * 'c' is exactly a literal code.
2316 */
2317 /* Save a decoded code to reference it
2318 * afterward. */
2319 w_buff[w_pos] = c;
2320 if (++w_pos >= w_size) {
2321 w_pos = 0;
2322 lzh_emit_window(strm, w_size);
2323 goto next_data;
2324 }
2325 }
2326 /* 'c' is the length of a match pattern we have
2327 * already extracted, which has be stored in
2328 * window(ds->w_buff). */
2329 copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2330 /* FALL THROUGH */
2331 case ST_GET_POS_1:
2332 /*
2333 * Get a reference position.
2334 */
2335 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2336 if (!last) {
2337 state = ST_GET_POS_1;
2338 ds->copy_len = copy_len;
2339 goto next_data;
2340 }
2341 copy_pos = lzh_decode_huffman(pt,
2342 lzh_br_bits_forced(&bre, pt_max_bits));
2343 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2344 if (!lzh_br_has(&bre, 0))
2345 goto failed;/* Over read. */
2346 } else {
2347 copy_pos = lzh_decode_huffman(pt,
2348 lzh_br_bits(&bre, pt_max_bits));
2349 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2350 }
2351 /* FALL THROUGH */
2352 case ST_GET_POS_2:
2353 if (copy_pos > 1) {
2354 /* We need an additional adjustment number to
2355 * the position. */
2356 int p = copy_pos - 1;
2357 if (!lzh_br_read_ahead(strm, &bre, p)) {
2358 if (last)
2359 goto failed;/* Truncated data.*/
2360 state = ST_GET_POS_2;
2361 ds->copy_len = copy_len;
2362 ds->copy_pos = copy_pos;
2363 goto next_data;
2364 }
2365 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2366 lzh_br_consume(&bre, p);
2367 }
2368 /* The position is actually a distance from the last
2369 * code we had extracted and thus we have to convert
2370 * it to a position of the window. */
2371 copy_pos = (w_pos - copy_pos - 1) & w_mask;
2372 /* FALL THROUGH */
2373 case ST_COPY_DATA:
2374 /*
2375 * Copy `copy_len' bytes as extracted data from
2376 * the window into the output buffer.
2377 */
2378 for (;;) {
2379 int l;
2380
2381 l = copy_len;
2382 if (copy_pos > w_pos) {
2383 if (l > w_size - copy_pos)
2384 l = w_size - copy_pos;
2385 } else {
2386 if (l > w_size - w_pos)
2387 l = w_size - w_pos;
2388 }
2389 if ((copy_pos + l < w_pos)
2390 || (w_pos + l < copy_pos)) {
2391 /* No overlap. */
2392 memcpy(w_buff + w_pos,
2393 w_buff + copy_pos, l);
2394 } else {
2395 const unsigned char *s;
2396 unsigned char *d;
2397 int li;
2398
2399 d = w_buff + w_pos;
2400 s = w_buff + copy_pos;
2401 for (li = 0; li < l-1;) {
2402 d[li] = s[li];li++;
2403 d[li] = s[li];li++;
2404 }
2405 if (li < l)
2406 d[li] = s[li];
2407 }
2408 w_pos += l;
2409 if (w_pos == w_size) {
2410 w_pos = 0;
2411 lzh_emit_window(strm, w_size);
2412 if (copy_len <= l)
2413 state = ST_GET_LITERAL;
2414 else {
2415 state = ST_COPY_DATA;
2416 ds->copy_len = copy_len - l;
2417 ds->copy_pos =
2418 (copy_pos + l) & w_mask;
2419 }
2420 goto next_data;
2421 }
2422 if (copy_len <= l)
2423 /* A copy of current pattern ended. */
2424 break;
2425 copy_len -= l;
2426 copy_pos = (copy_pos + l) & w_mask;
2427 }
2428 state = ST_GET_LITERAL;
2429 break;
2430 }
2431 }
2432failed:
2433 return (ds->error = ARCHIVE_FAILED);
2434next_data:
2435 ds->br = bre;
2436 ds->blocks_avail = blocks_avail;
2437 ds->state = state;
2438 ds->w_pos = w_pos;
2439 return (ARCHIVE_OK);
2440}
2441
2442static int
2443lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2444{
2445 int bits;
2446
2447 if (hf->bitlen == NULL) {
2448 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2449 if (hf->bitlen == NULL)
2450 return (ARCHIVE_FATAL);
2451 }
2452 if (hf->tbl == NULL) {
2453 if (tbl_bits < HTBL_BITS)
2454 bits = tbl_bits;
2455 else
2456 bits = HTBL_BITS;
2457 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2458 if (hf->tbl == NULL)
2459 return (ARCHIVE_FATAL);
2460 }
2461 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2462 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2463 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2464 if (hf->tree == NULL)
2465 return (ARCHIVE_FATAL);
2466 }
2467 hf->len_size = (int)len_size;
2468 hf->tbl_bits = tbl_bits;
2469 return (ARCHIVE_OK);
2470}
2471
2472static void
2473lzh_huffman_free(struct huffman *hf)
2474{
2475 free(hf->bitlen);
2476 free(hf->tbl);
2477 free(hf->tree);
2478}
2479
2480static char bitlen_tbl[0x400] = {
2480static const char bitlen_tbl[0x400] = {
2481 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2482 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2483 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2484 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2485 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2486 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2487 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2488 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2489 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2490 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2491 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2492 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2493 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2494 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2495 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2496 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2497 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2498 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2499 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2500 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2501 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2502 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2503 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2504 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2505 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2506 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2507 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2508 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2509 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2510 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2511 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2512 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2513 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2514 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2515 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2516 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2517 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2518 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2519 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2520 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2521 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2522 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2523 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2524 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2525 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2526 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2527 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2528 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2529 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2530 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2531 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2532 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2533 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2534 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2535 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2536 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2537 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2538 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2539 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2540 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2541 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2542 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2543 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2544 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0
2545};
2546static int
2547lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2548{
2549 struct lzh_dec *ds = strm->ds;
2550 struct lzh_br *br = &(ds->br);
2551 int c, i;
2552
2553 for (i = start; i < end; ) {
2554 /*
2555 * bit pattern the number we need
2556 * 000 -> 0
2557 * 001 -> 1
2558 * 010 -> 2
2559 * ...
2560 * 110 -> 6
2561 * 1110 -> 7
2562 * 11110 -> 8
2563 * ...
2564 * 1111111111110 -> 16
2565 */
2566 if (!lzh_br_read_ahead(strm, br, 3))
2567 return (i);
2568 if ((c = lzh_br_bits(br, 3)) == 7) {
2569 if (!lzh_br_read_ahead(strm, br, 13))
2570 return (i);
2571 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2572 if (c)
2573 lzh_br_consume(br, c - 3);
2574 else
2575 return (-1);/* Invalid data. */
2576 } else
2577 lzh_br_consume(br, 3);
2578 ds->pt.bitlen[i++] = c;
2579 ds->pt.freq[c]++;
2580 }
2581 return (i);
2582}
2583
2584static int
2585lzh_make_fake_table(struct huffman *hf, uint16_t c)
2586{
2587 if (c >= hf->len_size)
2588 return (0);
2589 hf->tbl[0] = c;
2590 hf->max_bits = 0;
2591 hf->shift_bits = 0;
2592 hf->bitlen[hf->tbl[0]] = 0;
2593 return (1);
2594}
2595
2596/*
2597 * Make a huffman coding table.
2598 */
2599static int
2600lzh_make_huffman_table(struct huffman *hf)
2601{
2602 uint16_t *tbl;
2603 const unsigned char *bitlen;
2604 int bitptn[17], weight[17];
2605 int i, maxbits = 0, ptn, tbl_size, w;
2606 int diffbits, len_avail;
2607
2608 /*
2609 * Initialize bit patterns.
2610 */
2611 ptn = 0;
2612 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2613 bitptn[i] = ptn;
2614 weight[i] = w;
2615 if (hf->freq[i]) {
2616 ptn += hf->freq[i] * w;
2617 maxbits = i;
2618 }
2619 }
2620 if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2621 return (0);/* Invalid */
2622
2623 hf->max_bits = maxbits;
2624
2625 /*
2626 * Cut out extra bits which we won't house in the table.
2627 * This preparation reduces the same calculation in the for-loop
2628 * making the table.
2629 */
2630 if (maxbits < 16) {
2631 int ebits = 16 - maxbits;
2632 for (i = 1; i <= maxbits; i++) {
2633 bitptn[i] >>= ebits;
2634 weight[i] >>= ebits;
2635 }
2636 }
2637 if (maxbits > HTBL_BITS) {
2638 unsigned htbl_max;
2639 uint16_t *p;
2640
2641 diffbits = maxbits - HTBL_BITS;
2642 for (i = 1; i <= HTBL_BITS; i++) {
2643 bitptn[i] >>= diffbits;
2644 weight[i] >>= diffbits;
2645 }
2646 htbl_max = bitptn[HTBL_BITS] +
2647 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2648 p = &(hf->tbl[htbl_max]);
2649 while (p < &hf->tbl[1U<<HTBL_BITS])
2650 *p++ = 0;
2651 } else
2652 diffbits = 0;
2653 hf->shift_bits = diffbits;
2654
2655 /*
2656 * Make the table.
2657 */
2658 tbl_size = 1 << HTBL_BITS;
2659 tbl = hf->tbl;
2660 bitlen = hf->bitlen;
2661 len_avail = hf->len_avail;
2662 hf->tree_used = 0;
2663 for (i = 0; i < len_avail; i++) {
2664 uint16_t *p;
2665 int len, cnt;
2666 uint16_t bit;
2667 int extlen;
2668 struct htree_t *ht;
2669
2670 if (bitlen[i] == 0)
2671 continue;
2672 /* Get a bit pattern */
2673 len = bitlen[i];
2674 ptn = bitptn[len];
2675 cnt = weight[len];
2676 if (len <= HTBL_BITS) {
2677 /* Calculate next bit pattern */
2678 if ((bitptn[len] = ptn + cnt) > tbl_size)
2679 return (0);/* Invalid */
2680 /* Update the table */
2681 p = &(tbl[ptn]);
2682 if (cnt > 7) {
2683 uint16_t *pc;
2684
2685 cnt -= 8;
2686 pc = &p[cnt];
2687 pc[0] = (uint16_t)i;
2688 pc[1] = (uint16_t)i;
2689 pc[2] = (uint16_t)i;
2690 pc[3] = (uint16_t)i;
2691 pc[4] = (uint16_t)i;
2692 pc[5] = (uint16_t)i;
2693 pc[6] = (uint16_t)i;
2694 pc[7] = (uint16_t)i;
2695 if (cnt > 7) {
2696 cnt -= 8;
2697 memcpy(&p[cnt], pc,
2698 8 * sizeof(uint16_t));
2699 pc = &p[cnt];
2700 while (cnt > 15) {
2701 cnt -= 16;
2702 memcpy(&p[cnt], pc,
2703 16 * sizeof(uint16_t));
2704 }
2705 }
2706 if (cnt)
2707 memcpy(p, pc, cnt * sizeof(uint16_t));
2708 } else {
2709 while (cnt > 1) {
2710 p[--cnt] = (uint16_t)i;
2711 p[--cnt] = (uint16_t)i;
2712 }
2713 if (cnt)
2714 p[--cnt] = (uint16_t)i;
2715 }
2716 continue;
2717 }
2718
2719 /*
2720 * A bit length is too big to be housed to a direct table,
2721 * so we use a tree model for its extra bits.
2722 */
2723 bitptn[len] = ptn + cnt;
2724 bit = 1U << (diffbits -1);
2725 extlen = len - HTBL_BITS;
2726
2727 p = &(tbl[ptn >> diffbits]);
2728 if (*p == 0) {
2729 *p = len_avail + hf->tree_used;
2730 ht = &(hf->tree[hf->tree_used++]);
2731 if (hf->tree_used > hf->tree_avail)
2732 return (0);/* Invalid */
2733 ht->left = 0;
2734 ht->right = 0;
2735 } else {
2736 if (*p < len_avail ||
2737 *p >= (len_avail + hf->tree_used))
2738 return (0);/* Invalid */
2739 ht = &(hf->tree[*p - len_avail]);
2740 }
2741 while (--extlen > 0) {
2742 if (ptn & bit) {
2743 if (ht->left < len_avail) {
2744 ht->left = len_avail + hf->tree_used;
2745 ht = &(hf->tree[hf->tree_used++]);
2746 if (hf->tree_used > hf->tree_avail)
2747 return (0);/* Invalid */
2748 ht->left = 0;
2749 ht->right = 0;
2750 } else {
2751 ht = &(hf->tree[ht->left - len_avail]);
2752 }
2753 } else {
2754 if (ht->right < len_avail) {
2755 ht->right = len_avail + hf->tree_used;
2756 ht = &(hf->tree[hf->tree_used++]);
2757 if (hf->tree_used > hf->tree_avail)
2758 return (0);/* Invalid */
2759 ht->left = 0;
2760 ht->right = 0;
2761 } else {
2762 ht = &(hf->tree[ht->right - len_avail]);
2763 }
2764 }
2765 bit >>= 1;
2766 }
2767 if (ptn & bit) {
2768 if (ht->left != 0)
2769 return (0);/* Invalid */
2770 ht->left = (uint16_t)i;
2771 } else {
2772 if (ht->right != 0)
2773 return (0);/* Invalid */
2774 ht->right = (uint16_t)i;
2775 }
2776 }
2777 return (1);
2778}
2779
2780static int
2781lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2782{
2783 struct htree_t *ht;
2784 int extlen;
2785
2786 ht = hf->tree;
2787 extlen = hf->shift_bits;
2788 while (c >= hf->len_avail) {
2789 c -= hf->len_avail;
2790 if (extlen-- <= 0 || c >= hf->tree_used)
2791 return (0);
2792 if (rbits & (1U << extlen))
2793 c = ht[c].left;
2794 else
2795 c = ht[c].right;
2796 }
2797 return (c);
2798}
2799
2800static inline int
2801lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2802{
2803 int c;
2804 /*
2805 * At first search an index table for a bit pattern.
2806 * If it fails, search a huffman tree for.
2807 */
2808 c = hf->tbl[rbits >> hf->shift_bits];
2809 if (c < hf->len_avail || hf->len_avail == 0)
2810 return (c);
2811 /* This bit pattern needs to be found out at a huffman tree. */
2812 return (lzh_decode_huffman_tree(hf, rbits, c));
2813}
2814
2481 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2482 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2483 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2484 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2485 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2486 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2487 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2488 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2489 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2490 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2491 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2492 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2493 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2494 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2495 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2496 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2497 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2498 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2499 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2500 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2501 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2502 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2503 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2504 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2505 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2506 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2507 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2508 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2509 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2510 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2511 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2512 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2513 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2514 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2515 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2516 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2517 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2518 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2519 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2520 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2521 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2522 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2523 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2524 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2525 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2526 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2527 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2528 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2529 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2530 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2531 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2532 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2533 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2534 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2535 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2536 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2537 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2538 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2539 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2540 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2541 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2542 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2543 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2544 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0
2545};
2546static int
2547lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2548{
2549 struct lzh_dec *ds = strm->ds;
2550 struct lzh_br *br = &(ds->br);
2551 int c, i;
2552
2553 for (i = start; i < end; ) {
2554 /*
2555 * bit pattern the number we need
2556 * 000 -> 0
2557 * 001 -> 1
2558 * 010 -> 2
2559 * ...
2560 * 110 -> 6
2561 * 1110 -> 7
2562 * 11110 -> 8
2563 * ...
2564 * 1111111111110 -> 16
2565 */
2566 if (!lzh_br_read_ahead(strm, br, 3))
2567 return (i);
2568 if ((c = lzh_br_bits(br, 3)) == 7) {
2569 if (!lzh_br_read_ahead(strm, br, 13))
2570 return (i);
2571 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2572 if (c)
2573 lzh_br_consume(br, c - 3);
2574 else
2575 return (-1);/* Invalid data. */
2576 } else
2577 lzh_br_consume(br, 3);
2578 ds->pt.bitlen[i++] = c;
2579 ds->pt.freq[c]++;
2580 }
2581 return (i);
2582}
2583
2584static int
2585lzh_make_fake_table(struct huffman *hf, uint16_t c)
2586{
2587 if (c >= hf->len_size)
2588 return (0);
2589 hf->tbl[0] = c;
2590 hf->max_bits = 0;
2591 hf->shift_bits = 0;
2592 hf->bitlen[hf->tbl[0]] = 0;
2593 return (1);
2594}
2595
2596/*
2597 * Make a huffman coding table.
2598 */
2599static int
2600lzh_make_huffman_table(struct huffman *hf)
2601{
2602 uint16_t *tbl;
2603 const unsigned char *bitlen;
2604 int bitptn[17], weight[17];
2605 int i, maxbits = 0, ptn, tbl_size, w;
2606 int diffbits, len_avail;
2607
2608 /*
2609 * Initialize bit patterns.
2610 */
2611 ptn = 0;
2612 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2613 bitptn[i] = ptn;
2614 weight[i] = w;
2615 if (hf->freq[i]) {
2616 ptn += hf->freq[i] * w;
2617 maxbits = i;
2618 }
2619 }
2620 if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2621 return (0);/* Invalid */
2622
2623 hf->max_bits = maxbits;
2624
2625 /*
2626 * Cut out extra bits which we won't house in the table.
2627 * This preparation reduces the same calculation in the for-loop
2628 * making the table.
2629 */
2630 if (maxbits < 16) {
2631 int ebits = 16 - maxbits;
2632 for (i = 1; i <= maxbits; i++) {
2633 bitptn[i] >>= ebits;
2634 weight[i] >>= ebits;
2635 }
2636 }
2637 if (maxbits > HTBL_BITS) {
2638 unsigned htbl_max;
2639 uint16_t *p;
2640
2641 diffbits = maxbits - HTBL_BITS;
2642 for (i = 1; i <= HTBL_BITS; i++) {
2643 bitptn[i] >>= diffbits;
2644 weight[i] >>= diffbits;
2645 }
2646 htbl_max = bitptn[HTBL_BITS] +
2647 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2648 p = &(hf->tbl[htbl_max]);
2649 while (p < &hf->tbl[1U<<HTBL_BITS])
2650 *p++ = 0;
2651 } else
2652 diffbits = 0;
2653 hf->shift_bits = diffbits;
2654
2655 /*
2656 * Make the table.
2657 */
2658 tbl_size = 1 << HTBL_BITS;
2659 tbl = hf->tbl;
2660 bitlen = hf->bitlen;
2661 len_avail = hf->len_avail;
2662 hf->tree_used = 0;
2663 for (i = 0; i < len_avail; i++) {
2664 uint16_t *p;
2665 int len, cnt;
2666 uint16_t bit;
2667 int extlen;
2668 struct htree_t *ht;
2669
2670 if (bitlen[i] == 0)
2671 continue;
2672 /* Get a bit pattern */
2673 len = bitlen[i];
2674 ptn = bitptn[len];
2675 cnt = weight[len];
2676 if (len <= HTBL_BITS) {
2677 /* Calculate next bit pattern */
2678 if ((bitptn[len] = ptn + cnt) > tbl_size)
2679 return (0);/* Invalid */
2680 /* Update the table */
2681 p = &(tbl[ptn]);
2682 if (cnt > 7) {
2683 uint16_t *pc;
2684
2685 cnt -= 8;
2686 pc = &p[cnt];
2687 pc[0] = (uint16_t)i;
2688 pc[1] = (uint16_t)i;
2689 pc[2] = (uint16_t)i;
2690 pc[3] = (uint16_t)i;
2691 pc[4] = (uint16_t)i;
2692 pc[5] = (uint16_t)i;
2693 pc[6] = (uint16_t)i;
2694 pc[7] = (uint16_t)i;
2695 if (cnt > 7) {
2696 cnt -= 8;
2697 memcpy(&p[cnt], pc,
2698 8 * sizeof(uint16_t));
2699 pc = &p[cnt];
2700 while (cnt > 15) {
2701 cnt -= 16;
2702 memcpy(&p[cnt], pc,
2703 16 * sizeof(uint16_t));
2704 }
2705 }
2706 if (cnt)
2707 memcpy(p, pc, cnt * sizeof(uint16_t));
2708 } else {
2709 while (cnt > 1) {
2710 p[--cnt] = (uint16_t)i;
2711 p[--cnt] = (uint16_t)i;
2712 }
2713 if (cnt)
2714 p[--cnt] = (uint16_t)i;
2715 }
2716 continue;
2717 }
2718
2719 /*
2720 * A bit length is too big to be housed to a direct table,
2721 * so we use a tree model for its extra bits.
2722 */
2723 bitptn[len] = ptn + cnt;
2724 bit = 1U << (diffbits -1);
2725 extlen = len - HTBL_BITS;
2726
2727 p = &(tbl[ptn >> diffbits]);
2728 if (*p == 0) {
2729 *p = len_avail + hf->tree_used;
2730 ht = &(hf->tree[hf->tree_used++]);
2731 if (hf->tree_used > hf->tree_avail)
2732 return (0);/* Invalid */
2733 ht->left = 0;
2734 ht->right = 0;
2735 } else {
2736 if (*p < len_avail ||
2737 *p >= (len_avail + hf->tree_used))
2738 return (0);/* Invalid */
2739 ht = &(hf->tree[*p - len_avail]);
2740 }
2741 while (--extlen > 0) {
2742 if (ptn & bit) {
2743 if (ht->left < len_avail) {
2744 ht->left = len_avail + hf->tree_used;
2745 ht = &(hf->tree[hf->tree_used++]);
2746 if (hf->tree_used > hf->tree_avail)
2747 return (0);/* Invalid */
2748 ht->left = 0;
2749 ht->right = 0;
2750 } else {
2751 ht = &(hf->tree[ht->left - len_avail]);
2752 }
2753 } else {
2754 if (ht->right < len_avail) {
2755 ht->right = len_avail + hf->tree_used;
2756 ht = &(hf->tree[hf->tree_used++]);
2757 if (hf->tree_used > hf->tree_avail)
2758 return (0);/* Invalid */
2759 ht->left = 0;
2760 ht->right = 0;
2761 } else {
2762 ht = &(hf->tree[ht->right - len_avail]);
2763 }
2764 }
2765 bit >>= 1;
2766 }
2767 if (ptn & bit) {
2768 if (ht->left != 0)
2769 return (0);/* Invalid */
2770 ht->left = (uint16_t)i;
2771 } else {
2772 if (ht->right != 0)
2773 return (0);/* Invalid */
2774 ht->right = (uint16_t)i;
2775 }
2776 }
2777 return (1);
2778}
2779
2780static int
2781lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2782{
2783 struct htree_t *ht;
2784 int extlen;
2785
2786 ht = hf->tree;
2787 extlen = hf->shift_bits;
2788 while (c >= hf->len_avail) {
2789 c -= hf->len_avail;
2790 if (extlen-- <= 0 || c >= hf->tree_used)
2791 return (0);
2792 if (rbits & (1U << extlen))
2793 c = ht[c].left;
2794 else
2795 c = ht[c].right;
2796 }
2797 return (c);
2798}
2799
2800static inline int
2801lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2802{
2803 int c;
2804 /*
2805 * At first search an index table for a bit pattern.
2806 * If it fails, search a huffman tree for.
2807 */
2808 c = hf->tbl[rbits >> hf->shift_bits];
2809 if (c < hf->len_avail || hf->len_avail == 0)
2810 return (c);
2811 /* This bit pattern needs to be found out at a huffman tree. */
2812 return (lzh_decode_huffman_tree(hf, rbits, c));
2813}
2814