archive_read_support_format_lha.c revision 316337
1/*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#ifdef HAVE_LIMITS_H
32#include <limits.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_entry_locale.h"
44#include "archive_private.h"
45#include "archive_read_private.h"
46#include "archive_endian.h"
47
48
49#define MAXMATCH		256	/* Maximum match length. */
50#define MINMATCH		3	/* Minimum match length. */
51/*
52 * Literal table format:
53 * +0              +256                      +510
54 * +---------------+-------------------------+
55 * | literal code  |       match length      |
56 * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57 * +---------------+-------------------------+
58 *  <---          LT_BITLEN_SIZE         --->
59 */
60/* Literal table size. */
61#define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62/* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64#define PT_BITLEN_SIZE		(3 + 16)
65
66struct lzh_dec {
67	/* Decoding status. */
68	int     		 state;
69
70	/*
71	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72	 * data.
73	 */
74	int			 w_size;
75	int			 w_mask;
76	/* Window buffer, which is a loop buffer. */
77	unsigned char		*w_buff;
78	/* The insert position to the window. */
79	int			 w_pos;
80	/* The position where we can copy decoded code from the window. */
81	int     		 copy_pos;
82	/* The length how many bytes we can copy decoded code from
83	 * the window. */
84	int     		 copy_len;
85
86	/*
87	 * Bit stream reader.
88	 */
89	struct lzh_br {
90#define CACHE_TYPE		uint64_t
91#define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
92	 	/* Cache buffer. */
93		CACHE_TYPE	 cache_buffer;
94		/* Indicates how many bits avail in cache_buffer. */
95		int		 cache_avail;
96	} br;
97
98	/*
99	 * Huffman coding.
100	 */
101	struct huffman {
102		int		 len_size;
103		int		 len_avail;
104		int		 len_bits;
105		int		 freq[17];
106		unsigned char	*bitlen;
107
108		/*
109		 * Use a index table. It's faster than searching a huffman
110		 * coding tree, which is a binary tree. But a use of a large
111		 * index table causes L1 cache read miss many times.
112		 */
113#define HTBL_BITS	10
114		int		 max_bits;
115		int		 shift_bits;
116		int		 tbl_bits;
117		int		 tree_used;
118		int		 tree_avail;
119		/* Direct access table. */
120		uint16_t	*tbl;
121		/* Binary tree table for extra bits over the direct access. */
122		struct htree_t {
123			uint16_t left;
124			uint16_t right;
125		}		*tree;
126	}			 lt, pt;
127
128	int			 blocks_avail;
129	int			 pos_pt_len_size;
130	int			 pos_pt_len_bits;
131	int			 literal_pt_len_size;
132	int			 literal_pt_len_bits;
133	int			 reading_position;
134	int			 loop;
135	int			 error;
136};
137
138struct lzh_stream {
139	const unsigned char	*next_in;
140	int			 avail_in;
141	int64_t			 total_in;
142	const unsigned char	*ref_ptr;
143	int			 avail_out;
144	int64_t			 total_out;
145	struct lzh_dec		*ds;
146};
147
148struct lha {
149	/* entry_bytes_remaining is the number of bytes we expect.	    */
150	int64_t                  entry_offset;
151	int64_t                  entry_bytes_remaining;
152	int64_t			 entry_unconsumed;
153	uint16_t		 entry_crc_calculated;
154
155	size_t			 header_size;	/* header size		    */
156	unsigned char		 level;		/* header level		    */
157	char			 method[3];	/* compress type	    */
158	int64_t			 compsize;	/* compressed data size	    */
159	int64_t			 origsize;	/* original file size	    */
160	int			 setflag;
161#define BIRTHTIME_IS_SET	1
162#define ATIME_IS_SET		2
163#define UNIX_MODE_IS_SET	4
164#define CRC_IS_SET		8
165	time_t			 birthtime;
166	long			 birthtime_tv_nsec;
167	time_t			 mtime;
168	long			 mtime_tv_nsec;
169	time_t			 atime;
170	long			 atime_tv_nsec;
171	mode_t			 mode;
172	int64_t			 uid;
173	int64_t			 gid;
174	struct archive_string 	 uname;
175	struct archive_string 	 gname;
176	uint16_t		 header_crc;
177	uint16_t		 crc;
178	struct archive_string_conv *sconv;
179	struct archive_string_conv *opt_sconv;
180
181	struct archive_string 	 dirname;
182	struct archive_string 	 filename;
183	struct archive_wstring	 ws;
184
185	unsigned char		 dos_attr;
186
187	/* Flag to mark progress that an archive was read their first header.*/
188	char			 found_first_header;
189	/* Flag to mark that indicates an empty directory. */
190	char			 directory;
191
192	/* Flags to mark progress of decompression. */
193	char			 decompress_init;
194	char			 end_of_entry;
195	char			 end_of_entry_cleanup;
196	char			 entry_is_compressed;
197
198	char			 format_name[64];
199
200	struct lzh_stream	 strm;
201};
202
203/*
204 * LHA header common member offset.
205 */
206#define H_METHOD_OFFSET	2	/* Compress type. */
207#define H_ATTR_OFFSET	19	/* DOS attribute. */
208#define H_LEVEL_OFFSET	20	/* Header Level.  */
209#define H_SIZE		22	/* Minimum header size. */
210
211static int      archive_read_format_lha_bid(struct archive_read *, int);
212static int      archive_read_format_lha_options(struct archive_read *,
213		    const char *, const char *);
214static int	archive_read_format_lha_read_header(struct archive_read *,
215		    struct archive_entry *);
216static int	archive_read_format_lha_read_data(struct archive_read *,
217		    const void **, size_t *, int64_t *);
218static int	archive_read_format_lha_read_data_skip(struct archive_read *);
219static int	archive_read_format_lha_cleanup(struct archive_read *);
220
221static void	lha_replace_path_separator(struct lha *,
222		    struct archive_entry *);
223static int	lha_read_file_header_0(struct archive_read *, struct lha *);
224static int	lha_read_file_header_1(struct archive_read *, struct lha *);
225static int	lha_read_file_header_2(struct archive_read *, struct lha *);
226static int	lha_read_file_header_3(struct archive_read *, struct lha *);
227static int	lha_read_file_extended_header(struct archive_read *,
228		    struct lha *, uint16_t *, int, size_t, size_t *);
229static size_t	lha_check_header_format(const void *);
230static int	lha_skip_sfx(struct archive_read *);
231static time_t	lha_dos_time(const unsigned char *);
232static time_t	lha_win_time(uint64_t, long *);
233static unsigned char	lha_calcsum(unsigned char, const void *,
234		    int, size_t);
235static int	lha_parse_linkname(struct archive_string *,
236		    struct archive_string *);
237static int	lha_read_data_none(struct archive_read *, const void **,
238		    size_t *, int64_t *);
239static int	lha_read_data_lzh(struct archive_read *, const void **,
240		    size_t *, int64_t *);
241static void	lha_crc16_init(void);
242static uint16_t lha_crc16(uint16_t, const void *, size_t);
243static int	lzh_decode_init(struct lzh_stream *, const char *);
244static void	lzh_decode_free(struct lzh_stream *);
245static int	lzh_decode(struct lzh_stream *, int);
246static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247static int	lzh_huffman_init(struct huffman *, size_t, int);
248static void	lzh_huffman_free(struct huffman *);
249static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250static int	lzh_make_fake_table(struct huffman *, uint16_t);
251static int	lzh_make_huffman_table(struct huffman *);
252static inline int lzh_decode_huffman(struct huffman *, unsigned);
253static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
254
255
256int
257archive_read_support_format_lha(struct archive *_a)
258{
259	struct archive_read *a = (struct archive_read *)_a;
260	struct lha *lha;
261	int r;
262
263	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
265
266	lha = (struct lha *)calloc(1, sizeof(*lha));
267	if (lha == NULL) {
268		archive_set_error(&a->archive, ENOMEM,
269		    "Can't allocate lha data");
270		return (ARCHIVE_FATAL);
271	}
272	archive_string_init(&lha->ws);
273
274	r = __archive_read_register_format(a,
275	    lha,
276	    "lha",
277	    archive_read_format_lha_bid,
278	    archive_read_format_lha_options,
279	    archive_read_format_lha_read_header,
280	    archive_read_format_lha_read_data,
281	    archive_read_format_lha_read_data_skip,
282	    NULL,
283	    archive_read_format_lha_cleanup,
284	    NULL,
285	    NULL);
286
287	if (r != ARCHIVE_OK)
288		free(lha);
289	return (ARCHIVE_OK);
290}
291
292static size_t
293lha_check_header_format(const void *h)
294{
295	const unsigned char *p = h;
296	size_t next_skip_bytes;
297
298	switch (p[H_METHOD_OFFSET+3]) {
299	/*
300	 * "-lh0-" ... "-lh7-" "-lhd-"
301	 * "-lzs-" "-lz5-"
302	 */
303	case '0': case '1': case '2': case '3':
304	case '4': case '5': case '6': case '7':
305	case 'd':
306	case 's':
307		next_skip_bytes = 4;
308
309		/* b0 == 0 means the end of an LHa archive file.	*/
310		if (p[0] == 0)
311			break;
312		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313		    ||  p[H_METHOD_OFFSET+4] != '-')
314			break;
315
316		if (p[H_METHOD_OFFSET+2] == 'h') {
317			/* "-lh?-" */
318			if (p[H_METHOD_OFFSET+3] == 's')
319				break;
320			if (p[H_LEVEL_OFFSET] == 0)
321				return (0);
322			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
323				return (0);
324		}
325		if (p[H_METHOD_OFFSET+2] == 'z') {
326			/* LArc extensions: -lzs-,-lz4- and -lz5- */
327			if (p[H_LEVEL_OFFSET] != 0)
328				break;
329			if (p[H_METHOD_OFFSET+3] == 's'
330			    || p[H_METHOD_OFFSET+3] == '4'
331			    || p[H_METHOD_OFFSET+3] == '5')
332				return (0);
333		}
334		break;
335	case 'h': next_skip_bytes = 1; break;
336	case 'z': next_skip_bytes = 1; break;
337	case 'l': next_skip_bytes = 2; break;
338	case '-': next_skip_bytes = 3; break;
339	default : next_skip_bytes = 4; break;
340	}
341
342	return (next_skip_bytes);
343}
344
345static int
346archive_read_format_lha_bid(struct archive_read *a, int best_bid)
347{
348	const char *p;
349	const void *buff;
350	ssize_t bytes_avail, offset, window;
351	size_t next;
352
353	/* If there's already a better bid than we can ever
354	   make, don't bother testing. */
355	if (best_bid > 30)
356		return (-1);
357
358	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
359		return (-1);
360
361	if (lha_check_header_format(p) == 0)
362		return (30);
363
364	if (p[0] == 'M' && p[1] == 'Z') {
365		/* PE file */
366		offset = 0;
367		window = 4096;
368		while (offset < (1024 * 20)) {
369			buff = __archive_read_ahead(a, offset + window,
370			    &bytes_avail);
371			if (buff == NULL) {
372				/* Remaining bytes are less than window. */
373				window >>= 1;
374				if (window < (H_SIZE + 3))
375					return (0);
376				continue;
377			}
378			p = (const char *)buff + offset;
379			while (p + H_SIZE < (const char *)buff + bytes_avail) {
380				if ((next = lha_check_header_format(p)) == 0)
381					return (30);
382				p += next;
383			}
384			offset = p - (const char *)buff;
385		}
386	}
387	return (0);
388}
389
390static int
391archive_read_format_lha_options(struct archive_read *a,
392    const char *key, const char *val)
393{
394	struct lha *lha;
395	int ret = ARCHIVE_FAILED;
396
397	lha = (struct lha *)(a->format->data);
398	if (strcmp(key, "hdrcharset")  == 0) {
399		if (val == NULL || val[0] == 0)
400			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401			    "lha: hdrcharset option needs a character-set name");
402		else {
403			lha->opt_sconv =
404			    archive_string_conversion_from_charset(
405				&a->archive, val, 0);
406			if (lha->opt_sconv != NULL)
407				ret = ARCHIVE_OK;
408			else
409				ret = ARCHIVE_FATAL;
410		}
411		return (ret);
412	}
413
414	/* Note: The "warn" return is just to inform the options
415	 * supervisor that we didn't handle it.  It will generate
416	 * a suitable error if no one used this option. */
417	return (ARCHIVE_WARN);
418}
419
420static int
421lha_skip_sfx(struct archive_read *a)
422{
423	const void *h;
424	const char *p, *q;
425	size_t next, skip;
426	ssize_t bytes, window;
427
428	window = 4096;
429	for (;;) {
430		h = __archive_read_ahead(a, window, &bytes);
431		if (h == NULL) {
432			/* Remaining bytes are less than window. */
433			window >>= 1;
434			if (window < (H_SIZE + 3))
435				goto fatal;
436			continue;
437		}
438		if (bytes < H_SIZE)
439			goto fatal;
440		p = h;
441		q = p + bytes;
442
443		/*
444		 * Scan ahead until we find something that looks
445		 * like the lha header.
446		 */
447		while (p + H_SIZE < q) {
448			if ((next = lha_check_header_format(p)) == 0) {
449				skip = p - (const char *)h;
450				__archive_read_consume(a, skip);
451				return (ARCHIVE_OK);
452			}
453			p += next;
454		}
455		skip = p - (const char *)h;
456		__archive_read_consume(a, skip);
457	}
458fatal:
459	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460	    "Couldn't find out LHa header");
461	return (ARCHIVE_FATAL);
462}
463
464static int
465truncated_error(struct archive_read *a)
466{
467	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468	    "Truncated LHa header");
469	return (ARCHIVE_FATAL);
470}
471
472static int
473archive_read_format_lha_read_header(struct archive_read *a,
474    struct archive_entry *entry)
475{
476	struct archive_string linkname;
477	struct archive_string pathname;
478	struct lha *lha;
479	const unsigned char *p;
480	const char *signature;
481	int err;
482
483	lha_crc16_init();
484
485	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486	if (a->archive.archive_format_name == NULL)
487		a->archive.archive_format_name = "lha";
488
489	lha = (struct lha *)(a->format->data);
490	lha->decompress_init = 0;
491	lha->end_of_entry = 0;
492	lha->end_of_entry_cleanup = 0;
493	lha->entry_unconsumed = 0;
494
495	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
496		/*
497		 * LHa archiver added 0 to the tail of its archive file as
498		 * the mark of the end of the archive.
499		 */
500		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501		if (signature == NULL || signature[0] == 0)
502			return (ARCHIVE_EOF);
503		return (truncated_error(a));
504	}
505
506	signature = (const char *)p;
507	if (lha->found_first_header == 0 &&
508	    signature[0] == 'M' && signature[1] == 'Z') {
509                /* This is an executable?  Must be self-extracting... 	*/
510		err = lha_skip_sfx(a);
511		if (err < ARCHIVE_WARN)
512			return (err);
513
514		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515			return (truncated_error(a));
516		signature = (const char *)p;
517	}
518	/* signature[0] == 0 means the end of an LHa archive file. */
519	if (signature[0] == 0)
520		return (ARCHIVE_EOF);
521
522	/*
523	 * Check the header format and method type.
524	 */
525	if (lha_check_header_format(p) != 0) {
526		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
527		    "Bad LHa file");
528		return (ARCHIVE_FATAL);
529	}
530
531	/* We've found the first header. */
532	lha->found_first_header = 1;
533	/* Set a default value and common data */
534	lha->header_size = 0;
535	lha->level = p[H_LEVEL_OFFSET];
536	lha->method[0] = p[H_METHOD_OFFSET+1];
537	lha->method[1] = p[H_METHOD_OFFSET+2];
538	lha->method[2] = p[H_METHOD_OFFSET+3];
539	if (memcmp(lha->method, "lhd", 3) == 0)
540		lha->directory = 1;
541	else
542		lha->directory = 0;
543	if (memcmp(lha->method, "lh0", 3) == 0 ||
544	    memcmp(lha->method, "lz4", 3) == 0)
545		lha->entry_is_compressed = 0;
546	else
547		lha->entry_is_compressed = 1;
548
549	lha->compsize = 0;
550	lha->origsize = 0;
551	lha->setflag = 0;
552	lha->birthtime = 0;
553	lha->birthtime_tv_nsec = 0;
554	lha->mtime = 0;
555	lha->mtime_tv_nsec = 0;
556	lha->atime = 0;
557	lha->atime_tv_nsec = 0;
558	lha->mode = (lha->directory)? 0777 : 0666;
559	lha->uid = 0;
560	lha->gid = 0;
561	archive_string_empty(&lha->dirname);
562	archive_string_empty(&lha->filename);
563	lha->dos_attr = 0;
564	if (lha->opt_sconv != NULL)
565		lha->sconv = lha->opt_sconv;
566	else
567		lha->sconv = NULL;
568
569	switch (p[H_LEVEL_OFFSET]) {
570	case 0:
571		err = lha_read_file_header_0(a, lha);
572		break;
573	case 1:
574		err = lha_read_file_header_1(a, lha);
575		break;
576	case 2:
577		err = lha_read_file_header_2(a, lha);
578		break;
579	case 3:
580		err = lha_read_file_header_3(a, lha);
581		break;
582	default:
583		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
585		err = ARCHIVE_FATAL;
586		break;
587	}
588	if (err < ARCHIVE_WARN)
589		return (err);
590
591
592	if (!lha->directory && archive_strlen(&lha->filename) == 0)
593		/* The filename has not been set */
594		return (truncated_error(a));
595
596	/*
597	 * Make a pathname from a dirname and a filename.
598	 */
599	archive_string_concat(&lha->dirname, &lha->filename);
600	archive_string_init(&pathname);
601	archive_string_init(&linkname);
602	archive_string_copy(&pathname, &lha->dirname);
603
604	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
605		/*
606	 	 * Extract the symlink-name if it's included in the pathname.
607	 	 */
608		if (!lha_parse_linkname(&linkname, &pathname)) {
609			/* We couldn't get the symlink-name. */
610			archive_set_error(&a->archive,
611		    	    ARCHIVE_ERRNO_FILE_FORMAT,
612			    "Unknown symlink-name");
613			archive_string_free(&pathname);
614			archive_string_free(&linkname);
615			return (ARCHIVE_FAILED);
616		}
617	} else {
618		/*
619		 * Make sure a file-type is set.
620		 * The mode has been overridden if it is in the extended data.
621		 */
622		lha->mode = (lha->mode & ~AE_IFMT) |
623		    ((lha->directory)? AE_IFDIR: AE_IFREG);
624	}
625	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626	    (lha->dos_attr & 1) != 0)
627		lha->mode &= ~(0222);/* read only. */
628
629	/*
630	 * Set basic file parameters.
631	 */
632	if (archive_entry_copy_pathname_l(entry, pathname.s,
633	    pathname.length, lha->sconv) != 0) {
634		if (errno == ENOMEM) {
635			archive_set_error(&a->archive, ENOMEM,
636			    "Can't allocate memory for Pathname");
637			return (ARCHIVE_FATAL);
638		}
639		archive_set_error(&a->archive,
640		    ARCHIVE_ERRNO_FILE_FORMAT,
641		    "Pathname cannot be converted "
642		    "from %s to current locale.",
643		    archive_string_conversion_charset_name(lha->sconv));
644		err = ARCHIVE_WARN;
645	}
646	archive_string_free(&pathname);
647	if (archive_strlen(&linkname) > 0) {
648		if (archive_entry_copy_symlink_l(entry, linkname.s,
649		    linkname.length, lha->sconv) != 0) {
650			if (errno == ENOMEM) {
651				archive_set_error(&a->archive, ENOMEM,
652				    "Can't allocate memory for Linkname");
653				return (ARCHIVE_FATAL);
654			}
655			archive_set_error(&a->archive,
656			    ARCHIVE_ERRNO_FILE_FORMAT,
657			    "Linkname cannot be converted "
658			    "from %s to current locale.",
659			    archive_string_conversion_charset_name(lha->sconv));
660			err = ARCHIVE_WARN;
661		}
662	} else
663		archive_entry_set_symlink(entry, NULL);
664	archive_string_free(&linkname);
665	/*
666	 * When a header level is 0, there is a possibility that
667	 * a pathname and a symlink has '\' character, a directory
668	 * separator in DOS/Windows. So we should convert it to '/'.
669	 */
670	if (p[H_LEVEL_OFFSET] == 0)
671		lha_replace_path_separator(lha, entry);
672
673	archive_entry_set_mode(entry, lha->mode);
674	archive_entry_set_uid(entry, lha->uid);
675	archive_entry_set_gid(entry, lha->gid);
676	if (archive_strlen(&lha->uname) > 0)
677		archive_entry_set_uname(entry, lha->uname.s);
678	if (archive_strlen(&lha->gname) > 0)
679		archive_entry_set_gname(entry, lha->gname.s);
680	if (lha->setflag & BIRTHTIME_IS_SET) {
681		archive_entry_set_birthtime(entry, lha->birthtime,
682		    lha->birthtime_tv_nsec);
683		archive_entry_set_ctime(entry, lha->birthtime,
684		    lha->birthtime_tv_nsec);
685	} else {
686		archive_entry_unset_birthtime(entry);
687		archive_entry_unset_ctime(entry);
688	}
689	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690	if (lha->setflag & ATIME_IS_SET)
691		archive_entry_set_atime(entry, lha->atime,
692		    lha->atime_tv_nsec);
693	else
694		archive_entry_unset_atime(entry);
695	if (lha->directory || archive_entry_symlink(entry) != NULL)
696		archive_entry_unset_size(entry);
697	else
698		archive_entry_set_size(entry, lha->origsize);
699
700	/*
701	 * Prepare variables used to read a file content.
702	 */
703	lha->entry_bytes_remaining = lha->compsize;
704	lha->entry_offset = 0;
705	lha->entry_crc_calculated = 0;
706
707	/*
708	 * This file does not have a content.
709	 */
710	if (lha->directory || lha->compsize == 0)
711		lha->end_of_entry = 1;
712
713	sprintf(lha->format_name, "lha -%c%c%c-",
714	    lha->method[0], lha->method[1], lha->method[2]);
715	a->archive.archive_format_name = lha->format_name;
716
717	return (err);
718}
719
720/*
721 * Replace a DOS path separator '\' by a character '/'.
722 * Some multi-byte character set have  a character '\' in its second byte.
723 */
724static void
725lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
726{
727	const wchar_t *wp;
728	size_t i;
729
730	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
731		archive_wstrcpy(&(lha->ws), wp);
732		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
733			if (lha->ws.s[i] == L'\\')
734				lha->ws.s[i] = L'/';
735		}
736		archive_entry_copy_pathname_w(entry, lha->ws.s);
737	}
738
739	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
740		archive_wstrcpy(&(lha->ws), wp);
741		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
742			if (lha->ws.s[i] == L'\\')
743				lha->ws.s[i] = L'/';
744		}
745		archive_entry_copy_symlink_w(entry, lha->ws.s);
746	}
747}
748
749/*
750 * Header 0 format
751 *
752 * +0              +1         +2               +7                  +11
753 * +---------------+----------+----------------+-------------------+
754 * |header size(*1)|header sum|compression type|compressed size(*2)|
755 * +---------------+----------+----------------+-------------------+
756 *                             <---------------------(*1)----------*
757 *
758 * +11               +15       +17       +19            +20              +21
759 * +-----------------+---------+---------+--------------+----------------+
760 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
761 * +-----------------+---------+---------+--------------+----------------+
762 * *--------------------------------(*1)---------------------------------*
763 *
764 * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
765 * +---------------+---------+----------+----------------+------------------+
766 * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
767 * +---------------+---------+----------+----------------+------------------+
768 *                  <--(*3)->                             <------(*2)------>
769 * *----------------------(*1)-------------------------->
770 *
771 */
772#define H0_HEADER_SIZE_OFFSET	0
773#define H0_HEADER_SUM_OFFSET	1
774#define H0_COMP_SIZE_OFFSET	7
775#define H0_ORIG_SIZE_OFFSET	11
776#define H0_DOS_TIME_OFFSET	15
777#define H0_NAME_LEN_OFFSET	21
778#define H0_FILE_NAME_OFFSET	22
779#define H0_FIXED_SIZE		24
780static int
781lha_read_file_header_0(struct archive_read *a, struct lha *lha)
782{
783	const unsigned char *p;
784	int extdsize, namelen;
785	unsigned char headersum, sum_calculated;
786
787	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
788		return (truncated_error(a));
789	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
790	headersum = p[H0_HEADER_SUM_OFFSET];
791	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
792	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
793	lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
794	namelen = p[H0_NAME_LEN_OFFSET];
795	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
796	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
797		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
798		    "Invalid LHa header");
799		return (ARCHIVE_FATAL);
800	}
801	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
802		return (truncated_error(a));
803
804	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
805	/* When extdsize == -2, A CRC16 value is not present in the header. */
806	if (extdsize >= 0) {
807		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
808		lha->setflag |= CRC_IS_SET;
809	}
810	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
811
812	/* Read an extended header */
813	if (extdsize > 0) {
814		/* This extended data is set by 'LHa for UNIX' only.
815		 * Maybe fixed size.
816		 */
817		p += H0_FILE_NAME_OFFSET + namelen + 2;
818		if (p[0] == 'U' && extdsize == 12) {
819			/* p[1] is a minor version. */
820			lha->mtime = archive_le32dec(&p[2]);
821			lha->mode = archive_le16dec(&p[6]);
822			lha->uid = archive_le16dec(&p[8]);
823			lha->gid = archive_le16dec(&p[10]);
824			lha->setflag |= UNIX_MODE_IS_SET;
825		}
826	}
827	__archive_read_consume(a, lha->header_size);
828
829	if (sum_calculated != headersum) {
830		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
831		    "LHa header sum error");
832		return (ARCHIVE_FATAL);
833	}
834
835	return (ARCHIVE_OK);
836}
837
838/*
839 * Header 1 format
840 *
841 * +0              +1         +2               +7            +11
842 * +---------------+----------+----------------+-------------+
843 * |header size(*1)|header sum|compression type|skip size(*2)|
844 * +---------------+----------+----------------+-------------+
845 *                             <---------------(*1)----------*
846 *
847 * +11               +15       +17       +19            +20              +21
848 * +-----------------+---------+---------+--------------+----------------+
849 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
850 * +-----------------+---------+---------+--------------+----------------+
851 * *-------------------------------(*1)----------------------------------*
852 *
853 * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
854 * +---------------+---------+----------+-----------+-----------+
855 * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
856 * +---------------+---------+----------+-----------+-----------+
857 *                  <--(*3)->
858 * *----------------------------(*1)----------------------------*
859 *
860 * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
861 * +----------------+---------------------+------------------------+
862 * |next header size| extended header(*5) |     compressed data    |
863 * +----------------+---------------------+------------------------+
864 * *------(*1)-----> <--------------------(*2)-------------------->
865 */
866#define H1_HEADER_SIZE_OFFSET	0
867#define H1_HEADER_SUM_OFFSET	1
868#define H1_COMP_SIZE_OFFSET	7
869#define H1_ORIG_SIZE_OFFSET	11
870#define H1_DOS_TIME_OFFSET	15
871#define H1_NAME_LEN_OFFSET	21
872#define H1_FILE_NAME_OFFSET	22
873#define H1_FIXED_SIZE		27
874static int
875lha_read_file_header_1(struct archive_read *a, struct lha *lha)
876{
877	const unsigned char *p;
878	size_t extdsize;
879	int i, err, err2;
880	int namelen, padding;
881	unsigned char headersum, sum_calculated;
882
883	err = ARCHIVE_OK;
884
885	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
886		return (truncated_error(a));
887
888	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
889	headersum = p[H1_HEADER_SUM_OFFSET];
890	/* Note: An extended header size is included in a compsize. */
891	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
892	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
893	lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
894	namelen = p[H1_NAME_LEN_OFFSET];
895	/* Calculate a padding size. The result will be normally 0 only(?) */
896	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
897
898	if (namelen > 230 || padding < 0)
899		goto invalid;
900
901	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
902		return (truncated_error(a));
903
904	for (i = 0; i < namelen; i++) {
905		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
906			goto invalid;/* Invalid filename. */
907	}
908	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
909	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
910	lha->setflag |= CRC_IS_SET;
911
912	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
913	/* Consume used bytes but not include `next header size' data
914	 * since it will be consumed in lha_read_file_extended_header(). */
915	__archive_read_consume(a, lha->header_size - 2);
916
917	/* Read extended headers */
918	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
919	    (size_t)(lha->compsize + 2), &extdsize);
920	if (err2 < ARCHIVE_WARN)
921		return (err2);
922	if (err2 < err)
923		err = err2;
924	/* Get a real compressed file size. */
925	lha->compsize -= extdsize - 2;
926
927	if (lha->compsize < 0)
928		goto invalid;	/* Invalid compressed file size */
929
930	if (sum_calculated != headersum) {
931		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
932		    "LHa header sum error");
933		return (ARCHIVE_FATAL);
934	}
935	return (err);
936invalid:
937	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
938	    "Invalid LHa header");
939	return (ARCHIVE_FATAL);
940}
941
942/*
943 * Header 2 format
944 *
945 * +0              +2               +7                  +11               +15
946 * +---------------+----------------+-------------------+-----------------+
947 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
948 * +---------------+----------------+-------------------+-----------------+
949 *  <--------------------------------(*1)---------------------------------*
950 *
951 * +15               +19          +20              +21        +23         +24
952 * +-----------------+------------+----------------+----------+-----------+
953 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
954 * +-----------------+------------+----------------+----------+-----------+
955 * *---------------------------------(*1)---------------------------------*
956 *
957 * +24              +26                 +26+(*3)      +26+(*3)+(*4)
958 * +----------------+-------------------+-------------+-------------------+
959 * |next header size|extended header(*3)| padding(*4) |  compressed data  |
960 * +----------------+-------------------+-------------+-------------------+
961 * *--------------------------(*1)-------------------> <------(*2)------->
962 *
963 */
964#define H2_HEADER_SIZE_OFFSET	0
965#define H2_COMP_SIZE_OFFSET	7
966#define H2_ORIG_SIZE_OFFSET	11
967#define H2_TIME_OFFSET		15
968#define H2_CRC_OFFSET		21
969#define H2_FIXED_SIZE		24
970static int
971lha_read_file_header_2(struct archive_read *a, struct lha *lha)
972{
973	const unsigned char *p;
974	size_t extdsize;
975	int err, padding;
976	uint16_t header_crc;
977
978	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
979		return (truncated_error(a));
980
981	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
982	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
983	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
984	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
985	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
986	lha->setflag |= CRC_IS_SET;
987
988	if (lha->header_size < H2_FIXED_SIZE) {
989		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
990		    "Invalid LHa header size");
991		return (ARCHIVE_FATAL);
992	}
993
994	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
995	__archive_read_consume(a, H2_FIXED_SIZE);
996
997	/* Read extended headers */
998	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
999		  lha->header_size - H2_FIXED_SIZE, &extdsize);
1000	if (err < ARCHIVE_WARN)
1001		return (err);
1002
1003	/* Calculate a padding size. The result will be normally 0 or 1. */
1004	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1005	if (padding > 0) {
1006		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1007			return (truncated_error(a));
1008		header_crc = lha_crc16(header_crc, p, padding);
1009		__archive_read_consume(a, padding);
1010	}
1011
1012	if (header_crc != lha->header_crc) {
1013		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1014		    "LHa header CRC error");
1015		return (ARCHIVE_FATAL);
1016	}
1017	return (err);
1018}
1019
1020/*
1021 * Header 3 format
1022 *
1023 * +0           +2               +7                  +11               +15
1024 * +------------+----------------+-------------------+-----------------+
1025 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1026 * +------------+----------------+-------------------+-----------------+
1027 *  <-------------------------------(*1)-------------------------------*
1028 *
1029 * +15               +19          +20              +21        +23         +24
1030 * +-----------------+------------+----------------+----------+-----------+
1031 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1032 * +-----------------+------------+----------------+----------+-----------+
1033 * *--------------------------------(*1)----------------------------------*
1034 *
1035 * +24             +28              +32                 +32+(*3)
1036 * +---------------+----------------+-------------------+-----------------+
1037 * |header size(*1)|next header size|extended header(*3)| compressed data |
1038 * +---------------+----------------+-------------------+-----------------+
1039 * *------------------------(*1)-----------------------> <------(*2)----->
1040 *
1041 */
1042#define H3_FIELD_LEN_OFFSET	0
1043#define H3_COMP_SIZE_OFFSET	7
1044#define H3_ORIG_SIZE_OFFSET	11
1045#define H3_TIME_OFFSET		15
1046#define H3_CRC_OFFSET		21
1047#define H3_HEADER_SIZE_OFFSET	24
1048#define H3_FIXED_SIZE		28
1049static int
1050lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1051{
1052	const unsigned char *p;
1053	size_t extdsize;
1054	int err;
1055	uint16_t header_crc;
1056
1057	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1058		return (truncated_error(a));
1059
1060	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1061		goto invalid;
1062	lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1063	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1064	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1065	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1066	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1067	lha->setflag |= CRC_IS_SET;
1068
1069	if (lha->header_size < H3_FIXED_SIZE + 4)
1070		goto invalid;
1071	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1072	__archive_read_consume(a, H3_FIXED_SIZE);
1073
1074	/* Read extended headers */
1075	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1076		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1077	if (err < ARCHIVE_WARN)
1078		return (err);
1079
1080	if (header_crc != lha->header_crc) {
1081		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1082		    "LHa header CRC error");
1083		return (ARCHIVE_FATAL);
1084	}
1085	return (err);
1086invalid:
1087	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1088	    "Invalid LHa header");
1089	return (ARCHIVE_FATAL);
1090}
1091
1092/*
1093 * Extended header format
1094 *
1095 * +0             +2        +3  -- used in header 1 and 2
1096 * +0             +4        +5  -- used in header 3
1097 * +--------------+---------+-------------------+--------------+--
1098 * |ex-header size|header id|        data       |ex-header size| .......
1099 * +--------------+---------+-------------------+--------------+--
1100 *  <-------------( ex-header size)------------> <-- next extended header --*
1101 *
1102 * If the ex-header size is zero, it is the make of the end of extended
1103 * headers.
1104 *
1105 */
1106static int
1107lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1108    uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1109{
1110	const void *h;
1111	const unsigned char *extdheader;
1112	size_t	extdsize;
1113	size_t	datasize;
1114	unsigned int i;
1115	unsigned char extdtype;
1116
1117#define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1118#define EXT_FILENAME		0x01		/* Filename 		    */
1119#define EXT_DIRECTORY		0x02		/* Directory name	    */
1120#define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1121#define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1122#define EXT_FILESIZE		0x42		/* Large file size	    */
1123#define EXT_TIMEZONE		0x43		/* Time zone		    */
1124#define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1125#define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1126#define EXT_CODEPAGE		0x46		/* Codepage		    */
1127#define EXT_UNIX_MODE		0x50		/* File permission	    */
1128#define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1129#define EXT_UNIX_GNAME		0x52		/* Group name		    */
1130#define EXT_UNIX_UNAME		0x53		/* User name		    */
1131#define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1132#define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1133#define EXT_NEW_ATTR		0xff		/* new attribute	    */
1134
1135	*total_size = sizefield_length;
1136
1137	for (;;) {
1138		/* Read an extended header size. */
1139		if ((h =
1140		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1141			return (truncated_error(a));
1142		/* Check if the size is the zero indicates the end of the
1143		 * extended header. */
1144		if (sizefield_length == sizeof(uint16_t))
1145			extdsize = archive_le16dec(h);
1146		else
1147			extdsize = archive_le32dec(h);
1148		if (extdsize == 0) {
1149			/* End of extended header */
1150			if (crc != NULL)
1151				*crc = lha_crc16(*crc, h, sizefield_length);
1152			__archive_read_consume(a, sizefield_length);
1153			return (ARCHIVE_OK);
1154		}
1155
1156		/* Sanity check to the extended header size. */
1157		if (((uint64_t)*total_size + extdsize) >
1158				    (uint64_t)limitsize ||
1159		    extdsize <= (size_t)sizefield_length)
1160			goto invalid;
1161
1162		/* Read the extended header. */
1163		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1164			return (truncated_error(a));
1165		*total_size += extdsize;
1166
1167		extdheader = (const unsigned char *)h;
1168		/* Get the extended header type. */
1169		extdtype = extdheader[sizefield_length];
1170		/* Calculate an extended data size. */
1171		datasize = extdsize - (1 + sizefield_length);
1172		/* Skip an extended header size field and type field. */
1173		extdheader += sizefield_length + 1;
1174
1175		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1176			*crc = lha_crc16(*crc, h, extdsize);
1177		switch (extdtype) {
1178		case EXT_HEADER_CRC:
1179			/* We only use a header CRC. Following data will not
1180			 * be used. */
1181			if (datasize >= 2) {
1182				lha->header_crc = archive_le16dec(extdheader);
1183				if (crc != NULL) {
1184					static const char zeros[2] = {0, 0};
1185					*crc = lha_crc16(*crc, h,
1186					    extdsize - datasize);
1187					/* CRC value itself as zero */
1188					*crc = lha_crc16(*crc, zeros, 2);
1189					*crc = lha_crc16(*crc,
1190					    extdheader+2, datasize - 2);
1191				}
1192			}
1193			break;
1194		case EXT_FILENAME:
1195			if (datasize == 0) {
1196				/* maybe directory header */
1197				archive_string_empty(&lha->filename);
1198				break;
1199			}
1200			if (extdheader[0] == '\0')
1201				goto invalid;
1202			archive_strncpy(&lha->filename,
1203			    (const char *)extdheader, datasize);
1204			break;
1205		case EXT_DIRECTORY:
1206			if (datasize == 0 || extdheader[0] == '\0')
1207				/* no directory name data. exit this case. */
1208				goto invalid;
1209
1210			archive_strncpy(&lha->dirname,
1211		  	    (const char *)extdheader, datasize);
1212			/*
1213			 * Convert directory delimiter from 0xFF
1214			 * to '/' for local system.
1215	 		 */
1216			for (i = 0; i < lha->dirname.length; i++) {
1217				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1218					lha->dirname.s[i] = '/';
1219			}
1220			/* Is last character directory separator? */
1221			if (lha->dirname.s[lha->dirname.length-1] != '/')
1222				/* invalid directory data */
1223				goto invalid;
1224			break;
1225		case EXT_DOS_ATTR:
1226			if (datasize == 2)
1227				lha->dos_attr = (unsigned char)
1228				    (archive_le16dec(extdheader) & 0xff);
1229			break;
1230		case EXT_TIMESTAMP:
1231			if (datasize == (sizeof(uint64_t) * 3)) {
1232				lha->birthtime = lha_win_time(
1233				    archive_le64dec(extdheader),
1234				    &lha->birthtime_tv_nsec);
1235				extdheader += sizeof(uint64_t);
1236				lha->mtime = lha_win_time(
1237				    archive_le64dec(extdheader),
1238				    &lha->mtime_tv_nsec);
1239				extdheader += sizeof(uint64_t);
1240				lha->atime = lha_win_time(
1241				    archive_le64dec(extdheader),
1242				    &lha->atime_tv_nsec);
1243				lha->setflag |= BIRTHTIME_IS_SET |
1244				    ATIME_IS_SET;
1245			}
1246			break;
1247		case EXT_FILESIZE:
1248			if (datasize == sizeof(uint64_t) * 2) {
1249				lha->compsize = archive_le64dec(extdheader);
1250				extdheader += sizeof(uint64_t);
1251				lha->origsize = archive_le64dec(extdheader);
1252			}
1253			break;
1254		case EXT_CODEPAGE:
1255			/* Get an archived filename charset from codepage.
1256			 * This overwrites the charset specified by
1257			 * hdrcharset option. */
1258			if (datasize == sizeof(uint32_t)) {
1259				struct archive_string cp;
1260				const char *charset;
1261
1262				archive_string_init(&cp);
1263				switch (archive_le32dec(extdheader)) {
1264				case 65001: /* UTF-8 */
1265					charset = "UTF-8";
1266					break;
1267				default:
1268					archive_string_sprintf(&cp, "CP%d",
1269					    (int)archive_le32dec(extdheader));
1270					charset = cp.s;
1271					break;
1272				}
1273				lha->sconv =
1274				    archive_string_conversion_from_charset(
1275					&(a->archive), charset, 1);
1276				archive_string_free(&cp);
1277				if (lha->sconv == NULL)
1278					return (ARCHIVE_FATAL);
1279			}
1280			break;
1281		case EXT_UNIX_MODE:
1282			if (datasize == sizeof(uint16_t)) {
1283				lha->mode = archive_le16dec(extdheader);
1284				lha->setflag |= UNIX_MODE_IS_SET;
1285			}
1286			break;
1287		case EXT_UNIX_GID_UID:
1288			if (datasize == (sizeof(uint16_t) * 2)) {
1289				lha->gid = archive_le16dec(extdheader);
1290				lha->uid = archive_le16dec(extdheader+2);
1291			}
1292			break;
1293		case EXT_UNIX_GNAME:
1294			if (datasize > 0)
1295				archive_strncpy(&lha->gname,
1296				    (const char *)extdheader, datasize);
1297			break;
1298		case EXT_UNIX_UNAME:
1299			if (datasize > 0)
1300				archive_strncpy(&lha->uname,
1301				    (const char *)extdheader, datasize);
1302			break;
1303		case EXT_UNIX_MTIME:
1304			if (datasize == sizeof(uint32_t))
1305				lha->mtime = archive_le32dec(extdheader);
1306			break;
1307		case EXT_OS2_NEW_ATTR:
1308			/* This extended header is OS/2 depend. */
1309			if (datasize == 16) {
1310				lha->dos_attr = (unsigned char)
1311				    (archive_le16dec(extdheader) & 0xff);
1312				lha->mode = archive_le16dec(extdheader+2);
1313				lha->gid = archive_le16dec(extdheader+4);
1314				lha->uid = archive_le16dec(extdheader+6);
1315				lha->birthtime = archive_le32dec(extdheader+8);
1316				lha->atime = archive_le32dec(extdheader+12);
1317				lha->setflag |= UNIX_MODE_IS_SET
1318				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1319			}
1320			break;
1321		case EXT_NEW_ATTR:
1322			if (datasize == 20) {
1323				lha->mode = (mode_t)archive_le32dec(extdheader);
1324				lha->gid = archive_le32dec(extdheader+4);
1325				lha->uid = archive_le32dec(extdheader+8);
1326				lha->birthtime = archive_le32dec(extdheader+12);
1327				lha->atime = archive_le32dec(extdheader+16);
1328				lha->setflag |= UNIX_MODE_IS_SET
1329				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1330			}
1331			break;
1332		case EXT_TIMEZONE:		/* Not supported */
1333		case EXT_UTF16_FILENAME:	/* Not supported */
1334		case EXT_UTF16_DIRECTORY:	/* Not supported */
1335		default:
1336			break;
1337		}
1338
1339		__archive_read_consume(a, extdsize);
1340	}
1341invalid:
1342	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1343	    "Invalid extended LHa header");
1344	return (ARCHIVE_FATAL);
1345}
1346
1347static int
1348lha_end_of_entry(struct archive_read *a)
1349{
1350	struct lha *lha = (struct lha *)(a->format->data);
1351	int r = ARCHIVE_EOF;
1352
1353	if (!lha->end_of_entry_cleanup) {
1354		if ((lha->setflag & CRC_IS_SET) &&
1355		    lha->crc != lha->entry_crc_calculated) {
1356			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1357			    "LHa data CRC error");
1358			r = ARCHIVE_WARN;
1359		}
1360
1361		/* End-of-entry cleanup done. */
1362		lha->end_of_entry_cleanup = 1;
1363	}
1364	return (r);
1365}
1366
1367static int
1368archive_read_format_lha_read_data(struct archive_read *a,
1369    const void **buff, size_t *size, int64_t *offset)
1370{
1371	struct lha *lha = (struct lha *)(a->format->data);
1372	int r;
1373
1374	if (lha->entry_unconsumed) {
1375		/* Consume as much as the decompressor actually used. */
1376		__archive_read_consume(a, lha->entry_unconsumed);
1377		lha->entry_unconsumed = 0;
1378	}
1379	if (lha->end_of_entry) {
1380		*offset = lha->entry_offset;
1381		*size = 0;
1382		*buff = NULL;
1383		return (lha_end_of_entry(a));
1384	}
1385
1386	if (lha->entry_is_compressed)
1387		r =  lha_read_data_lzh(a, buff, size, offset);
1388	else
1389		/* No compression. */
1390		r =  lha_read_data_none(a, buff, size, offset);
1391	return (r);
1392}
1393
1394/*
1395 * Read a file content in no compression.
1396 *
1397 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1398 * lha->end_of_entry if it consumes all of the data.
1399 */
1400static int
1401lha_read_data_none(struct archive_read *a, const void **buff,
1402    size_t *size, int64_t *offset)
1403{
1404	struct lha *lha = (struct lha *)(a->format->data);
1405	ssize_t bytes_avail;
1406
1407	if (lha->entry_bytes_remaining == 0) {
1408		*buff = NULL;
1409		*size = 0;
1410		*offset = lha->entry_offset;
1411		lha->end_of_entry = 1;
1412		return (ARCHIVE_OK);
1413	}
1414	/*
1415	 * Note: '1' here is a performance optimization.
1416	 * Recall that the decompression layer returns a count of
1417	 * available bytes; asking for more than that forces the
1418	 * decompressor to combine reads by copying data.
1419	 */
1420	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1421	if (bytes_avail <= 0) {
1422		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1423		    "Truncated LHa file data");
1424		return (ARCHIVE_FATAL);
1425	}
1426	if (bytes_avail > lha->entry_bytes_remaining)
1427		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1428	lha->entry_crc_calculated =
1429	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1430	*size = bytes_avail;
1431	*offset = lha->entry_offset;
1432	lha->entry_offset += bytes_avail;
1433	lha->entry_bytes_remaining -= bytes_avail;
1434	if (lha->entry_bytes_remaining == 0)
1435		lha->end_of_entry = 1;
1436	lha->entry_unconsumed = bytes_avail;
1437	return (ARCHIVE_OK);
1438}
1439
1440/*
1441 * Read a file content in LZHUFF encoding.
1442 *
1443 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1444 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1445 * all of the data.
1446 */
1447static int
1448lha_read_data_lzh(struct archive_read *a, const void **buff,
1449    size_t *size, int64_t *offset)
1450{
1451	struct lha *lha = (struct lha *)(a->format->data);
1452	ssize_t bytes_avail;
1453	int r;
1454
1455	/* If we haven't yet read any data, initialize the decompressor. */
1456	if (!lha->decompress_init) {
1457		r = lzh_decode_init(&(lha->strm), lha->method);
1458		switch (r) {
1459		case ARCHIVE_OK:
1460			break;
1461		case ARCHIVE_FAILED:
1462        		/* Unsupported compression. */
1463			*buff = NULL;
1464			*size = 0;
1465			*offset = 0;
1466			archive_set_error(&a->archive,
1467			    ARCHIVE_ERRNO_FILE_FORMAT,
1468			    "Unsupported lzh compression method -%c%c%c-",
1469			    lha->method[0], lha->method[1], lha->method[2]);
1470			/* We know compressed size; just skip it. */
1471			archive_read_format_lha_read_data_skip(a);
1472			return (ARCHIVE_WARN);
1473		default:
1474			archive_set_error(&a->archive, ENOMEM,
1475			    "Couldn't allocate memory "
1476			    "for lzh decompression");
1477			return (ARCHIVE_FATAL);
1478		}
1479		/* We've initialized decompression for this stream. */
1480		lha->decompress_init = 1;
1481		lha->strm.avail_out = 0;
1482		lha->strm.total_out = 0;
1483	}
1484
1485	/*
1486	 * Note: '1' here is a performance optimization.
1487	 * Recall that the decompression layer returns a count of
1488	 * available bytes; asking for more than that forces the
1489	 * decompressor to combine reads by copying data.
1490	 */
1491	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1492	if (bytes_avail <= 0) {
1493		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1494		    "Truncated LHa file body");
1495		return (ARCHIVE_FATAL);
1496	}
1497	if (bytes_avail > lha->entry_bytes_remaining)
1498		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1499
1500	lha->strm.avail_in = (int)bytes_avail;
1501	lha->strm.total_in = 0;
1502	lha->strm.avail_out = 0;
1503
1504	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1505	switch (r) {
1506	case ARCHIVE_OK:
1507		break;
1508	case ARCHIVE_EOF:
1509		lha->end_of_entry = 1;
1510		break;
1511	default:
1512		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1513		    "Bad lzh data");
1514		return (ARCHIVE_FAILED);
1515	}
1516	lha->entry_unconsumed = lha->strm.total_in;
1517	lha->entry_bytes_remaining -= lha->strm.total_in;
1518
1519	if (lha->strm.avail_out) {
1520		*offset = lha->entry_offset;
1521		*size = lha->strm.avail_out;
1522		*buff = lha->strm.ref_ptr;
1523		lha->entry_crc_calculated =
1524		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1525		lha->entry_offset += *size;
1526	} else {
1527		*offset = lha->entry_offset;
1528		*size = 0;
1529		*buff = NULL;
1530		if (lha->end_of_entry)
1531			return (lha_end_of_entry(a));
1532	}
1533	return (ARCHIVE_OK);
1534}
1535
1536/*
1537 * Skip a file content.
1538 */
1539static int
1540archive_read_format_lha_read_data_skip(struct archive_read *a)
1541{
1542	struct lha *lha;
1543	int64_t bytes_skipped;
1544
1545	lha = (struct lha *)(a->format->data);
1546
1547	if (lha->entry_unconsumed) {
1548		/* Consume as much as the decompressor actually used. */
1549		__archive_read_consume(a, lha->entry_unconsumed);
1550		lha->entry_unconsumed = 0;
1551	}
1552
1553	/* if we've already read to end of data, we're done. */
1554	if (lha->end_of_entry_cleanup)
1555		return (ARCHIVE_OK);
1556
1557	/*
1558	 * If the length is at the beginning, we can skip the
1559	 * compressed data much more quickly.
1560	 */
1561	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1562	if (bytes_skipped < 0)
1563		return (ARCHIVE_FATAL);
1564
1565	/* This entry is finished and done. */
1566	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1567	return (ARCHIVE_OK);
1568}
1569
1570static int
1571archive_read_format_lha_cleanup(struct archive_read *a)
1572{
1573	struct lha *lha = (struct lha *)(a->format->data);
1574
1575	lzh_decode_free(&(lha->strm));
1576	archive_string_free(&(lha->dirname));
1577	archive_string_free(&(lha->filename));
1578	archive_string_free(&(lha->uname));
1579	archive_string_free(&(lha->gname));
1580	archive_wstring_free(&(lha->ws));
1581	free(lha);
1582	(a->format->data) = NULL;
1583	return (ARCHIVE_OK);
1584}
1585
1586/*
1587 * 'LHa for UNIX' utility has archived a symbolic-link name after
1588 * a pathname with '|' character.
1589 * This function extracts the symbolic-link name from the pathname.
1590 *
1591 * example.
1592 *   1. a symbolic-name is 'aaa/bb/cc'
1593 *   2. a filename is 'xxx/bbb'
1594 *  then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1595 */
1596static int
1597lha_parse_linkname(struct archive_string *linkname,
1598    struct archive_string *pathname)
1599{
1600	char *	linkptr;
1601	size_t 	symlen;
1602
1603	linkptr = strchr(pathname->s, '|');
1604	if (linkptr != NULL) {
1605		symlen = strlen(linkptr + 1);
1606		archive_strncpy(linkname, linkptr+1, symlen);
1607
1608		*linkptr = 0;
1609		pathname->length = strlen(pathname->s);
1610
1611		return (1);
1612	}
1613	return (0);
1614}
1615
1616/* Convert an MSDOS-style date/time into Unix-style time. */
1617static time_t
1618lha_dos_time(const unsigned char *p)
1619{
1620	int msTime, msDate;
1621	struct tm ts;
1622
1623	msTime = archive_le16dec(p);
1624	msDate = archive_le16dec(p+2);
1625
1626	memset(&ts, 0, sizeof(ts));
1627	ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1628	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1629	ts.tm_mday = msDate & 0x1f;		    /* Day of month.     */
1630	ts.tm_hour = (msTime >> 11) & 0x1f;
1631	ts.tm_min = (msTime >> 5) & 0x3f;
1632	ts.tm_sec = (msTime << 1) & 0x3e;
1633	ts.tm_isdst = -1;
1634	return (mktime(&ts));
1635}
1636
1637/* Convert an MS-Windows-style date/time into Unix-style time. */
1638static time_t
1639lha_win_time(uint64_t wintime, long *ns)
1640{
1641#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1642
1643	if (wintime >= EPOC_TIME) {
1644		wintime -= EPOC_TIME;	/* 1970-01-01 00:00:00 (UTC) */
1645		if (ns != NULL)
1646			*ns = (long)(wintime % 10000000) * 100;
1647		return (wintime / 10000000);
1648	} else {
1649		if (ns != NULL)
1650			*ns = 0;
1651		return (0);
1652	}
1653}
1654
1655static unsigned char
1656lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1657{
1658	unsigned char const *p = (unsigned char const *)pp;
1659
1660	p += offset;
1661	for (;size > 0; --size)
1662		sum += *p++;
1663	return (sum);
1664}
1665
1666static uint16_t crc16tbl[2][256];
1667static void
1668lha_crc16_init(void)
1669{
1670	unsigned int i;
1671	static int crc16init = 0;
1672
1673	if (crc16init)
1674		return;
1675	crc16init = 1;
1676
1677	for (i = 0; i < 256; i++) {
1678		unsigned int j;
1679		uint16_t crc = (uint16_t)i;
1680		for (j = 8; j; j--)
1681			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1682		crc16tbl[0][i] = crc;
1683	}
1684
1685	for (i = 0; i < 256; i++) {
1686		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1687			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1688	}
1689}
1690
1691static uint16_t
1692lha_crc16(uint16_t crc, const void *pp, size_t len)
1693{
1694	const unsigned char *p = (const unsigned char *)pp;
1695	const uint16_t *buff;
1696	const union {
1697		uint32_t i;
1698		char c[4];
1699	} u = { 0x01020304 };
1700
1701	if (len == 0)
1702		return crc;
1703
1704	/* Process unaligned address. */
1705	if (((uintptr_t)p) & (uintptr_t)0x1) {
1706		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1707		len--;
1708	}
1709	buff = (const uint16_t *)p;
1710	/*
1711	 * Modern C compiler such as GCC does not unroll automatically yet
1712	 * without unrolling pragma, and Clang is so. So we should
1713	 * unroll this loop for its performance.
1714	 */
1715	for (;len >= 8; len -= 8) {
1716		/* This if statement expects compiler optimization will
1717		 * remove the statement which will not be executed. */
1718#undef bswap16
1719#if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1720#  define bswap16(x) _byteswap_ushort(x)
1721#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1722/* GCC 4.8 and later has __builtin_bswap16() */
1723#  define bswap16(x) __builtin_bswap16(x)
1724#elif defined(__clang__)
1725/* All clang versions have __builtin_bswap16() */
1726#  define bswap16(x) __builtin_bswap16(x)
1727#else
1728#  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1729#endif
1730#define CRC16W	do { 	\
1731		if(u.c[0] == 1) { /* Big endian */		\
1732			crc ^= bswap16(*buff); buff++;		\
1733		} else						\
1734			crc ^= *buff++;				\
1735		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1736} while (0)
1737		CRC16W;
1738		CRC16W;
1739		CRC16W;
1740		CRC16W;
1741#undef CRC16W
1742#undef bswap16
1743	}
1744
1745	p = (const unsigned char *)buff;
1746	for (;len; len--) {
1747		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1748	}
1749	return crc;
1750}
1751
1752/*
1753 * Initialize LZHUF decoder.
1754 *
1755 * Returns ARCHIVE_OK if initialization was successful.
1756 * Returns ARCHIVE_FAILED if method is unsupported.
1757 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1758 * error occurred.
1759 */
1760static int
1761lzh_decode_init(struct lzh_stream *strm, const char *method)
1762{
1763	struct lzh_dec *ds;
1764	int w_bits, w_size;
1765
1766	if (strm->ds == NULL) {
1767		strm->ds = calloc(1, sizeof(*strm->ds));
1768		if (strm->ds == NULL)
1769			return (ARCHIVE_FATAL);
1770	}
1771	ds = strm->ds;
1772	ds->error = ARCHIVE_FAILED;
1773	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1774		return (ARCHIVE_FAILED);
1775	switch (method[2]) {
1776	case '5':
1777		w_bits = 13;/* 8KiB for window */
1778		break;
1779	case '6':
1780		w_bits = 15;/* 32KiB for window */
1781		break;
1782	case '7':
1783		w_bits = 16;/* 64KiB for window */
1784		break;
1785	default:
1786		return (ARCHIVE_FAILED);/* Not supported. */
1787	}
1788	ds->error = ARCHIVE_FATAL;
1789	/* Expand a window size up to 128 KiB for decompressing process
1790	 * performance whatever its original window size is. */
1791	ds->w_size = 1U << 17;
1792	ds->w_mask = ds->w_size -1;
1793	if (ds->w_buff == NULL) {
1794		ds->w_buff = malloc(ds->w_size);
1795		if (ds->w_buff == NULL)
1796			return (ARCHIVE_FATAL);
1797	}
1798	w_size = 1U << w_bits;
1799	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1800	ds->w_pos = 0;
1801	ds->state = 0;
1802	ds->pos_pt_len_size = w_bits + 1;
1803	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1804	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1805	ds->literal_pt_len_bits = 5;
1806	ds->br.cache_buffer = 0;
1807	ds->br.cache_avail = 0;
1808
1809	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1810	    != ARCHIVE_OK)
1811		return (ARCHIVE_FATAL);
1812	ds->lt.len_bits = 9;
1813	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1814	    != ARCHIVE_OK)
1815		return (ARCHIVE_FATAL);
1816	ds->error = 0;
1817
1818	return (ARCHIVE_OK);
1819}
1820
1821/*
1822 * Release LZHUF decoder.
1823 */
1824static void
1825lzh_decode_free(struct lzh_stream *strm)
1826{
1827
1828	if (strm->ds == NULL)
1829		return;
1830	free(strm->ds->w_buff);
1831	lzh_huffman_free(&(strm->ds->lt));
1832	lzh_huffman_free(&(strm->ds->pt));
1833	free(strm->ds);
1834	strm->ds = NULL;
1835}
1836
1837/*
1838 * Bit stream reader.
1839 */
1840/* Check that the cache buffer has enough bits. */
1841#define lzh_br_has(br, n)	((br)->cache_avail >= n)
1842/* Get compressed data by bit. */
1843#define lzh_br_bits(br, n)				\
1844	(((uint16_t)((br)->cache_buffer >>		\
1845		((br)->cache_avail - (n)))) & cache_masks[n])
1846#define lzh_br_bits_forced(br, n)			\
1847	(((uint16_t)((br)->cache_buffer <<		\
1848		((n) - (br)->cache_avail))) & cache_masks[n])
1849/* Read ahead to make sure the cache buffer has enough compressed data we
1850 * will use.
1851 *  True  : completed, there is enough data in the cache buffer.
1852 *  False : we met that strm->next_in is empty, we have to get following
1853 *          bytes. */
1854#define lzh_br_read_ahead_0(strm, br, n)	\
1855	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1856/*  True  : the cache buffer has some bits as much as we need.
1857 *  False : there are no enough bits in the cache buffer to be used,
1858 *          we have to get following bytes if we could. */
1859#define lzh_br_read_ahead(strm, br, n)	\
1860	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1861
1862/* Notify how many bits we consumed. */
1863#define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1864#define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1865
1866static const uint16_t cache_masks[] = {
1867	0x0000, 0x0001, 0x0003, 0x0007,
1868	0x000F, 0x001F, 0x003F, 0x007F,
1869	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1870	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1871	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1872};
1873
1874/*
1875 * Shift away used bits in the cache data and fill it up with following bits.
1876 * Call this when cache buffer does not have enough bits you need.
1877 *
1878 * Returns 1 if the cache buffer is full.
1879 * Returns 0 if the cache buffer is not full; input buffer is empty.
1880 */
1881static int
1882lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1883{
1884	int n = CACHE_BITS - br->cache_avail;
1885
1886	for (;;) {
1887		const int x = n >> 3;
1888		if (strm->avail_in >= x) {
1889			switch (x) {
1890			case 8:
1891				br->cache_buffer =
1892				    ((uint64_t)strm->next_in[0]) << 56 |
1893				    ((uint64_t)strm->next_in[1]) << 48 |
1894				    ((uint64_t)strm->next_in[2]) << 40 |
1895				    ((uint64_t)strm->next_in[3]) << 32 |
1896				    ((uint32_t)strm->next_in[4]) << 24 |
1897				    ((uint32_t)strm->next_in[5]) << 16 |
1898				    ((uint32_t)strm->next_in[6]) << 8 |
1899				     (uint32_t)strm->next_in[7];
1900				strm->next_in += 8;
1901				strm->avail_in -= 8;
1902				br->cache_avail += 8 * 8;
1903				return (1);
1904			case 7:
1905				br->cache_buffer =
1906		 		   (br->cache_buffer << 56) |
1907				    ((uint64_t)strm->next_in[0]) << 48 |
1908				    ((uint64_t)strm->next_in[1]) << 40 |
1909				    ((uint64_t)strm->next_in[2]) << 32 |
1910				    ((uint32_t)strm->next_in[3]) << 24 |
1911				    ((uint32_t)strm->next_in[4]) << 16 |
1912				    ((uint32_t)strm->next_in[5]) << 8 |
1913				     (uint32_t)strm->next_in[6];
1914				strm->next_in += 7;
1915				strm->avail_in -= 7;
1916				br->cache_avail += 7 * 8;
1917				return (1);
1918			case 6:
1919				br->cache_buffer =
1920		 		   (br->cache_buffer << 48) |
1921				    ((uint64_t)strm->next_in[0]) << 40 |
1922				    ((uint64_t)strm->next_in[1]) << 32 |
1923				    ((uint32_t)strm->next_in[2]) << 24 |
1924				    ((uint32_t)strm->next_in[3]) << 16 |
1925				    ((uint32_t)strm->next_in[4]) << 8 |
1926				     (uint32_t)strm->next_in[5];
1927				strm->next_in += 6;
1928				strm->avail_in -= 6;
1929				br->cache_avail += 6 * 8;
1930				return (1);
1931			case 0:
1932				/* We have enough compressed data in
1933				 * the cache buffer.*/
1934				return (1);
1935			default:
1936				break;
1937			}
1938		}
1939		if (strm->avail_in == 0) {
1940			/* There is not enough compressed data to fill up the
1941			 * cache buffer. */
1942			return (0);
1943		}
1944		br->cache_buffer =
1945		   (br->cache_buffer << 8) | *strm->next_in++;
1946		strm->avail_in--;
1947		br->cache_avail += 8;
1948		n -= 8;
1949	}
1950}
1951
1952/*
1953 * Decode LZHUF.
1954 *
1955 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1956 *    Please set available buffer and call this function again.
1957 * 2. Returns ARCHIVE_EOF if decompression has been completed.
1958 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1959 *    is broken or you do not set 'last' flag properly.
1960 * 4. 'last' flag is very important, you must set 1 to the flag if there
1961 *    is no input data. The lha compressed data format does not provide how
1962 *    to know the compressed data is really finished.
1963 *    Note: lha command utility check if the total size of output bytes is
1964 *    reached the uncompressed size recorded in its header. it does not mind
1965 *    that the decoding process is properly finished.
1966 *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
1967 *    it handles EOF as null to fill read buffer with zero until the decoding
1968 *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
1969 *    zeros are treated as the mark of the end of the data although the zeros
1970 *    is dummy, not the file data.
1971 */
1972static int	lzh_read_blocks(struct lzh_stream *, int);
1973static int	lzh_decode_blocks(struct lzh_stream *, int);
1974#define ST_RD_BLOCK		0
1975#define ST_RD_PT_1		1
1976#define ST_RD_PT_2		2
1977#define ST_RD_PT_3		3
1978#define ST_RD_PT_4		4
1979#define ST_RD_LITERAL_1		5
1980#define ST_RD_LITERAL_2		6
1981#define ST_RD_LITERAL_3		7
1982#define ST_RD_POS_DATA_1	8
1983#define ST_GET_LITERAL		9
1984#define ST_GET_POS_1		10
1985#define ST_GET_POS_2		11
1986#define ST_COPY_DATA		12
1987
1988static int
1989lzh_decode(struct lzh_stream *strm, int last)
1990{
1991	struct lzh_dec *ds = strm->ds;
1992	int avail_in;
1993	int r;
1994
1995	if (ds->error)
1996		return (ds->error);
1997
1998	avail_in = strm->avail_in;
1999	do {
2000		if (ds->state < ST_GET_LITERAL)
2001			r = lzh_read_blocks(strm, last);
2002		else
2003			r = lzh_decode_blocks(strm, last);
2004	} while (r == 100);
2005	strm->total_in += avail_in - strm->avail_in;
2006	return (r);
2007}
2008
2009static void
2010lzh_emit_window(struct lzh_stream *strm, size_t s)
2011{
2012	strm->ref_ptr = strm->ds->w_buff;
2013	strm->avail_out = (int)s;
2014	strm->total_out += s;
2015}
2016
2017static int
2018lzh_read_blocks(struct lzh_stream *strm, int last)
2019{
2020	struct lzh_dec *ds = strm->ds;
2021	struct lzh_br *br = &(ds->br);
2022	int c = 0, i;
2023	unsigned rbits;
2024
2025	for (;;) {
2026		switch (ds->state) {
2027		case ST_RD_BLOCK:
2028			/*
2029			 * Read a block number indicates how many blocks
2030			 * we will handle. The block is composed of a
2031			 * literal and a match, sometimes a literal only
2032			 * in particular, there are no reference data at
2033			 * the beginning of the decompression.
2034			 */
2035			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2036				if (!last)
2037					/* We need following data. */
2038					return (ARCHIVE_OK);
2039				if (lzh_br_has(br, 8)) {
2040					/*
2041					 * It seems there are extra bits.
2042					 *  1. Compressed data is broken.
2043					 *  2. `last' flag does not properly
2044					 *     set.
2045					 */
2046					goto failed;
2047				}
2048				if (ds->w_pos > 0) {
2049					lzh_emit_window(strm, ds->w_pos);
2050					ds->w_pos = 0;
2051					return (ARCHIVE_OK);
2052				}
2053				/* End of compressed data; we have completely
2054				 * handled all compressed data. */
2055				return (ARCHIVE_EOF);
2056			}
2057			ds->blocks_avail = lzh_br_bits(br, 16);
2058			if (ds->blocks_avail == 0)
2059				goto failed;
2060			lzh_br_consume(br, 16);
2061			/*
2062			 * Read a literal table compressed in huffman
2063			 * coding.
2064			 */
2065			ds->pt.len_size = ds->literal_pt_len_size;
2066			ds->pt.len_bits = ds->literal_pt_len_bits;
2067			ds->reading_position = 0;
2068			/* FALL THROUGH */
2069		case ST_RD_PT_1:
2070			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2071			 * used in reading both a literal table and a
2072			 * position table. */
2073			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2074				if (last)
2075					goto failed;/* Truncated data. */
2076				ds->state = ST_RD_PT_1;
2077				return (ARCHIVE_OK);
2078			}
2079			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2080			lzh_br_consume(br, ds->pt.len_bits);
2081			/* FALL THROUGH */
2082		case ST_RD_PT_2:
2083			if (ds->pt.len_avail == 0) {
2084				/* There is no bitlen. */
2085				if (!lzh_br_read_ahead(strm, br,
2086				    ds->pt.len_bits)) {
2087					if (last)
2088						goto failed;/* Truncated data.*/
2089					ds->state = ST_RD_PT_2;
2090					return (ARCHIVE_OK);
2091				}
2092				if (!lzh_make_fake_table(&(ds->pt),
2093				    lzh_br_bits(br, ds->pt.len_bits)))
2094					goto failed;/* Invalid data. */
2095				lzh_br_consume(br, ds->pt.len_bits);
2096				if (ds->reading_position)
2097					ds->state = ST_GET_LITERAL;
2098				else
2099					ds->state = ST_RD_LITERAL_1;
2100				break;
2101			} else if (ds->pt.len_avail > ds->pt.len_size)
2102				goto failed;/* Invalid data. */
2103			ds->loop = 0;
2104			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2105			if (ds->pt.len_avail < 3 ||
2106			    ds->pt.len_size == ds->pos_pt_len_size) {
2107				ds->state = ST_RD_PT_4;
2108				break;
2109			}
2110			/* FALL THROUGH */
2111		case ST_RD_PT_3:
2112			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2113			if (ds->loop < 3) {
2114				if (ds->loop < 0 || last)
2115					goto failed;/* Invalid data. */
2116				/* Not completed, get following data. */
2117				ds->state = ST_RD_PT_3;
2118				return (ARCHIVE_OK);
2119			}
2120			/* There are some null in bitlen of the literal. */
2121			if (!lzh_br_read_ahead(strm, br, 2)) {
2122				if (last)
2123					goto failed;/* Truncated data. */
2124				ds->state = ST_RD_PT_3;
2125				return (ARCHIVE_OK);
2126			}
2127			c = lzh_br_bits(br, 2);
2128			lzh_br_consume(br, 2);
2129			if (c > ds->pt.len_avail - 3)
2130				goto failed;/* Invalid data. */
2131			for (i = 3; c-- > 0 ;)
2132				ds->pt.bitlen[i++] = 0;
2133			ds->loop = i;
2134			/* FALL THROUGH */
2135		case ST_RD_PT_4:
2136			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2137			    ds->pt.len_avail);
2138			if (ds->loop < ds->pt.len_avail) {
2139				if (ds->loop < 0 || last)
2140					goto failed;/* Invalid data. */
2141				/* Not completed, get following data. */
2142				ds->state = ST_RD_PT_4;
2143				return (ARCHIVE_OK);
2144			}
2145			if (!lzh_make_huffman_table(&(ds->pt)))
2146				goto failed;/* Invalid data */
2147			if (ds->reading_position) {
2148				ds->state = ST_GET_LITERAL;
2149				break;
2150			}
2151			/* FALL THROUGH */
2152		case ST_RD_LITERAL_1:
2153			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2154				if (last)
2155					goto failed;/* Truncated data. */
2156				ds->state = ST_RD_LITERAL_1;
2157				return (ARCHIVE_OK);
2158			}
2159			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2160			lzh_br_consume(br, ds->lt.len_bits);
2161			/* FALL THROUGH */
2162		case ST_RD_LITERAL_2:
2163			if (ds->lt.len_avail == 0) {
2164				/* There is no bitlen. */
2165				if (!lzh_br_read_ahead(strm, br,
2166				    ds->lt.len_bits)) {
2167					if (last)
2168						goto failed;/* Truncated data.*/
2169					ds->state = ST_RD_LITERAL_2;
2170					return (ARCHIVE_OK);
2171				}
2172				if (!lzh_make_fake_table(&(ds->lt),
2173				    lzh_br_bits(br, ds->lt.len_bits)))
2174					goto failed;/* Invalid data */
2175				lzh_br_consume(br, ds->lt.len_bits);
2176				ds->state = ST_RD_POS_DATA_1;
2177				break;
2178			} else if (ds->lt.len_avail > ds->lt.len_size)
2179				goto failed;/* Invalid data */
2180			ds->loop = 0;
2181			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2182			/* FALL THROUGH */
2183		case ST_RD_LITERAL_3:
2184			i = ds->loop;
2185			while (i < ds->lt.len_avail) {
2186				if (!lzh_br_read_ahead(strm, br,
2187				    ds->pt.max_bits)) {
2188					if (last)
2189						goto failed;/* Truncated data.*/
2190					ds->loop = i;
2191					ds->state = ST_RD_LITERAL_3;
2192					return (ARCHIVE_OK);
2193				}
2194				rbits = lzh_br_bits(br, ds->pt.max_bits);
2195				c = lzh_decode_huffman(&(ds->pt), rbits);
2196				if (c > 2) {
2197					/* Note: 'c' will never be more than
2198					 * eighteen since it's limited by
2199					 * PT_BITLEN_SIZE, which is being set
2200					 * to ds->pt.len_size through
2201					 * ds->literal_pt_len_size. */
2202					lzh_br_consume(br, ds->pt.bitlen[c]);
2203					c -= 2;
2204					ds->lt.freq[c]++;
2205					ds->lt.bitlen[i++] = c;
2206				} else if (c == 0) {
2207					lzh_br_consume(br, ds->pt.bitlen[c]);
2208					ds->lt.bitlen[i++] = 0;
2209				} else {
2210					/* c == 1 or c == 2 */
2211					int n = (c == 1)?4:9;
2212					if (!lzh_br_read_ahead(strm, br,
2213					     ds->pt.bitlen[c] + n)) {
2214						if (last) /* Truncated data. */
2215							goto failed;
2216						ds->loop = i;
2217						ds->state = ST_RD_LITERAL_3;
2218						return (ARCHIVE_OK);
2219					}
2220					lzh_br_consume(br, ds->pt.bitlen[c]);
2221					c = lzh_br_bits(br, n);
2222					lzh_br_consume(br, n);
2223					c += (n == 4)?3:20;
2224					if (i + c > ds->lt.len_avail)
2225						goto failed;/* Invalid data */
2226					memset(&(ds->lt.bitlen[i]), 0, c);
2227					i += c;
2228				}
2229			}
2230			if (i > ds->lt.len_avail ||
2231			    !lzh_make_huffman_table(&(ds->lt)))
2232				goto failed;/* Invalid data */
2233			/* FALL THROUGH */
2234		case ST_RD_POS_DATA_1:
2235			/*
2236			 * Read a position table compressed in huffman
2237			 * coding.
2238			 */
2239			ds->pt.len_size = ds->pos_pt_len_size;
2240			ds->pt.len_bits = ds->pos_pt_len_bits;
2241			ds->reading_position = 1;
2242			ds->state = ST_RD_PT_1;
2243			break;
2244		case ST_GET_LITERAL:
2245			return (100);
2246		}
2247	}
2248failed:
2249	return (ds->error = ARCHIVE_FAILED);
2250}
2251
2252static int
2253lzh_decode_blocks(struct lzh_stream *strm, int last)
2254{
2255	struct lzh_dec *ds = strm->ds;
2256	struct lzh_br bre = ds->br;
2257	struct huffman *lt = &(ds->lt);
2258	struct huffman *pt = &(ds->pt);
2259	unsigned char *w_buff = ds->w_buff;
2260	unsigned char *lt_bitlen = lt->bitlen;
2261	unsigned char *pt_bitlen = pt->bitlen;
2262	int blocks_avail = ds->blocks_avail, c = 0;
2263	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2264	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2265	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2266	int state = ds->state;
2267
2268	for (;;) {
2269		switch (state) {
2270		case ST_GET_LITERAL:
2271			for (;;) {
2272				if (blocks_avail == 0) {
2273					/* We have decoded all blocks.
2274					 * Let's handle next blocks. */
2275					ds->state = ST_RD_BLOCK;
2276					ds->br = bre;
2277					ds->blocks_avail = 0;
2278					ds->w_pos = w_pos;
2279					ds->copy_pos = 0;
2280					return (100);
2281				}
2282
2283				/* lzh_br_read_ahead() always try to fill the
2284				 * cache buffer up. In specific situation we
2285				 * are close to the end of the data, the cache
2286				 * buffer will not be full and thus we have to
2287				 * determine if the cache buffer has some bits
2288				 * as much as we need after lzh_br_read_ahead()
2289				 * failed. */
2290				if (!lzh_br_read_ahead(strm, &bre,
2291				    lt_max_bits)) {
2292					if (!last)
2293						goto next_data;
2294					/* Remaining bits are less than
2295					 * maximum bits(lt.max_bits) but maybe
2296					 * it still remains as much as we need,
2297					 * so we should try to use it with
2298					 * dummy bits. */
2299					c = lzh_decode_huffman(lt,
2300					      lzh_br_bits_forced(&bre,
2301					        lt_max_bits));
2302					lzh_br_consume(&bre, lt_bitlen[c]);
2303					if (!lzh_br_has(&bre, 0))
2304						goto failed;/* Over read. */
2305				} else {
2306					c = lzh_decode_huffman(lt,
2307					      lzh_br_bits(&bre, lt_max_bits));
2308					lzh_br_consume(&bre, lt_bitlen[c]);
2309				}
2310				blocks_avail--;
2311				if (c > UCHAR_MAX)
2312					/* Current block is a match data. */
2313					break;
2314				/*
2315				 * 'c' is exactly a literal code.
2316				 */
2317				/* Save a decoded code to reference it
2318				 * afterward. */
2319				w_buff[w_pos] = c;
2320				if (++w_pos >= w_size) {
2321					w_pos = 0;
2322					lzh_emit_window(strm, w_size);
2323					goto next_data;
2324				}
2325			}
2326			/* 'c' is the length of a match pattern we have
2327			 * already extracted, which has be stored in
2328			 * window(ds->w_buff). */
2329			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2330			/* FALL THROUGH */
2331		case ST_GET_POS_1:
2332			/*
2333			 * Get a reference position.
2334			 */
2335			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2336				if (!last) {
2337					state = ST_GET_POS_1;
2338					ds->copy_len = copy_len;
2339					goto next_data;
2340				}
2341				copy_pos = lzh_decode_huffman(pt,
2342				    lzh_br_bits_forced(&bre, pt_max_bits));
2343				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2344				if (!lzh_br_has(&bre, 0))
2345					goto failed;/* Over read. */
2346			} else {
2347				copy_pos = lzh_decode_huffman(pt,
2348				    lzh_br_bits(&bre, pt_max_bits));
2349				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2350			}
2351			/* FALL THROUGH */
2352		case ST_GET_POS_2:
2353			if (copy_pos > 1) {
2354				/* We need an additional adjustment number to
2355				 * the position. */
2356				int p = copy_pos - 1;
2357				if (!lzh_br_read_ahead(strm, &bre, p)) {
2358					if (last)
2359						goto failed;/* Truncated data.*/
2360					state = ST_GET_POS_2;
2361					ds->copy_len = copy_len;
2362					ds->copy_pos = copy_pos;
2363					goto next_data;
2364				}
2365				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2366				lzh_br_consume(&bre, p);
2367			}
2368			/* The position is actually a distance from the last
2369			 * code we had extracted and thus we have to convert
2370			 * it to a position of the window. */
2371			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2372			/* FALL THROUGH */
2373		case ST_COPY_DATA:
2374			/*
2375			 * Copy `copy_len' bytes as extracted data from
2376			 * the window into the output buffer.
2377			 */
2378			for (;;) {
2379				int l;
2380
2381				l = copy_len;
2382				if (copy_pos > w_pos) {
2383					if (l > w_size - copy_pos)
2384						l = w_size - copy_pos;
2385				} else {
2386					if (l > w_size - w_pos)
2387						l = w_size - w_pos;
2388				}
2389				if ((copy_pos + l < w_pos)
2390				    || (w_pos + l < copy_pos)) {
2391					/* No overlap. */
2392					memcpy(w_buff + w_pos,
2393					    w_buff + copy_pos, l);
2394				} else {
2395					const unsigned char *s;
2396					unsigned char *d;
2397					int li;
2398
2399					d = w_buff + w_pos;
2400					s = w_buff + copy_pos;
2401					for (li = 0; li < l-1;) {
2402						d[li] = s[li];li++;
2403						d[li] = s[li];li++;
2404					}
2405					if (li < l)
2406						d[li] = s[li];
2407				}
2408				w_pos += l;
2409				if (w_pos == w_size) {
2410					w_pos = 0;
2411					lzh_emit_window(strm, w_size);
2412					if (copy_len <= l)
2413						state = ST_GET_LITERAL;
2414					else {
2415						state = ST_COPY_DATA;
2416						ds->copy_len = copy_len - l;
2417						ds->copy_pos =
2418						    (copy_pos + l) & w_mask;
2419					}
2420					goto next_data;
2421				}
2422				if (copy_len <= l)
2423					/* A copy of current pattern ended. */
2424					break;
2425				copy_len -= l;
2426				copy_pos = (copy_pos + l) & w_mask;
2427			}
2428			state = ST_GET_LITERAL;
2429			break;
2430		}
2431	}
2432failed:
2433	return (ds->error = ARCHIVE_FAILED);
2434next_data:
2435	ds->br = bre;
2436	ds->blocks_avail = blocks_avail;
2437	ds->state = state;
2438	ds->w_pos = w_pos;
2439	return (ARCHIVE_OK);
2440}
2441
2442static int
2443lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2444{
2445	int bits;
2446
2447	if (hf->bitlen == NULL) {
2448		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2449		if (hf->bitlen == NULL)
2450			return (ARCHIVE_FATAL);
2451	}
2452	if (hf->tbl == NULL) {
2453		if (tbl_bits < HTBL_BITS)
2454			bits = tbl_bits;
2455		else
2456			bits = HTBL_BITS;
2457		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2458		if (hf->tbl == NULL)
2459			return (ARCHIVE_FATAL);
2460	}
2461	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2462		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2463		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2464		if (hf->tree == NULL)
2465			return (ARCHIVE_FATAL);
2466	}
2467	hf->len_size = (int)len_size;
2468	hf->tbl_bits = tbl_bits;
2469	return (ARCHIVE_OK);
2470}
2471
2472static void
2473lzh_huffman_free(struct huffman *hf)
2474{
2475	free(hf->bitlen);
2476	free(hf->tbl);
2477	free(hf->tree);
2478}
2479
2480static const char bitlen_tbl[0x400] = {
2481	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2482	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2483	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2484	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2485	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2486	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2487	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2488	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2489	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2490	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2491	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2492	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2493	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2494	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2495	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2496	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2497	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2498	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2499	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2500	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2501	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2502	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2503	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2504	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2505	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2506	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2507	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2508	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2509	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2510	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2511	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2512	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2513	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2514	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2515	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2516	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2517	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2518	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2519	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2520	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2521	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2522	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2523	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2524	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2525	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2526	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2527	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2528	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2529	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2530	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2531	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2532	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2533	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2534	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2535	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2536	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2537	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2538	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2539	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2540	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2541	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2542	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2543	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2544	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2545};
2546static int
2547lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2548{
2549	struct lzh_dec *ds = strm->ds;
2550	struct lzh_br *br = &(ds->br);
2551	int c, i;
2552
2553	for (i = start; i < end; ) {
2554		/*
2555		 *  bit pattern     the number we need
2556		 *     000           ->  0
2557		 *     001           ->  1
2558		 *     010           ->  2
2559		 *     ...
2560		 *     110           ->  6
2561		 *     1110          ->  7
2562		 *     11110         ->  8
2563		 *     ...
2564		 *     1111111111110 ->  16
2565		 */
2566		if (!lzh_br_read_ahead(strm, br, 3))
2567			return (i);
2568		if ((c = lzh_br_bits(br, 3)) == 7) {
2569			if (!lzh_br_read_ahead(strm, br, 13))
2570				return (i);
2571			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2572			if (c)
2573				lzh_br_consume(br, c - 3);
2574			else
2575				return (-1);/* Invalid data. */
2576		} else
2577			lzh_br_consume(br, 3);
2578		ds->pt.bitlen[i++] = c;
2579		ds->pt.freq[c]++;
2580	}
2581	return (i);
2582}
2583
2584static int
2585lzh_make_fake_table(struct huffman *hf, uint16_t c)
2586{
2587	if (c >= hf->len_size)
2588		return (0);
2589	hf->tbl[0] = c;
2590	hf->max_bits = 0;
2591	hf->shift_bits = 0;
2592	hf->bitlen[hf->tbl[0]] = 0;
2593	return (1);
2594}
2595
2596/*
2597 * Make a huffman coding table.
2598 */
2599static int
2600lzh_make_huffman_table(struct huffman *hf)
2601{
2602	uint16_t *tbl;
2603	const unsigned char *bitlen;
2604	int bitptn[17], weight[17];
2605	int i, maxbits = 0, ptn, tbl_size, w;
2606	int diffbits, len_avail;
2607
2608	/*
2609	 * Initialize bit patterns.
2610	 */
2611	ptn = 0;
2612	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2613		bitptn[i] = ptn;
2614		weight[i] = w;
2615		if (hf->freq[i]) {
2616			ptn += hf->freq[i] * w;
2617			maxbits = i;
2618		}
2619	}
2620	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2621		return (0);/* Invalid */
2622
2623	hf->max_bits = maxbits;
2624
2625	/*
2626	 * Cut out extra bits which we won't house in the table.
2627	 * This preparation reduces the same calculation in the for-loop
2628	 * making the table.
2629	 */
2630	if (maxbits < 16) {
2631		int ebits = 16 - maxbits;
2632		for (i = 1; i <= maxbits; i++) {
2633			bitptn[i] >>= ebits;
2634			weight[i] >>= ebits;
2635		}
2636	}
2637	if (maxbits > HTBL_BITS) {
2638		unsigned htbl_max;
2639		uint16_t *p;
2640
2641		diffbits = maxbits - HTBL_BITS;
2642		for (i = 1; i <= HTBL_BITS; i++) {
2643			bitptn[i] >>= diffbits;
2644			weight[i] >>= diffbits;
2645		}
2646		htbl_max = bitptn[HTBL_BITS] +
2647		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2648		p = &(hf->tbl[htbl_max]);
2649		while (p < &hf->tbl[1U<<HTBL_BITS])
2650			*p++ = 0;
2651	} else
2652		diffbits = 0;
2653	hf->shift_bits = diffbits;
2654
2655	/*
2656	 * Make the table.
2657	 */
2658	tbl_size = 1 << HTBL_BITS;
2659	tbl = hf->tbl;
2660	bitlen = hf->bitlen;
2661	len_avail = hf->len_avail;
2662	hf->tree_used = 0;
2663	for (i = 0; i < len_avail; i++) {
2664		uint16_t *p;
2665		int len, cnt;
2666		uint16_t bit;
2667		int extlen;
2668		struct htree_t *ht;
2669
2670		if (bitlen[i] == 0)
2671			continue;
2672		/* Get a bit pattern */
2673		len = bitlen[i];
2674		ptn = bitptn[len];
2675		cnt = weight[len];
2676		if (len <= HTBL_BITS) {
2677			/* Calculate next bit pattern */
2678			if ((bitptn[len] = ptn + cnt) > tbl_size)
2679				return (0);/* Invalid */
2680			/* Update the table */
2681			p = &(tbl[ptn]);
2682			if (cnt > 7) {
2683				uint16_t *pc;
2684
2685				cnt -= 8;
2686				pc = &p[cnt];
2687				pc[0] = (uint16_t)i;
2688				pc[1] = (uint16_t)i;
2689				pc[2] = (uint16_t)i;
2690				pc[3] = (uint16_t)i;
2691				pc[4] = (uint16_t)i;
2692				pc[5] = (uint16_t)i;
2693				pc[6] = (uint16_t)i;
2694				pc[7] = (uint16_t)i;
2695				if (cnt > 7) {
2696					cnt -= 8;
2697					memcpy(&p[cnt], pc,
2698						8 * sizeof(uint16_t));
2699					pc = &p[cnt];
2700					while (cnt > 15) {
2701						cnt -= 16;
2702						memcpy(&p[cnt], pc,
2703							16 * sizeof(uint16_t));
2704					}
2705				}
2706				if (cnt)
2707					memcpy(p, pc, cnt * sizeof(uint16_t));
2708			} else {
2709				while (cnt > 1) {
2710					p[--cnt] = (uint16_t)i;
2711					p[--cnt] = (uint16_t)i;
2712				}
2713				if (cnt)
2714					p[--cnt] = (uint16_t)i;
2715			}
2716			continue;
2717		}
2718
2719		/*
2720		 * A bit length is too big to be housed to a direct table,
2721		 * so we use a tree model for its extra bits.
2722		 */
2723		bitptn[len] = ptn + cnt;
2724		bit = 1U << (diffbits -1);
2725		extlen = len - HTBL_BITS;
2726
2727		p = &(tbl[ptn >> diffbits]);
2728		if (*p == 0) {
2729			*p = len_avail + hf->tree_used;
2730			ht = &(hf->tree[hf->tree_used++]);
2731			if (hf->tree_used > hf->tree_avail)
2732				return (0);/* Invalid */
2733			ht->left = 0;
2734			ht->right = 0;
2735		} else {
2736			if (*p < len_avail ||
2737			    *p >= (len_avail + hf->tree_used))
2738				return (0);/* Invalid */
2739			ht = &(hf->tree[*p - len_avail]);
2740		}
2741		while (--extlen > 0) {
2742			if (ptn & bit) {
2743				if (ht->left < len_avail) {
2744					ht->left = len_avail + hf->tree_used;
2745					ht = &(hf->tree[hf->tree_used++]);
2746					if (hf->tree_used > hf->tree_avail)
2747						return (0);/* Invalid */
2748					ht->left = 0;
2749					ht->right = 0;
2750				} else {
2751					ht = &(hf->tree[ht->left - len_avail]);
2752				}
2753			} else {
2754				if (ht->right < len_avail) {
2755					ht->right = len_avail + hf->tree_used;
2756					ht = &(hf->tree[hf->tree_used++]);
2757					if (hf->tree_used > hf->tree_avail)
2758						return (0);/* Invalid */
2759					ht->left = 0;
2760					ht->right = 0;
2761				} else {
2762					ht = &(hf->tree[ht->right - len_avail]);
2763				}
2764			}
2765			bit >>= 1;
2766		}
2767		if (ptn & bit) {
2768			if (ht->left != 0)
2769				return (0);/* Invalid */
2770			ht->left = (uint16_t)i;
2771		} else {
2772			if (ht->right != 0)
2773				return (0);/* Invalid */
2774			ht->right = (uint16_t)i;
2775		}
2776	}
2777	return (1);
2778}
2779
2780static int
2781lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2782{
2783	struct htree_t *ht;
2784	int extlen;
2785
2786	ht = hf->tree;
2787	extlen = hf->shift_bits;
2788	while (c >= hf->len_avail) {
2789		c -= hf->len_avail;
2790		if (extlen-- <= 0 || c >= hf->tree_used)
2791			return (0);
2792		if (rbits & (1U << extlen))
2793			c = ht[c].left;
2794		else
2795			c = ht[c].right;
2796	}
2797	return (c);
2798}
2799
2800static inline int
2801lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2802{
2803	int c;
2804	/*
2805	 * At first search an index table for a bit pattern.
2806	 * If it fails, search a huffman tree for.
2807	 */
2808	c = hf->tbl[rbits >> hf->shift_bits];
2809	if (c < hf->len_avail || hf->len_avail == 0)
2810		return (c);
2811	/* This bit pattern needs to be found out at a huffman tree. */
2812	return (lzh_decode_huffman_tree(hf, rbits, c));
2813}
2814
2815