1/*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#ifdef HAVE_LIMITS_H
32#include <limits.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_entry_locale.h"
44#include "archive_private.h"
45#include "archive_read_private.h"
46#include "archive_endian.h"
47
48
49#define MAXMATCH		256	/* Maximum match length. */
50#define MINMATCH		3	/* Minimum match length. */
51/*
52 * Literal table format:
53 * +0              +256                      +510
54 * +---------------+-------------------------+
55 * | literal code  |       match length      |
56 * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57 * +---------------+-------------------------+
58 *  <---          LT_BITLEN_SIZE         --->
59 */
60/* Literal table size. */
61#define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62/* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64#define PT_BITLEN_SIZE		(3 + 16)
65
66struct lzh_dec {
67	/* Decoding status. */
68	int     		 state;
69
70	/*
71	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72	 * data.
73	 */
74	int			 w_size;
75	int			 w_mask;
76	/* Window buffer, which is a loop buffer. */
77	unsigned char		*w_buff;
78	/* The insert position to the window. */
79	int			 w_pos;
80	/* The position where we can copy decoded code from the window. */
81	int     		 copy_pos;
82	/* The length how many bytes we can copy decoded code from
83	 * the window. */
84	int     		 copy_len;
85
86	/*
87	 * Bit stream reader.
88	 */
89	struct lzh_br {
90#define CACHE_TYPE		uint64_t
91#define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
92	 	/* Cache buffer. */
93		CACHE_TYPE	 cache_buffer;
94		/* Indicates how many bits avail in cache_buffer. */
95		int		 cache_avail;
96	} br;
97
98	/*
99	 * Huffman coding.
100	 */
101	struct huffman {
102		int		 len_size;
103		int		 len_avail;
104		int		 len_bits;
105		int		 freq[17];
106		unsigned char	*bitlen;
107
108		/*
109		 * Use a index table. It's faster than searching a huffman
110		 * coding tree, which is a binary tree. But a use of a large
111		 * index table causes L1 cache read miss many times.
112		 */
113#define HTBL_BITS	10
114		int		 max_bits;
115		int		 shift_bits;
116		int		 tbl_bits;
117		int		 tree_used;
118		int		 tree_avail;
119		/* Direct access table. */
120		uint16_t	*tbl;
121		/* Binary tree table for extra bits over the direct access. */
122		struct htree_t {
123			uint16_t left;
124			uint16_t right;
125		}		*tree;
126	}			 lt, pt;
127
128	int			 blocks_avail;
129	int			 pos_pt_len_size;
130	int			 pos_pt_len_bits;
131	int			 literal_pt_len_size;
132	int			 literal_pt_len_bits;
133	int			 reading_position;
134	int			 loop;
135	int			 error;
136};
137
138struct lzh_stream {
139	const unsigned char	*next_in;
140	int			 avail_in;
141	int64_t			 total_in;
142	const unsigned char	*ref_ptr;
143	int			 avail_out;
144	int64_t			 total_out;
145	struct lzh_dec		*ds;
146};
147
148struct lha {
149	/* entry_bytes_remaining is the number of bytes we expect.	    */
150	int64_t                  entry_offset;
151	int64_t                  entry_bytes_remaining;
152	int64_t			 entry_unconsumed;
153	uint16_t		 entry_crc_calculated;
154
155	size_t			 header_size;	/* header size		    */
156	unsigned char		 level;		/* header level		    */
157	char			 method[3];	/* compress type	    */
158	int64_t			 compsize;	/* compressed data size	    */
159	int64_t			 origsize;	/* original file size	    */
160	int			 setflag;
161#define BIRTHTIME_IS_SET	1
162#define ATIME_IS_SET		2
163#define UNIX_MODE_IS_SET	4
164#define CRC_IS_SET		8
165	time_t			 birthtime;
166	long			 birthtime_tv_nsec;
167	time_t			 mtime;
168	long			 mtime_tv_nsec;
169	time_t			 atime;
170	long			 atime_tv_nsec;
171	mode_t			 mode;
172	int64_t			 uid;
173	int64_t			 gid;
174	struct archive_string 	 uname;
175	struct archive_string 	 gname;
176	uint16_t		 header_crc;
177	uint16_t		 crc;
178	/* dirname and filename could be in different codepages */
179	struct archive_string_conv *sconv_dir;
180	struct archive_string_conv *sconv_fname;
181	struct archive_string_conv *opt_sconv;
182
183	struct archive_string 	 dirname;
184	struct archive_string 	 filename;
185	struct archive_wstring	 ws;
186
187	unsigned char		 dos_attr;
188
189	/* Flag to mark progress that an archive was read their first header.*/
190	char			 found_first_header;
191	/* Flag to mark that indicates an empty directory. */
192	char			 directory;
193
194	/* Flags to mark progress of decompression. */
195	char			 decompress_init;
196	char			 end_of_entry;
197	char			 end_of_entry_cleanup;
198	char			 entry_is_compressed;
199
200	char			 format_name[64];
201
202	struct lzh_stream	 strm;
203};
204
205/*
206 * LHA header common member offset.
207 */
208#define H_METHOD_OFFSET	2	/* Compress type. */
209#define H_ATTR_OFFSET	19	/* DOS attribute. */
210#define H_LEVEL_OFFSET	20	/* Header Level.  */
211#define H_SIZE		22	/* Minimum header size. */
212
213static int      archive_read_format_lha_bid(struct archive_read *, int);
214static int      archive_read_format_lha_options(struct archive_read *,
215		    const char *, const char *);
216static int	archive_read_format_lha_read_header(struct archive_read *,
217		    struct archive_entry *);
218static int	archive_read_format_lha_read_data(struct archive_read *,
219		    const void **, size_t *, int64_t *);
220static int	archive_read_format_lha_read_data_skip(struct archive_read *);
221static int	archive_read_format_lha_cleanup(struct archive_read *);
222
223static void	lha_replace_path_separator(struct lha *,
224		    struct archive_entry *);
225static int	lha_read_file_header_0(struct archive_read *, struct lha *);
226static int	lha_read_file_header_1(struct archive_read *, struct lha *);
227static int	lha_read_file_header_2(struct archive_read *, struct lha *);
228static int	lha_read_file_header_3(struct archive_read *, struct lha *);
229static int	lha_read_file_extended_header(struct archive_read *,
230		    struct lha *, uint16_t *, int, size_t, size_t *);
231static size_t	lha_check_header_format(const void *);
232static int	lha_skip_sfx(struct archive_read *);
233static time_t	lha_dos_time(const unsigned char *);
234static time_t	lha_win_time(uint64_t, long *);
235static unsigned char	lha_calcsum(unsigned char, const void *,
236		    int, size_t);
237static int	lha_parse_linkname(struct archive_wstring *,
238		    struct archive_wstring *);
239static int	lha_read_data_none(struct archive_read *, const void **,
240		    size_t *, int64_t *);
241static int	lha_read_data_lzh(struct archive_read *, const void **,
242		    size_t *, int64_t *);
243static void	lha_crc16_init(void);
244static uint16_t lha_crc16(uint16_t, const void *, size_t);
245static int	lzh_decode_init(struct lzh_stream *, const char *);
246static void	lzh_decode_free(struct lzh_stream *);
247static int	lzh_decode(struct lzh_stream *, int);
248static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
249static int	lzh_huffman_init(struct huffman *, size_t, int);
250static void	lzh_huffman_free(struct huffman *);
251static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
252static int	lzh_make_fake_table(struct huffman *, uint16_t);
253static int	lzh_make_huffman_table(struct huffman *);
254static inline int lzh_decode_huffman(struct huffman *, unsigned);
255static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
256
257
258int
259archive_read_support_format_lha(struct archive *_a)
260{
261	struct archive_read *a = (struct archive_read *)_a;
262	struct lha *lha;
263	int r;
264
265	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
266	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
267
268	lha = (struct lha *)calloc(1, sizeof(*lha));
269	if (lha == NULL) {
270		archive_set_error(&a->archive, ENOMEM,
271		    "Can't allocate lha data");
272		return (ARCHIVE_FATAL);
273	}
274	archive_string_init(&lha->ws);
275
276	r = __archive_read_register_format(a,
277	    lha,
278	    "lha",
279	    archive_read_format_lha_bid,
280	    archive_read_format_lha_options,
281	    archive_read_format_lha_read_header,
282	    archive_read_format_lha_read_data,
283	    archive_read_format_lha_read_data_skip,
284	    NULL,
285	    archive_read_format_lha_cleanup,
286	    NULL,
287	    NULL);
288
289	if (r != ARCHIVE_OK)
290		free(lha);
291	return (ARCHIVE_OK);
292}
293
294static size_t
295lha_check_header_format(const void *h)
296{
297	const unsigned char *p = h;
298	size_t next_skip_bytes;
299
300	switch (p[H_METHOD_OFFSET+3]) {
301	/*
302	 * "-lh0-" ... "-lh7-" "-lhd-"
303	 * "-lzs-" "-lz5-"
304	 */
305	case '0': case '1': case '2': case '3':
306	case '4': case '5': case '6': case '7':
307	case 'd':
308	case 's':
309		next_skip_bytes = 4;
310
311		/* b0 == 0 means the end of an LHa archive file.	*/
312		if (p[0] == 0)
313			break;
314		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
315		    ||  p[H_METHOD_OFFSET+4] != '-')
316			break;
317
318		if (p[H_METHOD_OFFSET+2] == 'h') {
319			/* "-lh?-" */
320			if (p[H_METHOD_OFFSET+3] == 's')
321				break;
322			if (p[H_LEVEL_OFFSET] == 0)
323				return (0);
324			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
325				return (0);
326		}
327		if (p[H_METHOD_OFFSET+2] == 'z') {
328			/* LArc extensions: -lzs-,-lz4- and -lz5- */
329			if (p[H_LEVEL_OFFSET] != 0)
330				break;
331			if (p[H_METHOD_OFFSET+3] == 's'
332			    || p[H_METHOD_OFFSET+3] == '4'
333			    || p[H_METHOD_OFFSET+3] == '5')
334				return (0);
335		}
336		break;
337	case 'h': next_skip_bytes = 1; break;
338	case 'z': next_skip_bytes = 1; break;
339	case 'l': next_skip_bytes = 2; break;
340	case '-': next_skip_bytes = 3; break;
341	default : next_skip_bytes = 4; break;
342	}
343
344	return (next_skip_bytes);
345}
346
347static int
348archive_read_format_lha_bid(struct archive_read *a, int best_bid)
349{
350	const char *p;
351	const void *buff;
352	ssize_t bytes_avail, offset, window;
353	size_t next;
354
355	/* If there's already a better bid than we can ever
356	   make, don't bother testing. */
357	if (best_bid > 30)
358		return (-1);
359
360	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
361		return (-1);
362
363	if (lha_check_header_format(p) == 0)
364		return (30);
365
366	if (p[0] == 'M' && p[1] == 'Z') {
367		/* PE file */
368		offset = 0;
369		window = 4096;
370		while (offset < (1024 * 20)) {
371			buff = __archive_read_ahead(a, offset + window,
372			    &bytes_avail);
373			if (buff == NULL) {
374				/* Remaining bytes are less than window. */
375				window >>= 1;
376				if (window < (H_SIZE + 3))
377					return (0);
378				continue;
379			}
380			p = (const char *)buff + offset;
381			while (p + H_SIZE < (const char *)buff + bytes_avail) {
382				if ((next = lha_check_header_format(p)) == 0)
383					return (30);
384				p += next;
385			}
386			offset = p - (const char *)buff;
387		}
388	}
389	return (0);
390}
391
392static int
393archive_read_format_lha_options(struct archive_read *a,
394    const char *key, const char *val)
395{
396	struct lha *lha;
397	int ret = ARCHIVE_FAILED;
398
399	lha = (struct lha *)(a->format->data);
400	if (strcmp(key, "hdrcharset")  == 0) {
401		if (val == NULL || val[0] == 0)
402			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
403			    "lha: hdrcharset option needs a character-set name");
404		else {
405			lha->opt_sconv =
406			    archive_string_conversion_from_charset(
407				&a->archive, val, 0);
408			if (lha->opt_sconv != NULL)
409				ret = ARCHIVE_OK;
410			else
411				ret = ARCHIVE_FATAL;
412		}
413		return (ret);
414	}
415
416	/* Note: The "warn" return is just to inform the options
417	 * supervisor that we didn't handle it.  It will generate
418	 * a suitable error if no one used this option. */
419	return (ARCHIVE_WARN);
420}
421
422static int
423lha_skip_sfx(struct archive_read *a)
424{
425	const void *h;
426	const char *p, *q;
427	size_t next, skip;
428	ssize_t bytes, window;
429
430	window = 4096;
431	for (;;) {
432		h = __archive_read_ahead(a, window, &bytes);
433		if (h == NULL) {
434			/* Remaining bytes are less than window. */
435			window >>= 1;
436			if (window < (H_SIZE + 3))
437				goto fatal;
438			continue;
439		}
440		if (bytes < H_SIZE)
441			goto fatal;
442		p = h;
443		q = p + bytes;
444
445		/*
446		 * Scan ahead until we find something that looks
447		 * like the lha header.
448		 */
449		while (p + H_SIZE < q) {
450			if ((next = lha_check_header_format(p)) == 0) {
451				skip = p - (const char *)h;
452				__archive_read_consume(a, skip);
453				return (ARCHIVE_OK);
454			}
455			p += next;
456		}
457		skip = p - (const char *)h;
458		__archive_read_consume(a, skip);
459	}
460fatal:
461	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
462	    "Couldn't find out LHa header");
463	return (ARCHIVE_FATAL);
464}
465
466static int
467truncated_error(struct archive_read *a)
468{
469	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
470	    "Truncated LHa header");
471	return (ARCHIVE_FATAL);
472}
473
474static int
475archive_read_format_lha_read_header(struct archive_read *a,
476    struct archive_entry *entry)
477{
478	struct archive_wstring linkname;
479	struct archive_wstring pathname;
480	struct lha *lha;
481	const unsigned char *p;
482	const char *signature;
483	int err;
484	struct archive_mstring conv_buffer;
485	const wchar_t *conv_buffer_p;
486
487	lha_crc16_init();
488
489	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
490	if (a->archive.archive_format_name == NULL)
491		a->archive.archive_format_name = "lha";
492
493	lha = (struct lha *)(a->format->data);
494	lha->decompress_init = 0;
495	lha->end_of_entry = 0;
496	lha->end_of_entry_cleanup = 0;
497	lha->entry_unconsumed = 0;
498
499	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
500		/*
501		 * LHa archiver added 0 to the tail of its archive file as
502		 * the mark of the end of the archive.
503		 */
504		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
505		if (signature == NULL || signature[0] == 0)
506			return (ARCHIVE_EOF);
507		return (truncated_error(a));
508	}
509
510	signature = (const char *)p;
511	if (lha->found_first_header == 0 &&
512	    signature[0] == 'M' && signature[1] == 'Z') {
513                /* This is an executable?  Must be self-extracting... 	*/
514		err = lha_skip_sfx(a);
515		if (err < ARCHIVE_WARN)
516			return (err);
517
518		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
519			return (truncated_error(a));
520		signature = (const char *)p;
521	}
522	/* signature[0] == 0 means the end of an LHa archive file. */
523	if (signature[0] == 0)
524		return (ARCHIVE_EOF);
525
526	/*
527	 * Check the header format and method type.
528	 */
529	if (lha_check_header_format(p) != 0) {
530		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
531		    "Bad LHa file");
532		return (ARCHIVE_FATAL);
533	}
534
535	/* We've found the first header. */
536	lha->found_first_header = 1;
537	/* Set a default value and common data */
538	lha->header_size = 0;
539	lha->level = p[H_LEVEL_OFFSET];
540	lha->method[0] = p[H_METHOD_OFFSET+1];
541	lha->method[1] = p[H_METHOD_OFFSET+2];
542	lha->method[2] = p[H_METHOD_OFFSET+3];
543	if (memcmp(lha->method, "lhd", 3) == 0)
544		lha->directory = 1;
545	else
546		lha->directory = 0;
547	if (memcmp(lha->method, "lh0", 3) == 0 ||
548	    memcmp(lha->method, "lz4", 3) == 0)
549		lha->entry_is_compressed = 0;
550	else
551		lha->entry_is_compressed = 1;
552
553	lha->compsize = 0;
554	lha->origsize = 0;
555	lha->setflag = 0;
556	lha->birthtime = 0;
557	lha->birthtime_tv_nsec = 0;
558	lha->mtime = 0;
559	lha->mtime_tv_nsec = 0;
560	lha->atime = 0;
561	lha->atime_tv_nsec = 0;
562	lha->mode = (lha->directory)? 0777 : 0666;
563	lha->uid = 0;
564	lha->gid = 0;
565	archive_string_empty(&lha->dirname);
566	archive_string_empty(&lha->filename);
567	lha->dos_attr = 0;
568	if (lha->opt_sconv != NULL) {
569		lha->sconv_dir = lha->opt_sconv;
570		lha->sconv_fname = lha->opt_sconv;
571	} else {
572		lha->sconv_dir = NULL;
573		lha->sconv_fname = NULL;
574	}
575
576	switch (p[H_LEVEL_OFFSET]) {
577	case 0:
578		err = lha_read_file_header_0(a, lha);
579		break;
580	case 1:
581		err = lha_read_file_header_1(a, lha);
582		break;
583	case 2:
584		err = lha_read_file_header_2(a, lha);
585		break;
586	case 3:
587		err = lha_read_file_header_3(a, lha);
588		break;
589	default:
590		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
591		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
592		err = ARCHIVE_FATAL;
593		break;
594	}
595	if (err < ARCHIVE_WARN)
596		return (err);
597
598
599	if (!lha->directory && archive_strlen(&lha->filename) == 0)
600		/* The filename has not been set */
601		return (truncated_error(a));
602
603	/*
604	 * Make a pathname from a dirname and a filename, after converting to Unicode.
605	 * This is because codepages might differ between dirname and filename.
606	*/
607	archive_string_init(&pathname);
608	archive_string_init(&linkname);
609	archive_string_init(&conv_buffer.aes_mbs);
610	archive_string_init(&conv_buffer.aes_mbs_in_locale);
611	archive_string_init(&conv_buffer.aes_utf8);
612	archive_string_init(&conv_buffer.aes_wcs);
613	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
614		archive_set_error(&a->archive,
615			ARCHIVE_ERRNO_FILE_FORMAT,
616			"Pathname cannot be converted "
617			"from %s to Unicode.",
618			archive_string_conversion_charset_name(lha->sconv_dir));
619		err = ARCHIVE_FATAL;
620	} else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
621		err = ARCHIVE_FATAL;
622	if (err == ARCHIVE_FATAL) {
623		archive_mstring_clean(&conv_buffer);
624		archive_wstring_free(&pathname);
625		archive_wstring_free(&linkname);
626		return (err);
627	}
628	archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
629
630	archive_string_empty(&conv_buffer.aes_mbs);
631	archive_string_empty(&conv_buffer.aes_mbs_in_locale);
632	archive_string_empty(&conv_buffer.aes_utf8);
633	archive_wstring_empty(&conv_buffer.aes_wcs);
634	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
635		archive_set_error(&a->archive,
636			ARCHIVE_ERRNO_FILE_FORMAT,
637			"Pathname cannot be converted "
638			"from %s to Unicode.",
639			archive_string_conversion_charset_name(lha->sconv_fname));
640		err = ARCHIVE_FATAL;
641	}
642	else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
643		err = ARCHIVE_FATAL;
644	if (err == ARCHIVE_FATAL) {
645		archive_mstring_clean(&conv_buffer);
646		archive_wstring_free(&pathname);
647		archive_wstring_free(&linkname);
648		return (err);
649	}
650	archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
651	archive_mstring_clean(&conv_buffer);
652
653	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
654		/*
655	 	 * Extract the symlink-name if it's included in the pathname.
656	 	 */
657		if (!lha_parse_linkname(&linkname, &pathname)) {
658			/* We couldn't get the symlink-name. */
659			archive_set_error(&a->archive,
660		    	    ARCHIVE_ERRNO_FILE_FORMAT,
661			    "Unknown symlink-name");
662			archive_wstring_free(&pathname);
663			archive_wstring_free(&linkname);
664			return (ARCHIVE_FAILED);
665		}
666	} else {
667		/*
668		 * Make sure a file-type is set.
669		 * The mode has been overridden if it is in the extended data.
670		 */
671		lha->mode = (lha->mode & ~AE_IFMT) |
672		    ((lha->directory)? AE_IFDIR: AE_IFREG);
673	}
674	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
675	    (lha->dos_attr & 1) != 0)
676		lha->mode &= ~(0222);/* read only. */
677
678	/*
679	 * Set basic file parameters.
680	 */
681	archive_entry_copy_pathname_w(entry, pathname.s);
682	archive_wstring_free(&pathname);
683	if (archive_strlen(&linkname) > 0) {
684		archive_entry_copy_symlink_w(entry, linkname.s);
685	} else
686		archive_entry_set_symlink(entry, NULL);
687	archive_wstring_free(&linkname);
688	/*
689	 * When a header level is 0, there is a possibility that
690	 * a pathname and a symlink has '\' character, a directory
691	 * separator in DOS/Windows. So we should convert it to '/'.
692	 */
693	if (p[H_LEVEL_OFFSET] == 0)
694		lha_replace_path_separator(lha, entry);
695
696	archive_entry_set_mode(entry, lha->mode);
697	archive_entry_set_uid(entry, lha->uid);
698	archive_entry_set_gid(entry, lha->gid);
699	if (archive_strlen(&lha->uname) > 0)
700		archive_entry_set_uname(entry, lha->uname.s);
701	if (archive_strlen(&lha->gname) > 0)
702		archive_entry_set_gname(entry, lha->gname.s);
703	if (lha->setflag & BIRTHTIME_IS_SET) {
704		archive_entry_set_birthtime(entry, lha->birthtime,
705		    lha->birthtime_tv_nsec);
706		archive_entry_set_ctime(entry, lha->birthtime,
707		    lha->birthtime_tv_nsec);
708	} else {
709		archive_entry_unset_birthtime(entry);
710		archive_entry_unset_ctime(entry);
711	}
712	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
713	if (lha->setflag & ATIME_IS_SET)
714		archive_entry_set_atime(entry, lha->atime,
715		    lha->atime_tv_nsec);
716	else
717		archive_entry_unset_atime(entry);
718	if (lha->directory || archive_entry_symlink(entry) != NULL)
719		archive_entry_unset_size(entry);
720	else
721		archive_entry_set_size(entry, lha->origsize);
722
723	/*
724	 * Prepare variables used to read a file content.
725	 */
726	lha->entry_bytes_remaining = lha->compsize;
727	if (lha->entry_bytes_remaining < 0) {
728		archive_set_error(&a->archive,
729		    ARCHIVE_ERRNO_FILE_FORMAT,
730		    "Invalid LHa entry size");
731		return (ARCHIVE_FATAL);
732	}
733	lha->entry_offset = 0;
734	lha->entry_crc_calculated = 0;
735
736	/*
737	 * This file does not have a content.
738	 */
739	if (lha->directory || lha->compsize == 0)
740		lha->end_of_entry = 1;
741
742	snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-",
743	    lha->method[0], lha->method[1], lha->method[2]);
744	a->archive.archive_format_name = lha->format_name;
745
746	return (err);
747}
748
749/*
750 * Replace a DOS path separator '\' by a character '/'.
751 * Some multi-byte character set have  a character '\' in its second byte.
752 */
753static void
754lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
755{
756	const wchar_t *wp;
757	size_t i;
758
759	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
760		archive_wstrcpy(&(lha->ws), wp);
761		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
762			if (lha->ws.s[i] == L'\\')
763				lha->ws.s[i] = L'/';
764		}
765		archive_entry_copy_pathname_w(entry, lha->ws.s);
766	}
767
768	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
769		archive_wstrcpy(&(lha->ws), wp);
770		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
771			if (lha->ws.s[i] == L'\\')
772				lha->ws.s[i] = L'/';
773		}
774		archive_entry_copy_symlink_w(entry, lha->ws.s);
775	}
776}
777
778/*
779 * Header 0 format
780 *
781 * +0              +1         +2               +7                  +11
782 * +---------------+----------+----------------+-------------------+
783 * |header size(*1)|header sum|compression type|compressed size(*2)|
784 * +---------------+----------+----------------+-------------------+
785 *                             <---------------------(*1)----------*
786 *
787 * +11               +15       +17       +19            +20              +21
788 * +-----------------+---------+---------+--------------+----------------+
789 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
790 * +-----------------+---------+---------+--------------+----------------+
791 * *--------------------------------(*1)---------------------------------*
792 *
793 * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
794 * +---------------+---------+----------+----------------+------------------+
795 * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
796 * +---------------+---------+----------+----------------+------------------+
797 *                  <--(*3)->                             <------(*2)------>
798 * *----------------------(*1)-------------------------->
799 *
800 */
801#define H0_HEADER_SIZE_OFFSET	0
802#define H0_HEADER_SUM_OFFSET	1
803#define H0_COMP_SIZE_OFFSET	7
804#define H0_ORIG_SIZE_OFFSET	11
805#define H0_DOS_TIME_OFFSET	15
806#define H0_NAME_LEN_OFFSET	21
807#define H0_FILE_NAME_OFFSET	22
808#define H0_FIXED_SIZE		24
809static int
810lha_read_file_header_0(struct archive_read *a, struct lha *lha)
811{
812	const unsigned char *p;
813	int extdsize, namelen;
814	unsigned char headersum, sum_calculated;
815
816	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
817		return (truncated_error(a));
818	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
819	headersum = p[H0_HEADER_SUM_OFFSET];
820	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
821	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
822	lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
823	namelen = p[H0_NAME_LEN_OFFSET];
824	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
825	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
826		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
827		    "Invalid LHa header");
828		return (ARCHIVE_FATAL);
829	}
830	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
831		return (truncated_error(a));
832
833	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
834	/* When extdsize == -2, A CRC16 value is not present in the header. */
835	if (extdsize >= 0) {
836		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
837		lha->setflag |= CRC_IS_SET;
838	}
839	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
840
841	/* Read an extended header */
842	if (extdsize > 0) {
843		/* This extended data is set by 'LHa for UNIX' only.
844		 * Maybe fixed size.
845		 */
846		p += H0_FILE_NAME_OFFSET + namelen + 2;
847		if (p[0] == 'U' && extdsize == 12) {
848			/* p[1] is a minor version. */
849			lha->mtime = archive_le32dec(&p[2]);
850			lha->mode = archive_le16dec(&p[6]);
851			lha->uid = archive_le16dec(&p[8]);
852			lha->gid = archive_le16dec(&p[10]);
853			lha->setflag |= UNIX_MODE_IS_SET;
854		}
855	}
856	__archive_read_consume(a, lha->header_size);
857
858	if (sum_calculated != headersum) {
859		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
860		    "LHa header sum error");
861		return (ARCHIVE_FATAL);
862	}
863
864	return (ARCHIVE_OK);
865}
866
867/*
868 * Header 1 format
869 *
870 * +0              +1         +2               +7            +11
871 * +---------------+----------+----------------+-------------+
872 * |header size(*1)|header sum|compression type|skip size(*2)|
873 * +---------------+----------+----------------+-------------+
874 *                             <---------------(*1)----------*
875 *
876 * +11               +15       +17       +19            +20              +21
877 * +-----------------+---------+---------+--------------+----------------+
878 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
879 * +-----------------+---------+---------+--------------+----------------+
880 * *-------------------------------(*1)----------------------------------*
881 *
882 * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
883 * +---------------+---------+----------+-----------+-----------+
884 * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
885 * +---------------+---------+----------+-----------+-----------+
886 *                  <--(*3)->
887 * *----------------------------(*1)----------------------------*
888 *
889 * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
890 * +----------------+---------------------+------------------------+
891 * |next header size| extended header(*5) |     compressed data    |
892 * +----------------+---------------------+------------------------+
893 * *------(*1)-----> <--------------------(*2)-------------------->
894 */
895#define H1_HEADER_SIZE_OFFSET	0
896#define H1_HEADER_SUM_OFFSET	1
897#define H1_COMP_SIZE_OFFSET	7
898#define H1_ORIG_SIZE_OFFSET	11
899#define H1_DOS_TIME_OFFSET	15
900#define H1_NAME_LEN_OFFSET	21
901#define H1_FILE_NAME_OFFSET	22
902#define H1_FIXED_SIZE		27
903static int
904lha_read_file_header_1(struct archive_read *a, struct lha *lha)
905{
906	const unsigned char *p;
907	size_t extdsize;
908	int i, err, err2;
909	int namelen, padding;
910	unsigned char headersum, sum_calculated;
911
912	err = ARCHIVE_OK;
913
914	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
915		return (truncated_error(a));
916
917	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
918	headersum = p[H1_HEADER_SUM_OFFSET];
919	/* Note: An extended header size is included in a compsize. */
920	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
921	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
922	lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
923	namelen = p[H1_NAME_LEN_OFFSET];
924	/* Calculate a padding size. The result will be normally 0 only(?) */
925	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
926
927	if (namelen > 230 || padding < 0)
928		goto invalid;
929
930	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
931		return (truncated_error(a));
932
933	for (i = 0; i < namelen; i++) {
934		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
935			goto invalid;/* Invalid filename. */
936	}
937	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
938	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
939	lha->setflag |= CRC_IS_SET;
940
941	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
942	/* Consume used bytes but not include `next header size' data
943	 * since it will be consumed in lha_read_file_extended_header(). */
944	__archive_read_consume(a, lha->header_size - 2);
945
946	/* Read extended headers */
947	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
948	    (size_t)(lha->compsize + 2), &extdsize);
949	if (err2 < ARCHIVE_WARN)
950		return (err2);
951	if (err2 < err)
952		err = err2;
953	/* Get a real compressed file size. */
954	lha->compsize -= extdsize - 2;
955
956	if (lha->compsize < 0)
957		goto invalid;	/* Invalid compressed file size */
958
959	if (sum_calculated != headersum) {
960		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
961		    "LHa header sum error");
962		return (ARCHIVE_FATAL);
963	}
964	return (err);
965invalid:
966	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
967	    "Invalid LHa header");
968	return (ARCHIVE_FATAL);
969}
970
971/*
972 * Header 2 format
973 *
974 * +0              +2               +7                  +11               +15
975 * +---------------+----------------+-------------------+-----------------+
976 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
977 * +---------------+----------------+-------------------+-----------------+
978 *  <--------------------------------(*1)---------------------------------*
979 *
980 * +15               +19          +20              +21        +23         +24
981 * +-----------------+------------+----------------+----------+-----------+
982 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
983 * +-----------------+------------+----------------+----------+-----------+
984 * *---------------------------------(*1)---------------------------------*
985 *
986 * +24              +26                 +26+(*3)      +26+(*3)+(*4)
987 * +----------------+-------------------+-------------+-------------------+
988 * |next header size|extended header(*3)| padding(*4) |  compressed data  |
989 * +----------------+-------------------+-------------+-------------------+
990 * *--------------------------(*1)-------------------> <------(*2)------->
991 *
992 */
993#define H2_HEADER_SIZE_OFFSET	0
994#define H2_COMP_SIZE_OFFSET	7
995#define H2_ORIG_SIZE_OFFSET	11
996#define H2_TIME_OFFSET		15
997#define H2_CRC_OFFSET		21
998#define H2_FIXED_SIZE		24
999static int
1000lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1001{
1002	const unsigned char *p;
1003	size_t extdsize;
1004	int err, padding;
1005	uint16_t header_crc;
1006
1007	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1008		return (truncated_error(a));
1009
1010	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1011	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1012	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1013	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1014	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1015	lha->setflag |= CRC_IS_SET;
1016
1017	if (lha->header_size < H2_FIXED_SIZE) {
1018		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1019		    "Invalid LHa header size");
1020		return (ARCHIVE_FATAL);
1021	}
1022
1023	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1024	__archive_read_consume(a, H2_FIXED_SIZE);
1025
1026	/* Read extended headers */
1027	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1028		  lha->header_size - H2_FIXED_SIZE, &extdsize);
1029	if (err < ARCHIVE_WARN)
1030		return (err);
1031
1032	/* Calculate a padding size. The result will be normally 0 or 1. */
1033	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1034	if (padding > 0) {
1035		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1036			return (truncated_error(a));
1037		header_crc = lha_crc16(header_crc, p, padding);
1038		__archive_read_consume(a, padding);
1039	}
1040
1041	if (header_crc != lha->header_crc) {
1042#ifndef DONT_FAIL_ON_CRC_ERROR
1043		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1044		    "LHa header CRC error");
1045		return (ARCHIVE_FATAL);
1046#endif
1047	}
1048	return (err);
1049}
1050
1051/*
1052 * Header 3 format
1053 *
1054 * +0           +2               +7                  +11               +15
1055 * +------------+----------------+-------------------+-----------------+
1056 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1057 * +------------+----------------+-------------------+-----------------+
1058 *  <-------------------------------(*1)-------------------------------*
1059 *
1060 * +15               +19          +20              +21        +23         +24
1061 * +-----------------+------------+----------------+----------+-----------+
1062 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1063 * +-----------------+------------+----------------+----------+-----------+
1064 * *--------------------------------(*1)----------------------------------*
1065 *
1066 * +24             +28              +32                 +32+(*3)
1067 * +---------------+----------------+-------------------+-----------------+
1068 * |header size(*1)|next header size|extended header(*3)| compressed data |
1069 * +---------------+----------------+-------------------+-----------------+
1070 * *------------------------(*1)-----------------------> <------(*2)----->
1071 *
1072 */
1073#define H3_FIELD_LEN_OFFSET	0
1074#define H3_COMP_SIZE_OFFSET	7
1075#define H3_ORIG_SIZE_OFFSET	11
1076#define H3_TIME_OFFSET		15
1077#define H3_CRC_OFFSET		21
1078#define H3_HEADER_SIZE_OFFSET	24
1079#define H3_FIXED_SIZE		28
1080static int
1081lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1082{
1083	const unsigned char *p;
1084	size_t extdsize;
1085	int err;
1086	uint16_t header_crc;
1087
1088	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1089		return (truncated_error(a));
1090
1091	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1092		goto invalid;
1093	lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1094	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1095	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1096	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1097	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1098	lha->setflag |= CRC_IS_SET;
1099
1100	if (lha->header_size < H3_FIXED_SIZE + 4)
1101		goto invalid;
1102	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1103	__archive_read_consume(a, H3_FIXED_SIZE);
1104
1105	/* Read extended headers */
1106	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1107		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1108	if (err < ARCHIVE_WARN)
1109		return (err);
1110
1111	if (header_crc != lha->header_crc) {
1112#ifndef DONT_FAIL_ON_CRC_ERROR
1113		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1114		    "LHa header CRC error");
1115		return (ARCHIVE_FATAL);
1116#endif
1117	}
1118	return (err);
1119invalid:
1120	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1121	    "Invalid LHa header");
1122	return (ARCHIVE_FATAL);
1123}
1124
1125/*
1126 * Extended header format
1127 *
1128 * +0             +2        +3  -- used in header 1 and 2
1129 * +0             +4        +5  -- used in header 3
1130 * +--------------+---------+-------------------+--------------+--
1131 * |ex-header size|header id|        data       |ex-header size| .......
1132 * +--------------+---------+-------------------+--------------+--
1133 *  <-------------( ex-header size)------------> <-- next extended header --*
1134 *
1135 * If the ex-header size is zero, it is the make of the end of extended
1136 * headers.
1137 *
1138 */
1139static int
1140lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1141    uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1142{
1143	const void *h;
1144	const unsigned char *extdheader;
1145	size_t	extdsize;
1146	size_t	datasize;
1147	unsigned int i;
1148	unsigned char extdtype;
1149
1150#define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1151#define EXT_FILENAME		0x01		/* Filename 		    */
1152#define EXT_DIRECTORY		0x02		/* Directory name	    */
1153#define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1154#define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1155#define EXT_FILESIZE		0x42		/* Large file size	    */
1156#define EXT_TIMEZONE		0x43		/* Time zone		    */
1157#define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1158#define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1159#define EXT_CODEPAGE		0x46		/* Codepage		    */
1160#define EXT_UNIX_MODE		0x50		/* File permission	    */
1161#define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1162#define EXT_UNIX_GNAME		0x52		/* Group name		    */
1163#define EXT_UNIX_UNAME		0x53		/* User name		    */
1164#define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1165#define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1166#define EXT_NEW_ATTR		0xff		/* new attribute	    */
1167
1168	*total_size = sizefield_length;
1169
1170	for (;;) {
1171		/* Read an extended header size. */
1172		if ((h =
1173		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1174			return (truncated_error(a));
1175		/* Check if the size is the zero indicates the end of the
1176		 * extended header. */
1177		if (sizefield_length == sizeof(uint16_t))
1178			extdsize = archive_le16dec(h);
1179		else
1180			extdsize = archive_le32dec(h);
1181		if (extdsize == 0) {
1182			/* End of extended header */
1183			if (crc != NULL)
1184				*crc = lha_crc16(*crc, h, sizefield_length);
1185			__archive_read_consume(a, sizefield_length);
1186			return (ARCHIVE_OK);
1187		}
1188
1189		/* Sanity check to the extended header size. */
1190		if (((uint64_t)*total_size + extdsize) >
1191				    (uint64_t)limitsize ||
1192		    extdsize <= (size_t)sizefield_length)
1193			goto invalid;
1194
1195		/* Read the extended header. */
1196		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1197			return (truncated_error(a));
1198		*total_size += extdsize;
1199
1200		extdheader = (const unsigned char *)h;
1201		/* Get the extended header type. */
1202		extdtype = extdheader[sizefield_length];
1203		/* Calculate an extended data size. */
1204		datasize = extdsize - (1 + sizefield_length);
1205		/* Skip an extended header size field and type field. */
1206		extdheader += sizefield_length + 1;
1207
1208		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1209			*crc = lha_crc16(*crc, h, extdsize);
1210		switch (extdtype) {
1211		case EXT_HEADER_CRC:
1212			/* We only use a header CRC. Following data will not
1213			 * be used. */
1214			if (datasize >= 2) {
1215				lha->header_crc = archive_le16dec(extdheader);
1216				if (crc != NULL) {
1217					static const char zeros[2] = {0, 0};
1218					*crc = lha_crc16(*crc, h,
1219					    extdsize - datasize);
1220					/* CRC value itself as zero */
1221					*crc = lha_crc16(*crc, zeros, 2);
1222					*crc = lha_crc16(*crc,
1223					    extdheader+2, datasize - 2);
1224				}
1225			}
1226			break;
1227		case EXT_FILENAME:
1228			if (datasize == 0) {
1229				/* maybe directory header */
1230				archive_string_empty(&lha->filename);
1231				break;
1232			}
1233			if (extdheader[0] == '\0')
1234				goto invalid;
1235			archive_strncpy(&lha->filename,
1236			    (const char *)extdheader, datasize);
1237			break;
1238		case EXT_UTF16_FILENAME:
1239			if (datasize == 0) {
1240				/* maybe directory header */
1241				archive_string_empty(&lha->filename);
1242				break;
1243			} else if (datasize & 1) {
1244				/* UTF-16 characters take always 2 or 4 bytes */
1245				goto invalid;
1246			}
1247			if (extdheader[0] == '\0')
1248				goto invalid;
1249			archive_string_empty(&lha->filename);
1250			archive_array_append(&lha->filename,
1251				(const char *)extdheader, datasize);
1252			/* Setup a string conversion for a filename. */
1253			lha->sconv_fname =
1254			    archive_string_conversion_from_charset(&a->archive,
1255			        "UTF-16LE", 1);
1256			if (lha->sconv_fname == NULL)
1257				return (ARCHIVE_FATAL);
1258			break;
1259		case EXT_DIRECTORY:
1260			if (datasize == 0 || extdheader[0] == '\0')
1261				/* no directory name data. exit this case. */
1262				goto invalid;
1263
1264			archive_strncpy(&lha->dirname,
1265		  	    (const char *)extdheader, datasize);
1266			/*
1267			 * Convert directory delimiter from 0xFF
1268			 * to '/' for local system.
1269	 		 */
1270			for (i = 0; i < lha->dirname.length; i++) {
1271				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1272					lha->dirname.s[i] = '/';
1273			}
1274			/* Is last character directory separator? */
1275			if (lha->dirname.s[lha->dirname.length-1] != '/')
1276				/* invalid directory data */
1277				goto invalid;
1278			break;
1279		case EXT_UTF16_DIRECTORY:
1280			/* UTF-16 characters take always 2 or 4 bytes */
1281			if (datasize == 0 || (datasize & 1) ||
1282			    extdheader[0] == '\0') {
1283				/* no directory name data. exit this case. */
1284				goto invalid;
1285			}
1286
1287			archive_string_empty(&lha->dirname);
1288			archive_array_append(&lha->dirname,
1289				(const char *)extdheader, datasize);
1290			lha->sconv_dir =
1291			    archive_string_conversion_from_charset(&a->archive,
1292			        "UTF-16LE", 1);
1293			if (lha->sconv_dir == NULL)
1294				return (ARCHIVE_FATAL);
1295			else {
1296				/*
1297				 * Convert directory delimiter from 0xFFFF
1298				 * to '/' for local system.
1299				 */
1300				uint16_t dirSep;
1301				uint16_t d = 1;
1302				if (archive_be16dec(&d) == 1)
1303					dirSep = 0x2F00;
1304				else
1305					dirSep = 0x002F;
1306
1307				/* UTF-16LE character */
1308				uint16_t *utf16name =
1309				    (uint16_t *)lha->dirname.s;
1310				for (i = 0; i < lha->dirname.length / 2; i++) {
1311					if (utf16name[i] == 0xFFFF) {
1312						utf16name[i] = dirSep;
1313					}
1314				}
1315				/* Is last character directory separator? */
1316				if (utf16name[lha->dirname.length / 2 - 1] !=
1317				    dirSep) {
1318					/* invalid directory data */
1319					goto invalid;
1320				}
1321			}
1322			break;
1323		case EXT_DOS_ATTR:
1324			if (datasize == 2)
1325				lha->dos_attr = (unsigned char)
1326				    (archive_le16dec(extdheader) & 0xff);
1327			break;
1328		case EXT_TIMESTAMP:
1329			if (datasize == (sizeof(uint64_t) * 3)) {
1330				lha->birthtime = lha_win_time(
1331				    archive_le64dec(extdheader),
1332				    &lha->birthtime_tv_nsec);
1333				extdheader += sizeof(uint64_t);
1334				lha->mtime = lha_win_time(
1335				    archive_le64dec(extdheader),
1336				    &lha->mtime_tv_nsec);
1337				extdheader += sizeof(uint64_t);
1338				lha->atime = lha_win_time(
1339				    archive_le64dec(extdheader),
1340				    &lha->atime_tv_nsec);
1341				lha->setflag |= BIRTHTIME_IS_SET |
1342				    ATIME_IS_SET;
1343			}
1344			break;
1345		case EXT_FILESIZE:
1346			if (datasize == sizeof(uint64_t) * 2) {
1347				lha->compsize = archive_le64dec(extdheader);
1348				extdheader += sizeof(uint64_t);
1349				lha->origsize = archive_le64dec(extdheader);
1350			}
1351			break;
1352		case EXT_CODEPAGE:
1353			/* Get an archived filename charset from codepage.
1354			 * This overwrites the charset specified by
1355			 * hdrcharset option. */
1356			if (datasize == sizeof(uint32_t)) {
1357				struct archive_string cp;
1358				const char *charset;
1359
1360				archive_string_init(&cp);
1361				switch (archive_le32dec(extdheader)) {
1362				case 65001: /* UTF-8 */
1363					charset = "UTF-8";
1364					break;
1365				default:
1366					archive_string_sprintf(&cp, "CP%d",
1367					    (int)archive_le32dec(extdheader));
1368					charset = cp.s;
1369					break;
1370				}
1371				lha->sconv_dir =
1372				    archive_string_conversion_from_charset(
1373					&(a->archive), charset, 1);
1374				lha->sconv_fname =
1375				    archive_string_conversion_from_charset(
1376					&(a->archive), charset, 1);
1377				archive_string_free(&cp);
1378				if (lha->sconv_dir == NULL)
1379					return (ARCHIVE_FATAL);
1380				if (lha->sconv_fname == NULL)
1381					return (ARCHIVE_FATAL);
1382			}
1383			break;
1384		case EXT_UNIX_MODE:
1385			if (datasize == sizeof(uint16_t)) {
1386				lha->mode = archive_le16dec(extdheader);
1387				lha->setflag |= UNIX_MODE_IS_SET;
1388			}
1389			break;
1390		case EXT_UNIX_GID_UID:
1391			if (datasize == (sizeof(uint16_t) * 2)) {
1392				lha->gid = archive_le16dec(extdheader);
1393				lha->uid = archive_le16dec(extdheader+2);
1394			}
1395			break;
1396		case EXT_UNIX_GNAME:
1397			if (datasize > 0)
1398				archive_strncpy(&lha->gname,
1399				    (const char *)extdheader, datasize);
1400			break;
1401		case EXT_UNIX_UNAME:
1402			if (datasize > 0)
1403				archive_strncpy(&lha->uname,
1404				    (const char *)extdheader, datasize);
1405			break;
1406		case EXT_UNIX_MTIME:
1407			if (datasize == sizeof(uint32_t))
1408				lha->mtime = archive_le32dec(extdheader);
1409			break;
1410		case EXT_OS2_NEW_ATTR:
1411			/* This extended header is OS/2 depend. */
1412			if (datasize == 16) {
1413				lha->dos_attr = (unsigned char)
1414				    (archive_le16dec(extdheader) & 0xff);
1415				lha->mode = archive_le16dec(extdheader+2);
1416				lha->gid = archive_le16dec(extdheader+4);
1417				lha->uid = archive_le16dec(extdheader+6);
1418				lha->birthtime = archive_le32dec(extdheader+8);
1419				lha->atime = archive_le32dec(extdheader+12);
1420				lha->setflag |= UNIX_MODE_IS_SET
1421				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1422			}
1423			break;
1424		case EXT_NEW_ATTR:
1425			if (datasize == 20) {
1426				lha->mode = (mode_t)archive_le32dec(extdheader);
1427				lha->gid = archive_le32dec(extdheader+4);
1428				lha->uid = archive_le32dec(extdheader+8);
1429				lha->birthtime = archive_le32dec(extdheader+12);
1430				lha->atime = archive_le32dec(extdheader+16);
1431				lha->setflag |= UNIX_MODE_IS_SET
1432				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1433			}
1434			break;
1435		case EXT_TIMEZONE:		/* Not supported */
1436			break;
1437		default:
1438			break;
1439		}
1440
1441		__archive_read_consume(a, extdsize);
1442	}
1443invalid:
1444	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1445	    "Invalid extended LHa header");
1446	return (ARCHIVE_FATAL);
1447}
1448
1449static int
1450lha_end_of_entry(struct archive_read *a)
1451{
1452	struct lha *lha = (struct lha *)(a->format->data);
1453	int r = ARCHIVE_EOF;
1454
1455	if (!lha->end_of_entry_cleanup) {
1456		if ((lha->setflag & CRC_IS_SET) &&
1457		    lha->crc != lha->entry_crc_calculated) {
1458			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1459			    "LHa data CRC error");
1460			r = ARCHIVE_WARN;
1461		}
1462
1463		/* End-of-entry cleanup done. */
1464		lha->end_of_entry_cleanup = 1;
1465	}
1466	return (r);
1467}
1468
1469static int
1470archive_read_format_lha_read_data(struct archive_read *a,
1471    const void **buff, size_t *size, int64_t *offset)
1472{
1473	struct lha *lha = (struct lha *)(a->format->data);
1474	int r;
1475
1476	if (lha->entry_unconsumed) {
1477		/* Consume as much as the decompressor actually used. */
1478		__archive_read_consume(a, lha->entry_unconsumed);
1479		lha->entry_unconsumed = 0;
1480	}
1481	if (lha->end_of_entry) {
1482		*offset = lha->entry_offset;
1483		*size = 0;
1484		*buff = NULL;
1485		return (lha_end_of_entry(a));
1486	}
1487
1488	if (lha->entry_is_compressed)
1489		r =  lha_read_data_lzh(a, buff, size, offset);
1490	else
1491		/* No compression. */
1492		r =  lha_read_data_none(a, buff, size, offset);
1493	return (r);
1494}
1495
1496/*
1497 * Read a file content in no compression.
1498 *
1499 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1500 * lha->end_of_entry if it consumes all of the data.
1501 */
1502static int
1503lha_read_data_none(struct archive_read *a, const void **buff,
1504    size_t *size, int64_t *offset)
1505{
1506	struct lha *lha = (struct lha *)(a->format->data);
1507	ssize_t bytes_avail;
1508
1509	if (lha->entry_bytes_remaining == 0) {
1510		*buff = NULL;
1511		*size = 0;
1512		*offset = lha->entry_offset;
1513		lha->end_of_entry = 1;
1514		return (ARCHIVE_OK);
1515	}
1516	/*
1517	 * Note: '1' here is a performance optimization.
1518	 * Recall that the decompression layer returns a count of
1519	 * available bytes; asking for more than that forces the
1520	 * decompressor to combine reads by copying data.
1521	 */
1522	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1523	if (bytes_avail <= 0) {
1524		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1525		    "Truncated LHa file data");
1526		return (ARCHIVE_FATAL);
1527	}
1528	if (bytes_avail > lha->entry_bytes_remaining)
1529		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1530	lha->entry_crc_calculated =
1531	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1532	*size = bytes_avail;
1533	*offset = lha->entry_offset;
1534	lha->entry_offset += bytes_avail;
1535	lha->entry_bytes_remaining -= bytes_avail;
1536	if (lha->entry_bytes_remaining == 0)
1537		lha->end_of_entry = 1;
1538	lha->entry_unconsumed = bytes_avail;
1539	return (ARCHIVE_OK);
1540}
1541
1542/*
1543 * Read a file content in LZHUFF encoding.
1544 *
1545 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1546 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1547 * all of the data.
1548 */
1549static int
1550lha_read_data_lzh(struct archive_read *a, const void **buff,
1551    size_t *size, int64_t *offset)
1552{
1553	struct lha *lha = (struct lha *)(a->format->data);
1554	ssize_t bytes_avail;
1555	int r;
1556
1557	/* If we haven't yet read any data, initialize the decompressor. */
1558	if (!lha->decompress_init) {
1559		r = lzh_decode_init(&(lha->strm), lha->method);
1560		switch (r) {
1561		case ARCHIVE_OK:
1562			break;
1563		case ARCHIVE_FAILED:
1564        		/* Unsupported compression. */
1565			*buff = NULL;
1566			*size = 0;
1567			*offset = 0;
1568			archive_set_error(&a->archive,
1569			    ARCHIVE_ERRNO_FILE_FORMAT,
1570			    "Unsupported lzh compression method -%c%c%c-",
1571			    lha->method[0], lha->method[1], lha->method[2]);
1572			/* We know compressed size; just skip it. */
1573			archive_read_format_lha_read_data_skip(a);
1574			return (ARCHIVE_WARN);
1575		default:
1576			archive_set_error(&a->archive, ENOMEM,
1577			    "Couldn't allocate memory "
1578			    "for lzh decompression");
1579			return (ARCHIVE_FATAL);
1580		}
1581		/* We've initialized decompression for this stream. */
1582		lha->decompress_init = 1;
1583		lha->strm.avail_out = 0;
1584		lha->strm.total_out = 0;
1585	}
1586
1587	/*
1588	 * Note: '1' here is a performance optimization.
1589	 * Recall that the decompression layer returns a count of
1590	 * available bytes; asking for more than that forces the
1591	 * decompressor to combine reads by copying data.
1592	 */
1593	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1594	if (bytes_avail <= 0) {
1595		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1596		    "Truncated LHa file body");
1597		return (ARCHIVE_FATAL);
1598	}
1599	if (bytes_avail > lha->entry_bytes_remaining)
1600		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1601
1602	lha->strm.avail_in = (int)bytes_avail;
1603	lha->strm.total_in = 0;
1604	lha->strm.avail_out = 0;
1605
1606	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1607	switch (r) {
1608	case ARCHIVE_OK:
1609		break;
1610	case ARCHIVE_EOF:
1611		lha->end_of_entry = 1;
1612		break;
1613	default:
1614		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1615		    "Bad lzh data");
1616		return (ARCHIVE_FAILED);
1617	}
1618	lha->entry_unconsumed = lha->strm.total_in;
1619	lha->entry_bytes_remaining -= lha->strm.total_in;
1620
1621	if (lha->strm.avail_out) {
1622		*offset = lha->entry_offset;
1623		*size = lha->strm.avail_out;
1624		*buff = lha->strm.ref_ptr;
1625		lha->entry_crc_calculated =
1626		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1627		lha->entry_offset += *size;
1628	} else {
1629		*offset = lha->entry_offset;
1630		*size = 0;
1631		*buff = NULL;
1632		if (lha->end_of_entry)
1633			return (lha_end_of_entry(a));
1634	}
1635	return (ARCHIVE_OK);
1636}
1637
1638/*
1639 * Skip a file content.
1640 */
1641static int
1642archive_read_format_lha_read_data_skip(struct archive_read *a)
1643{
1644	struct lha *lha;
1645	int64_t bytes_skipped;
1646
1647	lha = (struct lha *)(a->format->data);
1648
1649	if (lha->entry_unconsumed) {
1650		/* Consume as much as the decompressor actually used. */
1651		__archive_read_consume(a, lha->entry_unconsumed);
1652		lha->entry_unconsumed = 0;
1653	}
1654
1655	/* if we've already read to end of data, we're done. */
1656	if (lha->end_of_entry_cleanup)
1657		return (ARCHIVE_OK);
1658
1659	/*
1660	 * If the length is at the beginning, we can skip the
1661	 * compressed data much more quickly.
1662	 */
1663	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1664	if (bytes_skipped < 0)
1665		return (ARCHIVE_FATAL);
1666
1667	/* This entry is finished and done. */
1668	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1669	return (ARCHIVE_OK);
1670}
1671
1672static int
1673archive_read_format_lha_cleanup(struct archive_read *a)
1674{
1675	struct lha *lha = (struct lha *)(a->format->data);
1676
1677	lzh_decode_free(&(lha->strm));
1678	archive_string_free(&(lha->dirname));
1679	archive_string_free(&(lha->filename));
1680	archive_string_free(&(lha->uname));
1681	archive_string_free(&(lha->gname));
1682	archive_wstring_free(&(lha->ws));
1683	free(lha);
1684	(a->format->data) = NULL;
1685	return (ARCHIVE_OK);
1686}
1687
1688/*
1689 * 'LHa for UNIX' utility has archived a symbolic-link name after
1690 * a pathname with '|' character.
1691 * This function extracts the symbolic-link name from the pathname.
1692 *
1693 * example.
1694 *   1. a symbolic-name is 'aaa/bb/cc'
1695 *   2. a filename is 'xxx/bbb'
1696 *  then an archived pathname is 'xxx/bbb|aaa/bb/cc'
1697 */
1698static int
1699lha_parse_linkname(struct archive_wstring *linkname,
1700    struct archive_wstring *pathname)
1701{
1702	wchar_t *	linkptr;
1703	size_t 	symlen;
1704
1705	linkptr = wcschr(pathname->s, L'|');
1706	if (linkptr != NULL) {
1707		symlen = wcslen(linkptr + 1);
1708		archive_wstrncpy(linkname, linkptr+1, symlen);
1709
1710		*linkptr = 0;
1711		pathname->length = wcslen(pathname->s);
1712
1713		return (1);
1714	}
1715	return (0);
1716}
1717
1718/* Convert an MSDOS-style date/time into Unix-style time. */
1719static time_t
1720lha_dos_time(const unsigned char *p)
1721{
1722	int msTime, msDate;
1723	struct tm ts;
1724
1725	msTime = archive_le16dec(p);
1726	msDate = archive_le16dec(p+2);
1727
1728	memset(&ts, 0, sizeof(ts));
1729	ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1730	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1731	ts.tm_mday = msDate & 0x1f;		    /* Day of month.     */
1732	ts.tm_hour = (msTime >> 11) & 0x1f;
1733	ts.tm_min = (msTime >> 5) & 0x3f;
1734	ts.tm_sec = (msTime << 1) & 0x3e;
1735	ts.tm_isdst = -1;
1736	return (mktime(&ts));
1737}
1738
1739/* Convert an MS-Windows-style date/time into Unix-style time. */
1740static time_t
1741lha_win_time(uint64_t wintime, long *ns)
1742{
1743#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1744
1745	if (wintime >= EPOC_TIME) {
1746		wintime -= EPOC_TIME;	/* 1970-01-01 00:00:00 (UTC) */
1747		if (ns != NULL)
1748			*ns = (long)(wintime % 10000000) * 100;
1749		return (wintime / 10000000);
1750	} else {
1751		if (ns != NULL)
1752			*ns = 0;
1753		return (0);
1754	}
1755}
1756
1757static unsigned char
1758lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1759{
1760	unsigned char const *p = (unsigned char const *)pp;
1761
1762	p += offset;
1763	for (;size > 0; --size)
1764		sum += *p++;
1765	return (sum);
1766}
1767
1768static uint16_t crc16tbl[2][256];
1769static void
1770lha_crc16_init(void)
1771{
1772	unsigned int i;
1773	static int crc16init = 0;
1774
1775	if (crc16init)
1776		return;
1777	crc16init = 1;
1778
1779	for (i = 0; i < 256; i++) {
1780		unsigned int j;
1781		uint16_t crc = (uint16_t)i;
1782		for (j = 8; j; j--)
1783			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1784		crc16tbl[0][i] = crc;
1785	}
1786
1787	for (i = 0; i < 256; i++) {
1788		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1789			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1790	}
1791}
1792
1793static uint16_t
1794lha_crc16(uint16_t crc, const void *pp, size_t len)
1795{
1796	const unsigned char *p = (const unsigned char *)pp;
1797	const uint16_t *buff;
1798	const union {
1799		uint32_t i;
1800		char c[4];
1801	} u = { 0x01020304 };
1802
1803	if (len == 0)
1804		return crc;
1805
1806	/* Process unaligned address. */
1807	if (((uintptr_t)p) & (uintptr_t)0x1) {
1808		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1809		len--;
1810	}
1811	buff = (const uint16_t *)p;
1812	/*
1813	 * Modern C compiler such as GCC does not unroll automatically yet
1814	 * without unrolling pragma, and Clang is so. So we should
1815	 * unroll this loop for its performance.
1816	 */
1817	for (;len >= 8; len -= 8) {
1818		/* This if statement expects compiler optimization will
1819		 * remove the statement which will not be executed. */
1820#undef bswap16
1821#ifndef __has_builtin
1822#define __has_builtin(x) 0
1823#endif
1824#if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1825#  define bswap16(x) _byteswap_ushort(x)
1826#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1827/* GCC 4.8 and later has __builtin_bswap16() */
1828#  define bswap16(x) __builtin_bswap16(x)
1829#elif defined(__clang__) && __has_builtin(__builtin_bswap16)
1830/* Newer clang versions have __builtin_bswap16() */
1831#  define bswap16(x) __builtin_bswap16(x)
1832#else
1833#  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1834#endif
1835#define CRC16W	do { 	\
1836		if(u.c[0] == 1) { /* Big endian */		\
1837			crc ^= bswap16(*buff); buff++;		\
1838		} else						\
1839			crc ^= *buff++;				\
1840		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1841} while (0)
1842		CRC16W;
1843		CRC16W;
1844		CRC16W;
1845		CRC16W;
1846#undef CRC16W
1847#undef bswap16
1848	}
1849
1850	p = (const unsigned char *)buff;
1851	for (;len; len--) {
1852		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1853	}
1854	return crc;
1855}
1856
1857/*
1858 * Initialize LZHUF decoder.
1859 *
1860 * Returns ARCHIVE_OK if initialization was successful.
1861 * Returns ARCHIVE_FAILED if method is unsupported.
1862 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1863 * error occurred.
1864 */
1865static int
1866lzh_decode_init(struct lzh_stream *strm, const char *method)
1867{
1868	struct lzh_dec *ds;
1869	int w_bits, w_size;
1870
1871	if (strm->ds == NULL) {
1872		strm->ds = calloc(1, sizeof(*strm->ds));
1873		if (strm->ds == NULL)
1874			return (ARCHIVE_FATAL);
1875	}
1876	ds = strm->ds;
1877	ds->error = ARCHIVE_FAILED;
1878	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1879		return (ARCHIVE_FAILED);
1880	switch (method[2]) {
1881	case '5':
1882		w_bits = 13;/* 8KiB for window */
1883		break;
1884	case '6':
1885		w_bits = 15;/* 32KiB for window */
1886		break;
1887	case '7':
1888		w_bits = 16;/* 64KiB for window */
1889		break;
1890	default:
1891		return (ARCHIVE_FAILED);/* Not supported. */
1892	}
1893	ds->error = ARCHIVE_FATAL;
1894	/* Expand a window size up to 128 KiB for decompressing process
1895	 * performance whatever its original window size is. */
1896	ds->w_size = 1U << 17;
1897	ds->w_mask = ds->w_size -1;
1898	if (ds->w_buff == NULL) {
1899		ds->w_buff = malloc(ds->w_size);
1900		if (ds->w_buff == NULL)
1901			return (ARCHIVE_FATAL);
1902	}
1903	w_size = 1U << w_bits;
1904	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1905	ds->w_pos = 0;
1906	ds->state = 0;
1907	ds->pos_pt_len_size = w_bits + 1;
1908	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1909	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1910	ds->literal_pt_len_bits = 5;
1911	ds->br.cache_buffer = 0;
1912	ds->br.cache_avail = 0;
1913
1914	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1915	    != ARCHIVE_OK)
1916		return (ARCHIVE_FATAL);
1917	ds->lt.len_bits = 9;
1918	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1919	    != ARCHIVE_OK)
1920		return (ARCHIVE_FATAL);
1921	ds->error = 0;
1922
1923	return (ARCHIVE_OK);
1924}
1925
1926/*
1927 * Release LZHUF decoder.
1928 */
1929static void
1930lzh_decode_free(struct lzh_stream *strm)
1931{
1932
1933	if (strm->ds == NULL)
1934		return;
1935	free(strm->ds->w_buff);
1936	lzh_huffman_free(&(strm->ds->lt));
1937	lzh_huffman_free(&(strm->ds->pt));
1938	free(strm->ds);
1939	strm->ds = NULL;
1940}
1941
1942/*
1943 * Bit stream reader.
1944 */
1945/* Check that the cache buffer has enough bits. */
1946#define lzh_br_has(br, n)	((br)->cache_avail >= n)
1947/* Get compressed data by bit. */
1948#define lzh_br_bits(br, n)				\
1949	(((uint16_t)((br)->cache_buffer >>		\
1950		((br)->cache_avail - (n)))) & cache_masks[n])
1951#define lzh_br_bits_forced(br, n)			\
1952	(((uint16_t)((br)->cache_buffer <<		\
1953		((n) - (br)->cache_avail))) & cache_masks[n])
1954/* Read ahead to make sure the cache buffer has enough compressed data we
1955 * will use.
1956 *  True  : completed, there is enough data in the cache buffer.
1957 *  False : we met that strm->next_in is empty, we have to get following
1958 *          bytes. */
1959#define lzh_br_read_ahead_0(strm, br, n)	\
1960	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1961/*  True  : the cache buffer has some bits as much as we need.
1962 *  False : there are no enough bits in the cache buffer to be used,
1963 *          we have to get following bytes if we could. */
1964#define lzh_br_read_ahead(strm, br, n)	\
1965	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1966
1967/* Notify how many bits we consumed. */
1968#define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1969#define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1970
1971static const uint16_t cache_masks[] = {
1972	0x0000, 0x0001, 0x0003, 0x0007,
1973	0x000F, 0x001F, 0x003F, 0x007F,
1974	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1975	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1976	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1977};
1978
1979/*
1980 * Shift away used bits in the cache data and fill it up with following bits.
1981 * Call this when cache buffer does not have enough bits you need.
1982 *
1983 * Returns 1 if the cache buffer is full.
1984 * Returns 0 if the cache buffer is not full; input buffer is empty.
1985 */
1986static int
1987lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1988{
1989	int n = CACHE_BITS - br->cache_avail;
1990
1991	for (;;) {
1992		const int x = n >> 3;
1993		if (strm->avail_in >= x) {
1994			switch (x) {
1995			case 8:
1996				br->cache_buffer =
1997				    ((uint64_t)strm->next_in[0]) << 56 |
1998				    ((uint64_t)strm->next_in[1]) << 48 |
1999				    ((uint64_t)strm->next_in[2]) << 40 |
2000				    ((uint64_t)strm->next_in[3]) << 32 |
2001				    ((uint32_t)strm->next_in[4]) << 24 |
2002				    ((uint32_t)strm->next_in[5]) << 16 |
2003				    ((uint32_t)strm->next_in[6]) << 8 |
2004				     (uint32_t)strm->next_in[7];
2005				strm->next_in += 8;
2006				strm->avail_in -= 8;
2007				br->cache_avail += 8 * 8;
2008				return (1);
2009			case 7:
2010				br->cache_buffer =
2011		 		   (br->cache_buffer << 56) |
2012				    ((uint64_t)strm->next_in[0]) << 48 |
2013				    ((uint64_t)strm->next_in[1]) << 40 |
2014				    ((uint64_t)strm->next_in[2]) << 32 |
2015				    ((uint64_t)strm->next_in[3]) << 24 |
2016				    ((uint64_t)strm->next_in[4]) << 16 |
2017				    ((uint64_t)strm->next_in[5]) << 8 |
2018				     (uint64_t)strm->next_in[6];
2019				strm->next_in += 7;
2020				strm->avail_in -= 7;
2021				br->cache_avail += 7 * 8;
2022				return (1);
2023			case 6:
2024				br->cache_buffer =
2025		 		   (br->cache_buffer << 48) |
2026				    ((uint64_t)strm->next_in[0]) << 40 |
2027				    ((uint64_t)strm->next_in[1]) << 32 |
2028				    ((uint64_t)strm->next_in[2]) << 24 |
2029				    ((uint64_t)strm->next_in[3]) << 16 |
2030				    ((uint64_t)strm->next_in[4]) << 8 |
2031				     (uint64_t)strm->next_in[5];
2032				strm->next_in += 6;
2033				strm->avail_in -= 6;
2034				br->cache_avail += 6 * 8;
2035				return (1);
2036			case 0:
2037				/* We have enough compressed data in
2038				 * the cache buffer.*/
2039				return (1);
2040			default:
2041				break;
2042			}
2043		}
2044		if (strm->avail_in == 0) {
2045			/* There is not enough compressed data to fill up the
2046			 * cache buffer. */
2047			return (0);
2048		}
2049		br->cache_buffer =
2050		   (br->cache_buffer << 8) | *strm->next_in++;
2051		strm->avail_in--;
2052		br->cache_avail += 8;
2053		n -= 8;
2054	}
2055}
2056
2057/*
2058 * Decode LZHUF.
2059 *
2060 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2061 *    Please set available buffer and call this function again.
2062 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2063 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2064 *    is broken or you do not set 'last' flag properly.
2065 * 4. 'last' flag is very important, you must set 1 to the flag if there
2066 *    is no input data. The lha compressed data format does not provide how
2067 *    to know the compressed data is really finished.
2068 *    Note: lha command utility check if the total size of output bytes is
2069 *    reached the uncompressed size recorded in its header. it does not mind
2070 *    that the decoding process is properly finished.
2071 *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
2072 *    it handles EOF as null to fill read buffer with zero until the decoding
2073 *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
2074 *    zeros are treated as the mark of the end of the data although the zeros
2075 *    is dummy, not the file data.
2076 */
2077static int	lzh_read_blocks(struct lzh_stream *, int);
2078static int	lzh_decode_blocks(struct lzh_stream *, int);
2079#define ST_RD_BLOCK		0
2080#define ST_RD_PT_1		1
2081#define ST_RD_PT_2		2
2082#define ST_RD_PT_3		3
2083#define ST_RD_PT_4		4
2084#define ST_RD_LITERAL_1		5
2085#define ST_RD_LITERAL_2		6
2086#define ST_RD_LITERAL_3		7
2087#define ST_RD_POS_DATA_1	8
2088#define ST_GET_LITERAL		9
2089#define ST_GET_POS_1		10
2090#define ST_GET_POS_2		11
2091#define ST_COPY_DATA		12
2092
2093static int
2094lzh_decode(struct lzh_stream *strm, int last)
2095{
2096	struct lzh_dec *ds = strm->ds;
2097	int avail_in;
2098	int r;
2099
2100	if (ds->error)
2101		return (ds->error);
2102
2103	avail_in = strm->avail_in;
2104	do {
2105		if (ds->state < ST_GET_LITERAL)
2106			r = lzh_read_blocks(strm, last);
2107		else
2108			r = lzh_decode_blocks(strm, last);
2109	} while (r == 100);
2110	strm->total_in += avail_in - strm->avail_in;
2111	return (r);
2112}
2113
2114static void
2115lzh_emit_window(struct lzh_stream *strm, size_t s)
2116{
2117	strm->ref_ptr = strm->ds->w_buff;
2118	strm->avail_out = (int)s;
2119	strm->total_out += s;
2120}
2121
2122static int
2123lzh_read_blocks(struct lzh_stream *strm, int last)
2124{
2125	struct lzh_dec *ds = strm->ds;
2126	struct lzh_br *br = &(ds->br);
2127	int c = 0, i;
2128	unsigned rbits;
2129
2130	for (;;) {
2131		switch (ds->state) {
2132		case ST_RD_BLOCK:
2133			/*
2134			 * Read a block number indicates how many blocks
2135			 * we will handle. The block is composed of a
2136			 * literal and a match, sometimes a literal only
2137			 * in particular, there are no reference data at
2138			 * the beginning of the decompression.
2139			 */
2140			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2141				if (!last)
2142					/* We need following data. */
2143					return (ARCHIVE_OK);
2144				if (lzh_br_has(br, 8)) {
2145					/*
2146					 * It seems there are extra bits.
2147					 *  1. Compressed data is broken.
2148					 *  2. `last' flag does not properly
2149					 *     set.
2150					 */
2151					goto failed;
2152				}
2153				if (ds->w_pos > 0) {
2154					lzh_emit_window(strm, ds->w_pos);
2155					ds->w_pos = 0;
2156					return (ARCHIVE_OK);
2157				}
2158				/* End of compressed data; we have completely
2159				 * handled all compressed data. */
2160				return (ARCHIVE_EOF);
2161			}
2162			ds->blocks_avail = lzh_br_bits(br, 16);
2163			if (ds->blocks_avail == 0)
2164				goto failed;
2165			lzh_br_consume(br, 16);
2166			/*
2167			 * Read a literal table compressed in huffman
2168			 * coding.
2169			 */
2170			ds->pt.len_size = ds->literal_pt_len_size;
2171			ds->pt.len_bits = ds->literal_pt_len_bits;
2172			ds->reading_position = 0;
2173			/* FALL THROUGH */
2174		case ST_RD_PT_1:
2175			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2176			 * used in reading both a literal table and a
2177			 * position table. */
2178			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2179				if (last)
2180					goto failed;/* Truncated data. */
2181				ds->state = ST_RD_PT_1;
2182				return (ARCHIVE_OK);
2183			}
2184			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2185			lzh_br_consume(br, ds->pt.len_bits);
2186			/* FALL THROUGH */
2187		case ST_RD_PT_2:
2188			if (ds->pt.len_avail == 0) {
2189				/* There is no bitlen. */
2190				if (!lzh_br_read_ahead(strm, br,
2191				    ds->pt.len_bits)) {
2192					if (last)
2193						goto failed;/* Truncated data.*/
2194					ds->state = ST_RD_PT_2;
2195					return (ARCHIVE_OK);
2196				}
2197				if (!lzh_make_fake_table(&(ds->pt),
2198				    lzh_br_bits(br, ds->pt.len_bits)))
2199					goto failed;/* Invalid data. */
2200				lzh_br_consume(br, ds->pt.len_bits);
2201				if (ds->reading_position)
2202					ds->state = ST_GET_LITERAL;
2203				else
2204					ds->state = ST_RD_LITERAL_1;
2205				break;
2206			} else if (ds->pt.len_avail > ds->pt.len_size)
2207				goto failed;/* Invalid data. */
2208			ds->loop = 0;
2209			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2210			if (ds->pt.len_avail < 3 ||
2211			    ds->pt.len_size == ds->pos_pt_len_size) {
2212				ds->state = ST_RD_PT_4;
2213				break;
2214			}
2215			/* FALL THROUGH */
2216		case ST_RD_PT_3:
2217			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2218			if (ds->loop < 3) {
2219				if (ds->loop < 0 || last)
2220					goto failed;/* Invalid data. */
2221				/* Not completed, get following data. */
2222				ds->state = ST_RD_PT_3;
2223				return (ARCHIVE_OK);
2224			}
2225			/* There are some null in bitlen of the literal. */
2226			if (!lzh_br_read_ahead(strm, br, 2)) {
2227				if (last)
2228					goto failed;/* Truncated data. */
2229				ds->state = ST_RD_PT_3;
2230				return (ARCHIVE_OK);
2231			}
2232			c = lzh_br_bits(br, 2);
2233			lzh_br_consume(br, 2);
2234			if (c > ds->pt.len_avail - 3)
2235				goto failed;/* Invalid data. */
2236			for (i = 3; c-- > 0 ;)
2237				ds->pt.bitlen[i++] = 0;
2238			ds->loop = i;
2239			/* FALL THROUGH */
2240		case ST_RD_PT_4:
2241			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2242			    ds->pt.len_avail);
2243			if (ds->loop < ds->pt.len_avail) {
2244				if (ds->loop < 0 || last)
2245					goto failed;/* Invalid data. */
2246				/* Not completed, get following data. */
2247				ds->state = ST_RD_PT_4;
2248				return (ARCHIVE_OK);
2249			}
2250			if (!lzh_make_huffman_table(&(ds->pt)))
2251				goto failed;/* Invalid data */
2252			if (ds->reading_position) {
2253				ds->state = ST_GET_LITERAL;
2254				break;
2255			}
2256			/* FALL THROUGH */
2257		case ST_RD_LITERAL_1:
2258			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2259				if (last)
2260					goto failed;/* Truncated data. */
2261				ds->state = ST_RD_LITERAL_1;
2262				return (ARCHIVE_OK);
2263			}
2264			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2265			lzh_br_consume(br, ds->lt.len_bits);
2266			/* FALL THROUGH */
2267		case ST_RD_LITERAL_2:
2268			if (ds->lt.len_avail == 0) {
2269				/* There is no bitlen. */
2270				if (!lzh_br_read_ahead(strm, br,
2271				    ds->lt.len_bits)) {
2272					if (last)
2273						goto failed;/* Truncated data.*/
2274					ds->state = ST_RD_LITERAL_2;
2275					return (ARCHIVE_OK);
2276				}
2277				if (!lzh_make_fake_table(&(ds->lt),
2278				    lzh_br_bits(br, ds->lt.len_bits)))
2279					goto failed;/* Invalid data */
2280				lzh_br_consume(br, ds->lt.len_bits);
2281				ds->state = ST_RD_POS_DATA_1;
2282				break;
2283			} else if (ds->lt.len_avail > ds->lt.len_size)
2284				goto failed;/* Invalid data */
2285			ds->loop = 0;
2286			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2287			/* FALL THROUGH */
2288		case ST_RD_LITERAL_3:
2289			i = ds->loop;
2290			while (i < ds->lt.len_avail) {
2291				if (!lzh_br_read_ahead(strm, br,
2292				    ds->pt.max_bits)) {
2293					if (last)
2294						goto failed;/* Truncated data.*/
2295					ds->loop = i;
2296					ds->state = ST_RD_LITERAL_3;
2297					return (ARCHIVE_OK);
2298				}
2299				rbits = lzh_br_bits(br, ds->pt.max_bits);
2300				c = lzh_decode_huffman(&(ds->pt), rbits);
2301				if (c > 2) {
2302					/* Note: 'c' will never be more than
2303					 * eighteen since it's limited by
2304					 * PT_BITLEN_SIZE, which is being set
2305					 * to ds->pt.len_size through
2306					 * ds->literal_pt_len_size. */
2307					lzh_br_consume(br, ds->pt.bitlen[c]);
2308					c -= 2;
2309					ds->lt.freq[c]++;
2310					ds->lt.bitlen[i++] = c;
2311				} else if (c == 0) {
2312					lzh_br_consume(br, ds->pt.bitlen[c]);
2313					ds->lt.bitlen[i++] = 0;
2314				} else {
2315					/* c == 1 or c == 2 */
2316					int n = (c == 1)?4:9;
2317					if (!lzh_br_read_ahead(strm, br,
2318					     ds->pt.bitlen[c] + n)) {
2319						if (last) /* Truncated data. */
2320							goto failed;
2321						ds->loop = i;
2322						ds->state = ST_RD_LITERAL_3;
2323						return (ARCHIVE_OK);
2324					}
2325					lzh_br_consume(br, ds->pt.bitlen[c]);
2326					c = lzh_br_bits(br, n);
2327					lzh_br_consume(br, n);
2328					c += (n == 4)?3:20;
2329					if (i + c > ds->lt.len_avail)
2330						goto failed;/* Invalid data */
2331					memset(&(ds->lt.bitlen[i]), 0, c);
2332					i += c;
2333				}
2334			}
2335			if (i > ds->lt.len_avail ||
2336			    !lzh_make_huffman_table(&(ds->lt)))
2337				goto failed;/* Invalid data */
2338			/* FALL THROUGH */
2339		case ST_RD_POS_DATA_1:
2340			/*
2341			 * Read a position table compressed in huffman
2342			 * coding.
2343			 */
2344			ds->pt.len_size = ds->pos_pt_len_size;
2345			ds->pt.len_bits = ds->pos_pt_len_bits;
2346			ds->reading_position = 1;
2347			ds->state = ST_RD_PT_1;
2348			break;
2349		case ST_GET_LITERAL:
2350			return (100);
2351		}
2352	}
2353failed:
2354	return (ds->error = ARCHIVE_FAILED);
2355}
2356
2357static int
2358lzh_decode_blocks(struct lzh_stream *strm, int last)
2359{
2360	struct lzh_dec *ds = strm->ds;
2361	struct lzh_br bre = ds->br;
2362	struct huffman *lt = &(ds->lt);
2363	struct huffman *pt = &(ds->pt);
2364	unsigned char *w_buff = ds->w_buff;
2365	unsigned char *lt_bitlen = lt->bitlen;
2366	unsigned char *pt_bitlen = pt->bitlen;
2367	int blocks_avail = ds->blocks_avail, c = 0;
2368	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2369	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2370	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2371	int state = ds->state;
2372
2373	for (;;) {
2374		switch (state) {
2375		case ST_GET_LITERAL:
2376			for (;;) {
2377				if (blocks_avail == 0) {
2378					/* We have decoded all blocks.
2379					 * Let's handle next blocks. */
2380					ds->state = ST_RD_BLOCK;
2381					ds->br = bre;
2382					ds->blocks_avail = 0;
2383					ds->w_pos = w_pos;
2384					ds->copy_pos = 0;
2385					return (100);
2386				}
2387
2388				/* lzh_br_read_ahead() always tries to fill the
2389				 * cache buffer up. In specific situation we
2390				 * are close to the end of the data, the cache
2391				 * buffer will not be full and thus we have to
2392				 * determine if the cache buffer has some bits
2393				 * as much as we need after lzh_br_read_ahead()
2394				 * failed. */
2395				if (!lzh_br_read_ahead(strm, &bre,
2396				    lt_max_bits)) {
2397					if (!last)
2398						goto next_data;
2399					/* Remaining bits are less than
2400					 * maximum bits(lt.max_bits) but maybe
2401					 * it still remains as much as we need,
2402					 * so we should try to use it with
2403					 * dummy bits. */
2404					c = lzh_decode_huffman(lt,
2405					      lzh_br_bits_forced(&bre,
2406					        lt_max_bits));
2407					lzh_br_consume(&bre, lt_bitlen[c]);
2408					if (!lzh_br_has(&bre, 0))
2409						goto failed;/* Over read. */
2410				} else {
2411					c = lzh_decode_huffman(lt,
2412					      lzh_br_bits(&bre, lt_max_bits));
2413					lzh_br_consume(&bre, lt_bitlen[c]);
2414				}
2415				blocks_avail--;
2416				if (c > UCHAR_MAX)
2417					/* Current block is a match data. */
2418					break;
2419				/*
2420				 * 'c' is exactly a literal code.
2421				 */
2422				/* Save a decoded code to reference it
2423				 * afterward. */
2424				w_buff[w_pos] = c;
2425				if (++w_pos >= w_size) {
2426					w_pos = 0;
2427					lzh_emit_window(strm, w_size);
2428					goto next_data;
2429				}
2430			}
2431			/* 'c' is the length of a match pattern we have
2432			 * already extracted, which has be stored in
2433			 * window(ds->w_buff). */
2434			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2435			/* FALL THROUGH */
2436		case ST_GET_POS_1:
2437			/*
2438			 * Get a reference position.
2439			 */
2440			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2441				if (!last) {
2442					state = ST_GET_POS_1;
2443					ds->copy_len = copy_len;
2444					goto next_data;
2445				}
2446				copy_pos = lzh_decode_huffman(pt,
2447				    lzh_br_bits_forced(&bre, pt_max_bits));
2448				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2449				if (!lzh_br_has(&bre, 0))
2450					goto failed;/* Over read. */
2451			} else {
2452				copy_pos = lzh_decode_huffman(pt,
2453				    lzh_br_bits(&bre, pt_max_bits));
2454				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2455			}
2456			/* FALL THROUGH */
2457		case ST_GET_POS_2:
2458			if (copy_pos > 1) {
2459				/* We need an additional adjustment number to
2460				 * the position. */
2461				int p = copy_pos - 1;
2462				if (!lzh_br_read_ahead(strm, &bre, p)) {
2463					if (last)
2464						goto failed;/* Truncated data.*/
2465					state = ST_GET_POS_2;
2466					ds->copy_len = copy_len;
2467					ds->copy_pos = copy_pos;
2468					goto next_data;
2469				}
2470				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2471				lzh_br_consume(&bre, p);
2472			}
2473			/* The position is actually a distance from the last
2474			 * code we had extracted and thus we have to convert
2475			 * it to a position of the window. */
2476			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2477			/* FALL THROUGH */
2478		case ST_COPY_DATA:
2479			/*
2480			 * Copy `copy_len' bytes as extracted data from
2481			 * the window into the output buffer.
2482			 */
2483			for (;;) {
2484				int l;
2485
2486				l = copy_len;
2487				if (copy_pos > w_pos) {
2488					if (l > w_size - copy_pos)
2489						l = w_size - copy_pos;
2490				} else {
2491					if (l > w_size - w_pos)
2492						l = w_size - w_pos;
2493				}
2494				if ((copy_pos + l < w_pos)
2495				    || (w_pos + l < copy_pos)) {
2496					/* No overlap. */
2497					memcpy(w_buff + w_pos,
2498					    w_buff + copy_pos, l);
2499				} else {
2500					const unsigned char *s;
2501					unsigned char *d;
2502					int li;
2503
2504					d = w_buff + w_pos;
2505					s = w_buff + copy_pos;
2506					for (li = 0; li < l-1;) {
2507						d[li] = s[li];li++;
2508						d[li] = s[li];li++;
2509					}
2510					if (li < l)
2511						d[li] = s[li];
2512				}
2513				w_pos += l;
2514				if (w_pos == w_size) {
2515					w_pos = 0;
2516					lzh_emit_window(strm, w_size);
2517					if (copy_len <= l)
2518						state = ST_GET_LITERAL;
2519					else {
2520						state = ST_COPY_DATA;
2521						ds->copy_len = copy_len - l;
2522						ds->copy_pos =
2523						    (copy_pos + l) & w_mask;
2524					}
2525					goto next_data;
2526				}
2527				if (copy_len <= l)
2528					/* A copy of current pattern ended. */
2529					break;
2530				copy_len -= l;
2531				copy_pos = (copy_pos + l) & w_mask;
2532			}
2533			state = ST_GET_LITERAL;
2534			break;
2535		}
2536	}
2537failed:
2538	return (ds->error = ARCHIVE_FAILED);
2539next_data:
2540	ds->br = bre;
2541	ds->blocks_avail = blocks_avail;
2542	ds->state = state;
2543	ds->w_pos = w_pos;
2544	return (ARCHIVE_OK);
2545}
2546
2547static int
2548lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2549{
2550	int bits;
2551
2552	if (hf->bitlen == NULL) {
2553		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2554		if (hf->bitlen == NULL)
2555			return (ARCHIVE_FATAL);
2556	}
2557	if (hf->tbl == NULL) {
2558		if (tbl_bits < HTBL_BITS)
2559			bits = tbl_bits;
2560		else
2561			bits = HTBL_BITS;
2562		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2563		if (hf->tbl == NULL)
2564			return (ARCHIVE_FATAL);
2565	}
2566	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2567		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2568		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2569		if (hf->tree == NULL)
2570			return (ARCHIVE_FATAL);
2571	}
2572	hf->len_size = (int)len_size;
2573	hf->tbl_bits = tbl_bits;
2574	return (ARCHIVE_OK);
2575}
2576
2577static void
2578lzh_huffman_free(struct huffman *hf)
2579{
2580	free(hf->bitlen);
2581	free(hf->tbl);
2582	free(hf->tree);
2583}
2584
2585static const char bitlen_tbl[0x400] = {
2586	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2587	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2588	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2589	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2590	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2591	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2592	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2593	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2594	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2595	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2596	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2597	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2598	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2599	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2600	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2601	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2602	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2603	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2604	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2605	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2606	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2607	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2608	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2609	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2610	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2611	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2612	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2613	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2614	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2615	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2616	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2617	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2618	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2619	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2620	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2621	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2622	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2623	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2624	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2625	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2626	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2627	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2628	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2629	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2630	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2631	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2632	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2633	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2634	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2635	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2636	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2637	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2638	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2639	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2640	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2641	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2642	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2643	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2644	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2645	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2646	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2647	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2648	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2649	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2650};
2651static int
2652lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2653{
2654	struct lzh_dec *ds = strm->ds;
2655	struct lzh_br *br = &(ds->br);
2656	int c, i;
2657
2658	for (i = start; i < end; ) {
2659		/*
2660		 *  bit pattern     the number we need
2661		 *     000           ->  0
2662		 *     001           ->  1
2663		 *     010           ->  2
2664		 *     ...
2665		 *     110           ->  6
2666		 *     1110          ->  7
2667		 *     11110         ->  8
2668		 *     ...
2669		 *     1111111111110 ->  16
2670		 */
2671		if (!lzh_br_read_ahead(strm, br, 3))
2672			return (i);
2673		if ((c = lzh_br_bits(br, 3)) == 7) {
2674			if (!lzh_br_read_ahead(strm, br, 13))
2675				return (i);
2676			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2677			if (c)
2678				lzh_br_consume(br, c - 3);
2679			else
2680				return (-1);/* Invalid data. */
2681		} else
2682			lzh_br_consume(br, 3);
2683		ds->pt.bitlen[i++] = c;
2684		ds->pt.freq[c]++;
2685	}
2686	return (i);
2687}
2688
2689static int
2690lzh_make_fake_table(struct huffman *hf, uint16_t c)
2691{
2692	if (c >= hf->len_size)
2693		return (0);
2694	hf->tbl[0] = c;
2695	hf->max_bits = 0;
2696	hf->shift_bits = 0;
2697	hf->bitlen[hf->tbl[0]] = 0;
2698	return (1);
2699}
2700
2701/*
2702 * Make a huffman coding table.
2703 */
2704static int
2705lzh_make_huffman_table(struct huffman *hf)
2706{
2707	uint16_t *tbl;
2708	const unsigned char *bitlen;
2709	int bitptn[17], weight[17];
2710	int i, maxbits = 0, ptn, tbl_size, w;
2711	int diffbits, len_avail;
2712
2713	/*
2714	 * Initialize bit patterns.
2715	 */
2716	ptn = 0;
2717	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2718		bitptn[i] = ptn;
2719		weight[i] = w;
2720		if (hf->freq[i]) {
2721			ptn += hf->freq[i] * w;
2722			maxbits = i;
2723		}
2724	}
2725	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2726		return (0);/* Invalid */
2727
2728	hf->max_bits = maxbits;
2729
2730	/*
2731	 * Cut out extra bits which we won't house in the table.
2732	 * This preparation reduces the same calculation in the for-loop
2733	 * making the table.
2734	 */
2735	if (maxbits < 16) {
2736		int ebits = 16 - maxbits;
2737		for (i = 1; i <= maxbits; i++) {
2738			bitptn[i] >>= ebits;
2739			weight[i] >>= ebits;
2740		}
2741	}
2742	if (maxbits > HTBL_BITS) {
2743		unsigned htbl_max;
2744		uint16_t *p;
2745
2746		diffbits = maxbits - HTBL_BITS;
2747		for (i = 1; i <= HTBL_BITS; i++) {
2748			bitptn[i] >>= diffbits;
2749			weight[i] >>= diffbits;
2750		}
2751		htbl_max = bitptn[HTBL_BITS] +
2752		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2753		p = &(hf->tbl[htbl_max]);
2754		while (p < &hf->tbl[1U<<HTBL_BITS])
2755			*p++ = 0;
2756	} else
2757		diffbits = 0;
2758	hf->shift_bits = diffbits;
2759
2760	/*
2761	 * Make the table.
2762	 */
2763	tbl_size = 1 << HTBL_BITS;
2764	tbl = hf->tbl;
2765	bitlen = hf->bitlen;
2766	len_avail = hf->len_avail;
2767	hf->tree_used = 0;
2768	for (i = 0; i < len_avail; i++) {
2769		uint16_t *p;
2770		int len, cnt;
2771		uint16_t bit;
2772		int extlen;
2773		struct htree_t *ht;
2774
2775		if (bitlen[i] == 0)
2776			continue;
2777		/* Get a bit pattern */
2778		len = bitlen[i];
2779		ptn = bitptn[len];
2780		cnt = weight[len];
2781		if (len <= HTBL_BITS) {
2782			/* Calculate next bit pattern */
2783			if ((bitptn[len] = ptn + cnt) > tbl_size)
2784				return (0);/* Invalid */
2785			/* Update the table */
2786			p = &(tbl[ptn]);
2787			if (cnt > 7) {
2788				uint16_t *pc;
2789
2790				cnt -= 8;
2791				pc = &p[cnt];
2792				pc[0] = (uint16_t)i;
2793				pc[1] = (uint16_t)i;
2794				pc[2] = (uint16_t)i;
2795				pc[3] = (uint16_t)i;
2796				pc[4] = (uint16_t)i;
2797				pc[5] = (uint16_t)i;
2798				pc[6] = (uint16_t)i;
2799				pc[7] = (uint16_t)i;
2800				if (cnt > 7) {
2801					cnt -= 8;
2802					memcpy(&p[cnt], pc,
2803						8 * sizeof(uint16_t));
2804					pc = &p[cnt];
2805					while (cnt > 15) {
2806						cnt -= 16;
2807						memcpy(&p[cnt], pc,
2808							16 * sizeof(uint16_t));
2809					}
2810				}
2811				if (cnt)
2812					memcpy(p, pc, cnt * sizeof(uint16_t));
2813			} else {
2814				while (cnt > 1) {
2815					p[--cnt] = (uint16_t)i;
2816					p[--cnt] = (uint16_t)i;
2817				}
2818				if (cnt)
2819					p[--cnt] = (uint16_t)i;
2820			}
2821			continue;
2822		}
2823
2824		/*
2825		 * A bit length is too big to be housed to a direct table,
2826		 * so we use a tree model for its extra bits.
2827		 */
2828		bitptn[len] = ptn + cnt;
2829		bit = 1U << (diffbits -1);
2830		extlen = len - HTBL_BITS;
2831
2832		p = &(tbl[ptn >> diffbits]);
2833		if (*p == 0) {
2834			*p = len_avail + hf->tree_used;
2835			ht = &(hf->tree[hf->tree_used++]);
2836			if (hf->tree_used > hf->tree_avail)
2837				return (0);/* Invalid */
2838			ht->left = 0;
2839			ht->right = 0;
2840		} else {
2841			if (*p < len_avail ||
2842			    *p >= (len_avail + hf->tree_used))
2843				return (0);/* Invalid */
2844			ht = &(hf->tree[*p - len_avail]);
2845		}
2846		while (--extlen > 0) {
2847			if (ptn & bit) {
2848				if (ht->left < len_avail) {
2849					ht->left = len_avail + hf->tree_used;
2850					ht = &(hf->tree[hf->tree_used++]);
2851					if (hf->tree_used > hf->tree_avail)
2852						return (0);/* Invalid */
2853					ht->left = 0;
2854					ht->right = 0;
2855				} else {
2856					ht = &(hf->tree[ht->left - len_avail]);
2857				}
2858			} else {
2859				if (ht->right < len_avail) {
2860					ht->right = len_avail + hf->tree_used;
2861					ht = &(hf->tree[hf->tree_used++]);
2862					if (hf->tree_used > hf->tree_avail)
2863						return (0);/* Invalid */
2864					ht->left = 0;
2865					ht->right = 0;
2866				} else {
2867					ht = &(hf->tree[ht->right - len_avail]);
2868				}
2869			}
2870			bit >>= 1;
2871		}
2872		if (ptn & bit) {
2873			if (ht->left != 0)
2874				return (0);/* Invalid */
2875			ht->left = (uint16_t)i;
2876		} else {
2877			if (ht->right != 0)
2878				return (0);/* Invalid */
2879			ht->right = (uint16_t)i;
2880		}
2881	}
2882	return (1);
2883}
2884
2885static int
2886lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2887{
2888	struct htree_t *ht;
2889	int extlen;
2890
2891	ht = hf->tree;
2892	extlen = hf->shift_bits;
2893	while (c >= hf->len_avail) {
2894		c -= hf->len_avail;
2895		if (extlen-- <= 0 || c >= hf->tree_used)
2896			return (0);
2897		if (rbits & (1U << extlen))
2898			c = ht[c].left;
2899		else
2900			c = ht[c].right;
2901	}
2902	return (c);
2903}
2904
2905static inline int
2906lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2907{
2908	int c;
2909	/*
2910	 * At first search an index table for a bit pattern.
2911	 * If it fails, search a huffman tree for.
2912	 */
2913	c = hf->tbl[rbits >> hf->shift_bits];
2914	if (c < hf->len_avail || hf->len_avail == 0)
2915		return (c);
2916	/* This bit pattern needs to be found out at a huffman tree. */
2917	return (lzh_decode_huffman_tree(hf, rbits, c));
2918}
2919
2920