1342845Sdelphij/*	$NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $	*/
2342845Sdelphij
3342845Sdelphij/*-
4342845Sdelphij * Copyright (c) 2018 The NetBSD Foundation, Inc.
5342845Sdelphij * All rights reserved.
6342845Sdelphij *
7342845Sdelphij * This code is derived from software contributed to The NetBSD Foundation
8342845Sdelphij * by Christos Zoulas.
9342845Sdelphij *
10342845Sdelphij * Redistribution and use in source and binary forms, with or without
11342845Sdelphij * modification, are permitted provided that the following conditions
12342845Sdelphij * are met:
13342845Sdelphij * 1. Redistributions of source code must retain the above copyright
14342845Sdelphij *    notice, this list of conditions and the following disclaimer.
15342845Sdelphij * 2. Redistributions in binary form must reproduce the above copyright
16342845Sdelphij *    notice, this list of conditions and the following disclaimer in the
17342845Sdelphij *    documentation and/or other materials provided with the distribution.
18342845Sdelphij *
19342845Sdelphij * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20342845Sdelphij * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21342845Sdelphij * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22342845Sdelphij * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23342845Sdelphij * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24342845Sdelphij * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25342845Sdelphij * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26342845Sdelphij * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27342845Sdelphij * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28342845Sdelphij * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29342845Sdelphij * POSSIBILITY OF SUCH DAMAGE.
30342845Sdelphij *
31342845Sdelphij * $FreeBSD: stable/11/usr.bin/gzip/unlz.c 343251 2019-01-21 06:52:35Z delphij $
32342845Sdelphij */
33342845Sdelphij
34342845Sdelphij/*  Lzd - Educational decompressor for the lzip format
35342845Sdelphij    Copyright (C) 2013-2018 Antonio Diaz Diaz.
36342845Sdelphij
37342845Sdelphij    This program is free software. Redistribution and use in source and
38342845Sdelphij    binary forms, with or without modification, are permitted provided
39342845Sdelphij    that the following conditions are met:
40342845Sdelphij
41342845Sdelphij    1. Redistributions of source code must retain the above copyright
42342845Sdelphij    notice, this list of conditions and the following disclaimer.
43342845Sdelphij
44342845Sdelphij    2. Redistributions in binary form must reproduce the above copyright
45342845Sdelphij    notice, this list of conditions and the following disclaimer in the
46342845Sdelphij    documentation and/or other materials provided with the distribution.
47342845Sdelphij
48342845Sdelphij    This program is distributed in the hope that it will be useful,
49342845Sdelphij    but WITHOUT ANY WARRANTY; without even the implied warranty of
50342845Sdelphij    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
51342845Sdelphij*/
52342845Sdelphij
53342845Sdelphij#include <sys/param.h>
54342845Sdelphij#include <stdio.h>
55342845Sdelphij#include <string.h>
56342845Sdelphij#include <stdlib.h>
57342845Sdelphij#include <stdint.h>
58342845Sdelphij#include <stdbool.h>
59342845Sdelphij#include <errno.h>
60342845Sdelphij#include <unistd.h>
61342845Sdelphij
62342845Sdelphij#define LZ_STATES		12
63342845Sdelphij
64342845Sdelphij#define LITERAL_CONTEXT_BITS	3
65342845Sdelphij#define POS_STATE_BITS		2
66342845Sdelphij#define POS_STATES		(1 << POS_STATE_BITS)
67342845Sdelphij#define POS_STATE_MASK 		(POS_STATES - 1)
68342845Sdelphij
69342845Sdelphij#define STATES			4
70342845Sdelphij#define DIS_SLOT_BITS		6
71342845Sdelphij
72342845Sdelphij#define DIS_MODEL_START		4
73342845Sdelphij#define DIS_MODEL_END		14
74342845Sdelphij
75342845Sdelphij#define MODELED_DISTANCES	(1 << (DIS_MODEL_END / 2))
76342845Sdelphij#define DIS_ALIGN_BITS		4
77342845Sdelphij#define DIS_ALIGN_SIZE		(1 << DIS_ALIGN_BITS)
78342845Sdelphij
79342845Sdelphij#define LOW_BITS		3
80342845Sdelphij#define MID_BITS		3
81342845Sdelphij#define HIGH_BITS		8
82342845Sdelphij
83342845Sdelphij#define LOW_SYMBOLS		(1 << LOW_BITS)
84342845Sdelphij#define MID_SYMBOLS		(1 << MID_BITS)
85342845Sdelphij#define HIGH_SYMBOLS		(1 << HIGH_BITS)
86342845Sdelphij
87342845Sdelphij#define MAX_SYMBOLS 		(LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
88342845Sdelphij
89342845Sdelphij#define MIN_MATCH_LEN		2
90342845Sdelphij
91342845Sdelphij#define BIT_MODEL_MOVE_BITS	5
92342845Sdelphij#define BIT_MODEL_TOTAL_BITS 	11
93342845Sdelphij#define BIT_MODEL_TOTAL 	(1 << BIT_MODEL_TOTAL_BITS)
94342845Sdelphij#define BIT_MODEL_INIT		(BIT_MODEL_TOTAL / 2)
95342845Sdelphij
96342845Sdelphijstatic const int lz_st_next[] = {
97342845Sdelphij	0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
98342845Sdelphij};
99342845Sdelphij
100342845Sdelphijstatic bool
101342845Sdelphijlz_st_is_char(int st) {
102342845Sdelphij	return st < 7;
103342845Sdelphij}
104342845Sdelphij
105342845Sdelphijstatic int
106342845Sdelphijlz_st_get_char(int st) {
107342845Sdelphij	return lz_st_next[st];
108342845Sdelphij}
109342845Sdelphij
110342845Sdelphijstatic int
111342845Sdelphijlz_st_get_match(int st) {
112342845Sdelphij	return st < 7 ? 7 : 10;
113342845Sdelphij}
114342845Sdelphij
115342845Sdelphijstatic int
116342845Sdelphijlz_st_get_rep(int st) {
117342845Sdelphij	return st < 7 ? 8 : 11;
118342845Sdelphij}
119342845Sdelphij
120342845Sdelphijstatic int
121342845Sdelphijlz_st_get_short_rep(int st) {
122342845Sdelphij	return st < 7 ? 9 : 11;
123342845Sdelphij}
124342845Sdelphij
125342845Sdelphijstruct lz_len_model {
126342845Sdelphij	int choice1;
127342845Sdelphij	int choice2;
128342845Sdelphij	int bm_low[POS_STATES][LOW_SYMBOLS];
129342845Sdelphij	int bm_mid[POS_STATES][MID_SYMBOLS];
130342845Sdelphij	int bm_high[HIGH_SYMBOLS];
131342845Sdelphij};
132342845Sdelphij
133342845Sdelphijstatic uint32_t lz_crc[256];
134342845Sdelphij
135342845Sdelphijstatic void
136342845Sdelphijlz_crc_init(void)
137342845Sdelphij{
138342845Sdelphij	for (unsigned i = 0; i < nitems(lz_crc); i++) {
139342845Sdelphij		unsigned c = i;
140342845Sdelphij		for (unsigned j = 0; j < 8; j++) {
141342845Sdelphij			if (c & 1)
142342845Sdelphij				c = 0xEDB88320U ^ (c >> 1);
143342845Sdelphij			else
144342845Sdelphij				c >>= 1;
145342845Sdelphij		}
146342845Sdelphij		lz_crc[i] = c;
147342845Sdelphij      }
148342845Sdelphij}
149342845Sdelphij
150342845Sdelphijstatic void
151342845Sdelphijlz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
152342845Sdelphij{
153342845Sdelphij	for (size_t i = 0; i < len; i++)
154342845Sdelphij		*crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
155342845Sdelphij}
156342845Sdelphij
157342845Sdelphijstruct lz_range_decoder {
158342845Sdelphij	FILE *fp;
159342845Sdelphij	uint32_t code;
160342845Sdelphij	uint32_t range;
161342845Sdelphij};
162342845Sdelphij
163342845Sdelphijstatic int
164342845Sdelphijlz_rd_create(struct lz_range_decoder *rd, FILE *fp)
165342845Sdelphij{
166342845Sdelphij	rd->fp = fp;
167342845Sdelphij	rd->code = 0;
168342845Sdelphij	rd->range = ~0;
169342845Sdelphij	for (int i = 0; i < 5; i++)
170342845Sdelphij		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
171342845Sdelphij	return ferror(rd->fp) ? -1 : 0;
172342845Sdelphij}
173342845Sdelphij
174342845Sdelphijstatic unsigned
175342845Sdelphijlz_rd_decode(struct lz_range_decoder *rd, int num_bits)
176342845Sdelphij{
177342845Sdelphij	unsigned symbol = 0;
178342845Sdelphij
179342845Sdelphij	for (int i = num_bits; i > 0; i--) {
180342845Sdelphij		rd->range >>= 1;
181342845Sdelphij		symbol <<= 1;
182342845Sdelphij		if (rd->code >= rd->range) {
183342845Sdelphij			rd->code -= rd->range;
184342845Sdelphij			symbol |= 1;
185342845Sdelphij		}
186342845Sdelphij		if (rd->range <= 0x00FFFFFFU) {
187342845Sdelphij			rd->range <<= 8;
188342845Sdelphij			rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
189342845Sdelphij		}
190342845Sdelphij	}
191342845Sdelphij
192342845Sdelphij	return symbol;
193342845Sdelphij}
194342845Sdelphij
195342845Sdelphijstatic unsigned
196342845Sdelphijlz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
197342845Sdelphij{
198342845Sdelphij	unsigned symbol;
199342845Sdelphij	const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
200342845Sdelphij
201342845Sdelphij	if(rd->code < bound) {
202342845Sdelphij		rd->range = bound;
203342845Sdelphij		*bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
204342845Sdelphij		symbol = 0;
205342845Sdelphij	}
206342845Sdelphij	else {
207342845Sdelphij		rd->range -= bound;
208342845Sdelphij		rd->code -= bound;
209342845Sdelphij		*bm -= *bm >> BIT_MODEL_MOVE_BITS;
210342845Sdelphij		symbol = 1;
211342845Sdelphij	}
212342845Sdelphij
213342845Sdelphij	if (rd->range <= 0x00FFFFFFU) {
214342845Sdelphij		rd->range <<= 8;
215342845Sdelphij		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
216342845Sdelphij	}
217342845Sdelphij	return symbol;
218342845Sdelphij}
219342845Sdelphij
220342845Sdelphijstatic unsigned
221342845Sdelphijlz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
222342845Sdelphij{
223342845Sdelphij	unsigned symbol = 1;
224342845Sdelphij
225342845Sdelphij	for (int i = 0; i < num_bits; i++)
226342845Sdelphij		symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
227342845Sdelphij
228342845Sdelphij	return symbol - (1 << num_bits);
229342845Sdelphij}
230342845Sdelphij
231342845Sdelphijstatic unsigned
232342845Sdelphijlz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
233342845Sdelphij{
234342845Sdelphij	unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
235342845Sdelphij	unsigned reversed_symbol = 0;
236342845Sdelphij
237342845Sdelphij	for (int i = 0; i < num_bits; i++) {
238342845Sdelphij		reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
239342845Sdelphij		symbol >>= 1;
240342845Sdelphij	}
241342845Sdelphij
242342845Sdelphij	return reversed_symbol;
243342845Sdelphij}
244342845Sdelphij
245342845Sdelphijstatic unsigned
246342845Sdelphijlz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
247342845Sdelphij{
248342845Sdelphij	unsigned symbol = 1;
249342845Sdelphij
250342845Sdelphij	for (int i = 7; i >= 0; i--) {
251342845Sdelphij		const unsigned match_bit = (match_byte >> i) & 1;
252342845Sdelphij		const unsigned bit = lz_rd_decode_bit(rd,
253342845Sdelphij		    &bm[symbol + (match_bit << 8) + 0x100]);
254342845Sdelphij		symbol = (symbol << 1) | bit;
255342845Sdelphij		if (match_bit != bit) {
256342845Sdelphij			while (symbol < 0x100) {
257342845Sdelphij				symbol = (symbol << 1) |
258342845Sdelphij				    lz_rd_decode_bit(rd, &bm[symbol]);
259342845Sdelphij			}
260342845Sdelphij			break;
261342845Sdelphij		}
262342845Sdelphij	}
263342845Sdelphij	return symbol & 0xFF;
264342845Sdelphij}
265342845Sdelphij
266342845Sdelphijstatic unsigned
267342845Sdelphijlz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
268342845Sdelphij    int pos_state)
269342845Sdelphij{
270342845Sdelphij	if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
271342845Sdelphij		return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
272342845Sdelphij
273342845Sdelphij	if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
274342845Sdelphij		return LOW_SYMBOLS +
275342845Sdelphij		    lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
276342845Sdelphij	}
277342845Sdelphij
278342845Sdelphij	return LOW_SYMBOLS + MID_SYMBOLS +
279342845Sdelphij           lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
280342845Sdelphij}
281342845Sdelphij
282342845Sdelphijstruct lz_decoder {
283342845Sdelphij	FILE *fin, *fout;
284342845Sdelphij	off_t pos, ppos, spos, dict_size;
285342845Sdelphij	bool wrapped;
286342845Sdelphij	uint32_t crc;
287342845Sdelphij	uint8_t *obuf;
288342845Sdelphij	struct lz_range_decoder rdec;
289342845Sdelphij};
290342845Sdelphij
291342845Sdelphijstatic int
292342845Sdelphijlz_flush(struct lz_decoder *lz)
293342845Sdelphij{
294342845Sdelphij	off_t offs = lz->pos - lz->spos;
295342845Sdelphij	if (offs <= 0)
296342845Sdelphij		return -1;
297342845Sdelphij
298342845Sdelphij	size_t size = (size_t)offs;
299342845Sdelphij	lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
300342845Sdelphij	if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
301342845Sdelphij		return -1;
302342845Sdelphij
303342845Sdelphij	lz->wrapped = lz->pos >= lz->dict_size;
304342845Sdelphij	if (lz->wrapped) {
305342845Sdelphij		lz->ppos += lz->pos;
306342845Sdelphij		lz->pos = 0;
307342845Sdelphij	}
308342845Sdelphij	lz->spos = lz->pos;
309342845Sdelphij	return 0;
310342845Sdelphij}
311342845Sdelphij
312342845Sdelphijstatic void
313342845Sdelphijlz_destroy(struct lz_decoder *lz)
314342845Sdelphij{
315342845Sdelphij	if (lz->fin)
316342845Sdelphij		fclose(lz->fin);
317342845Sdelphij	if (lz->fout)
318342845Sdelphij		fclose(lz->fout);
319342845Sdelphij	free(lz->obuf);
320342845Sdelphij}
321342845Sdelphij
322342845Sdelphijstatic int
323342845Sdelphijlz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
324342845Sdelphij{
325342845Sdelphij	memset(lz, 0, sizeof(*lz));
326342845Sdelphij
327342845Sdelphij	lz->fin = fdopen(dup(fin), "r");
328342845Sdelphij	if (lz->fin == NULL)
329342845Sdelphij		goto out;
330342845Sdelphij
331342845Sdelphij	lz->fout = fdopen(dup(fdout), "w");
332342845Sdelphij	if (lz->fout == NULL)
333342845Sdelphij		goto out;
334342845Sdelphij
335342845Sdelphij	lz->pos = lz->ppos = lz->spos = 0;
336342845Sdelphij	lz->crc = ~0;
337342845Sdelphij	lz->dict_size = dict_size;
338342845Sdelphij	lz->wrapped = false;
339342845Sdelphij
340342845Sdelphij	lz->obuf = malloc(dict_size);
341342845Sdelphij	if (lz->obuf == NULL)
342342845Sdelphij		goto out;
343342845Sdelphij
344342845Sdelphij	if (lz_rd_create(&lz->rdec, lz->fin) == -1)
345342845Sdelphij		goto out;
346342845Sdelphij	return 0;
347342845Sdelphijout:
348342845Sdelphij	lz_destroy(lz);
349342845Sdelphij	return -1;
350342845Sdelphij}
351342845Sdelphij
352342845Sdelphijstatic uint8_t
353342845Sdelphijlz_peek(const struct lz_decoder *lz, unsigned ahead)
354342845Sdelphij{
355342845Sdelphij	off_t diff = lz->pos - ahead - 1;
356342845Sdelphij
357342845Sdelphij	if (diff >= 0)
358342845Sdelphij		return lz->obuf[diff];
359342845Sdelphij
360342845Sdelphij	if (lz->wrapped)
361342845Sdelphij		return lz->obuf[lz->dict_size + diff];
362342845Sdelphij
363342845Sdelphij	return 0;
364342845Sdelphij}
365342845Sdelphij
366342845Sdelphijstatic void
367342845Sdelphijlz_put(struct lz_decoder *lz, uint8_t b)
368342845Sdelphij{
369342845Sdelphij	lz->obuf[lz->pos++] = b;
370342845Sdelphij	if (lz->dict_size == lz->pos)
371342845Sdelphij		lz_flush(lz);
372342845Sdelphij}
373342845Sdelphij
374342845Sdelphijstatic off_t
375342845Sdelphijlz_get_data_position(const struct lz_decoder *lz)
376342845Sdelphij{
377342845Sdelphij	return lz->ppos + lz->pos;
378342845Sdelphij}
379342845Sdelphij
380342845Sdelphijstatic unsigned
381342845Sdelphijlz_get_crc(const struct lz_decoder *lz)
382342845Sdelphij{
383342845Sdelphij	return lz->crc ^ 0xffffffffU;
384342845Sdelphij}
385342845Sdelphij
386342845Sdelphijstatic void
387342845Sdelphijlz_bm_init(int *a, size_t l)
388342845Sdelphij{
389342845Sdelphij	for (size_t i = 0; i < l; i++)
390342845Sdelphij		a[i] = BIT_MODEL_INIT;
391342845Sdelphij}
392342845Sdelphij
393342845Sdelphij#define LZ_BM_INIT(a)	lz_bm_init(a, nitems(a))
394342845Sdelphij#define LZ_BM_INIT2(a)	do { \
395342845Sdelphij	size_t l = nitems(a[0]); \
396342845Sdelphij	for (size_t i = 0; i < nitems(a); i++) \
397342845Sdelphij		lz_bm_init(a[i], l); \
398342845Sdelphij} while (/*CONSTCOND*/0)
399342845Sdelphij
400342845Sdelphij#define LZ_MODEL_INIT(a) do { \
401342845Sdelphij	a.choice1 = BIT_MODEL_INIT; \
402342845Sdelphij	a.choice2 = BIT_MODEL_INIT; \
403342845Sdelphij	LZ_BM_INIT2(a.bm_low); \
404342845Sdelphij	LZ_BM_INIT2(a.bm_mid); \
405342845Sdelphij	LZ_BM_INIT(a.bm_high); \
406342845Sdelphij} while (/*CONSTCOND*/0)
407342845Sdelphij
408342845Sdelphijstatic bool
409342845Sdelphijlz_decode_member(struct lz_decoder *lz)
410342845Sdelphij{
411342845Sdelphij	int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
412342845Sdelphij	int bm_match[LZ_STATES][POS_STATES];
413342845Sdelphij	int bm_rep[4][LZ_STATES];
414342845Sdelphij	int bm_len[LZ_STATES][POS_STATES];
415342845Sdelphij	int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
416342845Sdelphij	int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
417342845Sdelphij	int bm_align[DIS_ALIGN_SIZE];
418342845Sdelphij
419342845Sdelphij	LZ_BM_INIT2(bm_literal);
420342845Sdelphij	LZ_BM_INIT2(bm_match);
421342845Sdelphij	LZ_BM_INIT2(bm_rep);
422342845Sdelphij	LZ_BM_INIT2(bm_len);
423342845Sdelphij	LZ_BM_INIT2(bm_dis_slot);
424342845Sdelphij	LZ_BM_INIT(bm_dis);
425342845Sdelphij	LZ_BM_INIT(bm_align);
426342845Sdelphij
427342845Sdelphij	struct lz_len_model match_len_model;
428342845Sdelphij	struct lz_len_model rep_len_model;
429342845Sdelphij
430342845Sdelphij	LZ_MODEL_INIT(match_len_model);
431342845Sdelphij	LZ_MODEL_INIT(rep_len_model);
432342845Sdelphij
433342845Sdelphij	struct lz_range_decoder *rd = &lz->rdec;
434342845Sdelphij	unsigned rep[4] = { 0 };
435342845Sdelphij
436342845Sdelphij
437342845Sdelphij	int state = 0;
438342845Sdelphij
439342845Sdelphij	while (!feof(lz->fin) && !ferror(lz->fin)) {
440342845Sdelphij		const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
441342845Sdelphij		// bit 1
442342845Sdelphij		if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
443342845Sdelphij			const uint8_t prev_byte = lz_peek(lz, 0);
444342845Sdelphij			const int literal_state =
445342845Sdelphij			    prev_byte >> (8 - LITERAL_CONTEXT_BITS);
446342845Sdelphij			int *bm = bm_literal[literal_state];
447342845Sdelphij			if (lz_st_is_char(state))
448342845Sdelphij				lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
449342845Sdelphij			else {
450342845Sdelphij				int peek = lz_peek(lz, rep[0]);
451342845Sdelphij				lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
452342845Sdelphij			}
453342845Sdelphij			state = lz_st_get_char(state);
454342845Sdelphij			continue;
455342845Sdelphij		}
456342845Sdelphij		int len;
457342845Sdelphij		// bit 2
458342845Sdelphij		if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
459342845Sdelphij			// bit 3
460342845Sdelphij			if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
461342845Sdelphij				// bit 4
462342845Sdelphij				if (lz_rd_decode_bit(rd,
463342845Sdelphij				    &bm_len[state][pos_state]) == 0)
464342845Sdelphij				{
465342845Sdelphij					state = lz_st_get_short_rep(state);
466342845Sdelphij					lz_put(lz, lz_peek(lz, rep[0]));
467342845Sdelphij					continue;
468342845Sdelphij				}
469342845Sdelphij			} else {
470342845Sdelphij				unsigned distance;
471342845Sdelphij				// bit 4
472342845Sdelphij				if (lz_rd_decode_bit(rd, &bm_rep[2][state])
473342845Sdelphij				    == 0)
474342845Sdelphij					distance = rep[1];
475342845Sdelphij				else {
476342845Sdelphij					// bit 5
477342845Sdelphij					if (lz_rd_decode_bit(rd,
478342845Sdelphij					    &bm_rep[3][state]) == 0)
479342845Sdelphij						distance = rep[2];
480342845Sdelphij					else {
481342845Sdelphij						distance = rep[3];
482342845Sdelphij						rep[3] = rep[2];
483342845Sdelphij					}
484342845Sdelphij					rep[2] = rep[1];
485342845Sdelphij				}
486342845Sdelphij				rep[1] = rep[0];
487342845Sdelphij				rep[0] = distance;
488342845Sdelphij			}
489342845Sdelphij			state = lz_st_get_rep(state);
490342845Sdelphij			len = MIN_MATCH_LEN +
491342845Sdelphij			    lz_rd_decode_len(rd, &rep_len_model, pos_state);
492342845Sdelphij		} else {
493342845Sdelphij			rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
494342845Sdelphij			len = MIN_MATCH_LEN +
495342845Sdelphij			    lz_rd_decode_len(rd, &match_len_model, pos_state);
496342845Sdelphij			const int len_state =
497342845Sdelphij			    MIN(len - MIN_MATCH_LEN, STATES - 1);
498342845Sdelphij			rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
499342845Sdelphij			    DIS_SLOT_BITS);
500342845Sdelphij			if (rep[0] >= DIS_MODEL_START) {
501342845Sdelphij				const unsigned dis_slot = rep[0];
502342845Sdelphij				const int direct_bits = (dis_slot >> 1) - 1;
503342845Sdelphij			        rep[0] = (2 | (dis_slot & 1)) << direct_bits;
504342845Sdelphij				if (dis_slot < DIS_MODEL_END)
505342845Sdelphij					rep[0] += lz_rd_decode_tree_reversed(rd,
506342845Sdelphij					    &bm_dis[rep[0] - dis_slot],
507342845Sdelphij                                            direct_bits);
508342845Sdelphij				else {
509342845Sdelphij					rep[0] += lz_rd_decode(rd, direct_bits
510342845Sdelphij					    - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
511342845Sdelphij					rep[0] += lz_rd_decode_tree_reversed(rd,
512342845Sdelphij					    bm_align, DIS_ALIGN_BITS);
513342845Sdelphij					if (rep[0] == 0xFFFFFFFFU) {
514342845Sdelphij						lz_flush(lz);
515342845Sdelphij						return len == MIN_MATCH_LEN;
516342845Sdelphij					}
517342845Sdelphij				}
518342845Sdelphij			}
519342845Sdelphij			state = lz_st_get_match(state);
520342845Sdelphij			if (rep[0] >= lz->dict_size ||
521342845Sdelphij			    (rep[0] >= lz->pos && !lz->wrapped)) {
522342845Sdelphij				lz_flush(lz);
523342845Sdelphij				return false;
524342845Sdelphij			}
525342845Sdelphij		}
526342845Sdelphij		for (int i = 0; i < len; i++)
527342845Sdelphij			lz_put(lz, lz_peek(lz, rep[0]));
528342845Sdelphij    	}
529342845Sdelphij	lz_flush(lz);
530342845Sdelphij	return false;
531342845Sdelphij}
532342845Sdelphij
533342845Sdelphij/*
534342845Sdelphij * 0-3	CRC32 of the uncompressed data
535342845Sdelphij * 4-11 size of the uncompressed data
536342845Sdelphij * 12-19 member size including header and trailer
537342845Sdelphij */
538342845Sdelphij#define TRAILER_SIZE 20
539342845Sdelphij
540342845Sdelphij
541342845Sdelphijstatic off_t
542342845Sdelphijlz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
543342845Sdelphij{
544342845Sdelphij	struct lz_decoder lz;
545342845Sdelphij	off_t rv = -1;
546342845Sdelphij
547342845Sdelphij	if (lz_create(&lz, fin, fdout, dict_size) == -1)
548342845Sdelphij		return -1;
549342845Sdelphij
550342845Sdelphij	if (!lz_decode_member(&lz))
551342845Sdelphij		goto out;
552342845Sdelphij
553342845Sdelphij	uint8_t trailer[TRAILER_SIZE];
554342845Sdelphij
555342845Sdelphij	for(size_t i = 0; i < nitems(trailer); i++)
556342845Sdelphij		trailer[i] = (uint8_t)getc(lz.fin);
557342845Sdelphij
558342845Sdelphij	unsigned crc = 0;
559342845Sdelphij	for (int i = 3; i >= 0; --i) {
560342845Sdelphij		crc <<= 8;
561342845Sdelphij		crc += trailer[i];
562342845Sdelphij	}
563342845Sdelphij
564342845Sdelphij	int64_t data_size = 0;
565342845Sdelphij	for (int i = 11; i >= 4; --i) {
566342845Sdelphij		data_size <<= 8;
567342845Sdelphij		data_size += trailer[i];
568342845Sdelphij	}
569342845Sdelphij
570342845Sdelphij	if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
571342845Sdelphij		goto out;
572342845Sdelphij
573342845Sdelphij	rv = 0;
574342845Sdelphij	for (int i = 19; i >= 12; --i) {
575342845Sdelphij		rv <<= 8;
576342845Sdelphij		rv += trailer[i];
577342845Sdelphij	}
578342845Sdelphij	if (insize)
579342845Sdelphij		*insize = rv;
580342845Sdelphij#if 0
581342845Sdelphij	/* Does not work with pipes */
582342845Sdelphij	rv = ftello(lz.fout);
583342845Sdelphij#else
584342845Sdelphij	rv = data_size;
585342845Sdelphij#endif
586342845Sdelphijout:
587342845Sdelphij	lz_destroy(&lz);
588342845Sdelphij	return rv;
589342845Sdelphij}
590342845Sdelphij
591342845Sdelphij
592342845Sdelphij/*
593342845Sdelphij * 0-3 magic
594342845Sdelphij * 4 version
595342845Sdelphij * 5 coded dict_size
596342845Sdelphij */
597342845Sdelphij#define HDR_SIZE 6
598342845Sdelphij#define MIN_DICTIONARY_SIZE (1 << 12)
599342845Sdelphij#define MAX_DICTIONARY_SIZE (1 << 29)
600342845Sdelphij
601342845Sdelphijstatic const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
602342845Sdelphij
603342845Sdelphijstatic unsigned
604342845Sdelphijlz_get_dict_size(unsigned char c)
605342845Sdelphij{
606342845Sdelphij	unsigned dict_size = 1 << (c & 0x1f);
607342845Sdelphij	dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
608342845Sdelphij	if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
609342845Sdelphij		return 0;
610342845Sdelphij	return dict_size;
611342845Sdelphij}
612342845Sdelphij
613342845Sdelphijstatic off_t
614342845Sdelphijunlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
615342845Sdelphij{
616342845Sdelphij	if (lz_crc[0] == 0)
617342845Sdelphij		lz_crc_init();
618342845Sdelphij
619342845Sdelphij	char header[HDR_SIZE];
620342845Sdelphij
621342845Sdelphij	if (prelen > sizeof(header))
622342845Sdelphij		return -1;
623342845Sdelphij	if (pre && prelen)
624342845Sdelphij		memcpy(header, pre, prelen);
625342845Sdelphij
626342845Sdelphij	ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
627342845Sdelphij	switch (nr) {
628342845Sdelphij	case -1:
629342845Sdelphij		return -1;
630342845Sdelphij	case 0:
631342845Sdelphij		return prelen ? -1 : 0;
632342845Sdelphij	default:
633342845Sdelphij		if ((size_t)nr != sizeof(header) - prelen)
634342845Sdelphij			return -1;
635342845Sdelphij		break;
636342845Sdelphij	}
637342845Sdelphij
638342845Sdelphij	if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
639342845Sdelphij		return -1;
640342845Sdelphij
641342845Sdelphij	unsigned dict_size = lz_get_dict_size(header[5]);
642342845Sdelphij	if (dict_size == 0)
643342845Sdelphij		return -1;
644342845Sdelphij
645342845Sdelphij	return lz_decode(fin, fout, dict_size, bytes_in);
646342845Sdelphij}
647