1342845Sdelphij/* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */ 2342845Sdelphij 3342845Sdelphij/*- 4342845Sdelphij * Copyright (c) 2018 The NetBSD Foundation, Inc. 5342845Sdelphij * All rights reserved. 6342845Sdelphij * 7342845Sdelphij * This code is derived from software contributed to The NetBSD Foundation 8342845Sdelphij * by Christos Zoulas. 9342845Sdelphij * 10342845Sdelphij * Redistribution and use in source and binary forms, with or without 11342845Sdelphij * modification, are permitted provided that the following conditions 12342845Sdelphij * are met: 13342845Sdelphij * 1. Redistributions of source code must retain the above copyright 14342845Sdelphij * notice, this list of conditions and the following disclaimer. 15342845Sdelphij * 2. Redistributions in binary form must reproduce the above copyright 16342845Sdelphij * notice, this list of conditions and the following disclaimer in the 17342845Sdelphij * documentation and/or other materials provided with the distribution. 18342845Sdelphij * 19342845Sdelphij * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20342845Sdelphij * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21342845Sdelphij * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22342845Sdelphij * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23342845Sdelphij * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24342845Sdelphij * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25342845Sdelphij * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26342845Sdelphij * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27342845Sdelphij * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28342845Sdelphij * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29342845Sdelphij * POSSIBILITY OF SUCH DAMAGE. 30342845Sdelphij * 31342845Sdelphij * $FreeBSD: stable/11/usr.bin/gzip/unlz.c 343251 2019-01-21 06:52:35Z delphij $ 32342845Sdelphij */ 33342845Sdelphij 34342845Sdelphij/* Lzd - Educational decompressor for the lzip format 35342845Sdelphij Copyright (C) 2013-2018 Antonio Diaz Diaz. 36342845Sdelphij 37342845Sdelphij This program is free software. Redistribution and use in source and 38342845Sdelphij binary forms, with or without modification, are permitted provided 39342845Sdelphij that the following conditions are met: 40342845Sdelphij 41342845Sdelphij 1. Redistributions of source code must retain the above copyright 42342845Sdelphij notice, this list of conditions and the following disclaimer. 43342845Sdelphij 44342845Sdelphij 2. Redistributions in binary form must reproduce the above copyright 45342845Sdelphij notice, this list of conditions and the following disclaimer in the 46342845Sdelphij documentation and/or other materials provided with the distribution. 47342845Sdelphij 48342845Sdelphij This program is distributed in the hope that it will be useful, 49342845Sdelphij but WITHOUT ANY WARRANTY; without even the implied warranty of 50342845Sdelphij MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 51342845Sdelphij*/ 52342845Sdelphij 53342845Sdelphij#include <sys/param.h> 54342845Sdelphij#include <stdio.h> 55342845Sdelphij#include <string.h> 56342845Sdelphij#include <stdlib.h> 57342845Sdelphij#include <stdint.h> 58342845Sdelphij#include <stdbool.h> 59342845Sdelphij#include <errno.h> 60342845Sdelphij#include <unistd.h> 61342845Sdelphij 62342845Sdelphij#define LZ_STATES 12 63342845Sdelphij 64342845Sdelphij#define LITERAL_CONTEXT_BITS 3 65342845Sdelphij#define POS_STATE_BITS 2 66342845Sdelphij#define POS_STATES (1 << POS_STATE_BITS) 67342845Sdelphij#define POS_STATE_MASK (POS_STATES - 1) 68342845Sdelphij 69342845Sdelphij#define STATES 4 70342845Sdelphij#define DIS_SLOT_BITS 6 71342845Sdelphij 72342845Sdelphij#define DIS_MODEL_START 4 73342845Sdelphij#define DIS_MODEL_END 14 74342845Sdelphij 75342845Sdelphij#define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2)) 76342845Sdelphij#define DIS_ALIGN_BITS 4 77342845Sdelphij#define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS) 78342845Sdelphij 79342845Sdelphij#define LOW_BITS 3 80342845Sdelphij#define MID_BITS 3 81342845Sdelphij#define HIGH_BITS 8 82342845Sdelphij 83342845Sdelphij#define LOW_SYMBOLS (1 << LOW_BITS) 84342845Sdelphij#define MID_SYMBOLS (1 << MID_BITS) 85342845Sdelphij#define HIGH_SYMBOLS (1 << HIGH_BITS) 86342845Sdelphij 87342845Sdelphij#define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS) 88342845Sdelphij 89342845Sdelphij#define MIN_MATCH_LEN 2 90342845Sdelphij 91342845Sdelphij#define BIT_MODEL_MOVE_BITS 5 92342845Sdelphij#define BIT_MODEL_TOTAL_BITS 11 93342845Sdelphij#define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS) 94342845Sdelphij#define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2) 95342845Sdelphij 96342845Sdelphijstatic const int lz_st_next[] = { 97342845Sdelphij 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, 98342845Sdelphij}; 99342845Sdelphij 100342845Sdelphijstatic bool 101342845Sdelphijlz_st_is_char(int st) { 102342845Sdelphij return st < 7; 103342845Sdelphij} 104342845Sdelphij 105342845Sdelphijstatic int 106342845Sdelphijlz_st_get_char(int st) { 107342845Sdelphij return lz_st_next[st]; 108342845Sdelphij} 109342845Sdelphij 110342845Sdelphijstatic int 111342845Sdelphijlz_st_get_match(int st) { 112342845Sdelphij return st < 7 ? 7 : 10; 113342845Sdelphij} 114342845Sdelphij 115342845Sdelphijstatic int 116342845Sdelphijlz_st_get_rep(int st) { 117342845Sdelphij return st < 7 ? 8 : 11; 118342845Sdelphij} 119342845Sdelphij 120342845Sdelphijstatic int 121342845Sdelphijlz_st_get_short_rep(int st) { 122342845Sdelphij return st < 7 ? 9 : 11; 123342845Sdelphij} 124342845Sdelphij 125342845Sdelphijstruct lz_len_model { 126342845Sdelphij int choice1; 127342845Sdelphij int choice2; 128342845Sdelphij int bm_low[POS_STATES][LOW_SYMBOLS]; 129342845Sdelphij int bm_mid[POS_STATES][MID_SYMBOLS]; 130342845Sdelphij int bm_high[HIGH_SYMBOLS]; 131342845Sdelphij}; 132342845Sdelphij 133342845Sdelphijstatic uint32_t lz_crc[256]; 134342845Sdelphij 135342845Sdelphijstatic void 136342845Sdelphijlz_crc_init(void) 137342845Sdelphij{ 138342845Sdelphij for (unsigned i = 0; i < nitems(lz_crc); i++) { 139342845Sdelphij unsigned c = i; 140342845Sdelphij for (unsigned j = 0; j < 8; j++) { 141342845Sdelphij if (c & 1) 142342845Sdelphij c = 0xEDB88320U ^ (c >> 1); 143342845Sdelphij else 144342845Sdelphij c >>= 1; 145342845Sdelphij } 146342845Sdelphij lz_crc[i] = c; 147342845Sdelphij } 148342845Sdelphij} 149342845Sdelphij 150342845Sdelphijstatic void 151342845Sdelphijlz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len) 152342845Sdelphij{ 153342845Sdelphij for (size_t i = 0; i < len; i++) 154342845Sdelphij *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8); 155342845Sdelphij} 156342845Sdelphij 157342845Sdelphijstruct lz_range_decoder { 158342845Sdelphij FILE *fp; 159342845Sdelphij uint32_t code; 160342845Sdelphij uint32_t range; 161342845Sdelphij}; 162342845Sdelphij 163342845Sdelphijstatic int 164342845Sdelphijlz_rd_create(struct lz_range_decoder *rd, FILE *fp) 165342845Sdelphij{ 166342845Sdelphij rd->fp = fp; 167342845Sdelphij rd->code = 0; 168342845Sdelphij rd->range = ~0; 169342845Sdelphij for (int i = 0; i < 5; i++) 170342845Sdelphij rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); 171342845Sdelphij return ferror(rd->fp) ? -1 : 0; 172342845Sdelphij} 173342845Sdelphij 174342845Sdelphijstatic unsigned 175342845Sdelphijlz_rd_decode(struct lz_range_decoder *rd, int num_bits) 176342845Sdelphij{ 177342845Sdelphij unsigned symbol = 0; 178342845Sdelphij 179342845Sdelphij for (int i = num_bits; i > 0; i--) { 180342845Sdelphij rd->range >>= 1; 181342845Sdelphij symbol <<= 1; 182342845Sdelphij if (rd->code >= rd->range) { 183342845Sdelphij rd->code -= rd->range; 184342845Sdelphij symbol |= 1; 185342845Sdelphij } 186342845Sdelphij if (rd->range <= 0x00FFFFFFU) { 187342845Sdelphij rd->range <<= 8; 188342845Sdelphij rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); 189342845Sdelphij } 190342845Sdelphij } 191342845Sdelphij 192342845Sdelphij return symbol; 193342845Sdelphij} 194342845Sdelphij 195342845Sdelphijstatic unsigned 196342845Sdelphijlz_rd_decode_bit(struct lz_range_decoder *rd, int *bm) 197342845Sdelphij{ 198342845Sdelphij unsigned symbol; 199342845Sdelphij const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm; 200342845Sdelphij 201342845Sdelphij if(rd->code < bound) { 202342845Sdelphij rd->range = bound; 203342845Sdelphij *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS; 204342845Sdelphij symbol = 0; 205342845Sdelphij } 206342845Sdelphij else { 207342845Sdelphij rd->range -= bound; 208342845Sdelphij rd->code -= bound; 209342845Sdelphij *bm -= *bm >> BIT_MODEL_MOVE_BITS; 210342845Sdelphij symbol = 1; 211342845Sdelphij } 212342845Sdelphij 213342845Sdelphij if (rd->range <= 0x00FFFFFFU) { 214342845Sdelphij rd->range <<= 8; 215342845Sdelphij rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); 216342845Sdelphij } 217342845Sdelphij return symbol; 218342845Sdelphij} 219342845Sdelphij 220342845Sdelphijstatic unsigned 221342845Sdelphijlz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits) 222342845Sdelphij{ 223342845Sdelphij unsigned symbol = 1; 224342845Sdelphij 225342845Sdelphij for (int i = 0; i < num_bits; i++) 226342845Sdelphij symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]); 227342845Sdelphij 228342845Sdelphij return symbol - (1 << num_bits); 229342845Sdelphij} 230342845Sdelphij 231342845Sdelphijstatic unsigned 232342845Sdelphijlz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits) 233342845Sdelphij{ 234342845Sdelphij unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits); 235342845Sdelphij unsigned reversed_symbol = 0; 236342845Sdelphij 237342845Sdelphij for (int i = 0; i < num_bits; i++) { 238342845Sdelphij reversed_symbol = (reversed_symbol << 1) | (symbol & 1); 239342845Sdelphij symbol >>= 1; 240342845Sdelphij } 241342845Sdelphij 242342845Sdelphij return reversed_symbol; 243342845Sdelphij} 244342845Sdelphij 245342845Sdelphijstatic unsigned 246342845Sdelphijlz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte) 247342845Sdelphij{ 248342845Sdelphij unsigned symbol = 1; 249342845Sdelphij 250342845Sdelphij for (int i = 7; i >= 0; i--) { 251342845Sdelphij const unsigned match_bit = (match_byte >> i) & 1; 252342845Sdelphij const unsigned bit = lz_rd_decode_bit(rd, 253342845Sdelphij &bm[symbol + (match_bit << 8) + 0x100]); 254342845Sdelphij symbol = (symbol << 1) | bit; 255342845Sdelphij if (match_bit != bit) { 256342845Sdelphij while (symbol < 0x100) { 257342845Sdelphij symbol = (symbol << 1) | 258342845Sdelphij lz_rd_decode_bit(rd, &bm[symbol]); 259342845Sdelphij } 260342845Sdelphij break; 261342845Sdelphij } 262342845Sdelphij } 263342845Sdelphij return symbol & 0xFF; 264342845Sdelphij} 265342845Sdelphij 266342845Sdelphijstatic unsigned 267342845Sdelphijlz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm, 268342845Sdelphij int pos_state) 269342845Sdelphij{ 270342845Sdelphij if (lz_rd_decode_bit(rd, &lm->choice1) == 0) 271342845Sdelphij return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS); 272342845Sdelphij 273342845Sdelphij if (lz_rd_decode_bit(rd, &lm->choice2) == 0) { 274342845Sdelphij return LOW_SYMBOLS + 275342845Sdelphij lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS); 276342845Sdelphij } 277342845Sdelphij 278342845Sdelphij return LOW_SYMBOLS + MID_SYMBOLS + 279342845Sdelphij lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS); 280342845Sdelphij} 281342845Sdelphij 282342845Sdelphijstruct lz_decoder { 283342845Sdelphij FILE *fin, *fout; 284342845Sdelphij off_t pos, ppos, spos, dict_size; 285342845Sdelphij bool wrapped; 286342845Sdelphij uint32_t crc; 287342845Sdelphij uint8_t *obuf; 288342845Sdelphij struct lz_range_decoder rdec; 289342845Sdelphij}; 290342845Sdelphij 291342845Sdelphijstatic int 292342845Sdelphijlz_flush(struct lz_decoder *lz) 293342845Sdelphij{ 294342845Sdelphij off_t offs = lz->pos - lz->spos; 295342845Sdelphij if (offs <= 0) 296342845Sdelphij return -1; 297342845Sdelphij 298342845Sdelphij size_t size = (size_t)offs; 299342845Sdelphij lz_crc_update(&lz->crc, lz->obuf + lz->spos, size); 300342845Sdelphij if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size) 301342845Sdelphij return -1; 302342845Sdelphij 303342845Sdelphij lz->wrapped = lz->pos >= lz->dict_size; 304342845Sdelphij if (lz->wrapped) { 305342845Sdelphij lz->ppos += lz->pos; 306342845Sdelphij lz->pos = 0; 307342845Sdelphij } 308342845Sdelphij lz->spos = lz->pos; 309342845Sdelphij return 0; 310342845Sdelphij} 311342845Sdelphij 312342845Sdelphijstatic void 313342845Sdelphijlz_destroy(struct lz_decoder *lz) 314342845Sdelphij{ 315342845Sdelphij if (lz->fin) 316342845Sdelphij fclose(lz->fin); 317342845Sdelphij if (lz->fout) 318342845Sdelphij fclose(lz->fout); 319342845Sdelphij free(lz->obuf); 320342845Sdelphij} 321342845Sdelphij 322342845Sdelphijstatic int 323342845Sdelphijlz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size) 324342845Sdelphij{ 325342845Sdelphij memset(lz, 0, sizeof(*lz)); 326342845Sdelphij 327342845Sdelphij lz->fin = fdopen(dup(fin), "r"); 328342845Sdelphij if (lz->fin == NULL) 329342845Sdelphij goto out; 330342845Sdelphij 331342845Sdelphij lz->fout = fdopen(dup(fdout), "w"); 332342845Sdelphij if (lz->fout == NULL) 333342845Sdelphij goto out; 334342845Sdelphij 335342845Sdelphij lz->pos = lz->ppos = lz->spos = 0; 336342845Sdelphij lz->crc = ~0; 337342845Sdelphij lz->dict_size = dict_size; 338342845Sdelphij lz->wrapped = false; 339342845Sdelphij 340342845Sdelphij lz->obuf = malloc(dict_size); 341342845Sdelphij if (lz->obuf == NULL) 342342845Sdelphij goto out; 343342845Sdelphij 344342845Sdelphij if (lz_rd_create(&lz->rdec, lz->fin) == -1) 345342845Sdelphij goto out; 346342845Sdelphij return 0; 347342845Sdelphijout: 348342845Sdelphij lz_destroy(lz); 349342845Sdelphij return -1; 350342845Sdelphij} 351342845Sdelphij 352342845Sdelphijstatic uint8_t 353342845Sdelphijlz_peek(const struct lz_decoder *lz, unsigned ahead) 354342845Sdelphij{ 355342845Sdelphij off_t diff = lz->pos - ahead - 1; 356342845Sdelphij 357342845Sdelphij if (diff >= 0) 358342845Sdelphij return lz->obuf[diff]; 359342845Sdelphij 360342845Sdelphij if (lz->wrapped) 361342845Sdelphij return lz->obuf[lz->dict_size + diff]; 362342845Sdelphij 363342845Sdelphij return 0; 364342845Sdelphij} 365342845Sdelphij 366342845Sdelphijstatic void 367342845Sdelphijlz_put(struct lz_decoder *lz, uint8_t b) 368342845Sdelphij{ 369342845Sdelphij lz->obuf[lz->pos++] = b; 370342845Sdelphij if (lz->dict_size == lz->pos) 371342845Sdelphij lz_flush(lz); 372342845Sdelphij} 373342845Sdelphij 374342845Sdelphijstatic off_t 375342845Sdelphijlz_get_data_position(const struct lz_decoder *lz) 376342845Sdelphij{ 377342845Sdelphij return lz->ppos + lz->pos; 378342845Sdelphij} 379342845Sdelphij 380342845Sdelphijstatic unsigned 381342845Sdelphijlz_get_crc(const struct lz_decoder *lz) 382342845Sdelphij{ 383342845Sdelphij return lz->crc ^ 0xffffffffU; 384342845Sdelphij} 385342845Sdelphij 386342845Sdelphijstatic void 387342845Sdelphijlz_bm_init(int *a, size_t l) 388342845Sdelphij{ 389342845Sdelphij for (size_t i = 0; i < l; i++) 390342845Sdelphij a[i] = BIT_MODEL_INIT; 391342845Sdelphij} 392342845Sdelphij 393342845Sdelphij#define LZ_BM_INIT(a) lz_bm_init(a, nitems(a)) 394342845Sdelphij#define LZ_BM_INIT2(a) do { \ 395342845Sdelphij size_t l = nitems(a[0]); \ 396342845Sdelphij for (size_t i = 0; i < nitems(a); i++) \ 397342845Sdelphij lz_bm_init(a[i], l); \ 398342845Sdelphij} while (/*CONSTCOND*/0) 399342845Sdelphij 400342845Sdelphij#define LZ_MODEL_INIT(a) do { \ 401342845Sdelphij a.choice1 = BIT_MODEL_INIT; \ 402342845Sdelphij a.choice2 = BIT_MODEL_INIT; \ 403342845Sdelphij LZ_BM_INIT2(a.bm_low); \ 404342845Sdelphij LZ_BM_INIT2(a.bm_mid); \ 405342845Sdelphij LZ_BM_INIT(a.bm_high); \ 406342845Sdelphij} while (/*CONSTCOND*/0) 407342845Sdelphij 408342845Sdelphijstatic bool 409342845Sdelphijlz_decode_member(struct lz_decoder *lz) 410342845Sdelphij{ 411342845Sdelphij int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300]; 412342845Sdelphij int bm_match[LZ_STATES][POS_STATES]; 413342845Sdelphij int bm_rep[4][LZ_STATES]; 414342845Sdelphij int bm_len[LZ_STATES][POS_STATES]; 415342845Sdelphij int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS]; 416342845Sdelphij int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1]; 417342845Sdelphij int bm_align[DIS_ALIGN_SIZE]; 418342845Sdelphij 419342845Sdelphij LZ_BM_INIT2(bm_literal); 420342845Sdelphij LZ_BM_INIT2(bm_match); 421342845Sdelphij LZ_BM_INIT2(bm_rep); 422342845Sdelphij LZ_BM_INIT2(bm_len); 423342845Sdelphij LZ_BM_INIT2(bm_dis_slot); 424342845Sdelphij LZ_BM_INIT(bm_dis); 425342845Sdelphij LZ_BM_INIT(bm_align); 426342845Sdelphij 427342845Sdelphij struct lz_len_model match_len_model; 428342845Sdelphij struct lz_len_model rep_len_model; 429342845Sdelphij 430342845Sdelphij LZ_MODEL_INIT(match_len_model); 431342845Sdelphij LZ_MODEL_INIT(rep_len_model); 432342845Sdelphij 433342845Sdelphij struct lz_range_decoder *rd = &lz->rdec; 434342845Sdelphij unsigned rep[4] = { 0 }; 435342845Sdelphij 436342845Sdelphij 437342845Sdelphij int state = 0; 438342845Sdelphij 439342845Sdelphij while (!feof(lz->fin) && !ferror(lz->fin)) { 440342845Sdelphij const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK; 441342845Sdelphij // bit 1 442342845Sdelphij if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) { 443342845Sdelphij const uint8_t prev_byte = lz_peek(lz, 0); 444342845Sdelphij const int literal_state = 445342845Sdelphij prev_byte >> (8 - LITERAL_CONTEXT_BITS); 446342845Sdelphij int *bm = bm_literal[literal_state]; 447342845Sdelphij if (lz_st_is_char(state)) 448342845Sdelphij lz_put(lz, lz_rd_decode_tree(rd, bm, 8)); 449342845Sdelphij else { 450342845Sdelphij int peek = lz_peek(lz, rep[0]); 451342845Sdelphij lz_put(lz, lz_rd_decode_matched(rd, bm, peek)); 452342845Sdelphij } 453342845Sdelphij state = lz_st_get_char(state); 454342845Sdelphij continue; 455342845Sdelphij } 456342845Sdelphij int len; 457342845Sdelphij // bit 2 458342845Sdelphij if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) { 459342845Sdelphij // bit 3 460342845Sdelphij if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) { 461342845Sdelphij // bit 4 462342845Sdelphij if (lz_rd_decode_bit(rd, 463342845Sdelphij &bm_len[state][pos_state]) == 0) 464342845Sdelphij { 465342845Sdelphij state = lz_st_get_short_rep(state); 466342845Sdelphij lz_put(lz, lz_peek(lz, rep[0])); 467342845Sdelphij continue; 468342845Sdelphij } 469342845Sdelphij } else { 470342845Sdelphij unsigned distance; 471342845Sdelphij // bit 4 472342845Sdelphij if (lz_rd_decode_bit(rd, &bm_rep[2][state]) 473342845Sdelphij == 0) 474342845Sdelphij distance = rep[1]; 475342845Sdelphij else { 476342845Sdelphij // bit 5 477342845Sdelphij if (lz_rd_decode_bit(rd, 478342845Sdelphij &bm_rep[3][state]) == 0) 479342845Sdelphij distance = rep[2]; 480342845Sdelphij else { 481342845Sdelphij distance = rep[3]; 482342845Sdelphij rep[3] = rep[2]; 483342845Sdelphij } 484342845Sdelphij rep[2] = rep[1]; 485342845Sdelphij } 486342845Sdelphij rep[1] = rep[0]; 487342845Sdelphij rep[0] = distance; 488342845Sdelphij } 489342845Sdelphij state = lz_st_get_rep(state); 490342845Sdelphij len = MIN_MATCH_LEN + 491342845Sdelphij lz_rd_decode_len(rd, &rep_len_model, pos_state); 492342845Sdelphij } else { 493342845Sdelphij rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0]; 494342845Sdelphij len = MIN_MATCH_LEN + 495342845Sdelphij lz_rd_decode_len(rd, &match_len_model, pos_state); 496342845Sdelphij const int len_state = 497342845Sdelphij MIN(len - MIN_MATCH_LEN, STATES - 1); 498342845Sdelphij rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state], 499342845Sdelphij DIS_SLOT_BITS); 500342845Sdelphij if (rep[0] >= DIS_MODEL_START) { 501342845Sdelphij const unsigned dis_slot = rep[0]; 502342845Sdelphij const int direct_bits = (dis_slot >> 1) - 1; 503342845Sdelphij rep[0] = (2 | (dis_slot & 1)) << direct_bits; 504342845Sdelphij if (dis_slot < DIS_MODEL_END) 505342845Sdelphij rep[0] += lz_rd_decode_tree_reversed(rd, 506342845Sdelphij &bm_dis[rep[0] - dis_slot], 507342845Sdelphij direct_bits); 508342845Sdelphij else { 509342845Sdelphij rep[0] += lz_rd_decode(rd, direct_bits 510342845Sdelphij - DIS_ALIGN_BITS) << DIS_ALIGN_BITS; 511342845Sdelphij rep[0] += lz_rd_decode_tree_reversed(rd, 512342845Sdelphij bm_align, DIS_ALIGN_BITS); 513342845Sdelphij if (rep[0] == 0xFFFFFFFFU) { 514342845Sdelphij lz_flush(lz); 515342845Sdelphij return len == MIN_MATCH_LEN; 516342845Sdelphij } 517342845Sdelphij } 518342845Sdelphij } 519342845Sdelphij state = lz_st_get_match(state); 520342845Sdelphij if (rep[0] >= lz->dict_size || 521342845Sdelphij (rep[0] >= lz->pos && !lz->wrapped)) { 522342845Sdelphij lz_flush(lz); 523342845Sdelphij return false; 524342845Sdelphij } 525342845Sdelphij } 526342845Sdelphij for (int i = 0; i < len; i++) 527342845Sdelphij lz_put(lz, lz_peek(lz, rep[0])); 528342845Sdelphij } 529342845Sdelphij lz_flush(lz); 530342845Sdelphij return false; 531342845Sdelphij} 532342845Sdelphij 533342845Sdelphij/* 534342845Sdelphij * 0-3 CRC32 of the uncompressed data 535342845Sdelphij * 4-11 size of the uncompressed data 536342845Sdelphij * 12-19 member size including header and trailer 537342845Sdelphij */ 538342845Sdelphij#define TRAILER_SIZE 20 539342845Sdelphij 540342845Sdelphij 541342845Sdelphijstatic off_t 542342845Sdelphijlz_decode(int fin, int fdout, unsigned dict_size, off_t *insize) 543342845Sdelphij{ 544342845Sdelphij struct lz_decoder lz; 545342845Sdelphij off_t rv = -1; 546342845Sdelphij 547342845Sdelphij if (lz_create(&lz, fin, fdout, dict_size) == -1) 548342845Sdelphij return -1; 549342845Sdelphij 550342845Sdelphij if (!lz_decode_member(&lz)) 551342845Sdelphij goto out; 552342845Sdelphij 553342845Sdelphij uint8_t trailer[TRAILER_SIZE]; 554342845Sdelphij 555342845Sdelphij for(size_t i = 0; i < nitems(trailer); i++) 556342845Sdelphij trailer[i] = (uint8_t)getc(lz.fin); 557342845Sdelphij 558342845Sdelphij unsigned crc = 0; 559342845Sdelphij for (int i = 3; i >= 0; --i) { 560342845Sdelphij crc <<= 8; 561342845Sdelphij crc += trailer[i]; 562342845Sdelphij } 563342845Sdelphij 564342845Sdelphij int64_t data_size = 0; 565342845Sdelphij for (int i = 11; i >= 4; --i) { 566342845Sdelphij data_size <<= 8; 567342845Sdelphij data_size += trailer[i]; 568342845Sdelphij } 569342845Sdelphij 570342845Sdelphij if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz)) 571342845Sdelphij goto out; 572342845Sdelphij 573342845Sdelphij rv = 0; 574342845Sdelphij for (int i = 19; i >= 12; --i) { 575342845Sdelphij rv <<= 8; 576342845Sdelphij rv += trailer[i]; 577342845Sdelphij } 578342845Sdelphij if (insize) 579342845Sdelphij *insize = rv; 580342845Sdelphij#if 0 581342845Sdelphij /* Does not work with pipes */ 582342845Sdelphij rv = ftello(lz.fout); 583342845Sdelphij#else 584342845Sdelphij rv = data_size; 585342845Sdelphij#endif 586342845Sdelphijout: 587342845Sdelphij lz_destroy(&lz); 588342845Sdelphij return rv; 589342845Sdelphij} 590342845Sdelphij 591342845Sdelphij 592342845Sdelphij/* 593342845Sdelphij * 0-3 magic 594342845Sdelphij * 4 version 595342845Sdelphij * 5 coded dict_size 596342845Sdelphij */ 597342845Sdelphij#define HDR_SIZE 6 598342845Sdelphij#define MIN_DICTIONARY_SIZE (1 << 12) 599342845Sdelphij#define MAX_DICTIONARY_SIZE (1 << 29) 600342845Sdelphij 601342845Sdelphijstatic const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 }; 602342845Sdelphij 603342845Sdelphijstatic unsigned 604342845Sdelphijlz_get_dict_size(unsigned char c) 605342845Sdelphij{ 606342845Sdelphij unsigned dict_size = 1 << (c & 0x1f); 607342845Sdelphij dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7); 608342845Sdelphij if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE) 609342845Sdelphij return 0; 610342845Sdelphij return dict_size; 611342845Sdelphij} 612342845Sdelphij 613342845Sdelphijstatic off_t 614342845Sdelphijunlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in) 615342845Sdelphij{ 616342845Sdelphij if (lz_crc[0] == 0) 617342845Sdelphij lz_crc_init(); 618342845Sdelphij 619342845Sdelphij char header[HDR_SIZE]; 620342845Sdelphij 621342845Sdelphij if (prelen > sizeof(header)) 622342845Sdelphij return -1; 623342845Sdelphij if (pre && prelen) 624342845Sdelphij memcpy(header, pre, prelen); 625342845Sdelphij 626342845Sdelphij ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen); 627342845Sdelphij switch (nr) { 628342845Sdelphij case -1: 629342845Sdelphij return -1; 630342845Sdelphij case 0: 631342845Sdelphij return prelen ? -1 : 0; 632342845Sdelphij default: 633342845Sdelphij if ((size_t)nr != sizeof(header) - prelen) 634342845Sdelphij return -1; 635342845Sdelphij break; 636342845Sdelphij } 637342845Sdelphij 638342845Sdelphij if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0) 639342845Sdelphij return -1; 640342845Sdelphij 641342845Sdelphij unsigned dict_size = lz_get_dict_size(header[5]); 642342845Sdelphij if (dict_size == 0) 643342845Sdelphij return -1; 644342845Sdelphij 645342845Sdelphij return lz_decode(fin, fout, dict_size, bytes_in); 646342845Sdelphij} 647