lzjb.c revision 256132
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd 22168404Spjd/* 23219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24168404Spjd */ 25168404Spjd 26168404Spjd/* 27219089Spjd * We keep our own copy of this algorithm for 3 main reasons: 28219089Spjd * 1. If we didn't, anyone modifying common/os/compress.c would 29168404Spjd * directly break our on disk format 30219089Spjd * 2. Our version of lzjb does not have a number of checks that the 31168404Spjd * common/os version needs and uses 32219089Spjd * 3. We initialize the lempel to ensure deterministic results, 33219089Spjd * so that identical blocks can always be deduplicated. 34168404Spjd * In particular, we are adding the "feature" that compress() can 35219089Spjd * take a destination buffer size and returns the compressed length, or the 36219089Spjd * source length if compression would overflow the destination buffer. 37168404Spjd */ 38168404Spjd 39168404Spjd#include <sys/zfs_context.h> 40168404Spjd#include <sys/types.h> 41247187Smm#include <sys/param.h> 42168404Spjd 43168404Spjd#define MATCH_BITS 6 44168404Spjd#define MATCH_MIN 3 45168404Spjd#define MATCH_MAX ((1 << MATCH_BITS) + (MATCH_MIN - 1)) 46168404Spjd#define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1) 47219089Spjd#define LEMPEL_SIZE 1024 48168404Spjd 49168404Spjd/*ARGSUSED*/ 50168404Spjdsize_t 51168404Spjdlzjb_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 52168404Spjd{ 53168404Spjd uchar_t *src = s_start; 54168404Spjd uchar_t *dst = d_start; 55247187Smm uchar_t *cpy; 56247187Smm uchar_t *copymap = NULL; 57168404Spjd int copymask = 1 << (NBBY - 1); 58219089Spjd int mlen, offset, hash; 59168404Spjd uint16_t *hp; 60219089Spjd uint16_t lempel[LEMPEL_SIZE] = { 0 }; 61168404Spjd 62168404Spjd while (src < (uchar_t *)s_start + s_len) { 63168404Spjd if ((copymask <<= 1) == (1 << NBBY)) { 64219089Spjd if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY) 65168404Spjd return (s_len); 66168404Spjd copymask = 1; 67168404Spjd copymap = dst; 68168404Spjd *dst++ = 0; 69168404Spjd } 70168404Spjd if (src > (uchar_t *)s_start + s_len - MATCH_MAX) { 71168404Spjd *dst++ = *src++; 72168404Spjd continue; 73168404Spjd } 74219089Spjd hash = (src[0] << 16) + (src[1] << 8) + src[2]; 75219089Spjd hash += hash >> 9; 76219089Spjd hash += hash >> 5; 77219089Spjd hp = &lempel[hash & (LEMPEL_SIZE - 1)]; 78168404Spjd offset = (intptr_t)(src - *hp) & OFFSET_MASK; 79168404Spjd *hp = (uint16_t)(uintptr_t)src; 80168404Spjd cpy = src - offset; 81168404Spjd if (cpy >= (uchar_t *)s_start && cpy != src && 82168404Spjd src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) { 83168404Spjd *copymap |= copymask; 84168404Spjd for (mlen = MATCH_MIN; mlen < MATCH_MAX; mlen++) 85168404Spjd if (src[mlen] != cpy[mlen]) 86168404Spjd break; 87168404Spjd *dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | 88168404Spjd (offset >> NBBY); 89168404Spjd *dst++ = (uchar_t)offset; 90168404Spjd src += mlen; 91168404Spjd } else { 92168404Spjd *dst++ = *src++; 93168404Spjd } 94168404Spjd } 95168404Spjd return (dst - (uchar_t *)d_start); 96168404Spjd} 97168404Spjd 98168404Spjd/*ARGSUSED*/ 99168404Spjdint 100168404Spjdlzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 101168404Spjd{ 102168404Spjd uchar_t *src = s_start; 103168404Spjd uchar_t *dst = d_start; 104168404Spjd uchar_t *d_end = (uchar_t *)d_start + d_len; 105247187Smm uchar_t *cpy; 106247187Smm uchar_t copymap = 0; 107168404Spjd int copymask = 1 << (NBBY - 1); 108168404Spjd 109168404Spjd while (dst < d_end) { 110168404Spjd if ((copymask <<= 1) == (1 << NBBY)) { 111168404Spjd copymask = 1; 112168404Spjd copymap = *src++; 113168404Spjd } 114168404Spjd if (copymap & copymask) { 115168404Spjd int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; 116168404Spjd int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; 117168404Spjd src += 2; 118168404Spjd if ((cpy = dst - offset) < (uchar_t *)d_start) 119168404Spjd return (-1); 120256132Sdelphij if (mlen > (d_end - dst)) 121256132Sdelphij mlen = d_end - dst; 122256132Sdelphij while (--mlen >= 0) 123168404Spjd *dst++ = *cpy++; 124168404Spjd } else { 125168404Spjd *dst++ = *src++; 126168404Spjd } 127168404Spjd } 128168404Spjd return (0); 129168404Spjd} 130