lz4.c revision 246768
1/* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35#include <sys/zfs_context.h> 36 37static int real_LZ4_compress(const char *source, char *dest, int isize, 38 int osize); 39static int real_LZ4_uncompress(const char *source, char *dest, int osize); 40static int LZ4_compressBound(int isize); 41static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 42 int isize, int maxOutputSize); 43static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 44 int isize, int osize); 45static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 46 int isize, int osize); 47 48/*ARGSUSED*/ 49size_t 50lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 51{ 52 uint32_t bufsiz; 53 char *dest = d_start; 54 55 ASSERT(d_len >= sizeof (bufsiz)); 56 57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 58 d_len - sizeof (bufsiz)); 59 60 /* Signal an error if the compression routine returned zero. */ 61 if (bufsiz == 0) 62 return (s_len); 63 64 /* 65 * Encode the compresed buffer size at the start. We'll need this in 66 * decompression to counter the effects of padding which might be 67 * added to the compressed buffer and which, if unhandled, would 68 * confuse the hell out of our decompression function. 69 */ 70 *(uint32_t *)dest = BE_32(bufsiz); 71 72 return (bufsiz + sizeof (bufsiz)); 73} 74 75/*ARGSUSED*/ 76int 77lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 78{ 79 const char *src = s_start; 80 uint32_t bufsiz = BE_IN32(src); 81 82 /* invalid compressed buffer size encoded at start */ 83 if (bufsiz + sizeof (bufsiz) > s_len) 84 return (1); 85 86 /* 87 * Returns 0 on success (decompression function returned non-negative) 88 * and non-zero on failure (decompression function returned negative. 89 */ 90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 91 d_start, bufsiz, d_len) < 0); 92} 93 94/* 95 * LZ4 API Description: 96 * 97 * Simple Functions: 98 * real_LZ4_compress() : 99 * isize : is the input size. Max supported value is ~1.9GB 100 * return : the number of bytes written in buffer dest 101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 102 * note : destination buffer must be already allocated. 103 * destination buffer must be sized to handle worst cases 104 * situations (input data not compressible) worst case size 105 * evaluation is provided by function LZ4_compressBound(). 106 * 107 * real_LZ4_uncompress() : 108 * osize : is the output size, therefore the original size 109 * return : the number of bytes read in the source buffer. 110 * If the source stream is malformed, the function will stop 111 * decoding and return a negative result, indicating the byte 112 * position of the faulty instruction. This function never 113 * writes beyond dest + osize, and is therefore protected 114 * against malicious data packets. 115 * note : destination buffer must be already allocated 116 * 117 * Advanced Functions 118 * 119 * LZ4_compressBound() : 120 * Provides the maximum size that LZ4 may output in a "worst case" 121 * scenario (input data not compressible) primarily useful for memory 122 * allocation of output buffer. 123 * 124 * isize : is the input size. Max supported value is ~1.9GB 125 * return : maximum output size in a "worst case" scenario 126 * note : this function is limited by "int" range (2^31-1) 127 * 128 * LZ4_uncompress_unknownOutputSize() : 129 * isize : is the input size, therefore the compressed size 130 * maxOutputSize : is the size of the destination buffer (which must be 131 * already allocated) 132 * return : the number of bytes decoded in the destination buffer 133 * (necessarily <= maxOutputSize). If the source stream is 134 * malformed, the function will stop decoding and return a 135 * negative result, indicating the byte position of the faulty 136 * instruction. This function never writes beyond dest + 137 * maxOutputSize, and is therefore protected against malicious 138 * data packets. 139 * note : Destination buffer must be already allocated. 140 * This version is slightly slower than real_LZ4_uncompress() 141 * 142 * LZ4_compressCtx() : 143 * This function explicitly handles the CTX memory structure. 144 * 145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 146 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 147 * isn't valid. 148 * 149 * LZ4_compress64kCtx() : 150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 151 * isize *Must* be <64KB, otherwise the output will be corrupted. 152 * 153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 155 * isn't valid. 156 */ 157 158/* 159 * Tuning parameters 160 */ 161 162/* 163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 164 * Lowering this value reduces memory usage. Reduced memory usage 165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 167 * (examples : 12 -> 16KB ; 17 -> 512KB) 168 */ 169#define COMPRESSIONLEVEL 12 170 171/* 172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 173 * algorithm skip faster data segments considered "incompressible". 174 * This may decrease compression ratio dramatically, but will be 175 * faster on incompressible data. Increasing this value will make 176 * the algorithm search more before declaring a segment "incompressible". 177 * This could improve compression a bit, but will be slower on 178 * incompressible data. The default value (6) is recommended. 179 */ 180#define NOTCOMPRESSIBLE_CONFIRMATION 6 181 182/* 183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 184 * performance for big endian cpu, but the resulting compressed stream 185 * will be incompatible with little-endian CPU. You can set this option 186 * to 1 in situations where data will stay within closed environment. 187 * This option is useless on Little_Endian CPU (such as x86). 188 */ 189/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 190 191/* 192 * CPU Feature Detection 193 */ 194 195/* 32 or 64 bits ? */ 196#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ 197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ 198 defined(__LP64__) || defined(_LP64)) 199#define LZ4_ARCH64 1 200/* 201 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we 202 * can spend 16k on the algorithm 203 */ 204/* FreeBSD: Use heap for all platforms for now */ 205#define STACKLIMIT 0 206#else 207#define LZ4_ARCH64 0 208/* 209 * Illumos: On i386 we only have 12k of stack, so in order to maintain the 210 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will 211 * suck, but alas, it's ZFS on 32-bit we're talking about, so... 212 */ 213#define STACKLIMIT 0 214#endif 215 216/* 217 * Little Endian or Big Endian? 218 * Note: overwrite the below #define if you know your architecture endianess. 219 */ 220#if BYTE_ORDER == BIG_ENDIAN 221#define LZ4_BIG_ENDIAN 1 222#else 223/* 224 * Little Endian assumed. PDP Endian and other very rare endian format 225 * are unsupported. 226 */ 227#endif 228 229/* 230 * Unaligned memory access is automatically enabled for "common" CPU, 231 * such as x86. For others CPU, the compiler will be more cautious, and 232 * insert extra code to ensure aligned access is respected. If you know 233 * your target CPU supports unaligned memory access, you may want to 234 * force this option manually to improve performance 235 */ 236#if defined(__ARM_FEATURE_UNALIGNED) 237#define LZ4_FORCE_UNALIGNED_ACCESS 1 238#endif 239 240/* 241 * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because 242 * gcc currently rely on libcompiler_rt. 243 * 244 * TODO: revisit this when situation changes. 245 */ 246#if defined(__sparc64__) 247#define LZ4_FORCE_SW_BITCOUNT 248#endif 249 250/* 251 * Compiler Options 252 */ 253#if __STDC_VERSION__ >= 199901L /* C99 */ 254/* "restrict" is a known keyword */ 255#else 256/* Disable restrict */ 257#define restrict 258#endif 259 260#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 261 (((x) & 0xffu) << 8))) 262 263#define expect(expr, value) (__builtin_expect((expr), (value))) 264 265#if defined(likely) 266#undef likely 267#endif 268#if defined(unlikely) 269#undef unlikely 270#endif 271 272#define likely(expr) expect((expr) != 0, 1) 273#define unlikely(expr) expect((expr) != 0, 0) 274 275/* Basic types */ 276#define BYTE uint8_t 277#define U16 uint16_t 278#define U32 uint32_t 279#define S32 int32_t 280#define U64 uint64_t 281 282#ifndef LZ4_FORCE_UNALIGNED_ACCESS 283#pragma pack(1) 284#endif 285 286typedef struct _U16_S { 287 U16 v; 288} U16_S; 289typedef struct _U32_S { 290 U32 v; 291} U32_S; 292typedef struct _U64_S { 293 U64 v; 294} U64_S; 295 296#ifndef LZ4_FORCE_UNALIGNED_ACCESS 297#pragma pack() 298#endif 299 300#define A64(x) (((U64_S *)(x))->v) 301#define A32(x) (((U32_S *)(x))->v) 302#define A16(x) (((U16_S *)(x))->v) 303 304/* 305 * Constants 306 */ 307#define MINMATCH 4 308 309#define HASH_LOG COMPRESSIONLEVEL 310#define HASHTABLESIZE (1 << HASH_LOG) 311#define HASH_MASK (HASHTABLESIZE - 1) 312 313#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 314 NOTCOMPRESSIBLE_CONFIRMATION : 2) 315 316/* 317 * Defines if memory is allocated into the stack (local variable), 318 * or into the heap (kmem_alloc()). 319 */ 320#define HEAPMODE (HASH_LOG > STACKLIMIT) 321#define COPYLENGTH 8 322#define LASTLITERALS 5 323#define MFLIMIT (COPYLENGTH + MINMATCH) 324#define MINLENGTH (MFLIMIT + 1) 325 326#define MAXD_LOG 16 327#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 328 329#define ML_BITS 4 330#define ML_MASK ((1U<<ML_BITS)-1) 331#define RUN_BITS (8-ML_BITS) 332#define RUN_MASK ((1U<<RUN_BITS)-1) 333 334 335/* 336 * Architecture-specific macros 337 */ 338#if LZ4_ARCH64 339#define STEPSIZE 8 340#define UARCH U64 341#define AARCH A64 342#define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 343#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 344#define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 345#define HTYPE U32 346#define INITBASE(base) const BYTE* const base = ip 347#else /* !LZ4_ARCH64 */ 348#define STEPSIZE 4 349#define UARCH U32 350#define AARCH A32 351#define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 352#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 353#define LZ4_SECURECOPY LZ4_WILDCOPY 354#define HTYPE const BYTE * 355#define INITBASE(base) const int base = 0 356#endif /* !LZ4_ARCH64 */ 357 358#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 359#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 360 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 361#define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 362 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 363#else 364#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 365#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 366#endif 367 368 369/* Local structures */ 370struct refTables { 371 HTYPE hashTable[HASHTABLESIZE]; 372}; 373 374 375/* Macros */ 376#define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 377 HASH_LOG)) 378#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 379#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 380#define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 381 d = e; } 382 383 384/* Private functions */ 385#if LZ4_ARCH64 386 387static inline int 388LZ4_NbCommonBytes(register U64 val) 389{ 390#if defined(LZ4_BIG_ENDIAN) 391#if !defined(LZ4_FORCE_SW_BITCOUNT) 392 return (__builtin_clzll(val) >> 3); 393#else 394 int r; 395 if (!(val >> 32)) { 396 r = 4; 397 } else { 398 r = 0; 399 val >>= 32; 400 } 401 if (!(val >> 16)) { 402 r += 2; 403 val >>= 8; 404 } else { 405 val >>= 24; 406 } 407 r += (!val); 408 return (r); 409#endif 410#else 411#if !defined(LZ4_FORCE_SW_BITCOUNT) 412 return (__builtin_ctzll(val) >> 3); 413#else 414 static const int DeBruijnBytePos[64] = 415 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 416 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 417 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 418 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 419 }; 420 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 421 58]; 422#endif 423#endif 424} 425 426#else 427 428static inline int 429LZ4_NbCommonBytes(register U32 val) 430{ 431#if defined(LZ4_BIG_ENDIAN) 432#if !defined(LZ4_FORCE_SW_BITCOUNT) 433 return (__builtin_clz(val) >> 3); 434#else 435 int r; 436 if (!(val >> 16)) { 437 r = 2; 438 val >>= 8; 439 } else { 440 r = 0; 441 val >>= 24; 442 } 443 r += (!val); 444 return (r); 445#endif 446#else 447#if !defined(LZ4_FORCE_SW_BITCOUNT) 448 return (__builtin_ctz(val) >> 3); 449#else 450 static const int DeBruijnBytePos[32] = { 451 0, 0, 3, 0, 3, 1, 3, 0, 452 3, 2, 2, 1, 3, 2, 0, 1, 453 3, 3, 1, 2, 2, 2, 2, 0, 454 3, 1, 2, 0, 1, 0, 1, 1 455 }; 456 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 457 27]; 458#endif 459#endif 460} 461 462#endif 463 464/* Public functions */ 465 466static int 467LZ4_compressBound(int isize) 468{ 469 return (isize + (isize / 255) + 16); 470} 471 472/* Compression functions */ 473 474/*ARGSUSED*/ 475static int 476LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 477 int osize) 478{ 479#if HEAPMODE 480 struct refTables *srt = (struct refTables *)ctx; 481 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 482#else 483 HTYPE HashTable[HASHTABLESIZE] = { 0 }; 484#endif 485 486 const BYTE *ip = (BYTE *) source; 487 INITBASE(base); 488 const BYTE *anchor = ip; 489 const BYTE *const iend = ip + isize; 490 const BYTE *const oend = (BYTE *) dest + osize; 491 const BYTE *const mflimit = iend - MFLIMIT; 492#define matchlimit (iend - LASTLITERALS) 493 494 BYTE *op = (BYTE *) dest; 495 496 int len, length; 497 const int skipStrength = SKIPSTRENGTH; 498 U32 forwardH; 499 500 501 /* Init */ 502 if (isize < MINLENGTH) 503 goto _last_literals; 504 505 /* First Byte */ 506 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 507 ip++; 508 forwardH = LZ4_HASH_VALUE(ip); 509 510 /* Main Loop */ 511 for (;;) { 512 int findMatchAttempts = (1U << skipStrength) + 3; 513 const BYTE *forwardIp = ip; 514 const BYTE *ref; 515 BYTE *token; 516 517 /* Find a match */ 518 do { 519 U32 h = forwardH; 520 int step = findMatchAttempts++ >> skipStrength; 521 ip = forwardIp; 522 forwardIp = ip + step; 523 524 if unlikely(forwardIp > mflimit) { 525 goto _last_literals; 526 } 527 528 forwardH = LZ4_HASH_VALUE(forwardIp); 529 ref = base + HashTable[h]; 530 HashTable[h] = ip - base; 531 532 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 533 534 /* Catch up */ 535 while ((ip > anchor) && (ref > (BYTE *) source) && 536 unlikely(ip[-1] == ref[-1])) { 537 ip--; 538 ref--; 539 } 540 541 /* Encode Literal length */ 542 length = ip - anchor; 543 token = op++; 544 545 /* Check output limit */ 546 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 547 (length >> 8) > oend) 548 return (0); 549 550 if (length >= (int)RUN_MASK) { 551 *token = (RUN_MASK << ML_BITS); 552 len = length - RUN_MASK; 553 for (; len > 254; len -= 255) 554 *op++ = 255; 555 *op++ = (BYTE)len; 556 } else 557 *token = (length << ML_BITS); 558 559 /* Copy Literals */ 560 LZ4_BLINDCOPY(anchor, op, length); 561 562 _next_match: 563 /* Encode Offset */ 564 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 565 566 /* Start Counting */ 567 ip += MINMATCH; 568 ref += MINMATCH; /* MinMatch verified */ 569 anchor = ip; 570 while likely(ip < matchlimit - (STEPSIZE - 1)) { 571 UARCH diff = AARCH(ref) ^ AARCH(ip); 572 if (!diff) { 573 ip += STEPSIZE; 574 ref += STEPSIZE; 575 continue; 576 } 577 ip += LZ4_NbCommonBytes(diff); 578 goto _endCount; 579 } 580#if LZ4_ARCH64 581 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 582 ip += 4; 583 ref += 4; 584 } 585#endif 586 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 587 ip += 2; 588 ref += 2; 589 } 590 if ((ip < matchlimit) && (*ref == *ip)) 591 ip++; 592 _endCount: 593 594 /* Encode MatchLength */ 595 len = (ip - anchor); 596 /* Check output limit */ 597 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 598 return (0); 599 if (len >= (int)ML_MASK) { 600 *token += ML_MASK; 601 len -= ML_MASK; 602 for (; len > 509; len -= 510) { 603 *op++ = 255; 604 *op++ = 255; 605 } 606 if (len > 254) { 607 len -= 255; 608 *op++ = 255; 609 } 610 *op++ = (BYTE)len; 611 } else 612 *token += len; 613 614 /* Test end of chunk */ 615 if (ip > mflimit) { 616 anchor = ip; 617 break; 618 } 619 /* Fill table */ 620 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 621 622 /* Test next position */ 623 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 624 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 625 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 626 token = op++; 627 *token = 0; 628 goto _next_match; 629 } 630 /* Prepare next loop */ 631 anchor = ip++; 632 forwardH = LZ4_HASH_VALUE(ip); 633 } 634 635 _last_literals: 636 /* Encode Last Literals */ 637 { 638 int lastRun = iend - anchor; 639 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 640 oend) 641 return (0); 642 if (lastRun >= (int)RUN_MASK) { 643 *op++ = (RUN_MASK << ML_BITS); 644 lastRun -= RUN_MASK; 645 for (; lastRun > 254; lastRun -= 255) { 646 *op++ = 255; 647 } 648 *op++ = (BYTE)lastRun; 649 } else 650 *op++ = (lastRun << ML_BITS); 651 (void) memcpy(op, anchor, iend - anchor); 652 op += iend - anchor; 653 } 654 655 /* End */ 656 return (int)(((char *)op) - dest); 657} 658 659 660 661/* Note : this function is valid only if isize < LZ4_64KLIMIT */ 662#define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 663#define HASHLOG64K (HASH_LOG + 1) 664#define HASH64KTABLESIZE (1U << HASHLOG64K) 665#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 666 HASHLOG64K)) 667#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 668 669/*ARGSUSED*/ 670static int 671LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 672 int osize) 673{ 674#if HEAPMODE 675 struct refTables *srt = (struct refTables *)ctx; 676 U16 *HashTable = (U16 *) (srt->hashTable); 677#else 678 U16 HashTable[HASH64KTABLESIZE] = { 0 }; 679#endif 680 681 const BYTE *ip = (BYTE *) source; 682 const BYTE *anchor = ip; 683 const BYTE *const base = ip; 684 const BYTE *const iend = ip + isize; 685 const BYTE *const oend = (BYTE *) dest + osize; 686 const BYTE *const mflimit = iend - MFLIMIT; 687#define matchlimit (iend - LASTLITERALS) 688 689 BYTE *op = (BYTE *) dest; 690 691 int len, length; 692 const int skipStrength = SKIPSTRENGTH; 693 U32 forwardH; 694 695 /* Init */ 696 if (isize < MINLENGTH) 697 goto _last_literals; 698 699 /* First Byte */ 700 ip++; 701 forwardH = LZ4_HASH64K_VALUE(ip); 702 703 /* Main Loop */ 704 for (;;) { 705 int findMatchAttempts = (1U << skipStrength) + 3; 706 const BYTE *forwardIp = ip; 707 const BYTE *ref; 708 BYTE *token; 709 710 /* Find a match */ 711 do { 712 U32 h = forwardH; 713 int step = findMatchAttempts++ >> skipStrength; 714 ip = forwardIp; 715 forwardIp = ip + step; 716 717 if (forwardIp > mflimit) { 718 goto _last_literals; 719 } 720 721 forwardH = LZ4_HASH64K_VALUE(forwardIp); 722 ref = base + HashTable[h]; 723 HashTable[h] = ip - base; 724 725 } while (A32(ref) != A32(ip)); 726 727 /* Catch up */ 728 while ((ip > anchor) && (ref > (BYTE *) source) && 729 (ip[-1] == ref[-1])) { 730 ip--; 731 ref--; 732 } 733 734 /* Encode Literal length */ 735 length = ip - anchor; 736 token = op++; 737 738 /* Check output limit */ 739 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 740 (length >> 8) > oend) 741 return (0); 742 743 if (length >= (int)RUN_MASK) { 744 *token = (RUN_MASK << ML_BITS); 745 len = length - RUN_MASK; 746 for (; len > 254; len -= 255) 747 *op++ = 255; 748 *op++ = (BYTE)len; 749 } else 750 *token = (length << ML_BITS); 751 752 /* Copy Literals */ 753 LZ4_BLINDCOPY(anchor, op, length); 754 755 _next_match: 756 /* Encode Offset */ 757 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 758 759 /* Start Counting */ 760 ip += MINMATCH; 761 ref += MINMATCH; /* MinMatch verified */ 762 anchor = ip; 763 while (ip < matchlimit - (STEPSIZE - 1)) { 764 UARCH diff = AARCH(ref) ^ AARCH(ip); 765 if (!diff) { 766 ip += STEPSIZE; 767 ref += STEPSIZE; 768 continue; 769 } 770 ip += LZ4_NbCommonBytes(diff); 771 goto _endCount; 772 } 773#if LZ4_ARCH64 774 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 775 ip += 4; 776 ref += 4; 777 } 778#endif 779 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 780 ip += 2; 781 ref += 2; 782 } 783 if ((ip < matchlimit) && (*ref == *ip)) 784 ip++; 785 _endCount: 786 787 /* Encode MatchLength */ 788 len = (ip - anchor); 789 /* Check output limit */ 790 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 791 return (0); 792 if (len >= (int)ML_MASK) { 793 *token += ML_MASK; 794 len -= ML_MASK; 795 for (; len > 509; len -= 510) { 796 *op++ = 255; 797 *op++ = 255; 798 } 799 if (len > 254) { 800 len -= 255; 801 *op++ = 255; 802 } 803 *op++ = (BYTE)len; 804 } else 805 *token += len; 806 807 /* Test end of chunk */ 808 if (ip > mflimit) { 809 anchor = ip; 810 break; 811 } 812 /* Fill table */ 813 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 814 815 /* Test next position */ 816 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 817 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 818 if (A32(ref) == A32(ip)) { 819 token = op++; 820 *token = 0; 821 goto _next_match; 822 } 823 /* Prepare next loop */ 824 anchor = ip++; 825 forwardH = LZ4_HASH64K_VALUE(ip); 826 } 827 828 _last_literals: 829 /* Encode Last Literals */ 830 { 831 int lastRun = iend - anchor; 832 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 833 oend) 834 return (0); 835 if (lastRun >= (int)RUN_MASK) { 836 *op++ = (RUN_MASK << ML_BITS); 837 lastRun -= RUN_MASK; 838 for (; lastRun > 254; lastRun -= 255) 839 *op++ = 255; 840 *op++ = (BYTE)lastRun; 841 } else 842 *op++ = (lastRun << ML_BITS); 843 (void) memcpy(op, anchor, iend - anchor); 844 op += iend - anchor; 845 } 846 847 /* End */ 848 return (int)(((char *)op) - dest); 849} 850 851static int 852real_LZ4_compress(const char *source, char *dest, int isize, int osize) 853{ 854#if HEAPMODE 855 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP); 856 int result; 857 858 /* 859 * out of kernel memory, gently fall through - this will disable 860 * compression in zio_compress_data 861 */ 862 if (ctx == NULL) 863 return (0); 864 865 if (isize < LZ4_64KLIMIT) 866 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 867 else 868 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 869 870 kmem_free(ctx, sizeof (struct refTables)); 871 return (result); 872#else 873 if (isize < (int)LZ4_64KLIMIT) 874 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); 875 return (LZ4_compressCtx(NULL, source, dest, isize, osize)); 876#endif 877} 878 879/* Decompression functions */ 880 881/* 882 * Note: The decoding functions real_LZ4_uncompress() and 883 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 884 * attack type. They will never write nor read outside of the provided 885 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 886 * it will never read outside of the input buffer. A corrupted input 887 * will produce an error result, a negative int, indicating the position 888 * of the error within input stream. 889 */ 890 891static int 892real_LZ4_uncompress(const char *source, char *dest, int osize) 893{ 894 /* Local Variables */ 895 const BYTE *restrict ip = (const BYTE *) source; 896 const BYTE *ref; 897 898 BYTE *op = (BYTE *) dest; 899 BYTE *const oend = op + osize; 900 BYTE *cpy; 901 902 unsigned token; 903 904 size_t length; 905 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 906#if LZ4_ARCH64 907 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 908#endif 909 910 /* Main Loop */ 911 for (;;) { 912 /* get runlength */ 913 token = *ip++; 914 if ((length = (token >> ML_BITS)) == RUN_MASK) { 915 size_t len; 916 for (; (len = *ip++) == 255; length += 255) { 917 } 918 length += len; 919 } 920 /* copy literals */ 921 cpy = op + length; 922 if unlikely(cpy > oend - COPYLENGTH) { 923 if (cpy != oend) 924 /* Error: we must necessarily stand at EOF */ 925 goto _output_error; 926 (void) memcpy(op, ip, length); 927 ip += length; 928 break; /* EOF */ 929 } 930 LZ4_WILDCOPY(ip, op, cpy); 931 ip -= (op - cpy); 932 op = cpy; 933 934 /* get offset */ 935 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 936 ip += 2; 937 if unlikely(ref < (BYTE * const) dest) 938 /* 939 * Error: offset create reference outside destination 940 * buffer 941 */ 942 goto _output_error; 943 944 /* get matchlength */ 945 if ((length = (token & ML_MASK)) == ML_MASK) { 946 for (; *ip == 255; length += 255) { 947 ip++; 948 } 949 length += *ip++; 950 } 951 /* copy repeated sequence */ 952 if unlikely(op - ref < STEPSIZE) { 953#if LZ4_ARCH64 954 size_t dec64 = dec64table[op-ref]; 955#else 956 const int dec64 = 0; 957#endif 958 op[0] = ref[0]; 959 op[1] = ref[1]; 960 op[2] = ref[2]; 961 op[3] = ref[3]; 962 op += 4; 963 ref += 4; 964 ref -= dec32table[op-ref]; 965 A32(op) = A32(ref); 966 op += STEPSIZE - 4; 967 ref -= dec64; 968 } else { 969 LZ4_COPYSTEP(ref, op); 970 } 971 cpy = op + length - (STEPSIZE - 4); 972 if (cpy > oend - COPYLENGTH) { 973 if (cpy > oend) 974 /* 975 * Error: request to write beyond destination 976 * buffer 977 */ 978 goto _output_error; 979 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 980 while (op < cpy) 981 *op++ = *ref++; 982 op = cpy; 983 if (op == oend) 984 /* 985 * Check EOF (should never happen, since last 986 * 5 bytes are supposed to be literals) 987 */ 988 goto _output_error; 989 continue; 990 } 991 LZ4_SECURECOPY(ref, op, cpy); 992 op = cpy; /* correction */ 993 } 994 995 /* end of decoding */ 996 return (int)(((char *)ip) - source); 997 998 /* write overflow error detected */ 999 _output_error: 1000 return (int)(-(((char *)ip) - source)); 1001} 1002 1003static int 1004LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 1005 int maxOutputSize) 1006{ 1007 /* Local Variables */ 1008 const BYTE *restrict ip = (const BYTE *) source; 1009 const BYTE *const iend = ip + isize; 1010 const BYTE *ref; 1011 1012 BYTE *op = (BYTE *) dest; 1013 BYTE *const oend = op + maxOutputSize; 1014 BYTE *cpy; 1015 1016 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 1017#if LZ4_ARCH64 1018 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 1019#endif 1020 1021 /* Main Loop */ 1022 while (ip < iend) { 1023 unsigned token; 1024 size_t length; 1025 1026 /* get runlength */ 1027 token = *ip++; 1028 if ((length = (token >> ML_BITS)) == RUN_MASK) { 1029 int s = 255; 1030 while ((ip < iend) && (s == 255)) { 1031 s = *ip++; 1032 length += s; 1033 } 1034 } 1035 /* copy literals */ 1036 cpy = op + length; 1037 if ((cpy > oend - COPYLENGTH) || 1038 (ip + length > iend - COPYLENGTH)) { 1039 if (cpy > oend) 1040 /* Error: writes beyond output buffer */ 1041 goto _output_error; 1042 if (ip + length != iend) 1043 /* 1044 * Error: LZ4 format requires to consume all 1045 * input at this stage 1046 */ 1047 goto _output_error; 1048 (void) memcpy(op, ip, length); 1049 op += length; 1050 /* Necessarily EOF, due to parsing restrictions */ 1051 break; 1052 } 1053 LZ4_WILDCOPY(ip, op, cpy); 1054 ip -= (op - cpy); 1055 op = cpy; 1056 1057 /* get offset */ 1058 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 1059 ip += 2; 1060 if (ref < (BYTE * const) dest) 1061 /* 1062 * Error: offset creates reference outside of 1063 * destination buffer 1064 */ 1065 goto _output_error; 1066 1067 /* get matchlength */ 1068 if ((length = (token & ML_MASK)) == ML_MASK) { 1069 while (ip < iend) { 1070 int s = *ip++; 1071 length += s; 1072 if (s == 255) 1073 continue; 1074 break; 1075 } 1076 } 1077 /* copy repeated sequence */ 1078 if unlikely(op - ref < STEPSIZE) { 1079#if LZ4_ARCH64 1080 size_t dec64 = dec64table[op-ref]; 1081#else 1082 const int dec64 = 0; 1083#endif 1084 op[0] = ref[0]; 1085 op[1] = ref[1]; 1086 op[2] = ref[2]; 1087 op[3] = ref[3]; 1088 op += 4; 1089 ref += 4; 1090 ref -= dec32table[op-ref]; 1091 A32(op) = A32(ref); 1092 op += STEPSIZE - 4; 1093 ref -= dec64; 1094 } else { 1095 LZ4_COPYSTEP(ref, op); 1096 } 1097 cpy = op + length - (STEPSIZE - 4); 1098 if (cpy > oend - COPYLENGTH) { 1099 if (cpy > oend) 1100 /* 1101 * Error: request to write outside of 1102 * destination buffer 1103 */ 1104 goto _output_error; 1105 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1106 while (op < cpy) 1107 *op++ = *ref++; 1108 op = cpy; 1109 if (op == oend) 1110 /* 1111 * Check EOF (should never happen, since 1112 * last 5 bytes are supposed to be literals) 1113 */ 1114 goto _output_error; 1115 continue; 1116 } 1117 LZ4_SECURECOPY(ref, op, cpy); 1118 op = cpy; /* correction */ 1119 } 1120 1121 /* end of decoding */ 1122 return (int)(((char *)op) - dest); 1123 1124 /* write overflow error detected */ 1125 _output_error: 1126 return (int)(-(((char *)ip) - source)); 1127} 1128