1/* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35#include <sys/zfs_context.h> 36 37static int real_LZ4_compress(const char *source, char *dest, int isize, 38 int osize); 39static int LZ4_compressBound(int isize); 40static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 41 int isize, int maxOutputSize); 42static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 43 int isize, int osize); 44static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 45 int isize, int osize); 46 47static kmem_cache_t *lz4_ctx_cache; 48 49/*ARGSUSED*/ 50size_t 51lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 52{ 53 uint32_t bufsiz; 54 char *dest = d_start; 55 56 ASSERT(d_len >= sizeof (bufsiz)); 57 58 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 59 d_len - sizeof (bufsiz)); 60 61 /* Signal an error if the compression routine returned zero. */ 62 if (bufsiz == 0) 63 return (s_len); 64 65 /* 66 * Encode the compresed buffer size at the start. We'll need this in 67 * decompression to counter the effects of padding which might be 68 * added to the compressed buffer and which, if unhandled, would 69 * confuse the hell out of our decompression function. 70 */ 71 *(uint32_t *)dest = BE_32(bufsiz); 72 73 return (bufsiz + sizeof (bufsiz)); 74} 75 76/*ARGSUSED*/ 77int 78lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 79{ 80 const char *src = s_start; 81 uint32_t bufsiz = BE_IN32(src); 82 83 /* invalid compressed buffer size encoded at start */ 84 if (bufsiz + sizeof (bufsiz) > s_len) 85 return (1); 86 87 /* 88 * Returns 0 on success (decompression function returned non-negative) 89 * and non-zero on failure (decompression function returned negative. 90 */ 91 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 92 d_start, bufsiz, d_len) < 0); 93} 94 95/* 96 * LZ4 API Description: 97 * 98 * Simple Functions: 99 * real_LZ4_compress() : 100 * isize : is the input size. Max supported value is ~1.9GB 101 * return : the number of bytes written in buffer dest 102 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 103 * note : destination buffer must be already allocated. 104 * destination buffer must be sized to handle worst cases 105 * situations (input data not compressible) worst case size 106 * evaluation is provided by function LZ4_compressBound(). 107 * 108 * Advanced Functions 109 * 110 * LZ4_compressBound() : 111 * Provides the maximum size that LZ4 may output in a "worst case" 112 * scenario (input data not compressible) primarily useful for memory 113 * allocation of output buffer. 114 * 115 * isize : is the input size. Max supported value is ~1.9GB 116 * return : maximum output size in a "worst case" scenario 117 * note : this function is limited by "int" range (2^31-1) 118 * 119 * LZ4_uncompress_unknownOutputSize() : 120 * isize : is the input size, therefore the compressed size 121 * maxOutputSize : is the size of the destination buffer (which must be 122 * already allocated) 123 * return : the number of bytes decoded in the destination buffer 124 * (necessarily <= maxOutputSize). If the source stream is 125 * malformed, the function will stop decoding and return a 126 * negative result, indicating the byte position of the faulty 127 * instruction. This function never writes beyond dest + 128 * maxOutputSize, and is therefore protected against malicious 129 * data packets. 130 * note : Destination buffer must be already allocated. 131 * 132 * LZ4_compressCtx() : 133 * This function explicitly handles the CTX memory structure. 134 * 135 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 136 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 137 * isn't valid. 138 * 139 * LZ4_compress64kCtx() : 140 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 141 * isize *Must* be <64KB, otherwise the output will be corrupted. 142 * 143 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 144 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 145 * isn't valid. 146 */ 147 148/* 149 * Tuning parameters 150 */ 151 152/* 153 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 154 * Lowering this value reduces memory usage. Reduced memory usage 155 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 156 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 157 * (examples : 12 -> 16KB ; 17 -> 512KB) 158 */ 159#define COMPRESSIONLEVEL 12 160 161/* 162 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 163 * algorithm skip faster data segments considered "incompressible". 164 * This may decrease compression ratio dramatically, but will be 165 * faster on incompressible data. Increasing this value will make 166 * the algorithm search more before declaring a segment "incompressible". 167 * This could improve compression a bit, but will be slower on 168 * incompressible data. The default value (6) is recommended. 169 */ 170#define NOTCOMPRESSIBLE_CONFIRMATION 6 171 172/* 173 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 174 * performance for big endian cpu, but the resulting compressed stream 175 * will be incompatible with little-endian CPU. You can set this option 176 * to 1 in situations where data will stay within closed environment. 177 * This option is useless on Little_Endian CPU (such as x86). 178 */ 179/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 180 181/* 182 * CPU Feature Detection 183 */ 184 185/* 32 or 64 bits ? */ 186#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ 187 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ 188 defined(__LP64__) || defined(_LP64)) 189#define LZ4_ARCH64 1 190/* 191 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we 192 * can spend 16k on the algorithm 193 */ 194/* FreeBSD: Use heap for all platforms for now */ 195#define STACKLIMIT 0 196#else 197#define LZ4_ARCH64 0 198/* 199 * Illumos: On i386 we only have 12k of stack, so in order to maintain the 200 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will 201 * suck, but alas, it's ZFS on 32-bit we're talking about, so... 202 */ 203#define STACKLIMIT 0 204#endif 205 206/* 207 * Little Endian or Big Endian? 208 * Note: overwrite the below #define if you know your architecture endianess. 209 */ 210#if BYTE_ORDER == BIG_ENDIAN 211#define LZ4_BIG_ENDIAN 1 212#else 213/* 214 * Little Endian assumed. PDP Endian and other very rare endian format 215 * are unsupported. 216 */ 217#endif 218 219/* 220 * Unaligned memory access is automatically enabled for "common" CPU, 221 * such as x86. For others CPU, the compiler will be more cautious, and 222 * insert extra code to ensure aligned access is respected. If you know 223 * your target CPU supports unaligned memory access, you may want to 224 * force this option manually to improve performance 225 */ 226#if defined(__ARM_FEATURE_UNALIGNED) 227#define LZ4_FORCE_UNALIGNED_ACCESS 1 228#endif 229 230/* 231 * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because 232 * gcc currently rely on libcompiler_rt. 233 * 234 * TODO: revisit this when situation changes. 235 */ 236#if defined(__sparc64__) 237#define LZ4_FORCE_SW_BITCOUNT 238#endif 239 240/* 241 * Compiler Options 242 */ 243#if __STDC_VERSION__ >= 199901L /* C99 */ 244/* "restrict" is a known keyword */ 245#else 246/* Disable restrict */ 247#define restrict 248#endif 249 250#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 251 (((x) & 0xffu) << 8))) 252 253#define expect(expr, value) (__builtin_expect((expr), (value))) 254 255#if defined(likely) 256#undef likely 257#endif 258#if defined(unlikely) 259#undef unlikely 260#endif 261 262#define likely(expr) expect((expr) != 0, 1) 263#define unlikely(expr) expect((expr) != 0, 0) 264 265/* Basic types */ 266#define BYTE uint8_t 267#define U16 uint16_t 268#define U32 uint32_t 269#define S32 int32_t 270#define U64 uint64_t 271 272#ifndef LZ4_FORCE_UNALIGNED_ACCESS 273#pragma pack(1) 274#endif 275 276typedef struct _U16_S { 277 U16 v; 278} U16_S; 279typedef struct _U32_S { 280 U32 v; 281} U32_S; 282typedef struct _U64_S { 283 U64 v; 284} U64_S; 285 286#ifndef LZ4_FORCE_UNALIGNED_ACCESS 287#pragma pack() 288#endif 289 290#define A64(x) (((U64_S *)(x))->v) 291#define A32(x) (((U32_S *)(x))->v) 292#define A16(x) (((U16_S *)(x))->v) 293 294/* 295 * Constants 296 */ 297#define MINMATCH 4 298 299#define HASH_LOG COMPRESSIONLEVEL 300#define HASHTABLESIZE (1 << HASH_LOG) 301#define HASH_MASK (HASHTABLESIZE - 1) 302 303#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 304 NOTCOMPRESSIBLE_CONFIRMATION : 2) 305 306/* 307 * Defines if memory is allocated into the stack (local variable), 308 * or into the heap (kmem_alloc()). 309 */ 310#define HEAPMODE (HASH_LOG > STACKLIMIT) 311#define COPYLENGTH 8 312#define LASTLITERALS 5 313#define MFLIMIT (COPYLENGTH + MINMATCH) 314#define MINLENGTH (MFLIMIT + 1) 315 316#define MAXD_LOG 16 317#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 318 319#define ML_BITS 4 320#define ML_MASK ((1U<<ML_BITS)-1) 321#define RUN_BITS (8-ML_BITS) 322#define RUN_MASK ((1U<<RUN_BITS)-1) 323 324 325/* 326 * Architecture-specific macros 327 */ 328#if LZ4_ARCH64 329#define STEPSIZE 8 330#define UARCH U64 331#define AARCH A64 332#define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 333#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 334#define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 335#define HTYPE U32 336#define INITBASE(base) const BYTE* const base = ip 337#else /* !LZ4_ARCH64 */ 338#define STEPSIZE 4 339#define UARCH U32 340#define AARCH A32 341#define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 342#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 343#define LZ4_SECURECOPY LZ4_WILDCOPY 344#define HTYPE const BYTE * 345#define INITBASE(base) const int base = 0 346#endif /* !LZ4_ARCH64 */ 347 348#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 349#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 350 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 351#define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 352 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 353#else 354#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 355#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 356#endif 357 358 359/* Local structures */ 360struct refTables { 361 HTYPE hashTable[HASHTABLESIZE]; 362}; 363 364 365/* Macros */ 366#define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 367 HASH_LOG)) 368#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 369#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 370#define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 371 d = e; } 372 373 374/* Private functions */ 375#if LZ4_ARCH64 376 377static inline int 378LZ4_NbCommonBytes(register U64 val) 379{ 380#if defined(LZ4_BIG_ENDIAN) 381#if !defined(LZ4_FORCE_SW_BITCOUNT) 382 return (__builtin_clzll(val) >> 3); 383#else 384 int r; 385 if (!(val >> 32)) { 386 r = 4; 387 } else { 388 r = 0; 389 val >>= 32; 390 } 391 if (!(val >> 16)) { 392 r += 2; 393 val >>= 8; 394 } else { 395 val >>= 24; 396 } 397 r += (!val); 398 return (r); 399#endif 400#else 401#if !defined(LZ4_FORCE_SW_BITCOUNT) 402 return (__builtin_ctzll(val) >> 3); 403#else 404 static const int DeBruijnBytePos[64] = 405 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 406 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 407 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 408 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 409 }; 410 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 411 58]; 412#endif 413#endif 414} 415 416#else 417 418static inline int 419LZ4_NbCommonBytes(register U32 val) 420{ 421#if defined(LZ4_BIG_ENDIAN) 422#if !defined(LZ4_FORCE_SW_BITCOUNT) 423 return (__builtin_clz(val) >> 3); 424#else 425 int r; 426 if (!(val >> 16)) { 427 r = 2; 428 val >>= 8; 429 } else { 430 r = 0; 431 val >>= 24; 432 } 433 r += (!val); 434 return (r); 435#endif 436#else 437#if !defined(LZ4_FORCE_SW_BITCOUNT) 438 return (__builtin_ctz(val) >> 3); 439#else 440 static const int DeBruijnBytePos[32] = { 441 0, 0, 3, 0, 3, 1, 3, 0, 442 3, 2, 2, 1, 3, 2, 0, 1, 443 3, 3, 1, 2, 2, 2, 2, 0, 444 3, 1, 2, 0, 1, 0, 1, 1 445 }; 446 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 447 27]; 448#endif 449#endif 450} 451 452#endif 453 454/* Public functions */ 455 456static int 457LZ4_compressBound(int isize) 458{ 459 return (isize + (isize / 255) + 16); 460} 461 462/* Compression functions */ 463 464/*ARGSUSED*/ 465static int 466LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 467 int osize) 468{ 469#if HEAPMODE 470 struct refTables *srt = (struct refTables *)ctx; 471 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 472#else 473 HTYPE HashTable[HASHTABLESIZE] = { 0 }; 474#endif 475 476 const BYTE *ip = (BYTE *) source; 477 INITBASE(base); 478 const BYTE *anchor = ip; 479 const BYTE *const iend = ip + isize; 480 const BYTE *const oend = (BYTE *) dest + osize; 481 const BYTE *const mflimit = iend - MFLIMIT; 482#define matchlimit (iend - LASTLITERALS) 483 484 BYTE *op = (BYTE *) dest; 485 486 int len, length; 487 const int skipStrength = SKIPSTRENGTH; 488 U32 forwardH; 489 490 491 /* Init */ 492 if (isize < MINLENGTH) 493 goto _last_literals; 494 495 /* First Byte */ 496 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 497 ip++; 498 forwardH = LZ4_HASH_VALUE(ip); 499 500 /* Main Loop */ 501 for (;;) { 502 int findMatchAttempts = (1U << skipStrength) + 3; 503 const BYTE *forwardIp = ip; 504 const BYTE *ref; 505 BYTE *token; 506 507 /* Find a match */ 508 do { 509 U32 h = forwardH; 510 int step = findMatchAttempts++ >> skipStrength; 511 ip = forwardIp; 512 forwardIp = ip + step; 513 514 if unlikely(forwardIp > mflimit) { 515 goto _last_literals; 516 } 517 518 forwardH = LZ4_HASH_VALUE(forwardIp); 519 ref = base + HashTable[h]; 520 HashTable[h] = ip - base; 521 522 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 523 524 /* Catch up */ 525 while ((ip > anchor) && (ref > (BYTE *) source) && 526 unlikely(ip[-1] == ref[-1])) { 527 ip--; 528 ref--; 529 } 530 531 /* Encode Literal length */ 532 length = ip - anchor; 533 token = op++; 534 535 /* Check output limit */ 536 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 537 (length >> 8) > oend) 538 return (0); 539 540 if (length >= (int)RUN_MASK) { 541 *token = (RUN_MASK << ML_BITS); 542 len = length - RUN_MASK; 543 for (; len > 254; len -= 255) 544 *op++ = 255; 545 *op++ = (BYTE)len; 546 } else 547 *token = (length << ML_BITS); 548 549 /* Copy Literals */ 550 LZ4_BLINDCOPY(anchor, op, length); 551 552 _next_match: 553 /* Encode Offset */ 554 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 555 556 /* Start Counting */ 557 ip += MINMATCH; 558 ref += MINMATCH; /* MinMatch verified */ 559 anchor = ip; 560 while likely(ip < matchlimit - (STEPSIZE - 1)) { 561 UARCH diff = AARCH(ref) ^ AARCH(ip); 562 if (!diff) { 563 ip += STEPSIZE; 564 ref += STEPSIZE; 565 continue; 566 } 567 ip += LZ4_NbCommonBytes(diff); 568 goto _endCount; 569 } 570#if LZ4_ARCH64 571 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 572 ip += 4; 573 ref += 4; 574 } 575#endif 576 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 577 ip += 2; 578 ref += 2; 579 } 580 if ((ip < matchlimit) && (*ref == *ip)) 581 ip++; 582 _endCount: 583 584 /* Encode MatchLength */ 585 len = (ip - anchor); 586 /* Check output limit */ 587 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 588 return (0); 589 if (len >= (int)ML_MASK) { 590 *token += ML_MASK; 591 len -= ML_MASK; 592 for (; len > 509; len -= 510) { 593 *op++ = 255; 594 *op++ = 255; 595 } 596 if (len > 254) { 597 len -= 255; 598 *op++ = 255; 599 } 600 *op++ = (BYTE)len; 601 } else 602 *token += len; 603 604 /* Test end of chunk */ 605 if (ip > mflimit) { 606 anchor = ip; 607 break; 608 } 609 /* Fill table */ 610 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 611 612 /* Test next position */ 613 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 614 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 615 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 616 token = op++; 617 *token = 0; 618 goto _next_match; 619 } 620 /* Prepare next loop */ 621 anchor = ip++; 622 forwardH = LZ4_HASH_VALUE(ip); 623 } 624 625 _last_literals: 626 /* Encode Last Literals */ 627 { 628 int lastRun = iend - anchor; 629 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 630 oend) 631 return (0); 632 if (lastRun >= (int)RUN_MASK) { 633 *op++ = (RUN_MASK << ML_BITS); 634 lastRun -= RUN_MASK; 635 for (; lastRun > 254; lastRun -= 255) { 636 *op++ = 255; 637 } 638 *op++ = (BYTE)lastRun; 639 } else 640 *op++ = (lastRun << ML_BITS); 641 (void) memcpy(op, anchor, iend - anchor); 642 op += iend - anchor; 643 } 644 645 /* End */ 646 return (int)(((char *)op) - dest); 647} 648 649 650 651/* Note : this function is valid only if isize < LZ4_64KLIMIT */ 652#define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 653#define HASHLOG64K (HASH_LOG + 1) 654#define HASH64KTABLESIZE (1U << HASHLOG64K) 655#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 656 HASHLOG64K)) 657#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 658 659/*ARGSUSED*/ 660static int 661LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 662 int osize) 663{ 664#if HEAPMODE 665 struct refTables *srt = (struct refTables *)ctx; 666 U16 *HashTable = (U16 *) (srt->hashTable); 667#else 668 U16 HashTable[HASH64KTABLESIZE] = { 0 }; 669#endif 670 671 const BYTE *ip = (BYTE *) source; 672 const BYTE *anchor = ip; 673 const BYTE *const base = ip; 674 const BYTE *const iend = ip + isize; 675 const BYTE *const oend = (BYTE *) dest + osize; 676 const BYTE *const mflimit = iend - MFLIMIT; 677#define matchlimit (iend - LASTLITERALS) 678 679 BYTE *op = (BYTE *) dest; 680 681 int len, length; 682 const int skipStrength = SKIPSTRENGTH; 683 U32 forwardH; 684 685 /* Init */ 686 if (isize < MINLENGTH) 687 goto _last_literals; 688 689 /* First Byte */ 690 ip++; 691 forwardH = LZ4_HASH64K_VALUE(ip); 692 693 /* Main Loop */ 694 for (;;) { 695 int findMatchAttempts = (1U << skipStrength) + 3; 696 const BYTE *forwardIp = ip; 697 const BYTE *ref; 698 BYTE *token; 699 700 /* Find a match */ 701 do { 702 U32 h = forwardH; 703 int step = findMatchAttempts++ >> skipStrength; 704 ip = forwardIp; 705 forwardIp = ip + step; 706 707 if (forwardIp > mflimit) { 708 goto _last_literals; 709 } 710 711 forwardH = LZ4_HASH64K_VALUE(forwardIp); 712 ref = base + HashTable[h]; 713 HashTable[h] = ip - base; 714 715 } while (A32(ref) != A32(ip)); 716 717 /* Catch up */ 718 while ((ip > anchor) && (ref > (BYTE *) source) && 719 (ip[-1] == ref[-1])) { 720 ip--; 721 ref--; 722 } 723 724 /* Encode Literal length */ 725 length = ip - anchor; 726 token = op++; 727 728 /* Check output limit */ 729 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 730 (length >> 8) > oend) 731 return (0); 732 733 if (length >= (int)RUN_MASK) { 734 *token = (RUN_MASK << ML_BITS); 735 len = length - RUN_MASK; 736 for (; len > 254; len -= 255) 737 *op++ = 255; 738 *op++ = (BYTE)len; 739 } else 740 *token = (length << ML_BITS); 741 742 /* Copy Literals */ 743 LZ4_BLINDCOPY(anchor, op, length); 744 745 _next_match: 746 /* Encode Offset */ 747 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 748 749 /* Start Counting */ 750 ip += MINMATCH; 751 ref += MINMATCH; /* MinMatch verified */ 752 anchor = ip; 753 while (ip < matchlimit - (STEPSIZE - 1)) { 754 UARCH diff = AARCH(ref) ^ AARCH(ip); 755 if (!diff) { 756 ip += STEPSIZE; 757 ref += STEPSIZE; 758 continue; 759 } 760 ip += LZ4_NbCommonBytes(diff); 761 goto _endCount; 762 } 763#if LZ4_ARCH64 764 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 765 ip += 4; 766 ref += 4; 767 } 768#endif 769 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 770 ip += 2; 771 ref += 2; 772 } 773 if ((ip < matchlimit) && (*ref == *ip)) 774 ip++; 775 _endCount: 776 777 /* Encode MatchLength */ 778 len = (ip - anchor); 779 /* Check output limit */ 780 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 781 return (0); 782 if (len >= (int)ML_MASK) { 783 *token += ML_MASK; 784 len -= ML_MASK; 785 for (; len > 509; len -= 510) { 786 *op++ = 255; 787 *op++ = 255; 788 } 789 if (len > 254) { 790 len -= 255; 791 *op++ = 255; 792 } 793 *op++ = (BYTE)len; 794 } else 795 *token += len; 796 797 /* Test end of chunk */ 798 if (ip > mflimit) { 799 anchor = ip; 800 break; 801 } 802 /* Fill table */ 803 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 804 805 /* Test next position */ 806 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 807 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 808 if (A32(ref) == A32(ip)) { 809 token = op++; 810 *token = 0; 811 goto _next_match; 812 } 813 /* Prepare next loop */ 814 anchor = ip++; 815 forwardH = LZ4_HASH64K_VALUE(ip); 816 } 817 818 _last_literals: 819 /* Encode Last Literals */ 820 { 821 int lastRun = iend - anchor; 822 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 823 oend) 824 return (0); 825 if (lastRun >= (int)RUN_MASK) { 826 *op++ = (RUN_MASK << ML_BITS); 827 lastRun -= RUN_MASK; 828 for (; lastRun > 254; lastRun -= 255) 829 *op++ = 255; 830 *op++ = (BYTE)lastRun; 831 } else 832 *op++ = (lastRun << ML_BITS); 833 (void) memcpy(op, anchor, iend - anchor); 834 op += iend - anchor; 835 } 836 837 /* End */ 838 return (int)(((char *)op) - dest); 839} 840 841static int 842real_LZ4_compress(const char *source, char *dest, int isize, int osize) 843{ 844#if HEAPMODE 845 void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP); 846 int result; 847 848 /* 849 * out of kernel memory, gently fall through - this will disable 850 * compression in zio_compress_data 851 */ 852 if (ctx == NULL) 853 return (0); 854 855 bzero(ctx, sizeof(struct refTables)); 856 if (isize < LZ4_64KLIMIT) 857 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 858 else 859 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 860 861 kmem_cache_free(lz4_ctx_cache, ctx); 862 return (result); 863#else 864 if (isize < (int)LZ4_64KLIMIT) 865 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); 866 return (LZ4_compressCtx(NULL, source, dest, isize, osize)); 867#endif 868} 869 870/* Decompression functions */ 871 872/* 873 * Note: The decoding functionLZ4_uncompress_unknownOutputSize() is safe 874 * against "buffer overflow" attack type. They will never write nor 875 * read outside of the provided output buffers. 876 * LZ4_uncompress_unknownOutputSize() also insures that it will never 877 * read outside of the input buffer. A corrupted input will produce 878 * an error result, a negative int, indicating the position of the 879 * error within input stream. 880 */ 881 882static int 883LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 884 int maxOutputSize) 885{ 886 /* Local Variables */ 887 const BYTE *restrict ip = (const BYTE *) source; 888 const BYTE *const iend = ip + isize; 889 const BYTE *ref; 890 891 BYTE *op = (BYTE *) dest; 892 BYTE *const oend = op + maxOutputSize; 893 BYTE *cpy; 894 895 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 896#if LZ4_ARCH64 897 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 898#endif 899 900 /* Main Loop */ 901 while (ip < iend) { 902 unsigned token; 903 size_t length; 904 905 /* get runlength */ 906 token = *ip++; 907 if ((length = (token >> ML_BITS)) == RUN_MASK) { 908 int s = 255; 909 while ((ip < iend) && (s == 255)) { 910 s = *ip++; 911 length += s; 912 } 913 } 914 /* copy literals */ 915 cpy = op + length; 916 if ((cpy > oend - COPYLENGTH) || 917 (ip + length > iend - COPYLENGTH)) { 918 if (cpy > oend) 919 /* Error: writes beyond output buffer */ 920 goto _output_error; 921 if (ip + length != iend) 922 /* 923 * Error: LZ4 format requires to consume all 924 * input at this stage 925 */ 926 goto _output_error; 927 (void) memcpy(op, ip, length); 928 op += length; 929 /* Necessarily EOF, due to parsing restrictions */ 930 break; 931 } 932 LZ4_WILDCOPY(ip, op, cpy); 933 ip -= (op - cpy); 934 op = cpy; 935 936 /* get offset */ 937 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 938 ip += 2; 939 if (ref < (BYTE * const) dest) 940 /* 941 * Error: offset creates reference outside of 942 * destination buffer 943 */ 944 goto _output_error; 945 946 /* get matchlength */ 947 if ((length = (token & ML_MASK)) == ML_MASK) { 948 while (ip < iend) { 949 int s = *ip++; 950 length += s; 951 if (s == 255) 952 continue; 953 break; 954 } 955 } 956 /* copy repeated sequence */ 957 if unlikely(op - ref < STEPSIZE) { 958#if LZ4_ARCH64 959 size_t dec64 = dec64table[op-ref]; 960#else 961 const int dec64 = 0; 962#endif 963 op[0] = ref[0]; 964 op[1] = ref[1]; 965 op[2] = ref[2]; 966 op[3] = ref[3]; 967 op += 4; 968 ref += 4; 969 ref -= dec32table[op-ref]; 970 A32(op) = A32(ref); 971 op += STEPSIZE - 4; 972 ref -= dec64; 973 } else { 974 LZ4_COPYSTEP(ref, op); 975 } 976 cpy = op + length - (STEPSIZE - 4); 977 if (cpy > oend - COPYLENGTH) { 978 if (cpy > oend) 979 /* 980 * Error: request to write outside of 981 * destination buffer 982 */ 983 goto _output_error; 984 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 985 while (op < cpy) 986 *op++ = *ref++; 987 op = cpy; 988 if (op == oend) 989 /* 990 * Check EOF (should never happen, since 991 * last 5 bytes are supposed to be literals) 992 */ 993 goto _output_error; 994 continue; 995 } 996 LZ4_SECURECOPY(ref, op, cpy); 997 op = cpy; /* correction */ 998 } 999 1000 /* end of decoding */ 1001 return (int)(((char *)op) - dest); 1002 1003 /* write overflow error detected */ 1004 _output_error: 1005 return (int)(-(((char *)ip) - source)); 1006} 1007 1008extern void 1009lz4_init(void) 1010{ 1011 1012#if HEAPMODE 1013 lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables), 1014 0, NULL, NULL, NULL, NULL, NULL, 0); 1015#endif 1016} 1017 1018extern void 1019lz4_fini(void) 1020{ 1021 1022#if HEAPMODE 1023 kmem_cache_destroy(lz4_ctx_cache); 1024#endif 1025} 1026