lz4.c revision 246586
1/* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35#include <sys/zfs_context.h> 36 37static int real_LZ4_compress(const char *source, char *dest, int isize, 38 int osize); 39static int real_LZ4_uncompress(const char *source, char *dest, int osize); 40static int LZ4_compressBound(int isize); 41static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 42 int isize, int maxOutputSize); 43static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 44 int isize, int osize); 45static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 46 int isize, int osize); 47 48/*ARGSUSED*/ 49size_t 50lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 51{ 52 uint32_t bufsiz; 53 char *dest = d_start; 54 55 ASSERT(d_len >= sizeof (bufsiz)); 56 57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 58 d_len - sizeof (bufsiz)); 59 60 /* Signal an error if the compression routine returned zero. */ 61 if (bufsiz == 0) 62 return (s_len); 63 64 /* 65 * Encode the compresed buffer size at the start. We'll need this in 66 * decompression to counter the effects of padding which might be 67 * added to the compressed buffer and which, if unhandled, would 68 * confuse the hell out of our decompression function. 69 */ 70 *(uint32_t *)dest = BE_32(bufsiz); 71 72 return (bufsiz + sizeof (bufsiz)); 73} 74 75/*ARGSUSED*/ 76int 77lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 78{ 79 const char *src = s_start; 80 uint32_t bufsiz = BE_IN32(src); 81 82 /* invalid compressed buffer size encoded at start */ 83 if (bufsiz + sizeof (bufsiz) > s_len) 84 return (1); 85 86 /* 87 * Returns 0 on success (decompression function returned non-negative) 88 * and non-zero on failure (decompression function returned negative. 89 */ 90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 91 d_start, bufsiz, d_len) < 0); 92} 93 94/* 95 * LZ4 API Description: 96 * 97 * Simple Functions: 98 * real_LZ4_compress() : 99 * isize : is the input size. Max supported value is ~1.9GB 100 * return : the number of bytes written in buffer dest 101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 102 * note : destination buffer must be already allocated. 103 * destination buffer must be sized to handle worst cases 104 * situations (input data not compressible) worst case size 105 * evaluation is provided by function LZ4_compressBound(). 106 * 107 * real_LZ4_uncompress() : 108 * osize : is the output size, therefore the original size 109 * return : the number of bytes read in the source buffer. 110 * If the source stream is malformed, the function will stop 111 * decoding and return a negative result, indicating the byte 112 * position of the faulty instruction. This function never 113 * writes beyond dest + osize, and is therefore protected 114 * against malicious data packets. 115 * note : destination buffer must be already allocated 116 * 117 * Advanced Functions 118 * 119 * LZ4_compressBound() : 120 * Provides the maximum size that LZ4 may output in a "worst case" 121 * scenario (input data not compressible) primarily useful for memory 122 * allocation of output buffer. 123 * 124 * isize : is the input size. Max supported value is ~1.9GB 125 * return : maximum output size in a "worst case" scenario 126 * note : this function is limited by "int" range (2^31-1) 127 * 128 * LZ4_uncompress_unknownOutputSize() : 129 * isize : is the input size, therefore the compressed size 130 * maxOutputSize : is the size of the destination buffer (which must be 131 * already allocated) 132 * return : the number of bytes decoded in the destination buffer 133 * (necessarily <= maxOutputSize). If the source stream is 134 * malformed, the function will stop decoding and return a 135 * negative result, indicating the byte position of the faulty 136 * instruction. This function never writes beyond dest + 137 * maxOutputSize, and is therefore protected against malicious 138 * data packets. 139 * note : Destination buffer must be already allocated. 140 * This version is slightly slower than real_LZ4_uncompress() 141 * 142 * LZ4_compressCtx() : 143 * This function explicitly handles the CTX memory structure. 144 * 145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 146 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 147 * isn't valid. 148 * 149 * LZ4_compress64kCtx() : 150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 151 * isize *Must* be <64KB, otherwise the output will be corrupted. 152 * 153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 155 * isn't valid. 156 */ 157 158/* 159 * Tuning parameters 160 */ 161 162/* 163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 164 * Lowering this value reduces memory usage. Reduced memory usage 165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 167 * (examples : 12 -> 16KB ; 17 -> 512KB) 168 */ 169#define COMPRESSIONLEVEL 12 170 171/* 172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 173 * algorithm skip faster data segments considered "incompressible". 174 * This may decrease compression ratio dramatically, but will be 175 * faster on incompressible data. Increasing this value will make 176 * the algorithm search more before declaring a segment "incompressible". 177 * This could improve compression a bit, but will be slower on 178 * incompressible data. The default value (6) is recommended. 179 */ 180#define NOTCOMPRESSIBLE_CONFIRMATION 6 181 182/* 183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 184 * performance for big endian cpu, but the resulting compressed stream 185 * will be incompatible with little-endian CPU. You can set this option 186 * to 1 in situations where data will stay within closed environment. 187 * This option is useless on Little_Endian CPU (such as x86). 188 */ 189/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 190 191/* 192 * CPU Feature Detection 193 */ 194 195/* 32 or 64 bits ? */ 196#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ 197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ 198 defined(__LP64__) || defined(_LP64)) 199#define LZ4_ARCH64 1 200/* 201 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we 202 * can spend 16k on the algorithm 203 */ 204/* FreeBSD: Use heap for all platforms for now */ 205#define STACKLIMIT 0 206#else 207#define LZ4_ARCH64 0 208/* 209 * Illumos: On i386 we only have 12k of stack, so in order to maintain the 210 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will 211 * suck, but alas, it's ZFS on 32-bit we're talking about, so... 212 */ 213#define STACKLIMIT 0 214#endif 215 216/* 217 * Little Endian or Big Endian? 218 * Note: overwrite the below #define if you know your architecture endianess. 219 */ 220#if BYTE_ORDER == BIG_ENDIAN 221#define LZ4_BIG_ENDIAN 1 222#else 223/* 224 * Little Endian assumed. PDP Endian and other very rare endian format 225 * are unsupported. 226 */ 227#endif 228 229/* 230 * Unaligned memory access is automatically enabled for "common" CPU, 231 * such as x86. For others CPU, the compiler will be more cautious, and 232 * insert extra code to ensure aligned access is respected. If you know 233 * your target CPU supports unaligned memory access, you may want to 234 * force this option manually to improve performance 235 */ 236#if defined(__ARM_FEATURE_UNALIGNED) 237#define LZ4_FORCE_UNALIGNED_ACCESS 1 238#endif 239 240/* 241 * Compiler Options 242 */ 243#if __STDC_VERSION__ >= 199901L /* C99 */ 244/* "restrict" is a known keyword */ 245#else 246/* Disable restrict */ 247#define restrict 248#endif 249 250#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 251 (((x) & 0xffu) << 8))) 252 253#define expect(expr, value) (__builtin_expect((expr), (value))) 254 255#if defined(likely) 256#undef likely 257#endif 258#if defined(unlikely) 259#undef unlikely 260#endif 261 262#define likely(expr) expect((expr) != 0, 1) 263#define unlikely(expr) expect((expr) != 0, 0) 264 265/* Basic types */ 266#define BYTE uint8_t 267#define U16 uint16_t 268#define U32 uint32_t 269#define S32 int32_t 270#define U64 uint64_t 271 272#ifndef LZ4_FORCE_UNALIGNED_ACCESS 273#pragma pack(1) 274#endif 275 276typedef struct _U16_S { 277 U16 v; 278} U16_S; 279typedef struct _U32_S { 280 U32 v; 281} U32_S; 282typedef struct _U64_S { 283 U64 v; 284} U64_S; 285 286#ifndef LZ4_FORCE_UNALIGNED_ACCESS 287#pragma pack() 288#endif 289 290#define A64(x) (((U64_S *)(x))->v) 291#define A32(x) (((U32_S *)(x))->v) 292#define A16(x) (((U16_S *)(x))->v) 293 294/* 295 * Constants 296 */ 297#define MINMATCH 4 298 299#define HASH_LOG COMPRESSIONLEVEL 300#define HASHTABLESIZE (1 << HASH_LOG) 301#define HASH_MASK (HASHTABLESIZE - 1) 302 303#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 304 NOTCOMPRESSIBLE_CONFIRMATION : 2) 305 306/* 307 * Defines if memory is allocated into the stack (local variable), 308 * or into the heap (kmem_alloc()). 309 */ 310#define HEAPMODE (HASH_LOG > STACKLIMIT) 311#define COPYLENGTH 8 312#define LASTLITERALS 5 313#define MFLIMIT (COPYLENGTH + MINMATCH) 314#define MINLENGTH (MFLIMIT + 1) 315 316#define MAXD_LOG 16 317#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 318 319#define ML_BITS 4 320#define ML_MASK ((1U<<ML_BITS)-1) 321#define RUN_BITS (8-ML_BITS) 322#define RUN_MASK ((1U<<RUN_BITS)-1) 323 324 325/* 326 * Architecture-specific macros 327 */ 328#if LZ4_ARCH64 329#define STEPSIZE 8 330#define UARCH U64 331#define AARCH A64 332#define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 333#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 334#define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 335#define HTYPE U32 336#define INITBASE(base) const BYTE* const base = ip 337#else /* !LZ4_ARCH64 */ 338#define STEPSIZE 4 339#define UARCH U32 340#define AARCH A32 341#define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 342#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 343#define LZ4_SECURECOPY LZ4_WILDCOPY 344#define HTYPE const BYTE * 345#define INITBASE(base) const int base = 0 346#endif /* !LZ4_ARCH64 */ 347 348#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 349#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 350 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 351#define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 352 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 353#else 354#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 355#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 356#endif 357 358 359/* Local structures */ 360struct refTables { 361 HTYPE hashTable[HASHTABLESIZE]; 362}; 363 364 365/* Macros */ 366#define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 367 HASH_LOG)) 368#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 369#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 370#define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 371 d = e; } 372 373 374/* Private functions */ 375#if LZ4_ARCH64 376 377static inline int 378LZ4_NbCommonBytes(register U64 val) 379{ 380#if defined(LZ4_BIG_ENDIAN) 381 return (__builtin_clzll(val) >> 3); 382#else 383 return (__builtin_ctzll(val) >> 3); 384#endif 385} 386 387#else 388 389static inline int 390LZ4_NbCommonBytes(register U32 val) 391{ 392#if defined(LZ4_BIG_ENDIAN) 393 return (__builtin_clz(val) >> 3); 394#else 395 return (__builtin_ctz(val) >> 3); 396#endif 397} 398 399#endif 400 401/* Public functions */ 402 403static int 404LZ4_compressBound(int isize) 405{ 406 return (isize + (isize / 255) + 16); 407} 408 409/* Compression functions */ 410 411/*ARGSUSED*/ 412static int 413LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 414 int osize) 415{ 416#if HEAPMODE 417 struct refTables *srt = (struct refTables *)ctx; 418 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 419#else 420 HTYPE HashTable[HASHTABLESIZE] = { 0 }; 421#endif 422 423 const BYTE *ip = (BYTE *) source; 424 INITBASE(base); 425 const BYTE *anchor = ip; 426 const BYTE *const iend = ip + isize; 427 const BYTE *const oend = (BYTE *) dest + osize; 428 const BYTE *const mflimit = iend - MFLIMIT; 429#define matchlimit (iend - LASTLITERALS) 430 431 BYTE *op = (BYTE *) dest; 432 433 int len, length; 434 const int skipStrength = SKIPSTRENGTH; 435 U32 forwardH; 436 437 438 /* Init */ 439 if (isize < MINLENGTH) 440 goto _last_literals; 441 442 /* First Byte */ 443 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 444 ip++; 445 forwardH = LZ4_HASH_VALUE(ip); 446 447 /* Main Loop */ 448 for (;;) { 449 int findMatchAttempts = (1U << skipStrength) + 3; 450 const BYTE *forwardIp = ip; 451 const BYTE *ref; 452 BYTE *token; 453 454 /* Find a match */ 455 do { 456 U32 h = forwardH; 457 int step = findMatchAttempts++ >> skipStrength; 458 ip = forwardIp; 459 forwardIp = ip + step; 460 461 if unlikely(forwardIp > mflimit) { 462 goto _last_literals; 463 } 464 465 forwardH = LZ4_HASH_VALUE(forwardIp); 466 ref = base + HashTable[h]; 467 HashTable[h] = ip - base; 468 469 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 470 471 /* Catch up */ 472 while ((ip > anchor) && (ref > (BYTE *) source) && 473 unlikely(ip[-1] == ref[-1])) { 474 ip--; 475 ref--; 476 } 477 478 /* Encode Literal length */ 479 length = ip - anchor; 480 token = op++; 481 482 /* Check output limit */ 483 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 484 (length >> 8) > oend) 485 return (0); 486 487 if (length >= (int)RUN_MASK) { 488 *token = (RUN_MASK << ML_BITS); 489 len = length - RUN_MASK; 490 for (; len > 254; len -= 255) 491 *op++ = 255; 492 *op++ = (BYTE)len; 493 } else 494 *token = (length << ML_BITS); 495 496 /* Copy Literals */ 497 LZ4_BLINDCOPY(anchor, op, length); 498 499 _next_match: 500 /* Encode Offset */ 501 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 502 503 /* Start Counting */ 504 ip += MINMATCH; 505 ref += MINMATCH; /* MinMatch verified */ 506 anchor = ip; 507 while likely(ip < matchlimit - (STEPSIZE - 1)) { 508 UARCH diff = AARCH(ref) ^ AARCH(ip); 509 if (!diff) { 510 ip += STEPSIZE; 511 ref += STEPSIZE; 512 continue; 513 } 514 ip += LZ4_NbCommonBytes(diff); 515 goto _endCount; 516 } 517#if LZ4_ARCH64 518 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 519 ip += 4; 520 ref += 4; 521 } 522#endif 523 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 524 ip += 2; 525 ref += 2; 526 } 527 if ((ip < matchlimit) && (*ref == *ip)) 528 ip++; 529 _endCount: 530 531 /* Encode MatchLength */ 532 len = (ip - anchor); 533 /* Check output limit */ 534 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 535 return (0); 536 if (len >= (int)ML_MASK) { 537 *token += ML_MASK; 538 len -= ML_MASK; 539 for (; len > 509; len -= 510) { 540 *op++ = 255; 541 *op++ = 255; 542 } 543 if (len > 254) { 544 len -= 255; 545 *op++ = 255; 546 } 547 *op++ = (BYTE)len; 548 } else 549 *token += len; 550 551 /* Test end of chunk */ 552 if (ip > mflimit) { 553 anchor = ip; 554 break; 555 } 556 /* Fill table */ 557 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 558 559 /* Test next position */ 560 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 561 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 562 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 563 token = op++; 564 *token = 0; 565 goto _next_match; 566 } 567 /* Prepare next loop */ 568 anchor = ip++; 569 forwardH = LZ4_HASH_VALUE(ip); 570 } 571 572 _last_literals: 573 /* Encode Last Literals */ 574 { 575 int lastRun = iend - anchor; 576 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 577 oend) 578 return (0); 579 if (lastRun >= (int)RUN_MASK) { 580 *op++ = (RUN_MASK << ML_BITS); 581 lastRun -= RUN_MASK; 582 for (; lastRun > 254; lastRun -= 255) { 583 *op++ = 255; 584 } 585 *op++ = (BYTE)lastRun; 586 } else 587 *op++ = (lastRun << ML_BITS); 588 (void) memcpy(op, anchor, iend - anchor); 589 op += iend - anchor; 590 } 591 592 /* End */ 593 return (int)(((char *)op) - dest); 594} 595 596 597 598/* Note : this function is valid only if isize < LZ4_64KLIMIT */ 599#define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 600#define HASHLOG64K (HASH_LOG + 1) 601#define HASH64KTABLESIZE (1U << HASHLOG64K) 602#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 603 HASHLOG64K)) 604#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 605 606/*ARGSUSED*/ 607static int 608LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 609 int osize) 610{ 611#if HEAPMODE 612 struct refTables *srt = (struct refTables *)ctx; 613 U16 *HashTable = (U16 *) (srt->hashTable); 614#else 615 U16 HashTable[HASH64KTABLESIZE] = { 0 }; 616#endif 617 618 const BYTE *ip = (BYTE *) source; 619 const BYTE *anchor = ip; 620 const BYTE *const base = ip; 621 const BYTE *const iend = ip + isize; 622 const BYTE *const oend = (BYTE *) dest + osize; 623 const BYTE *const mflimit = iend - MFLIMIT; 624#define matchlimit (iend - LASTLITERALS) 625 626 BYTE *op = (BYTE *) dest; 627 628 int len, length; 629 const int skipStrength = SKIPSTRENGTH; 630 U32 forwardH; 631 632 /* Init */ 633 if (isize < MINLENGTH) 634 goto _last_literals; 635 636 /* First Byte */ 637 ip++; 638 forwardH = LZ4_HASH64K_VALUE(ip); 639 640 /* Main Loop */ 641 for (;;) { 642 int findMatchAttempts = (1U << skipStrength) + 3; 643 const BYTE *forwardIp = ip; 644 const BYTE *ref; 645 BYTE *token; 646 647 /* Find a match */ 648 do { 649 U32 h = forwardH; 650 int step = findMatchAttempts++ >> skipStrength; 651 ip = forwardIp; 652 forwardIp = ip + step; 653 654 if (forwardIp > mflimit) { 655 goto _last_literals; 656 } 657 658 forwardH = LZ4_HASH64K_VALUE(forwardIp); 659 ref = base + HashTable[h]; 660 HashTable[h] = ip - base; 661 662 } while (A32(ref) != A32(ip)); 663 664 /* Catch up */ 665 while ((ip > anchor) && (ref > (BYTE *) source) && 666 (ip[-1] == ref[-1])) { 667 ip--; 668 ref--; 669 } 670 671 /* Encode Literal length */ 672 length = ip - anchor; 673 token = op++; 674 675 /* Check output limit */ 676 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 677 (length >> 8) > oend) 678 return (0); 679 680 if (length >= (int)RUN_MASK) { 681 *token = (RUN_MASK << ML_BITS); 682 len = length - RUN_MASK; 683 for (; len > 254; len -= 255) 684 *op++ = 255; 685 *op++ = (BYTE)len; 686 } else 687 *token = (length << ML_BITS); 688 689 /* Copy Literals */ 690 LZ4_BLINDCOPY(anchor, op, length); 691 692 _next_match: 693 /* Encode Offset */ 694 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 695 696 /* Start Counting */ 697 ip += MINMATCH; 698 ref += MINMATCH; /* MinMatch verified */ 699 anchor = ip; 700 while (ip < matchlimit - (STEPSIZE - 1)) { 701 UARCH diff = AARCH(ref) ^ AARCH(ip); 702 if (!diff) { 703 ip += STEPSIZE; 704 ref += STEPSIZE; 705 continue; 706 } 707 ip += LZ4_NbCommonBytes(diff); 708 goto _endCount; 709 } 710#if LZ4_ARCH64 711 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 712 ip += 4; 713 ref += 4; 714 } 715#endif 716 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 717 ip += 2; 718 ref += 2; 719 } 720 if ((ip < matchlimit) && (*ref == *ip)) 721 ip++; 722 _endCount: 723 724 /* Encode MatchLength */ 725 len = (ip - anchor); 726 /* Check output limit */ 727 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 728 return (0); 729 if (len >= (int)ML_MASK) { 730 *token += ML_MASK; 731 len -= ML_MASK; 732 for (; len > 509; len -= 510) { 733 *op++ = 255; 734 *op++ = 255; 735 } 736 if (len > 254) { 737 len -= 255; 738 *op++ = 255; 739 } 740 *op++ = (BYTE)len; 741 } else 742 *token += len; 743 744 /* Test end of chunk */ 745 if (ip > mflimit) { 746 anchor = ip; 747 break; 748 } 749 /* Fill table */ 750 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 751 752 /* Test next position */ 753 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 754 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 755 if (A32(ref) == A32(ip)) { 756 token = op++; 757 *token = 0; 758 goto _next_match; 759 } 760 /* Prepare next loop */ 761 anchor = ip++; 762 forwardH = LZ4_HASH64K_VALUE(ip); 763 } 764 765 _last_literals: 766 /* Encode Last Literals */ 767 { 768 int lastRun = iend - anchor; 769 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 770 oend) 771 return (0); 772 if (lastRun >= (int)RUN_MASK) { 773 *op++ = (RUN_MASK << ML_BITS); 774 lastRun -= RUN_MASK; 775 for (; lastRun > 254; lastRun -= 255) 776 *op++ = 255; 777 *op++ = (BYTE)lastRun; 778 } else 779 *op++ = (lastRun << ML_BITS); 780 (void) memcpy(op, anchor, iend - anchor); 781 op += iend - anchor; 782 } 783 784 /* End */ 785 return (int)(((char *)op) - dest); 786} 787 788static int 789real_LZ4_compress(const char *source, char *dest, int isize, int osize) 790{ 791#if HEAPMODE 792 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP); 793 int result; 794 795 /* 796 * out of kernel memory, gently fall through - this will disable 797 * compression in zio_compress_data 798 */ 799 if (ctx == NULL) 800 return (0); 801 802 if (isize < LZ4_64KLIMIT) 803 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 804 else 805 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 806 807 kmem_free(ctx, sizeof (struct refTables)); 808 return (result); 809#else 810 if (isize < (int)LZ4_64KLIMIT) 811 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); 812 return (LZ4_compressCtx(NULL, source, dest, isize, osize)); 813#endif 814} 815 816/* Decompression functions */ 817 818/* 819 * Note: The decoding functions real_LZ4_uncompress() and 820 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 821 * attack type. They will never write nor read outside of the provided 822 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 823 * it will never read outside of the input buffer. A corrupted input 824 * will produce an error result, a negative int, indicating the position 825 * of the error within input stream. 826 */ 827 828static int 829real_LZ4_uncompress(const char *source, char *dest, int osize) 830{ 831 /* Local Variables */ 832 const BYTE *restrict ip = (const BYTE *) source; 833 const BYTE *ref; 834 835 BYTE *op = (BYTE *) dest; 836 BYTE *const oend = op + osize; 837 BYTE *cpy; 838 839 unsigned token; 840 841 size_t length; 842 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 843#if LZ4_ARCH64 844 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 845#endif 846 847 /* Main Loop */ 848 for (;;) { 849 /* get runlength */ 850 token = *ip++; 851 if ((length = (token >> ML_BITS)) == RUN_MASK) { 852 size_t len; 853 for (; (len = *ip++) == 255; length += 255) { 854 } 855 length += len; 856 } 857 /* copy literals */ 858 cpy = op + length; 859 if unlikely(cpy > oend - COPYLENGTH) { 860 if (cpy != oend) 861 /* Error: we must necessarily stand at EOF */ 862 goto _output_error; 863 (void) memcpy(op, ip, length); 864 ip += length; 865 break; /* EOF */ 866 } 867 LZ4_WILDCOPY(ip, op, cpy); 868 ip -= (op - cpy); 869 op = cpy; 870 871 /* get offset */ 872 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 873 ip += 2; 874 if unlikely(ref < (BYTE * const) dest) 875 /* 876 * Error: offset create reference outside destination 877 * buffer 878 */ 879 goto _output_error; 880 881 /* get matchlength */ 882 if ((length = (token & ML_MASK)) == ML_MASK) { 883 for (; *ip == 255; length += 255) { 884 ip++; 885 } 886 length += *ip++; 887 } 888 /* copy repeated sequence */ 889 if unlikely(op - ref < STEPSIZE) { 890#if LZ4_ARCH64 891 size_t dec64 = dec64table[op-ref]; 892#else 893 const int dec64 = 0; 894#endif 895 op[0] = ref[0]; 896 op[1] = ref[1]; 897 op[2] = ref[2]; 898 op[3] = ref[3]; 899 op += 4; 900 ref += 4; 901 ref -= dec32table[op-ref]; 902 A32(op) = A32(ref); 903 op += STEPSIZE - 4; 904 ref -= dec64; 905 } else { 906 LZ4_COPYSTEP(ref, op); 907 } 908 cpy = op + length - (STEPSIZE - 4); 909 if (cpy > oend - COPYLENGTH) { 910 if (cpy > oend) 911 /* 912 * Error: request to write beyond destination 913 * buffer 914 */ 915 goto _output_error; 916 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 917 while (op < cpy) 918 *op++ = *ref++; 919 op = cpy; 920 if (op == oend) 921 /* 922 * Check EOF (should never happen, since last 923 * 5 bytes are supposed to be literals) 924 */ 925 goto _output_error; 926 continue; 927 } 928 LZ4_SECURECOPY(ref, op, cpy); 929 op = cpy; /* correction */ 930 } 931 932 /* end of decoding */ 933 return (int)(((char *)ip) - source); 934 935 /* write overflow error detected */ 936 _output_error: 937 return (int)(-(((char *)ip) - source)); 938} 939 940static int 941LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 942 int maxOutputSize) 943{ 944 /* Local Variables */ 945 const BYTE *restrict ip = (const BYTE *) source; 946 const BYTE *const iend = ip + isize; 947 const BYTE *ref; 948 949 BYTE *op = (BYTE *) dest; 950 BYTE *const oend = op + maxOutputSize; 951 BYTE *cpy; 952 953 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 954#if LZ4_ARCH64 955 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 956#endif 957 958 /* Main Loop */ 959 while (ip < iend) { 960 unsigned token; 961 size_t length; 962 963 /* get runlength */ 964 token = *ip++; 965 if ((length = (token >> ML_BITS)) == RUN_MASK) { 966 int s = 255; 967 while ((ip < iend) && (s == 255)) { 968 s = *ip++; 969 length += s; 970 } 971 } 972 /* copy literals */ 973 cpy = op + length; 974 if ((cpy > oend - COPYLENGTH) || 975 (ip + length > iend - COPYLENGTH)) { 976 if (cpy > oend) 977 /* Error: writes beyond output buffer */ 978 goto _output_error; 979 if (ip + length != iend) 980 /* 981 * Error: LZ4 format requires to consume all 982 * input at this stage 983 */ 984 goto _output_error; 985 (void) memcpy(op, ip, length); 986 op += length; 987 /* Necessarily EOF, due to parsing restrictions */ 988 break; 989 } 990 LZ4_WILDCOPY(ip, op, cpy); 991 ip -= (op - cpy); 992 op = cpy; 993 994 /* get offset */ 995 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 996 ip += 2; 997 if (ref < (BYTE * const) dest) 998 /* 999 * Error: offset creates reference outside of 1000 * destination buffer 1001 */ 1002 goto _output_error; 1003 1004 /* get matchlength */ 1005 if ((length = (token & ML_MASK)) == ML_MASK) { 1006 while (ip < iend) { 1007 int s = *ip++; 1008 length += s; 1009 if (s == 255) 1010 continue; 1011 break; 1012 } 1013 } 1014 /* copy repeated sequence */ 1015 if unlikely(op - ref < STEPSIZE) { 1016#if LZ4_ARCH64 1017 size_t dec64 = dec64table[op-ref]; 1018#else 1019 const int dec64 = 0; 1020#endif 1021 op[0] = ref[0]; 1022 op[1] = ref[1]; 1023 op[2] = ref[2]; 1024 op[3] = ref[3]; 1025 op += 4; 1026 ref += 4; 1027 ref -= dec32table[op-ref]; 1028 A32(op) = A32(ref); 1029 op += STEPSIZE - 4; 1030 ref -= dec64; 1031 } else { 1032 LZ4_COPYSTEP(ref, op); 1033 } 1034 cpy = op + length - (STEPSIZE - 4); 1035 if (cpy > oend - COPYLENGTH) { 1036 if (cpy > oend) 1037 /* 1038 * Error: request to write outside of 1039 * destination buffer 1040 */ 1041 goto _output_error; 1042 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1043 while (op < cpy) 1044 *op++ = *ref++; 1045 op = cpy; 1046 if (op == oend) 1047 /* 1048 * Check EOF (should never happen, since 1049 * last 5 bytes are supposed to be literals) 1050 */ 1051 goto _output_error; 1052 continue; 1053 } 1054 LZ4_SECURECOPY(ref, op, cpy); 1055 op = cpy; /* correction */ 1056 } 1057 1058 /* end of decoding */ 1059 return (int)(((char *)op) - dest); 1060 1061 /* write overflow error detected */ 1062 _output_error: 1063 return (int)(-(((char *)ip) - source)); 1064} 1065