lz4.c revision 245512
1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35#include <sys/zfs_context.h>
36
37static int real_LZ4_compress(const char *source, char *dest, int isize,
38    int osize);
39static int real_LZ4_uncompress(const char *source, char *dest, int osize);
40static int LZ4_compressBound(int isize);
41static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
42    int isize, int maxOutputSize);
43static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
44    int isize, int osize);
45static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
46    int isize, int osize);
47
48/*ARGSUSED*/
49size_t
50lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51{
52	uint32_t bufsiz;
53	char *dest = d_start;
54
55	ASSERT(d_len >= sizeof (bufsiz));
56
57	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58	    d_len - sizeof (bufsiz));
59
60	/* Signal an error if the compression routine returned zero. */
61	if (bufsiz == 0)
62		return (s_len);
63
64	/*
65	 * Encode the compresed buffer size at the start. We'll need this in
66	 * decompression to counter the effects of padding which might be
67	 * added to the compressed buffer and which, if unhandled, would
68	 * confuse the hell out of our decompression function.
69	 */
70	*(uint32_t *)dest = BE_32(bufsiz);
71
72	return (bufsiz + sizeof (bufsiz));
73}
74
75/*ARGSUSED*/
76int
77lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78{
79	const char *src = s_start;
80	uint32_t bufsiz = BE_IN32(src);
81
82	/* invalid compressed buffer size encoded at start */
83	if (bufsiz + sizeof (bufsiz) > s_len)
84		return (1);
85
86	/*
87	 * Returns 0 on success (decompression function returned non-negative)
88	 * and non-zero on failure (decompression function returned negative.
89	 */
90	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91	    d_start, bufsiz, d_len) < 0);
92}
93
94/*
95 * LZ4 API Description:
96 *
97 * Simple Functions:
98 * real_LZ4_compress() :
99 * 	isize  : is the input size. Max supported value is ~1.9GB
100 * 	return : the number of bytes written in buffer dest
101 *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102 * 	note : destination buffer must be already allocated.
103 * 		destination buffer must be sized to handle worst cases
104 * 		situations (input data not compressible) worst case size
105 * 		evaluation is provided by function LZ4_compressBound().
106 *
107 * real_LZ4_uncompress() :
108 * 	osize  : is the output size, therefore the original size
109 * 	return : the number of bytes read in the source buffer.
110 * 		If the source stream is malformed, the function will stop
111 * 		decoding and return a negative result, indicating the byte
112 * 		position of the faulty instruction. This function never
113 * 		writes beyond dest + osize, and is therefore protected
114 * 		against malicious data packets.
115 * 	note : destination buffer must be already allocated
116 *
117 * Advanced Functions
118 *
119 * LZ4_compressBound() :
120 * 	Provides the maximum size that LZ4 may output in a "worst case"
121 * 	scenario (input data not compressible) primarily useful for memory
122 * 	allocation of output buffer.
123 *
124 * 	isize  : is the input size. Max supported value is ~1.9GB
125 * 	return : maximum output size in a "worst case" scenario
126 * 	note : this function is limited by "int" range (2^31-1)
127 *
128 * LZ4_uncompress_unknownOutputSize() :
129 * 	isize  : is the input size, therefore the compressed size
130 * 	maxOutputSize : is the size of the destination buffer (which must be
131 * 		already allocated)
132 * 	return : the number of bytes decoded in the destination buffer
133 * 		(necessarily <= maxOutputSize). If the source stream is
134 * 		malformed, the function will stop decoding and return a
135 * 		negative result, indicating the byte position of the faulty
136 * 		instruction. This function never writes beyond dest +
137 * 		maxOutputSize, and is therefore protected against malicious
138 * 		data packets.
139 * 	note   : Destination buffer must be already allocated.
140 *		This version is slightly slower than real_LZ4_uncompress()
141 *
142 * LZ4_compressCtx() :
143 * 	This function explicitly handles the CTX memory structure.
144 *
145 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146 * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
147 * 	isn't valid.
148 *
149 * LZ4_compress64kCtx() :
150 * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151 * 	isize *Must* be <64KB, otherwise the output will be corrupted.
152 *
153 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154 * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
155 * 	isn't valid.
156 */
157
158/*
159 * Tuning parameters
160 */
161
162/*
163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164 *	 Lowering this value reduces memory usage. Reduced memory usage
165 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167 *	(examples : 12 -> 16KB ; 17 -> 512KB)
168 */
169#define	COMPRESSIONLEVEL 12
170
171/*
172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173 *	algorithm skip faster data segments considered "incompressible".
174 *	This may decrease compression ratio dramatically, but will be
175 *	faster on incompressible data. Increasing this value will make
176 *	the algorithm search more before declaring a segment "incompressible".
177 *	This could improve compression a bit, but will be slower on
178 *	incompressible data. The default value (6) is recommended.
179 */
180#define	NOTCOMPRESSIBLE_CONFIRMATION 6
181
182/*
183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184 * performance for big endian cpu, but the resulting compressed stream
185 * will be incompatible with little-endian CPU. You can set this option
186 * to 1 in situations where data will stay within closed environment.
187 * This option is useless on Little_Endian CPU (such as x86).
188 */
189/* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190
191/*
192 * CPU Feature Detection
193 */
194
195/* 32 or 64 bits ? */
196#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197    defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198    defined(__LP64__) || defined(_LP64))
199#define	LZ4_ARCH64 1
200/*
201 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we
202 * can spend 16k on the algorithm
203 */
204#define	STACKLIMIT 12
205#else
206#define	LZ4_ARCH64 0
207/*
208 * Illumos: On i386 we only have 12k of stack, so in order to maintain the
209 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will
210 * suck, but alas, it's ZFS on 32-bit we're talking about, so...
211 */
212#define	STACKLIMIT 11
213#endif
214
215/*
216 * Little Endian or Big Endian?
217 * Note: overwrite the below #define if you know your architecture endianess.
218 */
219#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
220	defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
221	defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
222	defined(__powerpc) || defined(powerpc) || \
223	((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
224#define	LZ4_BIG_ENDIAN 1
225#else
226/*
227 * Little Endian assumed. PDP Endian and other very rare endian format
228 * are unsupported.
229 */
230#endif
231
232/*
233 * Unaligned memory access is automatically enabled for "common" CPU,
234 * such as x86. For others CPU, the compiler will be more cautious, and
235 * insert extra code to ensure aligned access is respected. If you know
236 * your target CPU supports unaligned memory access, you may want to
237 * force this option manually to improve performance
238 */
239#if defined(__ARM_FEATURE_UNALIGNED)
240#define	LZ4_FORCE_UNALIGNED_ACCESS 1
241#endif
242
243/*
244 * Illumos: we can't use GCC's __builtin_ctz family of builtins in the
245 * kernel
246 */
247#define	LZ4_FORCE_SW_BITCOUNT
248
249/*
250 * Compiler Options
251 */
252#if __STDC_VERSION__ >= 199901L	/* C99 */
253/* "restrict" is a known keyword */
254#else
255/* Disable restrict */
256#define	restrict
257#endif
258
259#define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
260
261#ifdef _MSC_VER
262/* Visual Studio */
263/* Visual is not C99, but supports some kind of inline */
264#define	inline __forceinline
265#if LZ4_ARCH64
266/* For Visual 2005 */
267#pragma intrinsic(_BitScanForward64)
268#pragma intrinsic(_BitScanReverse64)
269#else /* !LZ4_ARCH64 */
270/* For Visual 2005 */
271#pragma intrinsic(_BitScanForward)
272#pragma intrinsic(_BitScanReverse)
273#endif /* !LZ4_ARCH64 */
274#endif /* _MSC_VER */
275
276#ifdef _MSC_VER
277#define	lz4_bswap16(x) _byteswap_ushort(x)
278#else /* !_MSC_VER */
279#define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
280	(((x) & 0xffu) << 8)))
281#endif /* !_MSC_VER */
282
283#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
284#define	expect(expr, value)    (__builtin_expect((expr), (value)))
285#else
286#define	expect(expr, value)    (expr)
287#endif
288
289#define	likely(expr)	expect((expr) != 0, 1)
290#define	unlikely(expr)	expect((expr) != 0, 0)
291
292/* Basic types */
293#if defined(_MSC_VER)
294/* Visual Studio does not support 'stdint' natively */
295#define	BYTE	unsigned __int8
296#define	U16	unsigned __int16
297#define	U32	unsigned __int32
298#define	S32	__int32
299#define	U64	unsigned __int64
300#else /* !defined(_MSC_VER) */
301#define	BYTE	uint8_t
302#define	U16	uint16_t
303#define	U32	uint32_t
304#define	S32	int32_t
305#define	U64	uint64_t
306#endif /* !defined(_MSC_VER) */
307
308#ifndef LZ4_FORCE_UNALIGNED_ACCESS
309#pragma pack(1)
310#endif
311
312typedef struct _U16_S {
313	U16 v;
314} U16_S;
315typedef struct _U32_S {
316	U32 v;
317} U32_S;
318typedef struct _U64_S {
319	U64 v;
320} U64_S;
321
322#ifndef LZ4_FORCE_UNALIGNED_ACCESS
323#pragma pack()
324#endif
325
326#define	A64(x) (((U64_S *)(x))->v)
327#define	A32(x) (((U32_S *)(x))->v)
328#define	A16(x) (((U16_S *)(x))->v)
329
330/*
331 * Constants
332 */
333#define	MINMATCH 4
334
335#define	HASH_LOG COMPRESSIONLEVEL
336#define	HASHTABLESIZE (1 << HASH_LOG)
337#define	HASH_MASK (HASHTABLESIZE - 1)
338
339#define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
340	NOTCOMPRESSIBLE_CONFIRMATION : 2)
341
342/*
343 * Defines if memory is allocated into the stack (local variable),
344 * or into the heap (kmem_alloc()).
345 */
346#define	HEAPMODE (HASH_LOG > STACKLIMIT)
347#define	COPYLENGTH 8
348#define	LASTLITERALS 5
349#define	MFLIMIT (COPYLENGTH + MINMATCH)
350#define	MINLENGTH (MFLIMIT + 1)
351
352#define	MAXD_LOG 16
353#define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
354
355#define	ML_BITS 4
356#define	ML_MASK ((1U<<ML_BITS)-1)
357#define	RUN_BITS (8-ML_BITS)
358#define	RUN_MASK ((1U<<RUN_BITS)-1)
359
360
361/*
362 * Architecture-specific macros
363 */
364#if LZ4_ARCH64
365#define	STEPSIZE 8
366#define	UARCH U64
367#define	AARCH A64
368#define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
369#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
370#define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
371#define	HTYPE U32
372#define	INITBASE(base)		const BYTE* const base = ip
373#else /* !LZ4_ARCH64 */
374#define	STEPSIZE 4
375#define	UARCH U32
376#define	AARCH A32
377#define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
378#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
379#define	LZ4_SECURECOPY		LZ4_WILDCOPY
380#define	HTYPE const BYTE *
381#define	INITBASE(base)		const int base = 0
382#endif /* !LZ4_ARCH64 */
383
384#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
385#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
386	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
387#define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
388	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
389#else
390#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
391#define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
392#endif
393
394
395/* Local structures */
396struct refTables {
397	HTYPE hashTable[HASHTABLESIZE];
398};
399
400
401/* Macros */
402#define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
403	HASH_LOG))
404#define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
405#define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
406#define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
407	d = e; }
408
409
410/* Private functions */
411#if LZ4_ARCH64
412
413static inline int
414LZ4_NbCommonBytes(register U64 val)
415{
416#if defined(LZ4_BIG_ENDIAN)
417#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
418	unsigned long r = 0;
419	_BitScanReverse64(&r, val);
420	return (int)(r >> 3);
421#elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
422	!defined(LZ4_FORCE_SW_BITCOUNT)
423	return (__builtin_clzll(val) >> 3);
424#else
425	int r;
426	if (!(val >> 32)) {
427		r = 4;
428	} else {
429		r = 0;
430		val >>= 32;
431	}
432	if (!(val >> 16)) {
433		r += 2;
434		val >>= 8;
435	} else {
436		val >>= 24;
437	}
438	r += (!val);
439	return (r);
440#endif
441#else
442#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
443	unsigned long r = 0;
444	_BitScanForward64(&r, val);
445	return (int)(r >> 3);
446#elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
447	!defined(LZ4_FORCE_SW_BITCOUNT)
448	return (__builtin_ctzll(val) >> 3);
449#else
450	static const int DeBruijnBytePos[64] =
451	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
452		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
453		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
454		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
455	};
456	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
457	    58];
458#endif
459#endif
460}
461
462#else
463
464static inline int
465LZ4_NbCommonBytes(register U32 val)
466{
467#if defined(LZ4_BIG_ENDIAN)
468#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
469	unsigned long r = 0;
470	_BitScanReverse(&r, val);
471	return (int)(r >> 3);
472#elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
473	!defined(LZ4_FORCE_SW_BITCOUNT)
474	return (__builtin_clz(val) >> 3);
475#else
476	int r;
477	if (!(val >> 16)) {
478		r = 2;
479		val >>= 8;
480	} else {
481		r = 0;
482		val >>= 24;
483	}
484	r += (!val);
485	return (r);
486#endif
487#else
488#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
489	unsigned long r = 0;
490	_BitScanForward(&r, val);
491	return (int)(r >> 3);
492#elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
493	!defined(LZ4_FORCE_SW_BITCOUNT)
494	return (__builtin_ctz(val) >> 3);
495#else
496	static const int DeBruijnBytePos[32] = {
497		0, 0, 3, 0, 3, 1, 3, 0,
498		3, 2, 2, 1, 3, 2, 0, 1,
499		3, 3, 1, 2, 2, 2, 2, 0,
500		3, 1, 2, 0, 1, 0, 1, 1
501	};
502	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
503	    27];
504#endif
505#endif
506}
507
508#endif
509
510/* Public functions */
511
512static int
513LZ4_compressBound(int isize)
514{
515	return (isize + (isize / 255) + 16);
516}
517
518/* Compression functions */
519
520/*ARGSUSED*/
521static int
522LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
523    int osize)
524{
525#if HEAPMODE
526	struct refTables *srt = (struct refTables *)ctx;
527	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
528#else
529	HTYPE HashTable[HASHTABLESIZE] = { 0 };
530#endif
531
532	const BYTE *ip = (BYTE *) source;
533	INITBASE(base);
534	const BYTE *anchor = ip;
535	const BYTE *const iend = ip + isize;
536	const BYTE *const oend = (BYTE *) dest + osize;
537	const BYTE *const mflimit = iend - MFLIMIT;
538#define	matchlimit (iend - LASTLITERALS)
539
540	BYTE *op = (BYTE *) dest;
541
542	int len, length;
543	const int skipStrength = SKIPSTRENGTH;
544	U32 forwardH;
545
546
547	/* Init */
548	if (isize < MINLENGTH)
549		goto _last_literals;
550
551	/* First Byte */
552	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
553	ip++;
554	forwardH = LZ4_HASH_VALUE(ip);
555
556	/* Main Loop */
557	for (;;) {
558		int findMatchAttempts = (1U << skipStrength) + 3;
559		const BYTE *forwardIp = ip;
560		const BYTE *ref;
561		BYTE *token;
562
563		/* Find a match */
564		do {
565			U32 h = forwardH;
566			int step = findMatchAttempts++ >> skipStrength;
567			ip = forwardIp;
568			forwardIp = ip + step;
569
570			if unlikely(forwardIp > mflimit) {
571				goto _last_literals;
572			}
573
574			forwardH = LZ4_HASH_VALUE(forwardIp);
575			ref = base + HashTable[h];
576			HashTable[h] = ip - base;
577
578		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
579
580		/* Catch up */
581		while ((ip > anchor) && (ref > (BYTE *) source) &&
582		    unlikely(ip[-1] == ref[-1])) {
583			ip--;
584			ref--;
585		}
586
587		/* Encode Literal length */
588		length = ip - anchor;
589		token = op++;
590
591		/* Check output limit */
592		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
593		    (length >> 8) > oend)
594			return (0);
595
596		if (length >= (int)RUN_MASK) {
597			*token = (RUN_MASK << ML_BITS);
598			len = length - RUN_MASK;
599			for (; len > 254; len -= 255)
600				*op++ = 255;
601			*op++ = (BYTE)len;
602		} else
603			*token = (length << ML_BITS);
604
605		/* Copy Literals */
606		LZ4_BLINDCOPY(anchor, op, length);
607
608		_next_match:
609		/* Encode Offset */
610		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
611
612		/* Start Counting */
613		ip += MINMATCH;
614		ref += MINMATCH;	/* MinMatch verified */
615		anchor = ip;
616		while likely(ip < matchlimit - (STEPSIZE - 1)) {
617			UARCH diff = AARCH(ref) ^ AARCH(ip);
618			if (!diff) {
619				ip += STEPSIZE;
620				ref += STEPSIZE;
621				continue;
622			}
623			ip += LZ4_NbCommonBytes(diff);
624			goto _endCount;
625		}
626#if LZ4_ARCH64
627		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
628			ip += 4;
629			ref += 4;
630		}
631#endif
632		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
633			ip += 2;
634			ref += 2;
635		}
636		if ((ip < matchlimit) && (*ref == *ip))
637			ip++;
638		_endCount:
639
640		/* Encode MatchLength */
641		len = (ip - anchor);
642		/* Check output limit */
643		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
644			return (0);
645		if (len >= (int)ML_MASK) {
646			*token += ML_MASK;
647			len -= ML_MASK;
648			for (; len > 509; len -= 510) {
649				*op++ = 255;
650				*op++ = 255;
651			}
652			if (len > 254) {
653				len -= 255;
654				*op++ = 255;
655			}
656			*op++ = (BYTE)len;
657		} else
658			*token += len;
659
660		/* Test end of chunk */
661		if (ip > mflimit) {
662			anchor = ip;
663			break;
664		}
665		/* Fill table */
666		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
667
668		/* Test next position */
669		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
670		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
671		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
672			token = op++;
673			*token = 0;
674			goto _next_match;
675		}
676		/* Prepare next loop */
677		anchor = ip++;
678		forwardH = LZ4_HASH_VALUE(ip);
679	}
680
681	_last_literals:
682	/* Encode Last Literals */
683	{
684		int lastRun = iend - anchor;
685		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
686		    oend)
687			return (0);
688		if (lastRun >= (int)RUN_MASK) {
689			*op++ = (RUN_MASK << ML_BITS);
690			lastRun -= RUN_MASK;
691			for (; lastRun > 254; lastRun -= 255) {
692				*op++ = 255;
693			}
694			*op++ = (BYTE)lastRun;
695		} else
696			*op++ = (lastRun << ML_BITS);
697		(void) memcpy(op, anchor, iend - anchor);
698		op += iend - anchor;
699	}
700
701	/* End */
702	return (int)(((char *)op) - dest);
703}
704
705
706
707/* Note : this function is valid only if isize < LZ4_64KLIMIT */
708#define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
709#define	HASHLOG64K (HASH_LOG + 1)
710#define	HASH64KTABLESIZE (1U << HASHLOG64K)
711#define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
712	HASHLOG64K))
713#define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
714
715/*ARGSUSED*/
716static int
717LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
718    int osize)
719{
720#if HEAPMODE
721	struct refTables *srt = (struct refTables *)ctx;
722	U16 *HashTable = (U16 *) (srt->hashTable);
723#else
724	U16 HashTable[HASH64KTABLESIZE] = { 0 };
725#endif
726
727	const BYTE *ip = (BYTE *) source;
728	const BYTE *anchor = ip;
729	const BYTE *const base = ip;
730	const BYTE *const iend = ip + isize;
731	const BYTE *const oend = (BYTE *) dest + osize;
732	const BYTE *const mflimit = iend - MFLIMIT;
733#define	matchlimit (iend - LASTLITERALS)
734
735	BYTE *op = (BYTE *) dest;
736
737	int len, length;
738	const int skipStrength = SKIPSTRENGTH;
739	U32 forwardH;
740
741	/* Init */
742	if (isize < MINLENGTH)
743		goto _last_literals;
744
745	/* First Byte */
746	ip++;
747	forwardH = LZ4_HASH64K_VALUE(ip);
748
749	/* Main Loop */
750	for (;;) {
751		int findMatchAttempts = (1U << skipStrength) + 3;
752		const BYTE *forwardIp = ip;
753		const BYTE *ref;
754		BYTE *token;
755
756		/* Find a match */
757		do {
758			U32 h = forwardH;
759			int step = findMatchAttempts++ >> skipStrength;
760			ip = forwardIp;
761			forwardIp = ip + step;
762
763			if (forwardIp > mflimit) {
764				goto _last_literals;
765			}
766
767			forwardH = LZ4_HASH64K_VALUE(forwardIp);
768			ref = base + HashTable[h];
769			HashTable[h] = ip - base;
770
771		} while (A32(ref) != A32(ip));
772
773		/* Catch up */
774		while ((ip > anchor) && (ref > (BYTE *) source) &&
775		    (ip[-1] == ref[-1])) {
776			ip--;
777			ref--;
778		}
779
780		/* Encode Literal length */
781		length = ip - anchor;
782		token = op++;
783
784		/* Check output limit */
785		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
786		    (length >> 8) > oend)
787			return (0);
788
789		if (length >= (int)RUN_MASK) {
790			*token = (RUN_MASK << ML_BITS);
791			len = length - RUN_MASK;
792			for (; len > 254; len -= 255)
793				*op++ = 255;
794			*op++ = (BYTE)len;
795		} else
796			*token = (length << ML_BITS);
797
798		/* Copy Literals */
799		LZ4_BLINDCOPY(anchor, op, length);
800
801		_next_match:
802		/* Encode Offset */
803		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
804
805		/* Start Counting */
806		ip += MINMATCH;
807		ref += MINMATCH;	/* MinMatch verified */
808		anchor = ip;
809		while (ip < matchlimit - (STEPSIZE - 1)) {
810			UARCH diff = AARCH(ref) ^ AARCH(ip);
811			if (!diff) {
812				ip += STEPSIZE;
813				ref += STEPSIZE;
814				continue;
815			}
816			ip += LZ4_NbCommonBytes(diff);
817			goto _endCount;
818		}
819#if LZ4_ARCH64
820		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
821			ip += 4;
822			ref += 4;
823		}
824#endif
825		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
826			ip += 2;
827			ref += 2;
828		}
829		if ((ip < matchlimit) && (*ref == *ip))
830			ip++;
831		_endCount:
832
833		/* Encode MatchLength */
834		len = (ip - anchor);
835		/* Check output limit */
836		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
837			return (0);
838		if (len >= (int)ML_MASK) {
839			*token += ML_MASK;
840			len -= ML_MASK;
841			for (; len > 509; len -= 510) {
842				*op++ = 255;
843				*op++ = 255;
844			}
845			if (len > 254) {
846				len -= 255;
847				*op++ = 255;
848			}
849			*op++ = (BYTE)len;
850		} else
851			*token += len;
852
853		/* Test end of chunk */
854		if (ip > mflimit) {
855			anchor = ip;
856			break;
857		}
858		/* Fill table */
859		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
860
861		/* Test next position */
862		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
863		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
864		if (A32(ref) == A32(ip)) {
865			token = op++;
866			*token = 0;
867			goto _next_match;
868		}
869		/* Prepare next loop */
870		anchor = ip++;
871		forwardH = LZ4_HASH64K_VALUE(ip);
872	}
873
874	_last_literals:
875	/* Encode Last Literals */
876	{
877		int lastRun = iend - anchor;
878		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
879		    oend)
880			return (0);
881		if (lastRun >= (int)RUN_MASK) {
882			*op++ = (RUN_MASK << ML_BITS);
883			lastRun -= RUN_MASK;
884			for (; lastRun > 254; lastRun -= 255)
885				*op++ = 255;
886			*op++ = (BYTE)lastRun;
887		} else
888			*op++ = (lastRun << ML_BITS);
889		(void) memcpy(op, anchor, iend - anchor);
890		op += iend - anchor;
891	}
892
893	/* End */
894	return (int)(((char *)op) - dest);
895}
896
897static int
898real_LZ4_compress(const char *source, char *dest, int isize, int osize)
899{
900#if HEAPMODE
901	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
902	int result;
903
904	/*
905	 * out of kernel memory, gently fall through - this will disable
906	 * compression in zio_compress_data
907	 */
908	if (ctx == NULL)
909		return (0);
910
911	if (isize < LZ4_64KLIMIT)
912		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
913	else
914		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
915
916	kmem_free(ctx, sizeof (struct refTables));
917	return (result);
918#else
919	if (isize < (int)LZ4_64KLIMIT)
920		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
921	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
922#endif
923}
924
925/* Decompression functions */
926
927/*
928 * Note: The decoding functions real_LZ4_uncompress() and
929 *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
930 *	attack type. They will never write nor read outside of the provided
931 *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
932 *	it will never read outside of the input buffer. A corrupted input
933 *	will produce an error result, a negative int, indicating the position
934 *	of the error within input stream.
935 */
936
937static int
938real_LZ4_uncompress(const char *source, char *dest, int osize)
939{
940	/* Local Variables */
941	const BYTE *restrict ip = (const BYTE *) source;
942	const BYTE *ref;
943
944	BYTE *op = (BYTE *) dest;
945	BYTE *const oend = op + osize;
946	BYTE *cpy;
947
948	unsigned token;
949
950	size_t length;
951	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
952#if LZ4_ARCH64
953	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
954#endif
955
956	/* Main Loop */
957	for (;;) {
958		/* get runlength */
959		token = *ip++;
960		if ((length = (token >> ML_BITS)) == RUN_MASK) {
961			size_t len;
962			for (; (len = *ip++) == 255; length += 255) {
963			}
964			length += len;
965		}
966		/* copy literals */
967		cpy = op + length;
968		if unlikely(cpy > oend - COPYLENGTH) {
969			if (cpy != oend)
970				/* Error: we must necessarily stand at EOF */
971				goto _output_error;
972			(void) memcpy(op, ip, length);
973			ip += length;
974			break;	/* EOF */
975			}
976		LZ4_WILDCOPY(ip, op, cpy);
977		ip -= (op - cpy);
978		op = cpy;
979
980		/* get offset */
981		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
982		ip += 2;
983		if unlikely(ref < (BYTE * const) dest)
984			/*
985			 * Error: offset create reference outside destination
986			 * buffer
987			 */
988			goto _output_error;
989
990		/* get matchlength */
991		if ((length = (token & ML_MASK)) == ML_MASK) {
992			for (; *ip == 255; length += 255) {
993				ip++;
994			}
995			length += *ip++;
996		}
997		/* copy repeated sequence */
998		if unlikely(op - ref < STEPSIZE) {
999#if LZ4_ARCH64
1000			size_t dec64 = dec64table[op-ref];
1001#else
1002			const int dec64 = 0;
1003#endif
1004			op[0] = ref[0];
1005			op[1] = ref[1];
1006			op[2] = ref[2];
1007			op[3] = ref[3];
1008			op += 4;
1009			ref += 4;
1010			ref -= dec32table[op-ref];
1011			A32(op) = A32(ref);
1012			op += STEPSIZE - 4;
1013			ref -= dec64;
1014		} else {
1015			LZ4_COPYSTEP(ref, op);
1016		}
1017		cpy = op + length - (STEPSIZE - 4);
1018		if (cpy > oend - COPYLENGTH) {
1019			if (cpy > oend)
1020				/*
1021				 * Error: request to write beyond destination
1022				 * buffer
1023				 */
1024				goto _output_error;
1025			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1026			while (op < cpy)
1027				*op++ = *ref++;
1028			op = cpy;
1029			if (op == oend)
1030				/*
1031				 * Check EOF (should never happen, since last
1032				 * 5 bytes are supposed to be literals)
1033				 */
1034				goto _output_error;
1035			continue;
1036		}
1037		LZ4_SECURECOPY(ref, op, cpy);
1038		op = cpy;	/* correction */
1039	}
1040
1041	/* end of decoding */
1042	return (int)(((char *)ip) - source);
1043
1044	/* write overflow error detected */
1045	_output_error:
1046	return (int)(-(((char *)ip) - source));
1047}
1048
1049static int
1050LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1051    int maxOutputSize)
1052{
1053	/* Local Variables */
1054	const BYTE *restrict ip = (const BYTE *) source;
1055	const BYTE *const iend = ip + isize;
1056	const BYTE *ref;
1057
1058	BYTE *op = (BYTE *) dest;
1059	BYTE *const oend = op + maxOutputSize;
1060	BYTE *cpy;
1061
1062	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1063#if LZ4_ARCH64
1064	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1065#endif
1066
1067	/* Main Loop */
1068	while (ip < iend) {
1069		unsigned token;
1070		size_t length;
1071
1072		/* get runlength */
1073		token = *ip++;
1074		if ((length = (token >> ML_BITS)) == RUN_MASK) {
1075			int s = 255;
1076			while ((ip < iend) && (s == 255)) {
1077				s = *ip++;
1078				length += s;
1079			}
1080		}
1081		/* copy literals */
1082		cpy = op + length;
1083		if ((cpy > oend - COPYLENGTH) ||
1084		    (ip + length > iend - COPYLENGTH)) {
1085			if (cpy > oend)
1086				/* Error: writes beyond output buffer */
1087				goto _output_error;
1088			if (ip + length != iend)
1089				/*
1090				 * Error: LZ4 format requires to consume all
1091				 * input at this stage
1092				 */
1093				goto _output_error;
1094			(void) memcpy(op, ip, length);
1095			op += length;
1096			/* Necessarily EOF, due to parsing restrictions */
1097			break;
1098		}
1099		LZ4_WILDCOPY(ip, op, cpy);
1100		ip -= (op - cpy);
1101		op = cpy;
1102
1103		/* get offset */
1104		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1105		ip += 2;
1106		if (ref < (BYTE * const) dest)
1107			/*
1108			 * Error: offset creates reference outside of
1109			 * destination buffer
1110			 */
1111			goto _output_error;
1112
1113		/* get matchlength */
1114		if ((length = (token & ML_MASK)) == ML_MASK) {
1115			while (ip < iend) {
1116				int s = *ip++;
1117				length += s;
1118				if (s == 255)
1119					continue;
1120				break;
1121			}
1122		}
1123		/* copy repeated sequence */
1124		if unlikely(op - ref < STEPSIZE) {
1125#if LZ4_ARCH64
1126			size_t dec64 = dec64table[op-ref];
1127#else
1128			const int dec64 = 0;
1129#endif
1130			op[0] = ref[0];
1131			op[1] = ref[1];
1132			op[2] = ref[2];
1133			op[3] = ref[3];
1134			op += 4;
1135			ref += 4;
1136			ref -= dec32table[op-ref];
1137			A32(op) = A32(ref);
1138			op += STEPSIZE - 4;
1139			ref -= dec64;
1140		} else {
1141			LZ4_COPYSTEP(ref, op);
1142		}
1143		cpy = op + length - (STEPSIZE - 4);
1144		if (cpy > oend - COPYLENGTH) {
1145			if (cpy > oend)
1146				/*
1147				 * Error: request to write outside of
1148				 * destination buffer
1149				 */
1150				goto _output_error;
1151			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1152			while (op < cpy)
1153				*op++ = *ref++;
1154			op = cpy;
1155			if (op == oend)
1156				/*
1157				 * Check EOF (should never happen, since
1158				 * last 5 bytes are supposed to be literals)
1159				 */
1160				goto _output_error;
1161			continue;
1162		}
1163		LZ4_SECURECOPY(ref, op, cpy);
1164		op = cpy;	/* correction */
1165	}
1166
1167	/* end of decoding */
1168	return (int)(((char *)op) - dest);
1169
1170	/* write overflow error detected */
1171	_output_error:
1172	return (int)(-(((char *)ip) - source));
1173}
1174