lz4.c revision 246768
1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35#include <sys/zfs_context.h>
36
37static int real_LZ4_compress(const char *source, char *dest, int isize,
38    int osize);
39static int real_LZ4_uncompress(const char *source, char *dest, int osize);
40static int LZ4_compressBound(int isize);
41static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
42    int isize, int maxOutputSize);
43static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
44    int isize, int osize);
45static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
46    int isize, int osize);
47
48/*ARGSUSED*/
49size_t
50lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51{
52	uint32_t bufsiz;
53	char *dest = d_start;
54
55	ASSERT(d_len >= sizeof (bufsiz));
56
57	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58	    d_len - sizeof (bufsiz));
59
60	/* Signal an error if the compression routine returned zero. */
61	if (bufsiz == 0)
62		return (s_len);
63
64	/*
65	 * Encode the compresed buffer size at the start. We'll need this in
66	 * decompression to counter the effects of padding which might be
67	 * added to the compressed buffer and which, if unhandled, would
68	 * confuse the hell out of our decompression function.
69	 */
70	*(uint32_t *)dest = BE_32(bufsiz);
71
72	return (bufsiz + sizeof (bufsiz));
73}
74
75/*ARGSUSED*/
76int
77lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78{
79	const char *src = s_start;
80	uint32_t bufsiz = BE_IN32(src);
81
82	/* invalid compressed buffer size encoded at start */
83	if (bufsiz + sizeof (bufsiz) > s_len)
84		return (1);
85
86	/*
87	 * Returns 0 on success (decompression function returned non-negative)
88	 * and non-zero on failure (decompression function returned negative.
89	 */
90	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91	    d_start, bufsiz, d_len) < 0);
92}
93
94/*
95 * LZ4 API Description:
96 *
97 * Simple Functions:
98 * real_LZ4_compress() :
99 * 	isize  : is the input size. Max supported value is ~1.9GB
100 * 	return : the number of bytes written in buffer dest
101 *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102 * 	note : destination buffer must be already allocated.
103 * 		destination buffer must be sized to handle worst cases
104 * 		situations (input data not compressible) worst case size
105 * 		evaluation is provided by function LZ4_compressBound().
106 *
107 * real_LZ4_uncompress() :
108 * 	osize  : is the output size, therefore the original size
109 * 	return : the number of bytes read in the source buffer.
110 * 		If the source stream is malformed, the function will stop
111 * 		decoding and return a negative result, indicating the byte
112 * 		position of the faulty instruction. This function never
113 * 		writes beyond dest + osize, and is therefore protected
114 * 		against malicious data packets.
115 * 	note : destination buffer must be already allocated
116 *
117 * Advanced Functions
118 *
119 * LZ4_compressBound() :
120 * 	Provides the maximum size that LZ4 may output in a "worst case"
121 * 	scenario (input data not compressible) primarily useful for memory
122 * 	allocation of output buffer.
123 *
124 * 	isize  : is the input size. Max supported value is ~1.9GB
125 * 	return : maximum output size in a "worst case" scenario
126 * 	note : this function is limited by "int" range (2^31-1)
127 *
128 * LZ4_uncompress_unknownOutputSize() :
129 * 	isize  : is the input size, therefore the compressed size
130 * 	maxOutputSize : is the size of the destination buffer (which must be
131 * 		already allocated)
132 * 	return : the number of bytes decoded in the destination buffer
133 * 		(necessarily <= maxOutputSize). If the source stream is
134 * 		malformed, the function will stop decoding and return a
135 * 		negative result, indicating the byte position of the faulty
136 * 		instruction. This function never writes beyond dest +
137 * 		maxOutputSize, and is therefore protected against malicious
138 * 		data packets.
139 * 	note   : Destination buffer must be already allocated.
140 *		This version is slightly slower than real_LZ4_uncompress()
141 *
142 * LZ4_compressCtx() :
143 * 	This function explicitly handles the CTX memory structure.
144 *
145 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146 * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
147 * 	isn't valid.
148 *
149 * LZ4_compress64kCtx() :
150 * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151 * 	isize *Must* be <64KB, otherwise the output will be corrupted.
152 *
153 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154 * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
155 * 	isn't valid.
156 */
157
158/*
159 * Tuning parameters
160 */
161
162/*
163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164 *	 Lowering this value reduces memory usage. Reduced memory usage
165 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167 *	(examples : 12 -> 16KB ; 17 -> 512KB)
168 */
169#define	COMPRESSIONLEVEL 12
170
171/*
172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173 *	algorithm skip faster data segments considered "incompressible".
174 *	This may decrease compression ratio dramatically, but will be
175 *	faster on incompressible data. Increasing this value will make
176 *	the algorithm search more before declaring a segment "incompressible".
177 *	This could improve compression a bit, but will be slower on
178 *	incompressible data. The default value (6) is recommended.
179 */
180#define	NOTCOMPRESSIBLE_CONFIRMATION 6
181
182/*
183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184 * performance for big endian cpu, but the resulting compressed stream
185 * will be incompatible with little-endian CPU. You can set this option
186 * to 1 in situations where data will stay within closed environment.
187 * This option is useless on Little_Endian CPU (such as x86).
188 */
189/* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190
191/*
192 * CPU Feature Detection
193 */
194
195/* 32 or 64 bits ? */
196#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197    defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198    defined(__LP64__) || defined(_LP64))
199#define	LZ4_ARCH64 1
200/*
201 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we
202 * can spend 16k on the algorithm
203 */
204/* FreeBSD: Use heap for all platforms for now */
205#define	STACKLIMIT 0
206#else
207#define	LZ4_ARCH64 0
208/*
209 * Illumos: On i386 we only have 12k of stack, so in order to maintain the
210 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will
211 * suck, but alas, it's ZFS on 32-bit we're talking about, so...
212 */
213#define	STACKLIMIT 0
214#endif
215
216/*
217 * Little Endian or Big Endian?
218 * Note: overwrite the below #define if you know your architecture endianess.
219 */
220#if BYTE_ORDER == BIG_ENDIAN
221#define	LZ4_BIG_ENDIAN 1
222#else
223/*
224 * Little Endian assumed. PDP Endian and other very rare endian format
225 * are unsupported.
226 */
227#endif
228
229/*
230 * Unaligned memory access is automatically enabled for "common" CPU,
231 * such as x86. For others CPU, the compiler will be more cautious, and
232 * insert extra code to ensure aligned access is respected. If you know
233 * your target CPU supports unaligned memory access, you may want to
234 * force this option manually to improve performance
235 */
236#if defined(__ARM_FEATURE_UNALIGNED)
237#define	LZ4_FORCE_UNALIGNED_ACCESS 1
238#endif
239
240/*
241 * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
242 * gcc currently rely on libcompiler_rt.
243 *
244 * TODO: revisit this when situation changes.
245 */
246#if defined(__sparc64__)
247#define	LZ4_FORCE_SW_BITCOUNT
248#endif
249
250/*
251 * Compiler Options
252 */
253#if __STDC_VERSION__ >= 199901L	/* C99 */
254/* "restrict" is a known keyword */
255#else
256/* Disable restrict */
257#define	restrict
258#endif
259
260#define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
261	(((x) & 0xffu) << 8)))
262
263#define	expect(expr, value)    (__builtin_expect((expr), (value)))
264
265#if defined(likely)
266#undef likely
267#endif
268#if defined(unlikely)
269#undef unlikely
270#endif
271
272#define	likely(expr)	expect((expr) != 0, 1)
273#define	unlikely(expr)	expect((expr) != 0, 0)
274
275/* Basic types */
276#define	BYTE	uint8_t
277#define	U16	uint16_t
278#define	U32	uint32_t
279#define	S32	int32_t
280#define	U64	uint64_t
281
282#ifndef LZ4_FORCE_UNALIGNED_ACCESS
283#pragma pack(1)
284#endif
285
286typedef struct _U16_S {
287	U16 v;
288} U16_S;
289typedef struct _U32_S {
290	U32 v;
291} U32_S;
292typedef struct _U64_S {
293	U64 v;
294} U64_S;
295
296#ifndef LZ4_FORCE_UNALIGNED_ACCESS
297#pragma pack()
298#endif
299
300#define	A64(x) (((U64_S *)(x))->v)
301#define	A32(x) (((U32_S *)(x))->v)
302#define	A16(x) (((U16_S *)(x))->v)
303
304/*
305 * Constants
306 */
307#define	MINMATCH 4
308
309#define	HASH_LOG COMPRESSIONLEVEL
310#define	HASHTABLESIZE (1 << HASH_LOG)
311#define	HASH_MASK (HASHTABLESIZE - 1)
312
313#define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
314	NOTCOMPRESSIBLE_CONFIRMATION : 2)
315
316/*
317 * Defines if memory is allocated into the stack (local variable),
318 * or into the heap (kmem_alloc()).
319 */
320#define	HEAPMODE (HASH_LOG > STACKLIMIT)
321#define	COPYLENGTH 8
322#define	LASTLITERALS 5
323#define	MFLIMIT (COPYLENGTH + MINMATCH)
324#define	MINLENGTH (MFLIMIT + 1)
325
326#define	MAXD_LOG 16
327#define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
328
329#define	ML_BITS 4
330#define	ML_MASK ((1U<<ML_BITS)-1)
331#define	RUN_BITS (8-ML_BITS)
332#define	RUN_MASK ((1U<<RUN_BITS)-1)
333
334
335/*
336 * Architecture-specific macros
337 */
338#if LZ4_ARCH64
339#define	STEPSIZE 8
340#define	UARCH U64
341#define	AARCH A64
342#define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
343#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
344#define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
345#define	HTYPE U32
346#define	INITBASE(base)		const BYTE* const base = ip
347#else /* !LZ4_ARCH64 */
348#define	STEPSIZE 4
349#define	UARCH U32
350#define	AARCH A32
351#define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
352#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
353#define	LZ4_SECURECOPY		LZ4_WILDCOPY
354#define	HTYPE const BYTE *
355#define	INITBASE(base)		const int base = 0
356#endif /* !LZ4_ARCH64 */
357
358#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
359#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
360	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
361#define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
362	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
363#else
364#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
365#define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
366#endif
367
368
369/* Local structures */
370struct refTables {
371	HTYPE hashTable[HASHTABLESIZE];
372};
373
374
375/* Macros */
376#define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
377	HASH_LOG))
378#define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
379#define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
380#define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
381	d = e; }
382
383
384/* Private functions */
385#if LZ4_ARCH64
386
387static inline int
388LZ4_NbCommonBytes(register U64 val)
389{
390#if defined(LZ4_BIG_ENDIAN)
391#if !defined(LZ4_FORCE_SW_BITCOUNT)
392	return (__builtin_clzll(val) >> 3);
393#else
394	int r;
395	if (!(val >> 32)) {
396		r = 4;
397	} else {
398		r = 0;
399		val >>= 32;
400	}
401	if (!(val >> 16)) {
402		r += 2;
403		val >>= 8;
404	} else {
405		val >>= 24;
406	}
407	r += (!val);
408	return (r);
409#endif
410#else
411#if !defined(LZ4_FORCE_SW_BITCOUNT)
412	return (__builtin_ctzll(val) >> 3);
413#else
414	static const int DeBruijnBytePos[64] =
415	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
416		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
417		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
418		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
419	};
420	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
421	    58];
422#endif
423#endif
424}
425
426#else
427
428static inline int
429LZ4_NbCommonBytes(register U32 val)
430{
431#if defined(LZ4_BIG_ENDIAN)
432#if !defined(LZ4_FORCE_SW_BITCOUNT)
433	return (__builtin_clz(val) >> 3);
434#else
435	int r;
436	if (!(val >> 16)) {
437		r = 2;
438		val >>= 8;
439	} else {
440		r = 0;
441		val >>= 24;
442	}
443	r += (!val);
444	return (r);
445#endif
446#else
447#if !defined(LZ4_FORCE_SW_BITCOUNT)
448	return (__builtin_ctz(val) >> 3);
449#else
450	static const int DeBruijnBytePos[32] = {
451		0, 0, 3, 0, 3, 1, 3, 0,
452		3, 2, 2, 1, 3, 2, 0, 1,
453		3, 3, 1, 2, 2, 2, 2, 0,
454		3, 1, 2, 0, 1, 0, 1, 1
455	};
456	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
457	    27];
458#endif
459#endif
460}
461
462#endif
463
464/* Public functions */
465
466static int
467LZ4_compressBound(int isize)
468{
469	return (isize + (isize / 255) + 16);
470}
471
472/* Compression functions */
473
474/*ARGSUSED*/
475static int
476LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
477    int osize)
478{
479#if HEAPMODE
480	struct refTables *srt = (struct refTables *)ctx;
481	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
482#else
483	HTYPE HashTable[HASHTABLESIZE] = { 0 };
484#endif
485
486	const BYTE *ip = (BYTE *) source;
487	INITBASE(base);
488	const BYTE *anchor = ip;
489	const BYTE *const iend = ip + isize;
490	const BYTE *const oend = (BYTE *) dest + osize;
491	const BYTE *const mflimit = iend - MFLIMIT;
492#define	matchlimit (iend - LASTLITERALS)
493
494	BYTE *op = (BYTE *) dest;
495
496	int len, length;
497	const int skipStrength = SKIPSTRENGTH;
498	U32 forwardH;
499
500
501	/* Init */
502	if (isize < MINLENGTH)
503		goto _last_literals;
504
505	/* First Byte */
506	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
507	ip++;
508	forwardH = LZ4_HASH_VALUE(ip);
509
510	/* Main Loop */
511	for (;;) {
512		int findMatchAttempts = (1U << skipStrength) + 3;
513		const BYTE *forwardIp = ip;
514		const BYTE *ref;
515		BYTE *token;
516
517		/* Find a match */
518		do {
519			U32 h = forwardH;
520			int step = findMatchAttempts++ >> skipStrength;
521			ip = forwardIp;
522			forwardIp = ip + step;
523
524			if unlikely(forwardIp > mflimit) {
525				goto _last_literals;
526			}
527
528			forwardH = LZ4_HASH_VALUE(forwardIp);
529			ref = base + HashTable[h];
530			HashTable[h] = ip - base;
531
532		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
533
534		/* Catch up */
535		while ((ip > anchor) && (ref > (BYTE *) source) &&
536		    unlikely(ip[-1] == ref[-1])) {
537			ip--;
538			ref--;
539		}
540
541		/* Encode Literal length */
542		length = ip - anchor;
543		token = op++;
544
545		/* Check output limit */
546		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
547		    (length >> 8) > oend)
548			return (0);
549
550		if (length >= (int)RUN_MASK) {
551			*token = (RUN_MASK << ML_BITS);
552			len = length - RUN_MASK;
553			for (; len > 254; len -= 255)
554				*op++ = 255;
555			*op++ = (BYTE)len;
556		} else
557			*token = (length << ML_BITS);
558
559		/* Copy Literals */
560		LZ4_BLINDCOPY(anchor, op, length);
561
562		_next_match:
563		/* Encode Offset */
564		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
565
566		/* Start Counting */
567		ip += MINMATCH;
568		ref += MINMATCH;	/* MinMatch verified */
569		anchor = ip;
570		while likely(ip < matchlimit - (STEPSIZE - 1)) {
571			UARCH diff = AARCH(ref) ^ AARCH(ip);
572			if (!diff) {
573				ip += STEPSIZE;
574				ref += STEPSIZE;
575				continue;
576			}
577			ip += LZ4_NbCommonBytes(diff);
578			goto _endCount;
579		}
580#if LZ4_ARCH64
581		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
582			ip += 4;
583			ref += 4;
584		}
585#endif
586		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
587			ip += 2;
588			ref += 2;
589		}
590		if ((ip < matchlimit) && (*ref == *ip))
591			ip++;
592		_endCount:
593
594		/* Encode MatchLength */
595		len = (ip - anchor);
596		/* Check output limit */
597		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
598			return (0);
599		if (len >= (int)ML_MASK) {
600			*token += ML_MASK;
601			len -= ML_MASK;
602			for (; len > 509; len -= 510) {
603				*op++ = 255;
604				*op++ = 255;
605			}
606			if (len > 254) {
607				len -= 255;
608				*op++ = 255;
609			}
610			*op++ = (BYTE)len;
611		} else
612			*token += len;
613
614		/* Test end of chunk */
615		if (ip > mflimit) {
616			anchor = ip;
617			break;
618		}
619		/* Fill table */
620		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
621
622		/* Test next position */
623		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
624		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
625		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
626			token = op++;
627			*token = 0;
628			goto _next_match;
629		}
630		/* Prepare next loop */
631		anchor = ip++;
632		forwardH = LZ4_HASH_VALUE(ip);
633	}
634
635	_last_literals:
636	/* Encode Last Literals */
637	{
638		int lastRun = iend - anchor;
639		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
640		    oend)
641			return (0);
642		if (lastRun >= (int)RUN_MASK) {
643			*op++ = (RUN_MASK << ML_BITS);
644			lastRun -= RUN_MASK;
645			for (; lastRun > 254; lastRun -= 255) {
646				*op++ = 255;
647			}
648			*op++ = (BYTE)lastRun;
649		} else
650			*op++ = (lastRun << ML_BITS);
651		(void) memcpy(op, anchor, iend - anchor);
652		op += iend - anchor;
653	}
654
655	/* End */
656	return (int)(((char *)op) - dest);
657}
658
659
660
661/* Note : this function is valid only if isize < LZ4_64KLIMIT */
662#define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
663#define	HASHLOG64K (HASH_LOG + 1)
664#define	HASH64KTABLESIZE (1U << HASHLOG64K)
665#define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
666	HASHLOG64K))
667#define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
668
669/*ARGSUSED*/
670static int
671LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
672    int osize)
673{
674#if HEAPMODE
675	struct refTables *srt = (struct refTables *)ctx;
676	U16 *HashTable = (U16 *) (srt->hashTable);
677#else
678	U16 HashTable[HASH64KTABLESIZE] = { 0 };
679#endif
680
681	const BYTE *ip = (BYTE *) source;
682	const BYTE *anchor = ip;
683	const BYTE *const base = ip;
684	const BYTE *const iend = ip + isize;
685	const BYTE *const oend = (BYTE *) dest + osize;
686	const BYTE *const mflimit = iend - MFLIMIT;
687#define	matchlimit (iend - LASTLITERALS)
688
689	BYTE *op = (BYTE *) dest;
690
691	int len, length;
692	const int skipStrength = SKIPSTRENGTH;
693	U32 forwardH;
694
695	/* Init */
696	if (isize < MINLENGTH)
697		goto _last_literals;
698
699	/* First Byte */
700	ip++;
701	forwardH = LZ4_HASH64K_VALUE(ip);
702
703	/* Main Loop */
704	for (;;) {
705		int findMatchAttempts = (1U << skipStrength) + 3;
706		const BYTE *forwardIp = ip;
707		const BYTE *ref;
708		BYTE *token;
709
710		/* Find a match */
711		do {
712			U32 h = forwardH;
713			int step = findMatchAttempts++ >> skipStrength;
714			ip = forwardIp;
715			forwardIp = ip + step;
716
717			if (forwardIp > mflimit) {
718				goto _last_literals;
719			}
720
721			forwardH = LZ4_HASH64K_VALUE(forwardIp);
722			ref = base + HashTable[h];
723			HashTable[h] = ip - base;
724
725		} while (A32(ref) != A32(ip));
726
727		/* Catch up */
728		while ((ip > anchor) && (ref > (BYTE *) source) &&
729		    (ip[-1] == ref[-1])) {
730			ip--;
731			ref--;
732		}
733
734		/* Encode Literal length */
735		length = ip - anchor;
736		token = op++;
737
738		/* Check output limit */
739		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
740		    (length >> 8) > oend)
741			return (0);
742
743		if (length >= (int)RUN_MASK) {
744			*token = (RUN_MASK << ML_BITS);
745			len = length - RUN_MASK;
746			for (; len > 254; len -= 255)
747				*op++ = 255;
748			*op++ = (BYTE)len;
749		} else
750			*token = (length << ML_BITS);
751
752		/* Copy Literals */
753		LZ4_BLINDCOPY(anchor, op, length);
754
755		_next_match:
756		/* Encode Offset */
757		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
758
759		/* Start Counting */
760		ip += MINMATCH;
761		ref += MINMATCH;	/* MinMatch verified */
762		anchor = ip;
763		while (ip < matchlimit - (STEPSIZE - 1)) {
764			UARCH diff = AARCH(ref) ^ AARCH(ip);
765			if (!diff) {
766				ip += STEPSIZE;
767				ref += STEPSIZE;
768				continue;
769			}
770			ip += LZ4_NbCommonBytes(diff);
771			goto _endCount;
772		}
773#if LZ4_ARCH64
774		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
775			ip += 4;
776			ref += 4;
777		}
778#endif
779		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
780			ip += 2;
781			ref += 2;
782		}
783		if ((ip < matchlimit) && (*ref == *ip))
784			ip++;
785		_endCount:
786
787		/* Encode MatchLength */
788		len = (ip - anchor);
789		/* Check output limit */
790		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
791			return (0);
792		if (len >= (int)ML_MASK) {
793			*token += ML_MASK;
794			len -= ML_MASK;
795			for (; len > 509; len -= 510) {
796				*op++ = 255;
797				*op++ = 255;
798			}
799			if (len > 254) {
800				len -= 255;
801				*op++ = 255;
802			}
803			*op++ = (BYTE)len;
804		} else
805			*token += len;
806
807		/* Test end of chunk */
808		if (ip > mflimit) {
809			anchor = ip;
810			break;
811		}
812		/* Fill table */
813		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
814
815		/* Test next position */
816		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
817		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
818		if (A32(ref) == A32(ip)) {
819			token = op++;
820			*token = 0;
821			goto _next_match;
822		}
823		/* Prepare next loop */
824		anchor = ip++;
825		forwardH = LZ4_HASH64K_VALUE(ip);
826	}
827
828	_last_literals:
829	/* Encode Last Literals */
830	{
831		int lastRun = iend - anchor;
832		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
833		    oend)
834			return (0);
835		if (lastRun >= (int)RUN_MASK) {
836			*op++ = (RUN_MASK << ML_BITS);
837			lastRun -= RUN_MASK;
838			for (; lastRun > 254; lastRun -= 255)
839				*op++ = 255;
840			*op++ = (BYTE)lastRun;
841		} else
842			*op++ = (lastRun << ML_BITS);
843		(void) memcpy(op, anchor, iend - anchor);
844		op += iend - anchor;
845	}
846
847	/* End */
848	return (int)(((char *)op) - dest);
849}
850
851static int
852real_LZ4_compress(const char *source, char *dest, int isize, int osize)
853{
854#if HEAPMODE
855	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
856	int result;
857
858	/*
859	 * out of kernel memory, gently fall through - this will disable
860	 * compression in zio_compress_data
861	 */
862	if (ctx == NULL)
863		return (0);
864
865	if (isize < LZ4_64KLIMIT)
866		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
867	else
868		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
869
870	kmem_free(ctx, sizeof (struct refTables));
871	return (result);
872#else
873	if (isize < (int)LZ4_64KLIMIT)
874		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
875	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
876#endif
877}
878
879/* Decompression functions */
880
881/*
882 * Note: The decoding functions real_LZ4_uncompress() and
883 *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
884 *	attack type. They will never write nor read outside of the provided
885 *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
886 *	it will never read outside of the input buffer. A corrupted input
887 *	will produce an error result, a negative int, indicating the position
888 *	of the error within input stream.
889 */
890
891static int
892real_LZ4_uncompress(const char *source, char *dest, int osize)
893{
894	/* Local Variables */
895	const BYTE *restrict ip = (const BYTE *) source;
896	const BYTE *ref;
897
898	BYTE *op = (BYTE *) dest;
899	BYTE *const oend = op + osize;
900	BYTE *cpy;
901
902	unsigned token;
903
904	size_t length;
905	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
906#if LZ4_ARCH64
907	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
908#endif
909
910	/* Main Loop */
911	for (;;) {
912		/* get runlength */
913		token = *ip++;
914		if ((length = (token >> ML_BITS)) == RUN_MASK) {
915			size_t len;
916			for (; (len = *ip++) == 255; length += 255) {
917			}
918			length += len;
919		}
920		/* copy literals */
921		cpy = op + length;
922		if unlikely(cpy > oend - COPYLENGTH) {
923			if (cpy != oend)
924				/* Error: we must necessarily stand at EOF */
925				goto _output_error;
926			(void) memcpy(op, ip, length);
927			ip += length;
928			break;	/* EOF */
929			}
930		LZ4_WILDCOPY(ip, op, cpy);
931		ip -= (op - cpy);
932		op = cpy;
933
934		/* get offset */
935		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
936		ip += 2;
937		if unlikely(ref < (BYTE * const) dest)
938			/*
939			 * Error: offset create reference outside destination
940			 * buffer
941			 */
942			goto _output_error;
943
944		/* get matchlength */
945		if ((length = (token & ML_MASK)) == ML_MASK) {
946			for (; *ip == 255; length += 255) {
947				ip++;
948			}
949			length += *ip++;
950		}
951		/* copy repeated sequence */
952		if unlikely(op - ref < STEPSIZE) {
953#if LZ4_ARCH64
954			size_t dec64 = dec64table[op-ref];
955#else
956			const int dec64 = 0;
957#endif
958			op[0] = ref[0];
959			op[1] = ref[1];
960			op[2] = ref[2];
961			op[3] = ref[3];
962			op += 4;
963			ref += 4;
964			ref -= dec32table[op-ref];
965			A32(op) = A32(ref);
966			op += STEPSIZE - 4;
967			ref -= dec64;
968		} else {
969			LZ4_COPYSTEP(ref, op);
970		}
971		cpy = op + length - (STEPSIZE - 4);
972		if (cpy > oend - COPYLENGTH) {
973			if (cpy > oend)
974				/*
975				 * Error: request to write beyond destination
976				 * buffer
977				 */
978				goto _output_error;
979			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
980			while (op < cpy)
981				*op++ = *ref++;
982			op = cpy;
983			if (op == oend)
984				/*
985				 * Check EOF (should never happen, since last
986				 * 5 bytes are supposed to be literals)
987				 */
988				goto _output_error;
989			continue;
990		}
991		LZ4_SECURECOPY(ref, op, cpy);
992		op = cpy;	/* correction */
993	}
994
995	/* end of decoding */
996	return (int)(((char *)ip) - source);
997
998	/* write overflow error detected */
999	_output_error:
1000	return (int)(-(((char *)ip) - source));
1001}
1002
1003static int
1004LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1005    int maxOutputSize)
1006{
1007	/* Local Variables */
1008	const BYTE *restrict ip = (const BYTE *) source;
1009	const BYTE *const iend = ip + isize;
1010	const BYTE *ref;
1011
1012	BYTE *op = (BYTE *) dest;
1013	BYTE *const oend = op + maxOutputSize;
1014	BYTE *cpy;
1015
1016	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1017#if LZ4_ARCH64
1018	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1019#endif
1020
1021	/* Main Loop */
1022	while (ip < iend) {
1023		unsigned token;
1024		size_t length;
1025
1026		/* get runlength */
1027		token = *ip++;
1028		if ((length = (token >> ML_BITS)) == RUN_MASK) {
1029			int s = 255;
1030			while ((ip < iend) && (s == 255)) {
1031				s = *ip++;
1032				length += s;
1033			}
1034		}
1035		/* copy literals */
1036		cpy = op + length;
1037		if ((cpy > oend - COPYLENGTH) ||
1038		    (ip + length > iend - COPYLENGTH)) {
1039			if (cpy > oend)
1040				/* Error: writes beyond output buffer */
1041				goto _output_error;
1042			if (ip + length != iend)
1043				/*
1044				 * Error: LZ4 format requires to consume all
1045				 * input at this stage
1046				 */
1047				goto _output_error;
1048			(void) memcpy(op, ip, length);
1049			op += length;
1050			/* Necessarily EOF, due to parsing restrictions */
1051			break;
1052		}
1053		LZ4_WILDCOPY(ip, op, cpy);
1054		ip -= (op - cpy);
1055		op = cpy;
1056
1057		/* get offset */
1058		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1059		ip += 2;
1060		if (ref < (BYTE * const) dest)
1061			/*
1062			 * Error: offset creates reference outside of
1063			 * destination buffer
1064			 */
1065			goto _output_error;
1066
1067		/* get matchlength */
1068		if ((length = (token & ML_MASK)) == ML_MASK) {
1069			while (ip < iend) {
1070				int s = *ip++;
1071				length += s;
1072				if (s == 255)
1073					continue;
1074				break;
1075			}
1076		}
1077		/* copy repeated sequence */
1078		if unlikely(op - ref < STEPSIZE) {
1079#if LZ4_ARCH64
1080			size_t dec64 = dec64table[op-ref];
1081#else
1082			const int dec64 = 0;
1083#endif
1084			op[0] = ref[0];
1085			op[1] = ref[1];
1086			op[2] = ref[2];
1087			op[3] = ref[3];
1088			op += 4;
1089			ref += 4;
1090			ref -= dec32table[op-ref];
1091			A32(op) = A32(ref);
1092			op += STEPSIZE - 4;
1093			ref -= dec64;
1094		} else {
1095			LZ4_COPYSTEP(ref, op);
1096		}
1097		cpy = op + length - (STEPSIZE - 4);
1098		if (cpy > oend - COPYLENGTH) {
1099			if (cpy > oend)
1100				/*
1101				 * Error: request to write outside of
1102				 * destination buffer
1103				 */
1104				goto _output_error;
1105			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1106			while (op < cpy)
1107				*op++ = *ref++;
1108			op = cpy;
1109			if (op == oend)
1110				/*
1111				 * Check EOF (should never happen, since
1112				 * last 5 bytes are supposed to be literals)
1113				 */
1114				goto _output_error;
1115			continue;
1116		}
1117		LZ4_SECURECOPY(ref, op, cpy);
1118		op = cpy;	/* correction */
1119	}
1120
1121	/* end of decoding */
1122	return (int)(((char *)op) - dest);
1123
1124	/* write overflow error detected */
1125	_output_error:
1126	return (int)(-(((char *)ip) - source));
1127}
1128