1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35#include <sys/zfs_context.h>
36#include <sys/zio_compress.h>
37
38static int real_LZ4_compress(const char *source, char *dest, int isize,
39    int osize);
40static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41    int isize, int maxOutputSize);
42static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43    int isize, int osize);
44static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45    int isize, int osize);
46
47static void *lz4_alloc(int flags);
48static void lz4_free(void *ctx);
49
50/*ARGSUSED*/
51size_t
52lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
53    size_t d_len, int n)
54{
55	uint32_t bufsiz;
56	char *dest = d_start;
57
58	ASSERT(d_len >= sizeof (bufsiz));
59
60	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
61	    d_len - sizeof (bufsiz));
62
63	/* Signal an error if the compression routine returned zero. */
64	if (bufsiz == 0)
65		return (s_len);
66
67	/*
68	 * The exact compressed size is needed by the decompression routine,
69	 * so it is stored at the start of the buffer. Note that this may be
70	 * less than the compressed block size, which is rounded up to a
71	 * multiple of 1<<ashift.
72	 */
73	*(uint32_t *)dest = BE_32(bufsiz);
74
75	return (bufsiz + sizeof (bufsiz));
76}
77
78/*ARGSUSED*/
79int
80lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
81    size_t d_len, int n)
82{
83	const char *src = s_start;
84	uint32_t bufsiz = BE_IN32(src);
85
86	/* invalid compressed buffer size encoded at start */
87	if (bufsiz + sizeof (bufsiz) > s_len)
88		return (1);
89
90	/*
91	 * Returns 0 on success (decompression function returned non-negative)
92	 * and non-zero on failure (decompression function returned negative).
93	 */
94	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95	    d_start, bufsiz, d_len) < 0);
96}
97
98/*
99 * LZ4 API Description:
100 *
101 * Simple Functions:
102 * real_LZ4_compress() :
103 * 	isize  : is the input size. Max supported value is ~1.9GB
104 * 	return : the number of bytes written in buffer dest
105 *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106 * 	note : destination buffer must be already allocated.
107 * 		destination buffer must be sized to handle worst cases
108 * 		situations (input data not compressible) worst case size
109 * 		evaluation is provided by function LZ4_compressBound().
110 *
111 * real_LZ4_uncompress() :
112 * 	osize  : is the output size, therefore the original size
113 * 	return : the number of bytes read in the source buffer.
114 * 		If the source stream is malformed, the function will stop
115 * 		decoding and return a negative result, indicating the byte
116 * 		position of the faulty instruction. This function never
117 * 		writes beyond dest + osize, and is therefore protected
118 * 		against malicious data packets.
119 * 	note : destination buffer must be already allocated
120 *	note : real_LZ4_uncompress() is not used in ZFS so its code
121 *	       is not present here.
122 *
123 * Advanced Functions
124 *
125 * LZ4_compressBound() :
126 * 	Provides the maximum size that LZ4 may output in a "worst case"
127 * 	scenario (input data not compressible) primarily useful for memory
128 * 	allocation of output buffer.
129 *
130 * 	isize  : is the input size. Max supported value is ~1.9GB
131 * 	return : maximum output size in a "worst case" scenario
132 * 	note : this function is limited by "int" range (2^31-1)
133 *
134 * LZ4_uncompress_unknownOutputSize() :
135 * 	isize  : is the input size, therefore the compressed size
136 * 	maxOutputSize : is the size of the destination buffer (which must be
137 * 		already allocated)
138 * 	return : the number of bytes decoded in the destination buffer
139 * 		(necessarily <= maxOutputSize). If the source stream is
140 * 		malformed, the function will stop decoding and return a
141 * 		negative result, indicating the byte position of the faulty
142 * 		instruction. This function never writes beyond dest +
143 * 		maxOutputSize, and is therefore protected against malicious
144 * 		data packets.
145 * 	note   : Destination buffer must be already allocated.
146 *		This version is slightly slower than real_LZ4_uncompress()
147 *
148 * LZ4_compressCtx() :
149 * 	This function explicitly handles the CTX memory structure.
150 *
151 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
152 * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
153 * 	NULL isn't valid.
154 *
155 * LZ4_compress64kCtx() :
156 * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
157 * 	isize *Must* be <64KB, otherwise the output will be corrupted.
158 *
159 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
160 * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
161 * 	NULL isn't valid.
162 */
163
164/*
165 * Tuning parameters
166 */
167
168/*
169 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
170 *	 Lowering this value reduces memory usage. Reduced memory usage
171 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
172 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
173 *	(examples : 12 -> 16KB ; 17 -> 512KB)
174 */
175#define	COMPRESSIONLEVEL 12
176
177/*
178 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
179 *	algorithm skip faster data segments considered "incompressible".
180 *	This may decrease compression ratio dramatically, but will be
181 *	faster on incompressible data. Increasing this value will make
182 *	the algorithm search more before declaring a segment "incompressible".
183 *	This could improve compression a bit, but will be slower on
184 *	incompressible data. The default value (6) is recommended.
185 */
186#define	NOTCOMPRESSIBLE_CONFIRMATION 6
187
188/*
189 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
190 * performance for big endian cpu, but the resulting compressed stream
191 * will be incompatible with little-endian CPU. You can set this option
192 * to 1 in situations where data will stay within closed environment.
193 * This option is useless on Little_Endian CPU (such as x86).
194 */
195/* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
196
197/*
198 * CPU Feature Detection
199 */
200
201/* 32 or 64 bits ? */
202#if defined(_LP64)
203#define	LZ4_ARCH64 1
204#else
205#define	LZ4_ARCH64 0
206#endif
207
208/*
209 * Little Endian or Big Endian?
210 * Note: overwrite the below #define if you know your architecture endianness.
211 */
212#if defined(_ZFS_BIG_ENDIAN)
213#define	LZ4_BIG_ENDIAN 1
214#else
215/*
216 * Little Endian assumed. PDP Endian and other very rare endian format
217 * are unsupported.
218 */
219#undef LZ4_BIG_ENDIAN
220#endif
221
222/*
223 * Unaligned memory access is automatically enabled for "common" CPU,
224 * such as x86. For others CPU, the compiler will be more cautious, and
225 * insert extra code to ensure aligned access is respected. If you know
226 * your target CPU supports unaligned memory access, you may want to
227 * force this option manually to improve performance
228 */
229#if defined(__ARM_FEATURE_UNALIGNED)
230#define	LZ4_FORCE_UNALIGNED_ACCESS 1
231#endif
232
233/*
234 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
235 * kernel
236 * Linux : we can use GCC's __builtin_ctz family of builtins in the
237 * kernel
238 */
239#undef	LZ4_FORCE_SW_BITCOUNT
240#if defined(__sparc)
241#define	LZ4_FORCE_SW_BITCOUNT
242#endif
243
244/*
245 * Compiler Options
246 */
247/* Disable restrict */
248#define	restrict
249
250/*
251 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
252 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
253 */
254#ifdef GCC_VERSION
255#undef GCC_VERSION
256#endif
257
258#define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
259
260#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
261#define	expect(expr, value)    (__builtin_expect((expr), (value)))
262#else
263#define	expect(expr, value)    (expr)
264#endif
265
266#ifndef likely
267#define	likely(expr)	expect((expr) != 0, 1)
268#endif
269
270#ifndef unlikely
271#define	unlikely(expr)	expect((expr) != 0, 0)
272#endif
273
274#define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
275	(((x) & 0xffu) << 8)))
276
277/* Basic types */
278#define	BYTE	uint8_t
279#define	U16	uint16_t
280#define	U32	uint32_t
281#define	S32	int32_t
282#define	U64	uint64_t
283
284#ifndef LZ4_FORCE_UNALIGNED_ACCESS
285#pragma pack(1)
286#endif
287
288typedef struct _U16_S {
289	U16 v;
290} U16_S;
291typedef struct _U32_S {
292	U32 v;
293} U32_S;
294typedef struct _U64_S {
295	U64 v;
296} U64_S;
297
298#ifndef LZ4_FORCE_UNALIGNED_ACCESS
299#pragma pack()
300#endif
301
302#define	A64(x) (((U64_S *)(x))->v)
303#define	A32(x) (((U32_S *)(x))->v)
304#define	A16(x) (((U16_S *)(x))->v)
305
306/*
307 * Constants
308 */
309#define	MINMATCH 4
310
311#define	HASH_LOG COMPRESSIONLEVEL
312#define	HASHTABLESIZE (1 << HASH_LOG)
313#define	HASH_MASK (HASHTABLESIZE - 1)
314
315#define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
316	NOTCOMPRESSIBLE_CONFIRMATION : 2)
317
318#define	COPYLENGTH 8
319#define	LASTLITERALS 5
320#define	MFLIMIT (COPYLENGTH + MINMATCH)
321#define	MINLENGTH (MFLIMIT + 1)
322
323#define	MAXD_LOG 16
324#define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
325
326#define	ML_BITS 4
327#define	ML_MASK ((1U<<ML_BITS)-1)
328#define	RUN_BITS (8-ML_BITS)
329#define	RUN_MASK ((1U<<RUN_BITS)-1)
330
331
332/*
333 * Architecture-specific macros
334 */
335#if LZ4_ARCH64
336#define	STEPSIZE 8
337#define	UARCH U64
338#define	AARCH A64
339#define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
340#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
341#define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
342#define	HTYPE U32
343#define	INITBASE(base)		const BYTE* const base = ip
344#else /* !LZ4_ARCH64 */
345#define	STEPSIZE 4
346#define	UARCH U32
347#define	AARCH A32
348#define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
349#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
350#define	LZ4_SECURECOPY		LZ4_WILDCOPY
351#define	HTYPE const BYTE *
352#define	INITBASE(base)		const int base = 0
353#endif /* !LZ4_ARCH64 */
354
355#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
356#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
357	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
358#define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
359	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
360#else
361#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
362#define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
363#endif
364
365
366/* Local structures */
367struct refTables {
368	HTYPE hashTable[HASHTABLESIZE];
369};
370
371
372/* Macros */
373#define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
374	HASH_LOG))
375#define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
376#define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
377#define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
378	d = e; }
379
380
381/* Private functions */
382#if LZ4_ARCH64
383
384static inline int
385LZ4_NbCommonBytes(register U64 val)
386{
387#if defined(LZ4_BIG_ENDIAN)
388#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
389	!defined(LZ4_FORCE_SW_BITCOUNT)
390	return (__builtin_clzll(val) >> 3);
391#else
392	int r;
393	if (!(val >> 32)) {
394		r = 4;
395	} else {
396		r = 0;
397		val >>= 32;
398	}
399	if (!(val >> 16)) {
400		r += 2;
401		val >>= 8;
402	} else {
403		val >>= 24;
404	}
405	r += (!val);
406	return (r);
407#endif
408#else
409#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
410	!defined(LZ4_FORCE_SW_BITCOUNT)
411	return (__builtin_ctzll(val) >> 3);
412#else
413	static const int DeBruijnBytePos[64] =
414	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
415		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
416		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
417		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
418	};
419	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
420	    58];
421#endif
422#endif
423}
424
425#else
426
427static inline int
428LZ4_NbCommonBytes(register U32 val)
429{
430#if defined(LZ4_BIG_ENDIAN)
431#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
432	!defined(LZ4_FORCE_SW_BITCOUNT)
433	return (__builtin_clz(val) >> 3);
434#else
435	int r;
436	if (!(val >> 16)) {
437		r = 2;
438		val >>= 8;
439	} else {
440		r = 0;
441		val >>= 24;
442	}
443	r += (!val);
444	return (r);
445#endif
446#else
447#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
448	!defined(LZ4_FORCE_SW_BITCOUNT)
449	return (__builtin_ctz(val) >> 3);
450#else
451	static const int DeBruijnBytePos[32] = {
452		0, 0, 3, 0, 3, 1, 3, 0,
453		3, 2, 2, 1, 3, 2, 0, 1,
454		3, 3, 1, 2, 2, 2, 2, 0,
455		3, 1, 2, 0, 1, 0, 1, 1
456	};
457	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
458	    27];
459#endif
460#endif
461}
462
463#endif
464
465/* Compression functions */
466
467/*ARGSUSED*/
468static int
469LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
470    int osize)
471{
472	struct refTables *srt = (struct refTables *)ctx;
473	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
474
475	const BYTE *ip = (BYTE *) source;
476	INITBASE(base);
477	const BYTE *anchor = ip;
478	const BYTE *const iend = ip + isize;
479	const BYTE *const oend = (BYTE *) dest + osize;
480	const BYTE *const mflimit = iend - MFLIMIT;
481#define	matchlimit (iend - LASTLITERALS)
482
483	BYTE *op = (BYTE *) dest;
484
485	int len, length;
486	const int skipStrength = SKIPSTRENGTH;
487	U32 forwardH;
488
489
490	/* Init */
491	if (isize < MINLENGTH)
492		goto _last_literals;
493
494	/* First Byte */
495	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
496	ip++;
497	forwardH = LZ4_HASH_VALUE(ip);
498
499	/* Main Loop */
500	for (;;) {
501		int findMatchAttempts = (1U << skipStrength) + 3;
502		const BYTE *forwardIp = ip;
503		const BYTE *ref;
504		BYTE *token;
505
506		/* Find a match */
507		do {
508			U32 h = forwardH;
509			int step = findMatchAttempts++ >> skipStrength;
510			ip = forwardIp;
511			forwardIp = ip + step;
512
513			if (unlikely(forwardIp > mflimit)) {
514				goto _last_literals;
515			}
516
517			forwardH = LZ4_HASH_VALUE(forwardIp);
518			ref = base + HashTable[h];
519			HashTable[h] = ip - base;
520
521		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
522
523		/* Catch up */
524		while ((ip > anchor) && (ref > (BYTE *) source) &&
525		    unlikely(ip[-1] == ref[-1])) {
526			ip--;
527			ref--;
528		}
529
530		/* Encode Literal length */
531		length = ip - anchor;
532		token = op++;
533
534		/* Check output limit */
535		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
536		    (length >> 8) > oend))
537			return (0);
538
539		if (length >= (int)RUN_MASK) {
540			*token = (RUN_MASK << ML_BITS);
541			len = length - RUN_MASK;
542			for (; len > 254; len -= 255)
543				*op++ = 255;
544			*op++ = (BYTE)len;
545		} else
546			*token = (length << ML_BITS);
547
548		/* Copy Literals */
549		LZ4_BLINDCOPY(anchor, op, length);
550
551		_next_match:
552		/* Encode Offset */
553		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
554
555		/* Start Counting */
556		ip += MINMATCH;
557		ref += MINMATCH;	/* MinMatch verified */
558		anchor = ip;
559		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
560			UARCH diff = AARCH(ref) ^ AARCH(ip);
561			if (!diff) {
562				ip += STEPSIZE;
563				ref += STEPSIZE;
564				continue;
565			}
566			ip += LZ4_NbCommonBytes(diff);
567			goto _endCount;
568		}
569#if LZ4_ARCH64
570		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
571			ip += 4;
572			ref += 4;
573		}
574#endif
575		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
576			ip += 2;
577			ref += 2;
578		}
579		if ((ip < matchlimit) && (*ref == *ip))
580			ip++;
581		_endCount:
582
583		/* Encode MatchLength */
584		len = (ip - anchor);
585		/* Check output limit */
586		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
587			return (0);
588		if (len >= (int)ML_MASK) {
589			*token += ML_MASK;
590			len -= ML_MASK;
591			for (; len > 509; len -= 510) {
592				*op++ = 255;
593				*op++ = 255;
594			}
595			if (len > 254) {
596				len -= 255;
597				*op++ = 255;
598			}
599			*op++ = (BYTE)len;
600		} else
601			*token += len;
602
603		/* Test end of chunk */
604		if (ip > mflimit) {
605			anchor = ip;
606			break;
607		}
608		/* Fill table */
609		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
610
611		/* Test next position */
612		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
613		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
614		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
615			token = op++;
616			*token = 0;
617			goto _next_match;
618		}
619		/* Prepare next loop */
620		anchor = ip++;
621		forwardH = LZ4_HASH_VALUE(ip);
622	}
623
624	_last_literals:
625	/* Encode Last Literals */
626	{
627		int lastRun = iend - anchor;
628		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
629		    oend)
630			return (0);
631		if (lastRun >= (int)RUN_MASK) {
632			*op++ = (RUN_MASK << ML_BITS);
633			lastRun -= RUN_MASK;
634			for (; lastRun > 254; lastRun -= 255) {
635				*op++ = 255;
636			}
637			*op++ = (BYTE)lastRun;
638		} else
639			*op++ = (lastRun << ML_BITS);
640		(void) memcpy(op, anchor, iend - anchor);
641		op += iend - anchor;
642	}
643
644	/* End */
645	return (int)(((char *)op) - dest);
646}
647
648
649
650/* Note : this function is valid only if isize < LZ4_64KLIMIT */
651#define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
652#define	HASHLOG64K (HASH_LOG + 1)
653#define	HASH64KTABLESIZE (1U << HASHLOG64K)
654#define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
655	HASHLOG64K))
656#define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
657
658/*ARGSUSED*/
659static int
660LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
661    int osize)
662{
663	struct refTables *srt = (struct refTables *)ctx;
664	U16 *HashTable = (U16 *) (srt->hashTable);
665
666	const BYTE *ip = (BYTE *) source;
667	const BYTE *anchor = ip;
668	const BYTE *const base = ip;
669	const BYTE *const iend = ip + isize;
670	const BYTE *const oend = (BYTE *) dest + osize;
671	const BYTE *const mflimit = iend - MFLIMIT;
672#define	matchlimit (iend - LASTLITERALS)
673
674	BYTE *op = (BYTE *) dest;
675
676	int len, length;
677	const int skipStrength = SKIPSTRENGTH;
678	U32 forwardH;
679
680	/* Init */
681	if (isize < MINLENGTH)
682		goto _last_literals;
683
684	/* First Byte */
685	ip++;
686	forwardH = LZ4_HASH64K_VALUE(ip);
687
688	/* Main Loop */
689	for (;;) {
690		int findMatchAttempts = (1U << skipStrength) + 3;
691		const BYTE *forwardIp = ip;
692		const BYTE *ref;
693		BYTE *token;
694
695		/* Find a match */
696		do {
697			U32 h = forwardH;
698			int step = findMatchAttempts++ >> skipStrength;
699			ip = forwardIp;
700			forwardIp = ip + step;
701
702			if (forwardIp > mflimit) {
703				goto _last_literals;
704			}
705
706			forwardH = LZ4_HASH64K_VALUE(forwardIp);
707			ref = base + HashTable[h];
708			HashTable[h] = ip - base;
709
710		} while (A32(ref) != A32(ip));
711
712		/* Catch up */
713		while ((ip > anchor) && (ref > (BYTE *) source) &&
714		    (ip[-1] == ref[-1])) {
715			ip--;
716			ref--;
717		}
718
719		/* Encode Literal length */
720		length = ip - anchor;
721		token = op++;
722
723		/* Check output limit */
724		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
725		    (length >> 8) > oend))
726			return (0);
727
728		if (length >= (int)RUN_MASK) {
729			*token = (RUN_MASK << ML_BITS);
730			len = length - RUN_MASK;
731			for (; len > 254; len -= 255)
732				*op++ = 255;
733			*op++ = (BYTE)len;
734		} else
735			*token = (length << ML_BITS);
736
737		/* Copy Literals */
738		LZ4_BLINDCOPY(anchor, op, length);
739
740		_next_match:
741		/* Encode Offset */
742		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
743
744		/* Start Counting */
745		ip += MINMATCH;
746		ref += MINMATCH;	/* MinMatch verified */
747		anchor = ip;
748		while (ip < matchlimit - (STEPSIZE - 1)) {
749			UARCH diff = AARCH(ref) ^ AARCH(ip);
750			if (!diff) {
751				ip += STEPSIZE;
752				ref += STEPSIZE;
753				continue;
754			}
755			ip += LZ4_NbCommonBytes(diff);
756			goto _endCount;
757		}
758#if LZ4_ARCH64
759		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
760			ip += 4;
761			ref += 4;
762		}
763#endif
764		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
765			ip += 2;
766			ref += 2;
767		}
768		if ((ip < matchlimit) && (*ref == *ip))
769			ip++;
770		_endCount:
771
772		/* Encode MatchLength */
773		len = (ip - anchor);
774		/* Check output limit */
775		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
776			return (0);
777		if (len >= (int)ML_MASK) {
778			*token += ML_MASK;
779			len -= ML_MASK;
780			for (; len > 509; len -= 510) {
781				*op++ = 255;
782				*op++ = 255;
783			}
784			if (len > 254) {
785				len -= 255;
786				*op++ = 255;
787			}
788			*op++ = (BYTE)len;
789		} else
790			*token += len;
791
792		/* Test end of chunk */
793		if (ip > mflimit) {
794			anchor = ip;
795			break;
796		}
797		/* Fill table */
798		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
799
800		/* Test next position */
801		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
802		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
803		if (A32(ref) == A32(ip)) {
804			token = op++;
805			*token = 0;
806			goto _next_match;
807		}
808		/* Prepare next loop */
809		anchor = ip++;
810		forwardH = LZ4_HASH64K_VALUE(ip);
811	}
812
813	_last_literals:
814	/* Encode Last Literals */
815	{
816		int lastRun = iend - anchor;
817		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
818		    oend)
819			return (0);
820		if (lastRun >= (int)RUN_MASK) {
821			*op++ = (RUN_MASK << ML_BITS);
822			lastRun -= RUN_MASK;
823			for (; lastRun > 254; lastRun -= 255)
824				*op++ = 255;
825			*op++ = (BYTE)lastRun;
826		} else
827			*op++ = (lastRun << ML_BITS);
828		(void) memcpy(op, anchor, iend - anchor);
829		op += iend - anchor;
830	}
831
832	/* End */
833	return (int)(((char *)op) - dest);
834}
835
836static int
837real_LZ4_compress(const char *source, char *dest, int isize, int osize)
838{
839	void *ctx;
840	int result;
841
842	ctx = lz4_alloc(KM_SLEEP);
843
844	/*
845	 * out of kernel memory, gently fall through - this will disable
846	 * compression in zio_compress_data
847	 */
848	if (ctx == NULL)
849		return (0);
850
851	memset(ctx, 0, sizeof (struct refTables));
852
853	if (isize < LZ4_64KLIMIT)
854		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
855	else
856		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
857
858	lz4_free(ctx);
859	return (result);
860}
861
862/* Decompression functions */
863
864/*
865 * Note: The decoding functions real_LZ4_uncompress() and
866 *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
867 *	attack type. They will never write nor read outside of the provided
868 *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
869 *	it will never read outside of the input buffer. A corrupted input
870 *	will produce an error result, a negative int, indicating the position
871 *	of the error within input stream.
872 *
873 * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so
874 *	its code is not present here.
875 */
876
877static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
878#if LZ4_ARCH64
879static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
880#endif
881
882static int
883LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
884    int maxOutputSize)
885{
886	/* Local Variables */
887	const BYTE *restrict ip = (const BYTE *) source;
888	const BYTE *const iend = ip + isize;
889	const BYTE *ref;
890
891	BYTE *op = (BYTE *) dest;
892	BYTE *const oend = op + maxOutputSize;
893	BYTE *cpy;
894
895	/* Main Loop */
896	while (ip < iend) {
897		unsigned token;
898		size_t length;
899
900		/* get runlength */
901		token = *ip++;
902		if ((length = (token >> ML_BITS)) == RUN_MASK) {
903			int s = 255;
904			while ((ip < iend) && (s == 255)) {
905				s = *ip++;
906				if (unlikely(length > (size_t)(length + s)))
907					goto _output_error;
908				length += s;
909			}
910		}
911		/* copy literals */
912		cpy = op + length;
913		/* CORNER-CASE: cpy might overflow. */
914		if (cpy < op)
915			goto _output_error;	/* cpy was overflowed, bail! */
916		if ((cpy > oend - COPYLENGTH) ||
917		    (ip + length > iend - COPYLENGTH)) {
918			if (cpy > oend)
919				/* Error: writes beyond output buffer */
920				goto _output_error;
921			if (ip + length != iend)
922				/*
923				 * Error: LZ4 format requires to consume all
924				 * input at this stage
925				 */
926				goto _output_error;
927			(void) memcpy(op, ip, length);
928			op += length;
929			/* Necessarily EOF, due to parsing restrictions */
930			break;
931		}
932		LZ4_WILDCOPY(ip, op, cpy);
933		ip -= (op - cpy);
934		op = cpy;
935
936		/* get offset */
937		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
938		ip += 2;
939		if (ref < (BYTE * const) dest)
940			/*
941			 * Error: offset creates reference outside of
942			 * destination buffer
943			 */
944			goto _output_error;
945
946		/* get matchlength */
947		if ((length = (token & ML_MASK)) == ML_MASK) {
948			while (ip < iend) {
949				int s = *ip++;
950				if (unlikely(length > (size_t)(length + s)))
951					goto _output_error;
952				length += s;
953				if (s == 255)
954					continue;
955				break;
956			}
957		}
958		/* copy repeated sequence */
959		if (unlikely(op - ref < STEPSIZE)) {
960#if LZ4_ARCH64
961			int dec64 = dec64table[op - ref];
962#else
963			const int dec64 = 0;
964#endif
965			op[0] = ref[0];
966			op[1] = ref[1];
967			op[2] = ref[2];
968			op[3] = ref[3];
969			op += 4;
970			ref += 4;
971			ref -= dec32table[op - ref];
972			A32(op) = A32(ref);
973			op += STEPSIZE - 4;
974			ref -= dec64;
975		} else {
976			LZ4_COPYSTEP(ref, op);
977		}
978		cpy = op + length - (STEPSIZE - 4);
979		if (cpy > oend - COPYLENGTH) {
980			if (cpy > oend)
981				/*
982				 * Error: request to write outside of
983				 * destination buffer
984				 */
985				goto _output_error;
986#if LZ4_ARCH64
987			if ((ref + COPYLENGTH) > oend)
988#else
989			if ((ref + COPYLENGTH) > oend ||
990			    (op + COPYLENGTH) > oend)
991#endif
992				goto _output_error;
993			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
994			while (op < cpy)
995				*op++ = *ref++;
996			op = cpy;
997			if (op == oend)
998				/*
999				 * Check EOF (should never happen, since
1000				 * last 5 bytes are supposed to be literals)
1001				 */
1002				goto _output_error;
1003			continue;
1004		}
1005		LZ4_SECURECOPY(ref, op, cpy);
1006		op = cpy;	/* correction */
1007	}
1008
1009	/* end of decoding */
1010	return (int)(((char *)op) - dest);
1011
1012	/* write overflow error detected */
1013	_output_error:
1014	return (-1);
1015}
1016
1017#ifdef __FreeBSD__
1018/*
1019 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
1020 * Should struct refTables get resized this may need to be revisited, hence
1021 * compiler-time asserts.
1022 */
1023_Static_assert(sizeof(struct refTables) <= 16384,
1024    "refTables too big for malloc");
1025_Static_assert((sizeof(struct refTables) % 4096) == 0,
1026    "refTables not a multiple of page size");
1027#else
1028#define ZFS_LZ4_USE_CACHE
1029#endif
1030
1031#ifdef ZFS_LZ4_USE_CACHE
1032static kmem_cache_t *lz4_cache;
1033
1034void
1035lz4_init(void)
1036{
1037	lz4_cache = kmem_cache_create("lz4_cache",
1038	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
1039}
1040
1041void
1042lz4_fini(void)
1043{
1044	if (lz4_cache) {
1045		kmem_cache_destroy(lz4_cache);
1046		lz4_cache = NULL;
1047	}
1048}
1049
1050static void *
1051lz4_alloc(int flags)
1052{
1053	ASSERT(lz4_cache != NULL);
1054	return (kmem_cache_alloc(lz4_cache, flags));
1055}
1056
1057static void
1058lz4_free(void *ctx)
1059{
1060	kmem_cache_free(lz4_cache, ctx);
1061}
1062#else
1063void
1064lz4_init(void)
1065{
1066}
1067
1068void
1069lz4_fini(void)
1070{
1071}
1072
1073static void *
1074lz4_alloc(int flags)
1075{
1076	return (kmem_alloc(sizeof (struct refTables), flags));
1077}
1078
1079static void
1080lz4_free(void *ctx)
1081{
1082	kmem_free(ctx, sizeof (struct refTables));
1083}
1084#endif
1085