1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35/*
36 * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
37 */
38
39#include <sys/zfs_context.h>
40#include <sys/zio_compress.h>
41
42static int real_LZ4_compress(const char *source, char *dest, int isize,
43    int osize);
44static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45    int isize, int osize);
46static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47    int isize, int osize);
48
49/* See lz4.c */
50int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51    int isize, int maxOutputSize);
52
53static void *lz4_alloc(int flags);
54static void lz4_free(void *ctx);
55
56size_t
57lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
58    size_t d_len, int n)
59{
60	(void) n;
61	uint32_t bufsiz;
62	char *dest = d_start;
63
64	ASSERT(d_len >= sizeof (bufsiz));
65
66	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
67	    d_len - sizeof (bufsiz));
68
69	/* Signal an error if the compression routine returned zero. */
70	if (bufsiz == 0)
71		return (s_len);
72
73	/*
74	 * The exact compressed size is needed by the decompression routine,
75	 * so it is stored at the start of the buffer. Note that this may be
76	 * less than the compressed block size, which is rounded up to a
77	 * multiple of 1<<ashift.
78	 */
79	*(uint32_t *)dest = BE_32(bufsiz);
80
81	return (bufsiz + sizeof (bufsiz));
82}
83
84int
85lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
86    size_t d_len, int n)
87{
88	(void) n;
89	const char *src = s_start;
90	uint32_t bufsiz = BE_IN32(src);
91
92	/* invalid compressed buffer size encoded at start */
93	if (bufsiz + sizeof (bufsiz) > s_len)
94		return (1);
95
96	/*
97	 * Returns 0 on success (decompression function returned non-negative)
98	 * and non-zero on failure (decompression function returned negative).
99	 */
100	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
101	    d_start, bufsiz, d_len) < 0);
102}
103
104/*
105 * LZ4 API Description:
106 *
107 * Simple Functions:
108 * real_LZ4_compress() :
109 * 	isize  : is the input size. Max supported value is ~1.9GB
110 * 	return : the number of bytes written in buffer dest
111 *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
112 * 	note : destination buffer must be already allocated.
113 * 		destination buffer must be sized to handle worst cases
114 * 		situations (input data not compressible) worst case size
115 * 		evaluation is provided by function LZ4_compressBound().
116 *
117 * real_LZ4_uncompress() :
118 * 	osize  : is the output size, therefore the original size
119 * 	return : the number of bytes read in the source buffer.
120 * 		If the source stream is malformed, the function will stop
121 * 		decoding and return a negative result, indicating the byte
122 * 		position of the faulty instruction. This function never
123 * 		writes beyond dest + osize, and is therefore protected
124 * 		against malicious data packets.
125 * 	note : destination buffer must be already allocated
126 *	note : real_LZ4_uncompress() is not used in ZFS so its code
127 *	       is not present here.
128 *
129 * Advanced Functions
130 *
131 * LZ4_compressBound() :
132 * 	Provides the maximum size that LZ4 may output in a "worst case"
133 * 	scenario (input data not compressible) primarily useful for memory
134 * 	allocation of output buffer.
135 *
136 * 	isize  : is the input size. Max supported value is ~1.9GB
137 * 	return : maximum output size in a "worst case" scenario
138 * 	note : this function is limited by "int" range (2^31-1)
139 *
140 * LZ4_uncompress_unknownOutputSize() :
141 * 	isize  : is the input size, therefore the compressed size
142 * 	maxOutputSize : is the size of the destination buffer (which must be
143 * 		already allocated)
144 * 	return : the number of bytes decoded in the destination buffer
145 * 		(necessarily <= maxOutputSize). If the source stream is
146 * 		malformed, the function will stop decoding and return a
147 * 		negative result, indicating the byte position of the faulty
148 * 		instruction. This function never writes beyond dest +
149 * 		maxOutputSize, and is therefore protected against malicious
150 * 		data packets.
151 * 	note   : Destination buffer must be already allocated.
152 *		This version is slightly slower than real_LZ4_uncompress()
153 *
154 * LZ4_compressCtx() :
155 * 	This function explicitly handles the CTX memory structure.
156 *
157 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
158 * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
159 * 	NULL isn't valid.
160 *
161 * LZ4_compress64kCtx() :
162 * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
163 * 	isize *Must* be <64KB, otherwise the output will be corrupted.
164 *
165 * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
166 * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
167 * 	NULL isn't valid.
168 */
169
170/*
171 * Tuning parameters
172 */
173
174/*
175 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
176 *	 Lowering this value reduces memory usage. Reduced memory usage
177 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
178 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
179 *	(examples : 12 -> 16KB ; 17 -> 512KB)
180 */
181#define	COMPRESSIONLEVEL 12
182
183/*
184 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
185 *	algorithm skip faster data segments considered "incompressible".
186 *	This may decrease compression ratio dramatically, but will be
187 *	faster on incompressible data. Increasing this value will make
188 *	the algorithm search more before declaring a segment "incompressible".
189 *	This could improve compression a bit, but will be slower on
190 *	incompressible data. The default value (6) is recommended.
191 */
192#define	NOTCOMPRESSIBLE_CONFIRMATION 6
193
194/*
195 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
196 * performance for big endian cpu, but the resulting compressed stream
197 * will be incompatible with little-endian CPU. You can set this option
198 * to 1 in situations where data will stay within closed environment.
199 * This option is useless on Little_Endian CPU (such as x86).
200 */
201/* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
202
203/*
204 * CPU Feature Detection
205 */
206
207/* 32 or 64 bits ? */
208#if defined(_LP64)
209#define	LZ4_ARCH64 1
210#else
211#define	LZ4_ARCH64 0
212#endif
213
214/*
215 * Little Endian or Big Endian?
216 * Note: overwrite the below #define if you know your architecture endianness.
217 */
218#if defined(_ZFS_BIG_ENDIAN)
219#define	LZ4_BIG_ENDIAN 1
220#else
221/*
222 * Little Endian assumed. PDP Endian and other very rare endian format
223 * are unsupported.
224 */
225#undef LZ4_BIG_ENDIAN
226#endif
227
228/*
229 * Unaligned memory access is automatically enabled for "common" CPU,
230 * such as x86. For others CPU, the compiler will be more cautious, and
231 * insert extra code to ensure aligned access is respected. If you know
232 * your target CPU supports unaligned memory access, you may want to
233 * force this option manually to improve performance
234 */
235#if defined(__ARM_FEATURE_UNALIGNED)
236#define	LZ4_FORCE_UNALIGNED_ACCESS 1
237#endif
238
239/*
240 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
241 * kernel
242 * Linux : we can use GCC's __builtin_ctz family of builtins in the
243 * kernel
244 */
245#undef	LZ4_FORCE_SW_BITCOUNT
246#if defined(__sparc)
247#define	LZ4_FORCE_SW_BITCOUNT
248#endif
249
250/*
251 * Compiler Options
252 */
253/* Disable restrict */
254#define	restrict
255
256/*
257 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
258 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
259 */
260#ifdef GCC_VERSION
261#undef GCC_VERSION
262#endif
263
264#define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
265
266#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
267#define	expect(expr, value)    (__builtin_expect((expr), (value)))
268#else
269#define	expect(expr, value)    (expr)
270#endif
271
272#ifndef likely
273#define	likely(expr)	expect((expr) != 0, 1)
274#endif
275
276#ifndef unlikely
277#define	unlikely(expr)	expect((expr) != 0, 0)
278#endif
279
280#define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
281	(((x) & 0xffu) << 8)))
282
283/* Basic types */
284#define	BYTE	uint8_t
285#define	U16	uint16_t
286#define	U32	uint32_t
287#define	S32	int32_t
288#define	U64	uint64_t
289
290#ifndef LZ4_FORCE_UNALIGNED_ACCESS
291#pragma pack(1)
292#endif
293
294typedef struct _U16_S {
295	U16 v;
296} U16_S;
297typedef struct _U32_S {
298	U32 v;
299} U32_S;
300typedef struct _U64_S {
301	U64 v;
302} U64_S;
303
304#ifndef LZ4_FORCE_UNALIGNED_ACCESS
305#pragma pack()
306#endif
307
308#define	A64(x) (((U64_S *)(x))->v)
309#define	A32(x) (((U32_S *)(x))->v)
310#define	A16(x) (((U16_S *)(x))->v)
311
312/*
313 * Constants
314 */
315#define	MINMATCH 4
316
317#define	HASH_LOG COMPRESSIONLEVEL
318#define	HASHTABLESIZE (1 << HASH_LOG)
319#define	HASH_MASK (HASHTABLESIZE - 1)
320
321#define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
322	NOTCOMPRESSIBLE_CONFIRMATION : 2)
323
324#define	COPYLENGTH 8
325#define	LASTLITERALS 5
326#define	MFLIMIT (COPYLENGTH + MINMATCH)
327#define	MINLENGTH (MFLIMIT + 1)
328
329#define	MAXD_LOG 16
330#define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
331
332#define	ML_BITS 4
333#define	ML_MASK ((1U<<ML_BITS)-1)
334#define	RUN_BITS (8-ML_BITS)
335#define	RUN_MASK ((1U<<RUN_BITS)-1)
336
337
338/*
339 * Architecture-specific macros
340 */
341#if LZ4_ARCH64
342#define	STEPSIZE 8
343#define	UARCH U64
344#define	AARCH A64
345#define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
346#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
347#define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
348#define	HTYPE U32
349#define	INITBASE(base)		const BYTE* const base = ip
350#else /* !LZ4_ARCH64 */
351#define	STEPSIZE 4
352#define	UARCH U32
353#define	AARCH A32
354#define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
355#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
356#define	LZ4_SECURECOPY		LZ4_WILDCOPY
357#define	HTYPE const BYTE *
358#define	INITBASE(base)		const int base = 0
359#endif /* !LZ4_ARCH64 */
360
361#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
362#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
363	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
364#define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
365	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
366#else
367#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
368#define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
369#endif
370
371
372/* Local structures */
373struct refTables {
374	HTYPE hashTable[HASHTABLESIZE];
375};
376
377
378/* Macros */
379#define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
380	HASH_LOG))
381#define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
382#define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
383#define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
384	d = e; }
385
386
387/* Private functions */
388#if LZ4_ARCH64
389
390static inline int
391LZ4_NbCommonBytes(register U64 val)
392{
393#if defined(LZ4_BIG_ENDIAN)
394#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
395	!defined(LZ4_FORCE_SW_BITCOUNT)
396	return (__builtin_clzll(val) >> 3);
397#else
398	int r;
399	if (!(val >> 32)) {
400		r = 4;
401	} else {
402		r = 0;
403		val >>= 32;
404	}
405	if (!(val >> 16)) {
406		r += 2;
407		val >>= 8;
408	} else {
409		val >>= 24;
410	}
411	r += (!val);
412	return (r);
413#endif
414#else
415#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
416	!defined(LZ4_FORCE_SW_BITCOUNT)
417	return (__builtin_ctzll(val) >> 3);
418#else
419	static const int DeBruijnBytePos[64] =
420	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
421		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
422		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
423		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
424	};
425	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
426	    58];
427#endif
428#endif
429}
430
431#else
432
433static inline int
434LZ4_NbCommonBytes(register U32 val)
435{
436#if defined(LZ4_BIG_ENDIAN)
437#if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
438	!defined(LZ4_FORCE_SW_BITCOUNT)
439	return (__builtin_clz(val) >> 3);
440#else
441	int r;
442	if (!(val >> 16)) {
443		r = 2;
444		val >>= 8;
445	} else {
446		r = 0;
447		val >>= 24;
448	}
449	r += (!val);
450	return (r);
451#endif
452#else
453#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
454	!defined(LZ4_FORCE_SW_BITCOUNT)
455	return (__builtin_ctz(val) >> 3);
456#else
457	static const int DeBruijnBytePos[32] = {
458		0, 0, 3, 0, 3, 1, 3, 0,
459		3, 2, 2, 1, 3, 2, 0, 1,
460		3, 3, 1, 2, 2, 2, 2, 0,
461		3, 1, 2, 0, 1, 0, 1, 1
462	};
463	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
464	    27];
465#endif
466#endif
467}
468
469#endif
470
471/* Compression functions */
472
473static int
474LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
475    int osize)
476{
477	struct refTables *srt = (struct refTables *)ctx;
478	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
479
480	const BYTE *ip = (BYTE *) source;
481	INITBASE(base);
482	const BYTE *anchor = ip;
483	const BYTE *const iend = ip + isize;
484	const BYTE *const oend = (BYTE *) dest + osize;
485	const BYTE *const mflimit = iend - MFLIMIT;
486#define	matchlimit (iend - LASTLITERALS)
487
488	BYTE *op = (BYTE *) dest;
489
490	int len, length;
491	const int skipStrength = SKIPSTRENGTH;
492	U32 forwardH;
493
494
495	/* Init */
496	if (isize < MINLENGTH)
497		goto _last_literals;
498
499	/* First Byte */
500	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
501	ip++;
502	forwardH = LZ4_HASH_VALUE(ip);
503
504	/* Main Loop */
505	for (;;) {
506		int findMatchAttempts = (1U << skipStrength) + 3;
507		const BYTE *forwardIp = ip;
508		const BYTE *ref;
509		BYTE *token;
510
511		/* Find a match */
512		do {
513			U32 h = forwardH;
514			int step = findMatchAttempts++ >> skipStrength;
515			ip = forwardIp;
516			forwardIp = ip + step;
517
518			if (unlikely(forwardIp > mflimit)) {
519				goto _last_literals;
520			}
521
522			forwardH = LZ4_HASH_VALUE(forwardIp);
523			ref = base + HashTable[h];
524			HashTable[h] = ip - base;
525
526		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
527
528		/* Catch up */
529		while ((ip > anchor) && (ref > (BYTE *) source) &&
530		    unlikely(ip[-1] == ref[-1])) {
531			ip--;
532			ref--;
533		}
534
535		/* Encode Literal length */
536		length = ip - anchor;
537		token = op++;
538
539		/* Check output limit */
540		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
541		    (length >> 8) > oend))
542			return (0);
543
544		if (length >= (int)RUN_MASK) {
545			*token = (RUN_MASK << ML_BITS);
546			len = length - RUN_MASK;
547			for (; len > 254; len -= 255)
548				*op++ = 255;
549			*op++ = (BYTE)len;
550		} else
551			*token = (length << ML_BITS);
552
553		/* Copy Literals */
554		LZ4_BLINDCOPY(anchor, op, length);
555
556		_next_match:
557		/* Encode Offset */
558		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
559
560		/* Start Counting */
561		ip += MINMATCH;
562		ref += MINMATCH;	/* MinMatch verified */
563		anchor = ip;
564		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
565			UARCH diff = AARCH(ref) ^ AARCH(ip);
566			if (!diff) {
567				ip += STEPSIZE;
568				ref += STEPSIZE;
569				continue;
570			}
571			ip += LZ4_NbCommonBytes(diff);
572			goto _endCount;
573		}
574#if LZ4_ARCH64
575		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
576			ip += 4;
577			ref += 4;
578		}
579#endif
580		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
581			ip += 2;
582			ref += 2;
583		}
584		if ((ip < matchlimit) && (*ref == *ip))
585			ip++;
586		_endCount:
587
588		/* Encode MatchLength */
589		len = (ip - anchor);
590		/* Check output limit */
591		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
592			return (0);
593		if (len >= (int)ML_MASK) {
594			*token += ML_MASK;
595			len -= ML_MASK;
596			for (; len > 509; len -= 510) {
597				*op++ = 255;
598				*op++ = 255;
599			}
600			if (len > 254) {
601				len -= 255;
602				*op++ = 255;
603			}
604			*op++ = (BYTE)len;
605		} else
606			*token += len;
607
608		/* Test end of chunk */
609		if (ip > mflimit) {
610			anchor = ip;
611			break;
612		}
613		/* Fill table */
614		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
615
616		/* Test next position */
617		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
618		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
619		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
620			token = op++;
621			*token = 0;
622			goto _next_match;
623		}
624		/* Prepare next loop */
625		anchor = ip++;
626		forwardH = LZ4_HASH_VALUE(ip);
627	}
628
629	_last_literals:
630	/* Encode Last Literals */
631	{
632		int lastRun = iend - anchor;
633		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
634		    oend)
635			return (0);
636		if (lastRun >= (int)RUN_MASK) {
637			*op++ = (RUN_MASK << ML_BITS);
638			lastRun -= RUN_MASK;
639			for (; lastRun > 254; lastRun -= 255) {
640				*op++ = 255;
641			}
642			*op++ = (BYTE)lastRun;
643		} else
644			*op++ = (lastRun << ML_BITS);
645		(void) memcpy(op, anchor, iend - anchor);
646		op += iend - anchor;
647	}
648
649	/* End */
650	return (int)(((char *)op) - dest);
651}
652
653
654
655/* Note : this function is valid only if isize < LZ4_64KLIMIT */
656#define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
657#define	HASHLOG64K (HASH_LOG + 1)
658#define	HASH64KTABLESIZE (1U << HASHLOG64K)
659#define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
660	HASHLOG64K))
661#define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
662
663static int
664LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
665    int osize)
666{
667	struct refTables *srt = (struct refTables *)ctx;
668	U16 *HashTable = (U16 *) (srt->hashTable);
669
670	const BYTE *ip = (BYTE *) source;
671	const BYTE *anchor = ip;
672	const BYTE *const base = ip;
673	const BYTE *const iend = ip + isize;
674	const BYTE *const oend = (BYTE *) dest + osize;
675	const BYTE *const mflimit = iend - MFLIMIT;
676#define	matchlimit (iend - LASTLITERALS)
677
678	BYTE *op = (BYTE *) dest;
679
680	int len, length;
681	const int skipStrength = SKIPSTRENGTH;
682	U32 forwardH;
683
684	/* Init */
685	if (isize < MINLENGTH)
686		goto _last_literals;
687
688	/* First Byte */
689	ip++;
690	forwardH = LZ4_HASH64K_VALUE(ip);
691
692	/* Main Loop */
693	for (;;) {
694		int findMatchAttempts = (1U << skipStrength) + 3;
695		const BYTE *forwardIp = ip;
696		const BYTE *ref;
697		BYTE *token;
698
699		/* Find a match */
700		do {
701			U32 h = forwardH;
702			int step = findMatchAttempts++ >> skipStrength;
703			ip = forwardIp;
704			forwardIp = ip + step;
705
706			if (forwardIp > mflimit) {
707				goto _last_literals;
708			}
709
710			forwardH = LZ4_HASH64K_VALUE(forwardIp);
711			ref = base + HashTable[h];
712			HashTable[h] = ip - base;
713
714		} while (A32(ref) != A32(ip));
715
716		/* Catch up */
717		while ((ip > anchor) && (ref > (BYTE *) source) &&
718		    (ip[-1] == ref[-1])) {
719			ip--;
720			ref--;
721		}
722
723		/* Encode Literal length */
724		length = ip - anchor;
725		token = op++;
726
727		/* Check output limit */
728		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
729		    (length >> 8) > oend))
730			return (0);
731
732		if (length >= (int)RUN_MASK) {
733			*token = (RUN_MASK << ML_BITS);
734			len = length - RUN_MASK;
735			for (; len > 254; len -= 255)
736				*op++ = 255;
737			*op++ = (BYTE)len;
738		} else
739			*token = (length << ML_BITS);
740
741		/* Copy Literals */
742		LZ4_BLINDCOPY(anchor, op, length);
743
744		_next_match:
745		/* Encode Offset */
746		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
747
748		/* Start Counting */
749		ip += MINMATCH;
750		ref += MINMATCH;	/* MinMatch verified */
751		anchor = ip;
752		while (ip < matchlimit - (STEPSIZE - 1)) {
753			UARCH diff = AARCH(ref) ^ AARCH(ip);
754			if (!diff) {
755				ip += STEPSIZE;
756				ref += STEPSIZE;
757				continue;
758			}
759			ip += LZ4_NbCommonBytes(diff);
760			goto _endCount;
761		}
762#if LZ4_ARCH64
763		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
764			ip += 4;
765			ref += 4;
766		}
767#endif
768		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
769			ip += 2;
770			ref += 2;
771		}
772		if ((ip < matchlimit) && (*ref == *ip))
773			ip++;
774		_endCount:
775
776		/* Encode MatchLength */
777		len = (ip - anchor);
778		/* Check output limit */
779		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
780			return (0);
781		if (len >= (int)ML_MASK) {
782			*token += ML_MASK;
783			len -= ML_MASK;
784			for (; len > 509; len -= 510) {
785				*op++ = 255;
786				*op++ = 255;
787			}
788			if (len > 254) {
789				len -= 255;
790				*op++ = 255;
791			}
792			*op++ = (BYTE)len;
793		} else
794			*token += len;
795
796		/* Test end of chunk */
797		if (ip > mflimit) {
798			anchor = ip;
799			break;
800		}
801		/* Fill table */
802		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
803
804		/* Test next position */
805		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
806		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
807		if (A32(ref) == A32(ip)) {
808			token = op++;
809			*token = 0;
810			goto _next_match;
811		}
812		/* Prepare next loop */
813		anchor = ip++;
814		forwardH = LZ4_HASH64K_VALUE(ip);
815	}
816
817	_last_literals:
818	/* Encode Last Literals */
819	{
820		int lastRun = iend - anchor;
821		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
822		    oend)
823			return (0);
824		if (lastRun >= (int)RUN_MASK) {
825			*op++ = (RUN_MASK << ML_BITS);
826			lastRun -= RUN_MASK;
827			for (; lastRun > 254; lastRun -= 255)
828				*op++ = 255;
829			*op++ = (BYTE)lastRun;
830		} else
831			*op++ = (lastRun << ML_BITS);
832		(void) memcpy(op, anchor, iend - anchor);
833		op += iend - anchor;
834	}
835
836	/* End */
837	return (int)(((char *)op) - dest);
838}
839
840static int
841real_LZ4_compress(const char *source, char *dest, int isize, int osize)
842{
843	void *ctx;
844	int result;
845
846	ctx = lz4_alloc(KM_SLEEP);
847
848	/*
849	 * out of kernel memory, gently fall through - this will disable
850	 * compression in zio_compress_data
851	 */
852	if (ctx == NULL)
853		return (0);
854
855	memset(ctx, 0, sizeof (struct refTables));
856
857	if (isize < LZ4_64KLIMIT)
858		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
859	else
860		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
861
862	lz4_free(ctx);
863	return (result);
864}
865
866#ifdef __FreeBSD__
867/*
868 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
869 * Should struct refTables get resized this may need to be revisited, hence
870 * compiler-time asserts.
871 */
872_Static_assert(sizeof(struct refTables) <= 16384,
873    "refTables too big for malloc");
874_Static_assert((sizeof(struct refTables) % 4096) == 0,
875    "refTables not a multiple of page size");
876#else
877#define ZFS_LZ4_USE_CACHE
878#endif
879
880#ifdef ZFS_LZ4_USE_CACHE
881static kmem_cache_t *lz4_cache;
882#endif
883
884#ifdef ZFS_LZ4_USE_CACHE
885void
886lz4_init(void)
887{
888	lz4_cache = kmem_cache_create("lz4_cache",
889	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
890}
891
892void
893lz4_fini(void)
894{
895	if (lz4_cache) {
896		kmem_cache_destroy(lz4_cache);
897		lz4_cache = NULL;
898	}
899}
900
901static void *
902lz4_alloc(int flags)
903{
904	ASSERT(lz4_cache != NULL);
905	return (kmem_cache_alloc(lz4_cache, flags));
906}
907
908static void
909lz4_free(void *ctx)
910{
911	kmem_cache_free(lz4_cache, ctx);
912}
913#else
914void
915lz4_init(void)
916{
917}
918
919void
920lz4_fini(void)
921{
922}
923
924static void *
925lz4_alloc(int flags)
926{
927	return (kmem_alloc(sizeof (struct refTables), flags));
928}
929
930static void
931lz4_free(void *ctx)
932{
933	kmem_free(ctx, sizeof (struct refTables));
934}
935#endif
936