1/*
2 * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * 3. Neither the name of the copyright holder nor the names of its
15 * contributors may be used to endorse or promote products derived from this
16 * software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/*
32 * Copyright (c) 2016-2018, Klara Inc.
33 * Copyright (c) 2016-2018, Allan Jude
34 * Copyright (c) 2018-2020, Sebastian Gottschall
35 * Copyright (c) 2019-2020, Michael Niew��hner
36 * Copyright (c) 2020, The FreeBSD Foundation [1]
37 *
38 * [1] Portions of this software were developed by Allan Jude
39 *     under sponsorship from the FreeBSD Foundation.
40 */
41
42#include <sys/param.h>
43#include <sys/sysmacros.h>
44#include <sys/zfs_context.h>
45#include <sys/zio_compress.h>
46#include <sys/spa.h>
47#include <sys/zstd/zstd.h>
48
49#define	ZSTD_STATIC_LINKING_ONLY
50#include "lib/zstd.h"
51#include "lib/zstd_errors.h"
52
53kstat_t *zstd_ksp = NULL;
54
55typedef struct zstd_stats {
56	kstat_named_t	zstd_stat_alloc_fail;
57	kstat_named_t	zstd_stat_alloc_fallback;
58	kstat_named_t	zstd_stat_com_alloc_fail;
59	kstat_named_t	zstd_stat_dec_alloc_fail;
60	kstat_named_t	zstd_stat_com_inval;
61	kstat_named_t	zstd_stat_dec_inval;
62	kstat_named_t	zstd_stat_dec_header_inval;
63	kstat_named_t	zstd_stat_com_fail;
64	kstat_named_t	zstd_stat_dec_fail;
65	kstat_named_t	zstd_stat_buffers;
66	kstat_named_t	zstd_stat_size;
67} zstd_stats_t;
68
69static zstd_stats_t zstd_stats = {
70	{ "alloc_fail",			KSTAT_DATA_UINT64 },
71	{ "alloc_fallback",		KSTAT_DATA_UINT64 },
72	{ "compress_alloc_fail",	KSTAT_DATA_UINT64 },
73	{ "decompress_alloc_fail",	KSTAT_DATA_UINT64 },
74	{ "compress_level_invalid",	KSTAT_DATA_UINT64 },
75	{ "decompress_level_invalid",	KSTAT_DATA_UINT64 },
76	{ "decompress_header_invalid",	KSTAT_DATA_UINT64 },
77	{ "compress_failed",		KSTAT_DATA_UINT64 },
78	{ "decompress_failed",		KSTAT_DATA_UINT64 },
79	{ "buffers",			KSTAT_DATA_UINT64 },
80	{ "size",			KSTAT_DATA_UINT64 },
81};
82
83/* Enums describing the allocator type specified by kmem_type in zstd_kmem */
84enum zstd_kmem_type {
85	ZSTD_KMEM_UNKNOWN = 0,
86	/* Allocation type using kmem_vmalloc */
87	ZSTD_KMEM_DEFAULT,
88	/* Pool based allocation using mempool_alloc */
89	ZSTD_KMEM_POOL,
90	/* Reserved fallback memory for decompression only */
91	ZSTD_KMEM_DCTX,
92	ZSTD_KMEM_COUNT,
93};
94
95/* Structure for pooled memory objects */
96struct zstd_pool {
97	void *mem;
98	size_t size;
99	kmutex_t barrier;
100	hrtime_t timeout;
101};
102
103/* Global structure for handling memory allocations */
104struct zstd_kmem {
105	enum zstd_kmem_type kmem_type;
106	size_t kmem_size;
107	struct zstd_pool *pool;
108};
109
110/* Fallback memory structure used for decompression only if memory runs out */
111struct zstd_fallback_mem {
112	size_t mem_size;
113	void *mem;
114	kmutex_t barrier;
115};
116
117struct zstd_levelmap {
118	int16_t zstd_level;
119	enum zio_zstd_levels level;
120};
121
122/*
123 * ZSTD memory handlers
124 *
125 * For decompression we use a different handler which also provides fallback
126 * memory allocation in case memory runs out.
127 *
128 * The ZSTD handlers were split up for the most simplified implementation.
129 */
130static void *zstd_alloc(void *opaque, size_t size);
131static void *zstd_dctx_alloc(void *opaque, size_t size);
132static void zstd_free(void *opaque, void *ptr);
133
134/* Compression memory handler */
135static const ZSTD_customMem zstd_malloc = {
136	zstd_alloc,
137	zstd_free,
138	NULL,
139};
140
141/* Decompression memory handler */
142static const ZSTD_customMem zstd_dctx_malloc = {
143	zstd_dctx_alloc,
144	zstd_free,
145	NULL,
146};
147
148/* Level map for converting ZFS internal levels to ZSTD levels and vice versa */
149static struct zstd_levelmap zstd_levels[] = {
150	{ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1},
151	{ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2},
152	{ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3},
153	{ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4},
154	{ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5},
155	{ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6},
156	{ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7},
157	{ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8},
158	{ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9},
159	{ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10},
160	{ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11},
161	{ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12},
162	{ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13},
163	{ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14},
164	{ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15},
165	{ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16},
166	{ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17},
167	{ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18},
168	{ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19},
169	{-1, ZIO_ZSTD_LEVEL_FAST_1},
170	{-2, ZIO_ZSTD_LEVEL_FAST_2},
171	{-3, ZIO_ZSTD_LEVEL_FAST_3},
172	{-4, ZIO_ZSTD_LEVEL_FAST_4},
173	{-5, ZIO_ZSTD_LEVEL_FAST_5},
174	{-6, ZIO_ZSTD_LEVEL_FAST_6},
175	{-7, ZIO_ZSTD_LEVEL_FAST_7},
176	{-8, ZIO_ZSTD_LEVEL_FAST_8},
177	{-9, ZIO_ZSTD_LEVEL_FAST_9},
178	{-10, ZIO_ZSTD_LEVEL_FAST_10},
179	{-20, ZIO_ZSTD_LEVEL_FAST_20},
180	{-30, ZIO_ZSTD_LEVEL_FAST_30},
181	{-40, ZIO_ZSTD_LEVEL_FAST_40},
182	{-50, ZIO_ZSTD_LEVEL_FAST_50},
183	{-60, ZIO_ZSTD_LEVEL_FAST_60},
184	{-70, ZIO_ZSTD_LEVEL_FAST_70},
185	{-80, ZIO_ZSTD_LEVEL_FAST_80},
186	{-90, ZIO_ZSTD_LEVEL_FAST_90},
187	{-100, ZIO_ZSTD_LEVEL_FAST_100},
188	{-500, ZIO_ZSTD_LEVEL_FAST_500},
189	{-1000, ZIO_ZSTD_LEVEL_FAST_1000},
190};
191
192/*
193 * This variable represents the maximum count of the pool based on the number
194 * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd.
195 */
196static int pool_count = 16;
197
198#define	ZSTD_POOL_MAX		pool_count
199#define	ZSTD_POOL_TIMEOUT	60 * 2
200
201static struct zstd_fallback_mem zstd_dctx_fallback;
202static struct zstd_pool *zstd_mempool_cctx;
203static struct zstd_pool *zstd_mempool_dctx;
204
205
206static void
207zstd_mempool_reap(struct zstd_pool *zstd_mempool)
208{
209	struct zstd_pool *pool;
210
211	if (!zstd_mempool || !ZSTDSTAT(zstd_stat_buffers)) {
212		return;
213	}
214
215	/* free obsolete slots */
216	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
217		pool = &zstd_mempool[i];
218		if (pool->mem && mutex_tryenter(&pool->barrier)) {
219			/* Free memory if unused object older than 2 minutes */
220			if (pool->mem && gethrestime_sec() > pool->timeout) {
221				vmem_free(pool->mem, pool->size);
222				ZSTDSTAT_SUB(zstd_stat_buffers, 1);
223				ZSTDSTAT_SUB(zstd_stat_size, pool->size);
224				pool->mem = NULL;
225				pool->size = 0;
226				pool->timeout = 0;
227			}
228			mutex_exit(&pool->barrier);
229		}
230	}
231}
232
233/*
234 * Try to get a cached allocated buffer from memory pool or allocate a new one
235 * if necessary. If a object is older than 2 minutes and does not fit the
236 * requested size, it will be released and a new cached entry will be allocated.
237 * If other pooled objects are detected without being used for 2 minutes, they
238 * will be released, too.
239 *
240 * The concept is that high frequency memory allocations of bigger objects are
241 * expensive. So if a lot of work is going on, allocations will be kept for a
242 * while and can be reused in that time frame.
243 *
244 * The scheduled release will be updated every time a object is reused.
245 */
246
247static void *
248zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size)
249{
250	struct zstd_pool *pool;
251	struct zstd_kmem *mem = NULL;
252
253	if (!zstd_mempool) {
254		return (NULL);
255	}
256
257	/* Seek for preallocated memory slot and free obsolete slots */
258	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
259		pool = &zstd_mempool[i];
260		/*
261		 * This lock is simply a marker for a pool object being in use.
262		 * If it's already hold, it will be skipped.
263		 *
264		 * We need to create it before checking it to avoid race
265		 * conditions caused by running in a threaded context.
266		 *
267		 * The lock is later released by zstd_mempool_free.
268		 */
269		if (mutex_tryenter(&pool->barrier)) {
270			/*
271			 * Check if objects fits the size, if so we take it and
272			 * update the timestamp.
273			 */
274			if (pool->mem && size <= pool->size) {
275				pool->timeout = gethrestime_sec() +
276				    ZSTD_POOL_TIMEOUT;
277				mem = pool->mem;
278				return (mem);
279			}
280			mutex_exit(&pool->barrier);
281		}
282	}
283
284	/*
285	 * If no preallocated slot was found, try to fill in a new one.
286	 *
287	 * We run a similar algorithm twice here to avoid pool fragmentation.
288	 * The first one may generate holes in the list if objects get released.
289	 * We always make sure that these holes get filled instead of adding new
290	 * allocations constantly at the end.
291	 */
292	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
293		pool = &zstd_mempool[i];
294		if (mutex_tryenter(&pool->barrier)) {
295			/* Object is free, try to allocate new one */
296			if (!pool->mem) {
297				mem = vmem_alloc(size, KM_SLEEP);
298				if (mem) {
299					ZSTDSTAT_ADD(zstd_stat_buffers, 1);
300					ZSTDSTAT_ADD(zstd_stat_size, size);
301					pool->mem = mem;
302					pool->size = size;
303					/* Keep track for later release */
304					mem->pool = pool;
305					mem->kmem_type = ZSTD_KMEM_POOL;
306					mem->kmem_size = size;
307				}
308			}
309
310			if (size <= pool->size) {
311				/* Update timestamp */
312				pool->timeout = gethrestime_sec() +
313				    ZSTD_POOL_TIMEOUT;
314
315				return (pool->mem);
316			}
317
318			mutex_exit(&pool->barrier);
319		}
320	}
321
322	/*
323	 * If the pool is full or the allocation failed, try lazy allocation
324	 * instead.
325	 */
326	if (!mem) {
327		mem = vmem_alloc(size, KM_NOSLEEP);
328		if (mem) {
329			mem->pool = NULL;
330			mem->kmem_type = ZSTD_KMEM_DEFAULT;
331			mem->kmem_size = size;
332		}
333	}
334
335	return (mem);
336}
337
338/* Mark object as released by releasing the barrier mutex */
339static void
340zstd_mempool_free(struct zstd_kmem *z)
341{
342	mutex_exit(&z->pool->barrier);
343}
344
345/* Convert ZFS internal enum to ZSTD level */
346static int
347zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
348{
349	if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) {
350		*zstd_level = zstd_levels[level - 1].zstd_level;
351		return (0);
352	}
353	if (level >= ZIO_ZSTD_LEVEL_FAST_1 &&
354	    level <= ZIO_ZSTD_LEVEL_FAST_1000) {
355		*zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1
356		    + ZIO_ZSTD_LEVEL_19].zstd_level;
357		return (0);
358	}
359
360	/* Invalid/unknown zfs compression enum - this should never happen. */
361	return (1);
362}
363
364/* Compress block using zstd */
365size_t
366zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
367    int level)
368{
369	size_t c_len;
370	int16_t zstd_level;
371	zfs_zstdhdr_t *hdr;
372	ZSTD_CCtx *cctx;
373
374	hdr = (zfs_zstdhdr_t *)d_start;
375
376	/* Skip compression if the specified level is invalid */
377	if (zstd_enum_to_level(level, &zstd_level)) {
378		ZSTDSTAT_BUMP(zstd_stat_com_inval);
379		return (s_len);
380	}
381
382	ASSERT3U(d_len, >=, sizeof (*hdr));
383	ASSERT3U(d_len, <=, s_len);
384	ASSERT3U(zstd_level, !=, 0);
385
386	cctx = ZSTD_createCCtx_advanced(zstd_malloc);
387
388	/*
389	 * Out of kernel memory, gently fall through - this will disable
390	 * compression in zio_compress_data
391	 */
392	if (!cctx) {
393		ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
394		return (s_len);
395	}
396
397	/* Set the compression level */
398	ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level);
399
400	/* Use the "magicless" zstd header which saves us 4 header bytes */
401	ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless);
402
403	/*
404	 * Disable redundant checksum calculation and content size storage since
405	 * this is already done by ZFS itself.
406	 */
407	ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
408	ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);
409
410	c_len = ZSTD_compress2(cctx,
411	    hdr->data,
412	    d_len - sizeof (*hdr),
413	    s_start, s_len);
414
415	ZSTD_freeCCtx(cctx);
416
417	/* Error in the compression routine, disable compression. */
418	if (ZSTD_isError(c_len)) {
419		/*
420		 * If we are aborting the compression because the saves are
421		 * too small, that is not a failure. Everything else is a
422		 * failure, so increment the compression failure counter.
423		 */
424		if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) {
425			ZSTDSTAT_BUMP(zstd_stat_com_fail);
426		}
427		return (s_len);
428	}
429
430	/*
431	 * Encode the compressed buffer size at the start. We'll need this in
432	 * decompression to counter the effects of padding which might be added
433	 * to the compressed buffer and which, if unhandled, would confuse the
434	 * hell out of our decompression function.
435	 */
436	hdr->c_len = BE_32(c_len);
437
438	/*
439	 * Check version for overflow.
440	 * The limit of 24 bits must not be exceeded. This allows a maximum
441	 * version 1677.72.15 which we don't expect to be ever reached.
442	 */
443	ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF);
444
445	/*
446	 * Encode the compression level as well. We may need to know the
447	 * original compression level if compressed_arc is disabled, to match
448	 * the compression settings to write this block to the L2ARC.
449	 *
450	 * Encode the actual level, so if the enum changes in the future, we
451	 * will be compatible.
452	 *
453	 * The upper 24 bits store the ZSTD version to be able to provide
454	 * future compatibility, since new versions might enhance the
455	 * compression algorithm in a way, where the compressed data will
456	 * change.
457	 *
458	 * As soon as such incompatibility occurs, handling code needs to be
459	 * added, differentiating between the versions.
460	 */
461	hdr->version = ZSTD_VERSION_NUMBER;
462	hdr->level = level;
463	hdr->raw_version_level = BE_32(hdr->raw_version_level);
464
465	return (c_len + sizeof (*hdr));
466}
467
468/* Decompress block using zstd and return its stored level */
469int
470zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
471    size_t d_len, uint8_t *level)
472{
473	ZSTD_DCtx *dctx;
474	size_t result;
475	int16_t zstd_level;
476	uint32_t c_len;
477	const zfs_zstdhdr_t *hdr;
478	zfs_zstdhdr_t hdr_copy;
479
480	hdr = (const zfs_zstdhdr_t *)s_start;
481	c_len = BE_32(hdr->c_len);
482
483	/*
484	 * Make a copy instead of directly converting the header, since we must
485	 * not modify the original data that may be used again later.
486	 */
487	hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
488
489	/*
490	 * NOTE: We ignore the ZSTD version for now. As soon as any
491	 * incompatibility occurs, it has to be handled accordingly.
492	 * The version can be accessed via `hdr_copy.version`.
493	 */
494
495	/*
496	 * Convert and check the level
497	 * An invalid level is a strong indicator for data corruption! In such
498	 * case return an error so the upper layers can try to fix it.
499	 */
500	if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) {
501		ZSTDSTAT_BUMP(zstd_stat_dec_inval);
502		return (1);
503	}
504
505	ASSERT3U(d_len, >=, s_len);
506	ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT);
507
508	/* Invalid compressed buffer size encoded at start */
509	if (c_len + sizeof (*hdr) > s_len) {
510		ZSTDSTAT_BUMP(zstd_stat_dec_header_inval);
511		return (1);
512	}
513
514	dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc);
515	if (!dctx) {
516		ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail);
517		return (1);
518	}
519
520	/* Set header type to "magicless" */
521	ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless);
522
523	/* Decompress the data and release the context */
524	result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len);
525	ZSTD_freeDCtx(dctx);
526
527	/*
528	 * Returns 0 on success (decompression function returned non-negative)
529	 * and non-zero on failure (decompression function returned negative.
530	 */
531	if (ZSTD_isError(result)) {
532		ZSTDSTAT_BUMP(zstd_stat_dec_fail);
533		return (1);
534	}
535
536	if (level) {
537		*level = hdr_copy.level;
538	}
539
540	return (0);
541}
542
543/* Decompress datablock using zstd */
544int
545zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
546    int level __maybe_unused)
547{
548
549	return (zfs_zstd_decompress_level(s_start, d_start, s_len, d_len,
550	    NULL));
551}
552
553/* Allocator for zstd compression context using mempool_allocator */
554static void *
555zstd_alloc(void *opaque __maybe_unused, size_t size)
556{
557	size_t nbytes = sizeof (struct zstd_kmem) + size;
558	struct zstd_kmem *z = NULL;
559
560	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes);
561
562	if (!z) {
563		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
564		return (NULL);
565	}
566
567	return ((void*)z + (sizeof (struct zstd_kmem)));
568}
569
570/*
571 * Allocator for zstd decompression context using mempool_allocator with
572 * fallback to reserved memory if allocation fails
573 */
574static void *
575zstd_dctx_alloc(void *opaque __maybe_unused, size_t size)
576{
577	size_t nbytes = sizeof (struct zstd_kmem) + size;
578	struct zstd_kmem *z = NULL;
579	enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT;
580
581	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes);
582	if (!z) {
583		/* Try harder, decompression shall not fail */
584		z = vmem_alloc(nbytes, KM_SLEEP);
585		if (z) {
586			z->pool = NULL;
587		}
588		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
589	} else {
590		return ((void*)z + (sizeof (struct zstd_kmem)));
591	}
592
593	/* Fallback if everything fails */
594	if (!z) {
595		/*
596		 * Barrier since we only can handle it in a single thread. All
597		 * other following threads need to wait here until decompression
598		 * is completed. zstd_free will release this barrier later.
599		 */
600		mutex_enter(&zstd_dctx_fallback.barrier);
601
602		z = zstd_dctx_fallback.mem;
603		type = ZSTD_KMEM_DCTX;
604		ZSTDSTAT_BUMP(zstd_stat_alloc_fallback);
605	}
606
607	/* Allocation should always be successful */
608	if (!z) {
609		return (NULL);
610	}
611
612	z->kmem_type = type;
613	z->kmem_size = nbytes;
614
615	return ((void*)z + (sizeof (struct zstd_kmem)));
616}
617
618/* Free allocated memory by its specific type */
619static void
620zstd_free(void *opaque __maybe_unused, void *ptr)
621{
622	struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem));
623	enum zstd_kmem_type type;
624
625	ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT);
626	ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN);
627
628	type = z->kmem_type;
629	switch (type) {
630	case ZSTD_KMEM_DEFAULT:
631		vmem_free(z, z->kmem_size);
632		break;
633	case ZSTD_KMEM_POOL:
634		zstd_mempool_free(z);
635		break;
636	case ZSTD_KMEM_DCTX:
637		mutex_exit(&zstd_dctx_fallback.barrier);
638		break;
639	default:
640		break;
641	}
642}
643
644/* Allocate fallback memory to ensure safe decompression */
645static void __init
646create_fallback_mem(struct zstd_fallback_mem *mem, size_t size)
647{
648	mem->mem_size = size;
649	mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP);
650	mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL);
651}
652
653/* Initialize memory pool barrier mutexes */
654static void __init
655zstd_mempool_init(void)
656{
657	zstd_mempool_cctx = (struct zstd_pool *)
658	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
659	zstd_mempool_dctx = (struct zstd_pool *)
660	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
661
662	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
663		mutex_init(&zstd_mempool_cctx[i].barrier, NULL,
664		    MUTEX_DEFAULT, NULL);
665		mutex_init(&zstd_mempool_dctx[i].barrier, NULL,
666		    MUTEX_DEFAULT, NULL);
667	}
668}
669
670/* Initialize zstd-related memory handling */
671static int __init
672zstd_meminit(void)
673{
674	zstd_mempool_init();
675
676	/*
677	 * Estimate the size of the fallback decompression context.
678	 * The expected size on x64 with current ZSTD should be about 160 KB.
679	 */
680	create_fallback_mem(&zstd_dctx_fallback,
681	    P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem),
682	    PAGESIZE));
683
684	return (0);
685}
686
687/* Release object from pool and free memory */
688static void __exit
689release_pool(struct zstd_pool *pool)
690{
691	mutex_destroy(&pool->barrier);
692	vmem_free(pool->mem, pool->size);
693	pool->mem = NULL;
694	pool->size = 0;
695}
696
697/* Release memory pool objects */
698static void __exit
699zstd_mempool_deinit(void)
700{
701	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
702		release_pool(&zstd_mempool_cctx[i]);
703		release_pool(&zstd_mempool_dctx[i]);
704	}
705
706	kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
707	kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
708	zstd_mempool_dctx = NULL;
709	zstd_mempool_cctx = NULL;
710}
711
712/* release unused memory from pool */
713
714void
715zfs_zstd_cache_reap_now(void)
716{
717
718	/*
719	 * Short-circuit if there are no buffers to begin with.
720	 */
721	if (ZSTDSTAT(zstd_stat_buffers) == 0)
722		return;
723
724	/*
725	 * calling alloc with zero size seeks
726	 * and releases old unused objects
727	 */
728	zstd_mempool_reap(zstd_mempool_cctx);
729	zstd_mempool_reap(zstd_mempool_dctx);
730}
731
732extern int __init
733zstd_init(void)
734{
735	/* Set pool size by using maximum sane thread count * 4 */
736	pool_count = (boot_ncpus * 4);
737	zstd_meminit();
738
739	/* Initialize kstat */
740	zstd_ksp = kstat_create("zfs", 0, "zstd", "misc",
741	    KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t),
742	    KSTAT_FLAG_VIRTUAL);
743	if (zstd_ksp != NULL) {
744		zstd_ksp->ks_data = &zstd_stats;
745		kstat_install(zstd_ksp);
746	}
747
748	return (0);
749}
750
751extern void __exit
752zstd_fini(void)
753{
754	/* Deinitialize kstat */
755	if (zstd_ksp != NULL) {
756		kstat_delete(zstd_ksp);
757		zstd_ksp = NULL;
758	}
759
760	/* Release fallback memory */
761	vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size);
762	mutex_destroy(&zstd_dctx_fallback.barrier);
763
764	/* Deinit memory pool */
765	zstd_mempool_deinit();
766}
767
768#if defined(_KERNEL)
769module_init(zstd_init);
770module_exit(zstd_fini);
771
772ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
773ZFS_MODULE_LICENSE("Dual BSD/GPL");
774ZFS_MODULE_VERSION(ZSTD_VERSION_STRING);
775
776EXPORT_SYMBOL(zfs_zstd_compress);
777EXPORT_SYMBOL(zfs_zstd_decompress_level);
778EXPORT_SYMBOL(zfs_zstd_decompress);
779EXPORT_SYMBOL(zfs_zstd_cache_reap_now);
780#endif
781