1/*-
2 * Copyright (c) 2017 Sean Purcell
3 * Copyright (c) 2023-2024 Klara, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "archive_platform.h"
28
29#ifdef HAVE_ERRNO_H
30#include <errno.h>
31#endif
32#ifdef HAVE_LIMITS_H
33#include <limits.h>
34#endif
35#ifdef HAVE_STDINT_H
36#include <stdint.h>
37#endif
38#ifdef HAVE_STDLIB_H
39#include <stdlib.h>
40#endif
41#ifdef HAVE_STRING_H
42#include <string.h>
43#endif
44#ifdef HAVE_UNISTD_H
45#include <unistd.h>
46#endif
47#ifdef HAVE_ZSTD_H
48#include <zstd.h>
49#endif
50
51#include "archive.h"
52#include "archive_private.h"
53#include "archive_string.h"
54#include "archive_write_private.h"
55
56/* Don't compile this if we don't have zstd.h */
57
58struct private_data {
59	int		 compression_level;
60	int		 threads;
61	int		 long_distance;
62#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
63	enum {
64		running,
65		finishing,
66		resetting,
67	} state;
68	int		 frame_per_file;
69	size_t		 min_frame_in;
70	size_t		 max_frame_in;
71	size_t		 min_frame_out;
72	size_t		 max_frame_out;
73	size_t		 cur_frame;
74	size_t		 cur_frame_in;
75	size_t		 cur_frame_out;
76	size_t		 total_in;
77	ZSTD_CStream	*cstream;
78	ZSTD_outBuffer	 out;
79#else
80	struct archive_write_program_data *pdata;
81#endif
82};
83
84/* If we don't have the library use default range values (zstdcli.c v1.4.0) */
85#define CLEVEL_MIN -99
86#define CLEVEL_STD_MIN 0 /* prior to 1.3.4 and more recent without using --fast */
87#define CLEVEL_DEFAULT 3
88#define CLEVEL_STD_MAX 19 /* without using --ultra */
89#define CLEVEL_MAX 22
90
91#define LONG_STD 27
92
93#define MINVER_NEGCLEVEL 10304
94#define MINVER_MINCLEVEL 10306
95#define MINVER_LONG 10302
96
97static int archive_compressor_zstd_options(struct archive_write_filter *,
98		    const char *, const char *);
99static int archive_compressor_zstd_open(struct archive_write_filter *);
100static int archive_compressor_zstd_write(struct archive_write_filter *,
101		    const void *, size_t);
102static int archive_compressor_zstd_flush(struct archive_write_filter *);
103static int archive_compressor_zstd_close(struct archive_write_filter *);
104static int archive_compressor_zstd_free(struct archive_write_filter *);
105#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
106static int drive_compressor(struct archive_write_filter *,
107		    struct private_data *, int, const void *, size_t);
108#endif
109
110
111/*
112 * Add a zstd compression filter to this write handle.
113 */
114int
115archive_write_add_filter_zstd(struct archive *_a)
116{
117	struct archive_write *a = (struct archive_write *)_a;
118	struct archive_write_filter *f = __archive_write_allocate_filter(_a);
119	struct private_data *data;
120	archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
121	    ARCHIVE_STATE_NEW, "archive_write_add_filter_zstd");
122
123	data = calloc(1, sizeof(*data));
124	if (data == NULL) {
125		archive_set_error(&a->archive, ENOMEM, "Out of memory");
126		return (ARCHIVE_FATAL);
127	}
128	f->data = data;
129	f->open = &archive_compressor_zstd_open;
130	f->options = &archive_compressor_zstd_options;
131	f->flush = &archive_compressor_zstd_flush;
132	f->close = &archive_compressor_zstd_close;
133	f->free = &archive_compressor_zstd_free;
134	f->code = ARCHIVE_FILTER_ZSTD;
135	f->name = "zstd";
136	data->compression_level = CLEVEL_DEFAULT;
137	data->threads = 0;
138	data->long_distance = 0;
139#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
140	data->frame_per_file = 0;
141	data->min_frame_in = 0;
142	data->max_frame_in = SIZE_MAX;
143	data->min_frame_out = 0;
144	data->max_frame_out = SIZE_MAX;
145	data->cur_frame_in = 0;
146	data->cur_frame_out = 0;
147	data->cstream = ZSTD_createCStream();
148	if (data->cstream == NULL) {
149		free(data);
150		archive_set_error(&a->archive, ENOMEM,
151		    "Failed to allocate zstd compressor object");
152		return (ARCHIVE_FATAL);
153	}
154
155	return (ARCHIVE_OK);
156#else
157	data->pdata = __archive_write_program_allocate("zstd");
158	if (data->pdata == NULL) {
159		free(data);
160		archive_set_error(&a->archive, ENOMEM, "Out of memory");
161		return (ARCHIVE_FATAL);
162	}
163	archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
164	    "Using external zstd program");
165	return (ARCHIVE_WARN);
166#endif
167}
168
169static int
170archive_compressor_zstd_free(struct archive_write_filter *f)
171{
172	struct private_data *data = (struct private_data *)f->data;
173#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
174	ZSTD_freeCStream(data->cstream);
175	free(data->out.dst);
176#else
177	__archive_write_program_free(data->pdata);
178#endif
179	free(data);
180	f->data = NULL;
181	return (ARCHIVE_OK);
182}
183
184static int
185string_to_number(const char *string, intmax_t *numberp)
186{
187	char *end;
188
189	if (string == NULL || *string == '\0')
190		return (ARCHIVE_WARN);
191	*numberp = strtoimax(string, &end, 10);
192	if (end == string || *end != '\0' || errno == EOVERFLOW) {
193		*numberp = 0;
194		return (ARCHIVE_WARN);
195	}
196	return (ARCHIVE_OK);
197}
198
199#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
200static int
201string_to_size(const char *string, size_t *numberp)
202{
203	uintmax_t number;
204	char *end;
205	unsigned int shift = 0;
206
207	if (string == NULL || *string == '\0' || *string == '-')
208		return (ARCHIVE_WARN);
209	number = strtoumax(string, &end, 10);
210	if (end > string) {
211		if (*end == 'K' || *end == 'k') {
212			shift = 10;
213			end++;
214		} else if (*end == 'M' || *end == 'm') {
215			shift = 20;
216			end++;
217		} else if (*end == 'G' || *end == 'g') {
218			shift = 30;
219			end++;
220		}
221		if (*end == 'B' || *end == 'b') {
222			end++;
223		}
224	}
225	if (end == string || *end != '\0' || errno == EOVERFLOW) {
226		return (ARCHIVE_WARN);
227	}
228	if (number > (uintmax_t)SIZE_MAX >> shift) {
229		return (ARCHIVE_WARN);
230	}
231	*numberp = (size_t)(number << shift);
232	return (ARCHIVE_OK);
233}
234#endif
235
236/*
237 * Set write options.
238 */
239static int
240archive_compressor_zstd_options(struct archive_write_filter *f, const char *key,
241    const char *value)
242{
243	struct private_data *data = (struct private_data *)f->data;
244
245	if (strcmp(key, "compression-level") == 0) {
246		intmax_t level;
247		if (string_to_number(value, &level) != ARCHIVE_OK) {
248			return (ARCHIVE_WARN);
249		}
250		/* If we don't have the library, hard-code the max level */
251		int minimum = CLEVEL_MIN;
252		int maximum = CLEVEL_MAX;
253#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
254		maximum = ZSTD_maxCLevel();
255#if ZSTD_VERSION_NUMBER >= MINVER_MINCLEVEL
256		if (ZSTD_versionNumber() >= MINVER_MINCLEVEL) {
257			minimum = ZSTD_minCLevel();
258		}
259		else
260#endif
261		if (ZSTD_versionNumber() < MINVER_NEGCLEVEL) {
262			minimum = CLEVEL_STD_MIN;
263		}
264#endif
265		if (level < minimum || level > maximum) {
266			return (ARCHIVE_WARN);
267		}
268		data->compression_level = (int)level;
269		return (ARCHIVE_OK);
270	} else if (strcmp(key, "threads") == 0) {
271		intmax_t threads;
272		if (string_to_number(value, &threads) != ARCHIVE_OK) {
273			return (ARCHIVE_WARN);
274		}
275
276#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
277		if (threads == 0) {
278			threads = sysconf(_SC_NPROCESSORS_ONLN);
279		}
280#elif !defined(__CYGWIN__) && defined(_WIN32_WINNT) && \
281    _WIN32_WINNT >= 0x0601 /* _WIN32_WINNT_WIN7 */
282		if (threads == 0) {
283			DWORD winCores = GetActiveProcessorCount(
284			    ALL_PROCESSOR_GROUPS);
285			threads = (intmax_t)winCores;
286		}
287#endif
288		if (threads < 0 || threads > INT_MAX) {
289			return (ARCHIVE_WARN);
290		}
291		data->threads = (int)threads;
292		return (ARCHIVE_OK);
293#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
294	} else if (strcmp(key, "frame-per-file") == 0) {
295		data->frame_per_file = 1;
296		return (ARCHIVE_OK);
297	} else if (strcmp(key, "min-frame-in") == 0) {
298		if (string_to_size(value, &data->min_frame_in) != ARCHIVE_OK) {
299			return (ARCHIVE_WARN);
300		}
301		return (ARCHIVE_OK);
302	} else if (strcmp(key, "min-frame-out") == 0 ||
303	    strcmp(key, "min-frame-size") == 0) {
304		if (string_to_size(value, &data->min_frame_out) != ARCHIVE_OK) {
305			return (ARCHIVE_WARN);
306		}
307		return (ARCHIVE_OK);
308	} else if (strcmp(key, "max-frame-in") == 0 ||
309	    strcmp(key, "max-frame-size") == 0) {
310		if (string_to_size(value, &data->max_frame_in) != ARCHIVE_OK ||
311		    data->max_frame_in < 1024) {
312			return (ARCHIVE_WARN);
313		}
314		return (ARCHIVE_OK);
315	} else if (strcmp(key, "max-frame-out") == 0) {
316		if (string_to_size(value, &data->max_frame_out) != ARCHIVE_OK ||
317		    data->max_frame_out < 1024) {
318			return (ARCHIVE_WARN);
319		}
320		return (ARCHIVE_OK);
321#endif
322	}
323	else if (strcmp(key, "long") == 0) {
324		intmax_t long_distance;
325		if (string_to_number(value, &long_distance) != ARCHIVE_OK) {
326			return (ARCHIVE_WARN);
327		}
328#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream && ZSTD_VERSION_NUMBER >= MINVER_LONG
329		ZSTD_bounds bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog);
330		if (ZSTD_isError(bounds.error)) {
331			int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31));
332			if (((int)long_distance) < 10 || (int)long_distance > max_distance)
333				return (ARCHIVE_WARN);
334		} else {
335			if ((int)long_distance < bounds.lowerBound || (int)long_distance > bounds.upperBound)
336				return (ARCHIVE_WARN);
337		}
338#else
339		int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31));
340		if (((int)long_distance) < 10 || (int)long_distance > max_distance)
341		    return (ARCHIVE_WARN);
342#endif
343		data->long_distance = (int)long_distance;
344		return (ARCHIVE_OK);
345	}
346
347	/* Note: The "warn" return is just to inform the options
348	 * supervisor that we didn't handle it.  It will generate
349	 * a suitable error if no one used this option. */
350	return (ARCHIVE_WARN);
351}
352
353#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream
354/*
355 * Setup callback.
356 */
357static int
358archive_compressor_zstd_open(struct archive_write_filter *f)
359{
360	struct private_data *data = (struct private_data *)f->data;
361
362	if (data->out.dst == NULL) {
363		size_t bs = ZSTD_CStreamOutSize(), bpb;
364		if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
365			/* Buffer size should be a multiple number of
366			 * the of bytes per block for performance. */
367			bpb = archive_write_get_bytes_per_block(f->archive);
368			if (bpb > bs)
369				bs = bpb;
370			else if (bpb != 0)
371				bs -= bs % bpb;
372		}
373		data->out.size = bs;
374		data->out.pos = 0;
375		data->out.dst
376		    = (unsigned char *)malloc(data->out.size);
377		if (data->out.dst == NULL) {
378			archive_set_error(f->archive, ENOMEM,
379			    "Can't allocate data for compression buffer");
380			return (ARCHIVE_FATAL);
381		}
382	}
383
384	f->write = archive_compressor_zstd_write;
385
386	if (ZSTD_isError(ZSTD_initCStream(data->cstream,
387	    data->compression_level))) {
388		archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
389		    "Internal error initializing zstd compressor object");
390		return (ARCHIVE_FATAL);
391	}
392
393	ZSTD_CCtx_setParameter(data->cstream, ZSTD_c_nbWorkers, data->threads);
394
395#if ZSTD_VERSION_NUMBER >= MINVER_LONG
396	ZSTD_CCtx_setParameter(data->cstream, ZSTD_c_windowLog, data->long_distance);
397#endif
398
399	return (ARCHIVE_OK);
400}
401
402/*
403 * Write data to the compressed stream.
404 */
405static int
406archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
407    size_t length)
408{
409	struct private_data *data = (struct private_data *)f->data;
410
411	return (drive_compressor(f, data, 0, buff, length));
412}
413
414/*
415 * Flush the compressed stream.
416 */
417static int
418archive_compressor_zstd_flush(struct archive_write_filter *f)
419{
420	struct private_data *data = (struct private_data *)f->data;
421
422	if (data->frame_per_file && data->state == running) {
423		if (data->cur_frame_in > data->min_frame_in &&
424		    data->cur_frame_out > data->min_frame_out) {
425			data->state = finishing;
426		}
427	}
428	return (drive_compressor(f, data, 1, NULL, 0));
429}
430
431/*
432 * Finish the compression...
433 */
434static int
435archive_compressor_zstd_close(struct archive_write_filter *f)
436{
437	struct private_data *data = (struct private_data *)f->data;
438
439	if (data->state == running)
440		data->state = finishing;
441	return (drive_compressor(f, data, 1, NULL, 0));
442}
443
444/*
445 * Utility function to push input data through compressor,
446 * writing full output blocks as necessary.
447 */
448static int
449drive_compressor(struct archive_write_filter *f,
450    struct private_data *data, int flush, const void *src, size_t length)
451{
452	ZSTD_inBuffer in = { .src = src, .size = length, .pos = 0 };
453	size_t ipos, opos, zstdret = 0;
454	int ret;
455
456	for (;;) {
457		ipos = in.pos;
458		opos = data->out.pos;
459		switch (data->state) {
460		case running:
461			if (in.pos == in.size)
462				return (ARCHIVE_OK);
463			zstdret = ZSTD_compressStream(data->cstream,
464			    &data->out, &in);
465			if (ZSTD_isError(zstdret))
466				goto zstd_fatal;
467			break;
468		case finishing:
469			zstdret = ZSTD_endStream(data->cstream, &data->out);
470			if (ZSTD_isError(zstdret))
471				goto zstd_fatal;
472			if (zstdret == 0)
473				data->state = resetting;
474			break;
475		case resetting:
476			ZSTD_CCtx_reset(data->cstream, ZSTD_reset_session_only);
477			data->cur_frame++;
478			data->cur_frame_in = 0;
479			data->cur_frame_out = 0;
480			data->state = running;
481			break;
482		}
483		data->total_in += in.pos - ipos;
484		data->cur_frame_in += in.pos - ipos;
485		data->cur_frame_out += data->out.pos - opos;
486		if (data->state == running) {
487			if (data->cur_frame_in >= data->max_frame_in ||
488			    data->cur_frame_out >= data->max_frame_out) {
489				data->state = finishing;
490			}
491		}
492		if (data->out.pos == data->out.size ||
493		    (flush && data->out.pos > 0)) {
494			ret = __archive_write_filter(f->next_filter,
495			    data->out.dst, data->out.pos);
496			if (ret != ARCHIVE_OK)
497				goto fatal;
498			data->out.pos = 0;
499		}
500	}
501zstd_fatal:
502	archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
503	    "Zstd compression failed: %s",
504	    ZSTD_getErrorName(zstdret));
505fatal:
506	return (ARCHIVE_FATAL);
507}
508
509#else /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */
510
511static int
512archive_compressor_zstd_open(struct archive_write_filter *f)
513{
514	struct private_data *data = (struct private_data *)f->data;
515	struct archive_string as;
516	int r;
517
518	archive_string_init(&as);
519	/* --no-check matches library default */
520	archive_strcpy(&as, "zstd --no-check");
521
522	if (data->compression_level < CLEVEL_STD_MIN) {
523		archive_string_sprintf(&as, " --fast=%d", -data->compression_level);
524	} else {
525		archive_string_sprintf(&as, " -%d", data->compression_level);
526	}
527
528	if (data->compression_level > CLEVEL_STD_MAX) {
529		archive_strcat(&as, " --ultra");
530	}
531
532	if (data->threads != 0) {
533		archive_string_sprintf(&as, " --threads=%d", data->threads);
534	}
535
536	if (data->long_distance != 0) {
537		archive_string_sprintf(&as, " --long=%d", data->long_distance);
538	}
539
540	f->write = archive_compressor_zstd_write;
541	r = __archive_write_program_open(f, data->pdata, as.s);
542	archive_string_free(&as);
543	return (r);
544}
545
546static int
547archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
548    size_t length)
549{
550	struct private_data *data = (struct private_data *)f->data;
551
552	return __archive_write_program_write(f, data->pdata, buff, length);
553}
554
555static int
556archive_compressor_zstd_flush(struct archive_write_filter *f)
557{
558	(void)f; /* UNUSED */
559
560	return (ARCHIVE_OK);
561}
562
563static int
564archive_compressor_zstd_close(struct archive_write_filter *f)
565{
566	struct private_data *data = (struct private_data *)f->data;
567
568	return __archive_write_program_close(f, data->pdata);
569}
570
571#endif /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */
572