1/*- 2 * Copyright (c) 2017 Sean Purcell 3 * Copyright (c) 2023-2024 Klara, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "archive_platform.h" 28 29#ifdef HAVE_ERRNO_H 30#include <errno.h> 31#endif 32#ifdef HAVE_LIMITS_H 33#include <limits.h> 34#endif 35#ifdef HAVE_STDINT_H 36#include <stdint.h> 37#endif 38#ifdef HAVE_STDLIB_H 39#include <stdlib.h> 40#endif 41#ifdef HAVE_STRING_H 42#include <string.h> 43#endif 44#ifdef HAVE_UNISTD_H 45#include <unistd.h> 46#endif 47#ifdef HAVE_ZSTD_H 48#include <zstd.h> 49#endif 50 51#include "archive.h" 52#include "archive_private.h" 53#include "archive_string.h" 54#include "archive_write_private.h" 55 56/* Don't compile this if we don't have zstd.h */ 57 58struct private_data { 59 int compression_level; 60 int threads; 61 int long_distance; 62#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream 63 enum { 64 running, 65 finishing, 66 resetting, 67 } state; 68 int frame_per_file; 69 size_t min_frame_in; 70 size_t max_frame_in; 71 size_t min_frame_out; 72 size_t max_frame_out; 73 size_t cur_frame; 74 size_t cur_frame_in; 75 size_t cur_frame_out; 76 size_t total_in; 77 ZSTD_CStream *cstream; 78 ZSTD_outBuffer out; 79#else 80 struct archive_write_program_data *pdata; 81#endif 82}; 83 84/* If we don't have the library use default range values (zstdcli.c v1.4.0) */ 85#define CLEVEL_MIN -99 86#define CLEVEL_STD_MIN 0 /* prior to 1.3.4 and more recent without using --fast */ 87#define CLEVEL_DEFAULT 3 88#define CLEVEL_STD_MAX 19 /* without using --ultra */ 89#define CLEVEL_MAX 22 90 91#define LONG_STD 27 92 93#define MINVER_NEGCLEVEL 10304 94#define MINVER_MINCLEVEL 10306 95#define MINVER_LONG 10302 96 97static int archive_compressor_zstd_options(struct archive_write_filter *, 98 const char *, const char *); 99static int archive_compressor_zstd_open(struct archive_write_filter *); 100static int archive_compressor_zstd_write(struct archive_write_filter *, 101 const void *, size_t); 102static int archive_compressor_zstd_flush(struct archive_write_filter *); 103static int archive_compressor_zstd_close(struct archive_write_filter *); 104static int archive_compressor_zstd_free(struct archive_write_filter *); 105#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream 106static int drive_compressor(struct archive_write_filter *, 107 struct private_data *, int, const void *, size_t); 108#endif 109 110 111/* 112 * Add a zstd compression filter to this write handle. 113 */ 114int 115archive_write_add_filter_zstd(struct archive *_a) 116{ 117 struct archive_write *a = (struct archive_write *)_a; 118 struct archive_write_filter *f = __archive_write_allocate_filter(_a); 119 struct private_data *data; 120 archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, 121 ARCHIVE_STATE_NEW, "archive_write_add_filter_zstd"); 122 123 data = calloc(1, sizeof(*data)); 124 if (data == NULL) { 125 archive_set_error(&a->archive, ENOMEM, "Out of memory"); 126 return (ARCHIVE_FATAL); 127 } 128 f->data = data; 129 f->open = &archive_compressor_zstd_open; 130 f->options = &archive_compressor_zstd_options; 131 f->flush = &archive_compressor_zstd_flush; 132 f->close = &archive_compressor_zstd_close; 133 f->free = &archive_compressor_zstd_free; 134 f->code = ARCHIVE_FILTER_ZSTD; 135 f->name = "zstd"; 136 data->compression_level = CLEVEL_DEFAULT; 137 data->threads = 0; 138 data->long_distance = 0; 139#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream 140 data->frame_per_file = 0; 141 data->min_frame_in = 0; 142 data->max_frame_in = SIZE_MAX; 143 data->min_frame_out = 0; 144 data->max_frame_out = SIZE_MAX; 145 data->cur_frame_in = 0; 146 data->cur_frame_out = 0; 147 data->cstream = ZSTD_createCStream(); 148 if (data->cstream == NULL) { 149 free(data); 150 archive_set_error(&a->archive, ENOMEM, 151 "Failed to allocate zstd compressor object"); 152 return (ARCHIVE_FATAL); 153 } 154 155 return (ARCHIVE_OK); 156#else 157 data->pdata = __archive_write_program_allocate("zstd"); 158 if (data->pdata == NULL) { 159 free(data); 160 archive_set_error(&a->archive, ENOMEM, "Out of memory"); 161 return (ARCHIVE_FATAL); 162 } 163 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 164 "Using external zstd program"); 165 return (ARCHIVE_WARN); 166#endif 167} 168 169static int 170archive_compressor_zstd_free(struct archive_write_filter *f) 171{ 172 struct private_data *data = (struct private_data *)f->data; 173#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream 174 ZSTD_freeCStream(data->cstream); 175 free(data->out.dst); 176#else 177 __archive_write_program_free(data->pdata); 178#endif 179 free(data); 180 f->data = NULL; 181 return (ARCHIVE_OK); 182} 183 184static int 185string_to_number(const char *string, intmax_t *numberp) 186{ 187 char *end; 188 189 if (string == NULL || *string == '\0') 190 return (ARCHIVE_WARN); 191 *numberp = strtoimax(string, &end, 10); 192 if (end == string || *end != '\0' || errno == EOVERFLOW) { 193 *numberp = 0; 194 return (ARCHIVE_WARN); 195 } 196 return (ARCHIVE_OK); 197} 198 199#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream 200static int 201string_to_size(const char *string, size_t *numberp) 202{ 203 uintmax_t number; 204 char *end; 205 unsigned int shift = 0; 206 207 if (string == NULL || *string == '\0' || *string == '-') 208 return (ARCHIVE_WARN); 209 number = strtoumax(string, &end, 10); 210 if (end > string) { 211 if (*end == 'K' || *end == 'k') { 212 shift = 10; 213 end++; 214 } else if (*end == 'M' || *end == 'm') { 215 shift = 20; 216 end++; 217 } else if (*end == 'G' || *end == 'g') { 218 shift = 30; 219 end++; 220 } 221 if (*end == 'B' || *end == 'b') { 222 end++; 223 } 224 } 225 if (end == string || *end != '\0' || errno == EOVERFLOW) { 226 return (ARCHIVE_WARN); 227 } 228 if (number > (uintmax_t)SIZE_MAX >> shift) { 229 return (ARCHIVE_WARN); 230 } 231 *numberp = (size_t)(number << shift); 232 return (ARCHIVE_OK); 233} 234#endif 235 236/* 237 * Set write options. 238 */ 239static int 240archive_compressor_zstd_options(struct archive_write_filter *f, const char *key, 241 const char *value) 242{ 243 struct private_data *data = (struct private_data *)f->data; 244 245 if (strcmp(key, "compression-level") == 0) { 246 intmax_t level; 247 if (string_to_number(value, &level) != ARCHIVE_OK) { 248 return (ARCHIVE_WARN); 249 } 250 /* If we don't have the library, hard-code the max level */ 251 int minimum = CLEVEL_MIN; 252 int maximum = CLEVEL_MAX; 253#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream 254 maximum = ZSTD_maxCLevel(); 255#if ZSTD_VERSION_NUMBER >= MINVER_MINCLEVEL 256 if (ZSTD_versionNumber() >= MINVER_MINCLEVEL) { 257 minimum = ZSTD_minCLevel(); 258 } 259 else 260#endif 261 if (ZSTD_versionNumber() < MINVER_NEGCLEVEL) { 262 minimum = CLEVEL_STD_MIN; 263 } 264#endif 265 if (level < minimum || level > maximum) { 266 return (ARCHIVE_WARN); 267 } 268 data->compression_level = (int)level; 269 return (ARCHIVE_OK); 270 } else if (strcmp(key, "threads") == 0) { 271 intmax_t threads; 272 if (string_to_number(value, &threads) != ARCHIVE_OK) { 273 return (ARCHIVE_WARN); 274 } 275 276#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) 277 if (threads == 0) { 278 threads = sysconf(_SC_NPROCESSORS_ONLN); 279 } 280#elif !defined(__CYGWIN__) && defined(_WIN32_WINNT) && \ 281 _WIN32_WINNT >= 0x0601 /* _WIN32_WINNT_WIN7 */ 282 if (threads == 0) { 283 DWORD winCores = GetActiveProcessorCount( 284 ALL_PROCESSOR_GROUPS); 285 threads = (intmax_t)winCores; 286 } 287#endif 288 if (threads < 0 || threads > INT_MAX) { 289 return (ARCHIVE_WARN); 290 } 291 data->threads = (int)threads; 292 return (ARCHIVE_OK); 293#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream 294 } else if (strcmp(key, "frame-per-file") == 0) { 295 data->frame_per_file = 1; 296 return (ARCHIVE_OK); 297 } else if (strcmp(key, "min-frame-in") == 0) { 298 if (string_to_size(value, &data->min_frame_in) != ARCHIVE_OK) { 299 return (ARCHIVE_WARN); 300 } 301 return (ARCHIVE_OK); 302 } else if (strcmp(key, "min-frame-out") == 0 || 303 strcmp(key, "min-frame-size") == 0) { 304 if (string_to_size(value, &data->min_frame_out) != ARCHIVE_OK) { 305 return (ARCHIVE_WARN); 306 } 307 return (ARCHIVE_OK); 308 } else if (strcmp(key, "max-frame-in") == 0 || 309 strcmp(key, "max-frame-size") == 0) { 310 if (string_to_size(value, &data->max_frame_in) != ARCHIVE_OK || 311 data->max_frame_in < 1024) { 312 return (ARCHIVE_WARN); 313 } 314 return (ARCHIVE_OK); 315 } else if (strcmp(key, "max-frame-out") == 0) { 316 if (string_to_size(value, &data->max_frame_out) != ARCHIVE_OK || 317 data->max_frame_out < 1024) { 318 return (ARCHIVE_WARN); 319 } 320 return (ARCHIVE_OK); 321#endif 322 } 323 else if (strcmp(key, "long") == 0) { 324 intmax_t long_distance; 325 if (string_to_number(value, &long_distance) != ARCHIVE_OK) { 326 return (ARCHIVE_WARN); 327 } 328#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream && ZSTD_VERSION_NUMBER >= MINVER_LONG 329 ZSTD_bounds bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog); 330 if (ZSTD_isError(bounds.error)) { 331 int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31)); 332 if (((int)long_distance) < 10 || (int)long_distance > max_distance) 333 return (ARCHIVE_WARN); 334 } else { 335 if ((int)long_distance < bounds.lowerBound || (int)long_distance > bounds.upperBound) 336 return (ARCHIVE_WARN); 337 } 338#else 339 int max_distance = ((int)(sizeof(size_t) == 4 ? 30 : 31)); 340 if (((int)long_distance) < 10 || (int)long_distance > max_distance) 341 return (ARCHIVE_WARN); 342#endif 343 data->long_distance = (int)long_distance; 344 return (ARCHIVE_OK); 345 } 346 347 /* Note: The "warn" return is just to inform the options 348 * supervisor that we didn't handle it. It will generate 349 * a suitable error if no one used this option. */ 350 return (ARCHIVE_WARN); 351} 352 353#if HAVE_ZSTD_H && HAVE_ZSTD_compressStream 354/* 355 * Setup callback. 356 */ 357static int 358archive_compressor_zstd_open(struct archive_write_filter *f) 359{ 360 struct private_data *data = (struct private_data *)f->data; 361 362 if (data->out.dst == NULL) { 363 size_t bs = ZSTD_CStreamOutSize(), bpb; 364 if (f->archive->magic == ARCHIVE_WRITE_MAGIC) { 365 /* Buffer size should be a multiple number of 366 * the of bytes per block for performance. */ 367 bpb = archive_write_get_bytes_per_block(f->archive); 368 if (bpb > bs) 369 bs = bpb; 370 else if (bpb != 0) 371 bs -= bs % bpb; 372 } 373 data->out.size = bs; 374 data->out.pos = 0; 375 data->out.dst 376 = (unsigned char *)malloc(data->out.size); 377 if (data->out.dst == NULL) { 378 archive_set_error(f->archive, ENOMEM, 379 "Can't allocate data for compression buffer"); 380 return (ARCHIVE_FATAL); 381 } 382 } 383 384 f->write = archive_compressor_zstd_write; 385 386 if (ZSTD_isError(ZSTD_initCStream(data->cstream, 387 data->compression_level))) { 388 archive_set_error(f->archive, ARCHIVE_ERRNO_MISC, 389 "Internal error initializing zstd compressor object"); 390 return (ARCHIVE_FATAL); 391 } 392 393 ZSTD_CCtx_setParameter(data->cstream, ZSTD_c_nbWorkers, data->threads); 394 395#if ZSTD_VERSION_NUMBER >= MINVER_LONG 396 ZSTD_CCtx_setParameter(data->cstream, ZSTD_c_windowLog, data->long_distance); 397#endif 398 399 return (ARCHIVE_OK); 400} 401 402/* 403 * Write data to the compressed stream. 404 */ 405static int 406archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff, 407 size_t length) 408{ 409 struct private_data *data = (struct private_data *)f->data; 410 411 return (drive_compressor(f, data, 0, buff, length)); 412} 413 414/* 415 * Flush the compressed stream. 416 */ 417static int 418archive_compressor_zstd_flush(struct archive_write_filter *f) 419{ 420 struct private_data *data = (struct private_data *)f->data; 421 422 if (data->frame_per_file && data->state == running) { 423 if (data->cur_frame_in > data->min_frame_in && 424 data->cur_frame_out > data->min_frame_out) { 425 data->state = finishing; 426 } 427 } 428 return (drive_compressor(f, data, 1, NULL, 0)); 429} 430 431/* 432 * Finish the compression... 433 */ 434static int 435archive_compressor_zstd_close(struct archive_write_filter *f) 436{ 437 struct private_data *data = (struct private_data *)f->data; 438 439 if (data->state == running) 440 data->state = finishing; 441 return (drive_compressor(f, data, 1, NULL, 0)); 442} 443 444/* 445 * Utility function to push input data through compressor, 446 * writing full output blocks as necessary. 447 */ 448static int 449drive_compressor(struct archive_write_filter *f, 450 struct private_data *data, int flush, const void *src, size_t length) 451{ 452 ZSTD_inBuffer in = { .src = src, .size = length, .pos = 0 }; 453 size_t ipos, opos, zstdret = 0; 454 int ret; 455 456 for (;;) { 457 ipos = in.pos; 458 opos = data->out.pos; 459 switch (data->state) { 460 case running: 461 if (in.pos == in.size) 462 return (ARCHIVE_OK); 463 zstdret = ZSTD_compressStream(data->cstream, 464 &data->out, &in); 465 if (ZSTD_isError(zstdret)) 466 goto zstd_fatal; 467 break; 468 case finishing: 469 zstdret = ZSTD_endStream(data->cstream, &data->out); 470 if (ZSTD_isError(zstdret)) 471 goto zstd_fatal; 472 if (zstdret == 0) 473 data->state = resetting; 474 break; 475 case resetting: 476 ZSTD_CCtx_reset(data->cstream, ZSTD_reset_session_only); 477 data->cur_frame++; 478 data->cur_frame_in = 0; 479 data->cur_frame_out = 0; 480 data->state = running; 481 break; 482 } 483 data->total_in += in.pos - ipos; 484 data->cur_frame_in += in.pos - ipos; 485 data->cur_frame_out += data->out.pos - opos; 486 if (data->state == running) { 487 if (data->cur_frame_in >= data->max_frame_in || 488 data->cur_frame_out >= data->max_frame_out) { 489 data->state = finishing; 490 } 491 } 492 if (data->out.pos == data->out.size || 493 (flush && data->out.pos > 0)) { 494 ret = __archive_write_filter(f->next_filter, 495 data->out.dst, data->out.pos); 496 if (ret != ARCHIVE_OK) 497 goto fatal; 498 data->out.pos = 0; 499 } 500 } 501zstd_fatal: 502 archive_set_error(f->archive, ARCHIVE_ERRNO_MISC, 503 "Zstd compression failed: %s", 504 ZSTD_getErrorName(zstdret)); 505fatal: 506 return (ARCHIVE_FATAL); 507} 508 509#else /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */ 510 511static int 512archive_compressor_zstd_open(struct archive_write_filter *f) 513{ 514 struct private_data *data = (struct private_data *)f->data; 515 struct archive_string as; 516 int r; 517 518 archive_string_init(&as); 519 /* --no-check matches library default */ 520 archive_strcpy(&as, "zstd --no-check"); 521 522 if (data->compression_level < CLEVEL_STD_MIN) { 523 archive_string_sprintf(&as, " --fast=%d", -data->compression_level); 524 } else { 525 archive_string_sprintf(&as, " -%d", data->compression_level); 526 } 527 528 if (data->compression_level > CLEVEL_STD_MAX) { 529 archive_strcat(&as, " --ultra"); 530 } 531 532 if (data->threads != 0) { 533 archive_string_sprintf(&as, " --threads=%d", data->threads); 534 } 535 536 if (data->long_distance != 0) { 537 archive_string_sprintf(&as, " --long=%d", data->long_distance); 538 } 539 540 f->write = archive_compressor_zstd_write; 541 r = __archive_write_program_open(f, data->pdata, as.s); 542 archive_string_free(&as); 543 return (r); 544} 545 546static int 547archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff, 548 size_t length) 549{ 550 struct private_data *data = (struct private_data *)f->data; 551 552 return __archive_write_program_write(f, data->pdata, buff, length); 553} 554 555static int 556archive_compressor_zstd_flush(struct archive_write_filter *f) 557{ 558 (void)f; /* UNUSED */ 559 560 return (ARCHIVE_OK); 561} 562 563static int 564archive_compressor_zstd_close(struct archive_write_filter *f) 565{ 566 struct private_data *data = (struct private_data *)f->data; 567 568 return __archive_write_program_close(f, data->pdata); 569} 570 571#endif /* HAVE_ZSTD_H && HAVE_ZSTD_compressStream */ 572