1135045Ssobomax/* 2303095Ssobomax * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 3303095Ssobomax * All rights reserved. 4135045Ssobomax * 5303095Ssobomax * Redistribution and use in source and binary forms, with or without 6303095Ssobomax * modification, are permitted provided that the following conditions 7303095Ssobomax * are met: 8303095Ssobomax * 1. Redistributions of source code must retain the above copyright 9303095Ssobomax * notice, this list of conditions and the following disclaimer. 10303095Ssobomax * 2. Redistributions in binary form must reproduce the above copyright 11303095Ssobomax * notice, this list of conditions and the following disclaimer in the 12303095Ssobomax * documentation and/or other materials provided with the distribution. 13135045Ssobomax * 14303095Ssobomax * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15303095Ssobomax * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16303095Ssobomax * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17303095Ssobomax * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18303095Ssobomax * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19303095Ssobomax * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20303095Ssobomax * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21303095Ssobomax * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22303095Ssobomax * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23303095Ssobomax * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24303095Ssobomax * SUCH DAMAGE. 25135045Ssobomax */ 26135045Ssobomax 27303095Ssobomax#include <sys/cdefs.h> 28303095Ssobomax__FBSDID("$FreeBSD: stable/10/usr.bin/mkuzip/mkuzip.c 319267 2017-05-30 22:48:17Z asomers $"); 29303095Ssobomax 30135045Ssobomax#include <sys/types.h> 31167272Sfjoe#include <sys/disk.h> 32135045Ssobomax#include <sys/endian.h> 33135045Ssobomax#include <sys/param.h> 34303095Ssobomax#include <sys/sysctl.h> 35135045Ssobomax#include <sys/stat.h> 36135045Ssobomax#include <sys/uio.h> 37135045Ssobomax#include <netinet/in.h> 38303095Ssobomax#include <assert.h> 39303095Ssobomax#include <ctype.h> 40135045Ssobomax#include <err.h> 41135045Ssobomax#include <fcntl.h> 42303095Ssobomax#include <pthread.h> 43135045Ssobomax#include <signal.h> 44303095Ssobomax#include <stdint.h> 45135045Ssobomax#include <stdio.h> 46135045Ssobomax#include <stdlib.h> 47135045Ssobomax#include <string.h> 48135045Ssobomax#include <unistd.h> 49135045Ssobomax 50303095Ssobomax#include "mkuzip.h" 51303095Ssobomax#include "mkuz_cloop.h" 52303095Ssobomax#include "mkuz_blockcache.h" 53303095Ssobomax#include "mkuz_zlib.h" 54303095Ssobomax#include "mkuz_lzma.h" 55303095Ssobomax#include "mkuz_blk.h" 56303095Ssobomax#include "mkuz_cfg.h" 57303095Ssobomax#include "mkuz_conveyor.h" 58303095Ssobomax#include "mkuz_format.h" 59303095Ssobomax#include "mkuz_fqueue.h" 60303095Ssobomax#include "mkuz_time.h" 61135045Ssobomax 62303095Ssobomax#define DEFAULT_CLSTSIZE 16384 63135045Ssobomax 64303095Ssobomaxstatic struct mkuz_format uzip_fmt = { 65303095Ssobomax .magic = CLOOP_MAGIC_ZLIB, 66303095Ssobomax .default_sufx = DEFAULT_SUFX_ZLIB, 67303095Ssobomax .f_init = &mkuz_zlib_init, 68303095Ssobomax .f_compress = &mkuz_zlib_compress 69303095Ssobomax}; 70303095Ssobomax 71303095Ssobomaxstatic struct mkuz_format ulzma_fmt = { 72303095Ssobomax .magic = CLOOP_MAGIC_LZMA, 73303095Ssobomax .default_sufx = DEFAULT_SUFX_LZMA, 74303095Ssobomax .f_init = &mkuz_lzma_init, 75303095Ssobomax .f_compress = &mkuz_lzma_compress 76303095Ssobomax}; 77303095Ssobomax 78303095Ssobomaxstatic struct mkuz_blk *readblock(int, u_int32_t); 79135045Ssobomaxstatic void usage(void); 80135045Ssobomaxstatic void cleanup(void); 81135045Ssobomax 82135045Ssobomaxstatic char *cleanfile = NULL; 83135045Ssobomax 84303095Ssobomaxstatic int 85303095Ssobomaxcmp_blkno(const struct mkuz_blk *bp, void *p) 86303095Ssobomax{ 87303095Ssobomax uint32_t *ap; 88303095Ssobomax 89303095Ssobomax ap = (uint32_t *)p; 90303095Ssobomax 91303095Ssobomax return (bp->info.blkno == *ap); 92303095Ssobomax} 93303095Ssobomax 94135045Ssobomaxint main(int argc, char **argv) 95135045Ssobomax{ 96303095Ssobomax struct mkuz_cfg cfs; 97303095Ssobomax char *iname, *oname; 98135045Ssobomax uint64_t *toc; 99303095Ssobomax int i, io, opt, tmp; 100303095Ssobomax struct { 101303095Ssobomax int en; 102303095Ssobomax FILE *f; 103303095Ssobomax } summary; 104135045Ssobomax struct iovec iov[2]; 105135045Ssobomax struct stat sb; 106303095Ssobomax uint64_t offset, last_offset; 107303095Ssobomax struct cloop_header hdr; 108303095Ssobomax struct mkuz_conveyor *cvp; 109303095Ssobomax void *c_ctx; 110303095Ssobomax struct mkuz_blk_info *chit; 111319267Sasomers size_t ncpusz, ncpu, magiclen; 112303095Ssobomax double st, et; 113135045Ssobomax 114303095Ssobomax st = getdtime(); 115303095Ssobomax 116303095Ssobomax ncpusz = sizeof(size_t); 117303095Ssobomax if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { 118303095Ssobomax ncpu = 1; 119303095Ssobomax } else if (ncpu > MAX_WORKERS_AUTO) { 120303095Ssobomax ncpu = MAX_WORKERS_AUTO; 121303095Ssobomax } 122303095Ssobomax 123135045Ssobomax memset(&hdr, 0, sizeof(hdr)); 124303095Ssobomax cfs.blksz = DEFAULT_CLSTSIZE; 125135045Ssobomax oname = NULL; 126303095Ssobomax cfs.verbose = 0; 127303095Ssobomax cfs.no_zcomp = 0; 128303095Ssobomax cfs.en_dedup = 0; 129303095Ssobomax summary.en = 0; 130303095Ssobomax summary.f = stderr; 131303095Ssobomax cfs.handler = &uzip_fmt; 132303095Ssobomax cfs.nworkers = ncpu; 133303095Ssobomax struct mkuz_blk *iblk, *oblk; 134135045Ssobomax 135303095Ssobomax while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) { 136135045Ssobomax switch(opt) { 137135045Ssobomax case 'o': 138135045Ssobomax oname = optarg; 139135045Ssobomax break; 140135045Ssobomax 141135045Ssobomax case 's': 142135045Ssobomax tmp = atoi(optarg); 143135045Ssobomax if (tmp <= 0) { 144135045Ssobomax errx(1, "invalid cluster size specified: %s", 145135045Ssobomax optarg); 146135045Ssobomax /* Not reached */ 147135045Ssobomax } 148303095Ssobomax cfs.blksz = tmp; 149135045Ssobomax break; 150135045Ssobomax 151135045Ssobomax case 'v': 152303095Ssobomax cfs.verbose = 1; 153135045Ssobomax break; 154135045Ssobomax 155303095Ssobomax case 'Z': 156303095Ssobomax cfs.no_zcomp = 1; 157303095Ssobomax break; 158303095Ssobomax 159303095Ssobomax case 'd': 160303095Ssobomax cfs.en_dedup = 1; 161303095Ssobomax break; 162303095Ssobomax 163303095Ssobomax case 'L': 164303095Ssobomax cfs.handler = &ulzma_fmt; 165303095Ssobomax break; 166303095Ssobomax 167303095Ssobomax case 'S': 168303095Ssobomax summary.en = 1; 169303095Ssobomax summary.f = stdout; 170303095Ssobomax break; 171303095Ssobomax 172303095Ssobomax case 'j': 173303095Ssobomax tmp = atoi(optarg); 174303095Ssobomax if (tmp <= 0) { 175303095Ssobomax errx(1, "invalid number of compression threads" 176303095Ssobomax " specified: %s", optarg); 177303095Ssobomax /* Not reached */ 178303095Ssobomax } 179303095Ssobomax cfs.nworkers = tmp; 180303095Ssobomax break; 181303095Ssobomax 182135045Ssobomax default: 183135045Ssobomax usage(); 184135045Ssobomax /* Not reached */ 185135045Ssobomax } 186135045Ssobomax } 187135045Ssobomax argc -= optind; 188135045Ssobomax argv += optind; 189135045Ssobomax 190135045Ssobomax if (argc != 1) { 191135045Ssobomax usage(); 192135045Ssobomax /* Not reached */ 193135045Ssobomax } 194135045Ssobomax 195319267Sasomers magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); 196319267Sasomers assert(magiclen < sizeof(hdr.magic)); 197303095Ssobomax 198303095Ssobomax if (cfs.en_dedup != 0) { 199303095Ssobomax hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 200303095Ssobomax hdr.magic[CLOOP_OFS_COMPR] = 201303095Ssobomax tolower(hdr.magic[CLOOP_OFS_COMPR]); 202303095Ssobomax } 203303095Ssobomax 204303095Ssobomax c_ctx = cfs.handler->f_init(cfs.blksz); 205303095Ssobomax 206135045Ssobomax iname = argv[0]; 207135045Ssobomax if (oname == NULL) { 208303095Ssobomax asprintf(&oname, "%s%s", iname, cfs.handler->default_sufx); 209135045Ssobomax if (oname == NULL) { 210135045Ssobomax err(1, "can't allocate memory"); 211135045Ssobomax /* Not reached */ 212135045Ssobomax } 213135045Ssobomax } 214135045Ssobomax 215135045Ssobomax signal(SIGHUP, exit); 216135045Ssobomax signal(SIGINT, exit); 217135045Ssobomax signal(SIGTERM, exit); 218135045Ssobomax signal(SIGXCPU, exit); 219135045Ssobomax signal(SIGXFSZ, exit); 220135045Ssobomax atexit(cleanup); 221135045Ssobomax 222303095Ssobomax cfs.fdr = open(iname, O_RDONLY); 223303095Ssobomax if (cfs.fdr < 0) { 224167272Sfjoe err(1, "open(%s)", iname); 225135045Ssobomax /* Not reached */ 226135045Ssobomax } 227303095Ssobomax if (fstat(cfs.fdr, &sb) != 0) { 228167272Sfjoe err(1, "fstat(%s)", iname); 229167272Sfjoe /* Not reached */ 230167272Sfjoe } 231167272Sfjoe if (S_ISCHR(sb.st_mode)) { 232167272Sfjoe off_t ms; 233167272Sfjoe 234303095Ssobomax if (ioctl(cfs.fdr, DIOCGMEDIASIZE, &ms) < 0) { 235167272Sfjoe err(1, "ioctl(DIOCGMEDIASIZE)"); 236167272Sfjoe /* Not reached */ 237167272Sfjoe } 238167272Sfjoe sb.st_size = ms; 239167272Sfjoe } else if (!S_ISREG(sb.st_mode)) { 240167272Sfjoe fprintf(stderr, "%s: not a character device or regular file\n", 241167272Sfjoe iname); 242167272Sfjoe exit(1); 243167272Sfjoe } 244303095Ssobomax hdr.nblocks = sb.st_size / cfs.blksz; 245303095Ssobomax if ((sb.st_size % cfs.blksz) != 0) { 246303095Ssobomax if (cfs.verbose != 0) 247135058Ssobomax fprintf(stderr, "file size is not multiple " 248303095Ssobomax "of %d, padding data\n", cfs.blksz); 249135058Ssobomax hdr.nblocks++; 250135045Ssobomax } 251303095Ssobomax toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 252135045Ssobomax 253303095Ssobomax cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, 254146107Sfjoe S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); 255303095Ssobomax if (cfs.fdw < 0) { 256155074Spjd err(1, "open(%s)", oname); 257135045Ssobomax /* Not reached */ 258135045Ssobomax } 259135045Ssobomax cleanfile = oname; 260135045Ssobomax 261135045Ssobomax /* Prepare header that we will write later when we have index ready. */ 262135045Ssobomax iov[0].iov_base = (char *)&hdr; 263135045Ssobomax iov[0].iov_len = sizeof(hdr); 264135045Ssobomax iov[1].iov_base = (char *)toc; 265135045Ssobomax iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 266135045Ssobomax offset = iov[0].iov_len + iov[1].iov_len; 267135045Ssobomax 268135045Ssobomax /* Reserve space for header */ 269303095Ssobomax lseek(cfs.fdw, offset, SEEK_SET); 270135045Ssobomax 271303095Ssobomax if (cfs.verbose != 0) { 272145808Ssobomax fprintf(stderr, "data size %ju bytes, number of clusters " 273146107Sfjoe "%u, index length %zu bytes\n", sb.st_size, 274135058Ssobomax hdr.nblocks, iov[1].iov_len); 275303095Ssobomax } 276135045Ssobomax 277303095Ssobomax cvp = mkuz_conveyor_ctor(&cfs); 278303095Ssobomax 279303095Ssobomax last_offset = 0; 280303095Ssobomax iblk = oblk = NULL; 281303095Ssobomax for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { 282303095Ssobomax iblk = readblock(cfs.fdr, cfs.blksz); 283303095Ssobomax mkuz_fqueue_enq(cvp->wrk_queue, iblk); 284303095Ssobomax if (iblk != MKUZ_BLK_EOF && 285303095Ssobomax (i < (cfs.nworkers * ITEMS_PER_WORKER))) { 286303095Ssobomax continue; 287303095Ssobomax } 288303095Ssobomaxdrain: 289303095Ssobomax oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); 290303095Ssobomax assert(oblk->info.blkno == (unsigned)io); 291303095Ssobomax oblk->info.offset = offset; 292303095Ssobomax chit = NULL; 293303095Ssobomax if (cfs.en_dedup != 0 && oblk->info.len > 0) { 294303095Ssobomax chit = mkuz_blkcache_regblock(cfs.fdw, oblk); 295303095Ssobomax /* 296303095Ssobomax * There should be at least one non-empty block 297303095Ssobomax * between us and the backref'ed offset, otherwise 298303095Ssobomax * we won't be able to parse that sequence correctly 299303095Ssobomax * as it would be indistinguishible from another 300303095Ssobomax * empty block. 301303095Ssobomax */ 302303095Ssobomax if (chit != NULL && chit->offset == last_offset) { 303303095Ssobomax chit = NULL; 304303095Ssobomax } 305303095Ssobomax } 306303095Ssobomax if (chit != NULL) { 307303095Ssobomax toc[io] = htobe64(chit->offset); 308303095Ssobomax oblk->info.len = 0; 309303095Ssobomax } else { 310303095Ssobomax if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, 311303095Ssobomax oblk->info.len) < 0) { 312303095Ssobomax err(1, "write(%s)", oname); 313135045Ssobomax /* Not reached */ 314135045Ssobomax } 315303095Ssobomax toc[io] = htobe64(offset); 316303095Ssobomax last_offset = offset; 317303095Ssobomax offset += oblk->info.len; 318135045Ssobomax } 319303095Ssobomax if (cfs.verbose != 0) { 320303095Ssobomax fprintf(stderr, "cluster #%d, in %u bytes, " 321303095Ssobomax "out len=%lu offset=%lu", io, cfs.blksz, 322303095Ssobomax (u_long)oblk->info.len, (u_long)be64toh(toc[io])); 323303095Ssobomax if (chit != NULL) { 324303095Ssobomax fprintf(stderr, " (backref'ed to #%d)", 325303095Ssobomax chit->blkno); 326303095Ssobomax } 327303095Ssobomax fprintf(stderr, "\n"); 328135045Ssobomax } 329303095Ssobomax free(oblk); 330303095Ssobomax io += 1; 331303095Ssobomax if (iblk == MKUZ_BLK_EOF) { 332303095Ssobomax if (io < i) 333303095Ssobomax goto drain; 334303095Ssobomax /* Last block, see if we need to add some padding */ 335303095Ssobomax if ((offset % DEV_BSIZE) == 0) 336303095Ssobomax continue; 337303095Ssobomax oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); 338303095Ssobomax oblk->info.blkno = io; 339303095Ssobomax oblk->info.len = oblk->alen; 340303095Ssobomax if (cfs.verbose != 0) { 341303095Ssobomax fprintf(stderr, "padding data with %lu bytes " 342303095Ssobomax "so that file size is multiple of %d\n", 343303095Ssobomax (u_long)oblk->alen, DEV_BSIZE); 344303095Ssobomax } 345303095Ssobomax mkuz_fqueue_enq(cvp->results, oblk); 346303095Ssobomax goto drain; 347303095Ssobomax } 348135045Ssobomax } 349135045Ssobomax 350303095Ssobomax close(cfs.fdr); 351135045Ssobomax 352303095Ssobomax if (cfs.verbose != 0 || summary.en != 0) { 353303095Ssobomax et = getdtime(); 354303095Ssobomax fprintf(summary.f, "compressed data to %ju bytes, saved %lld " 355303095Ssobomax "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, 356303095Ssobomax (long long)(sb.st_size - offset), 357303095Ssobomax 100.0 * (long long)(sb.st_size - offset) / 358303095Ssobomax (float)sb.st_size, (float)sb.st_size / (et - st)); 359303095Ssobomax } 360303095Ssobomax 361135045Ssobomax /* Convert to big endian */ 362303095Ssobomax hdr.blksz = htonl(cfs.blksz); 363135045Ssobomax hdr.nblocks = htonl(hdr.nblocks); 364135045Ssobomax /* Write headers into pre-allocated space */ 365303095Ssobomax lseek(cfs.fdw, 0, SEEK_SET); 366303095Ssobomax if (writev(cfs.fdw, iov, 2) < 0) { 367155074Spjd err(1, "writev(%s)", oname); 368135045Ssobomax /* Not reached */ 369135045Ssobomax } 370135045Ssobomax cleanfile = NULL; 371303095Ssobomax close(cfs.fdw); 372135045Ssobomax 373135045Ssobomax exit(0); 374135045Ssobomax} 375135045Ssobomax 376303095Ssobomaxstatic struct mkuz_blk * 377303095Ssobomaxreadblock(int fd, u_int32_t clstsize) 378135058Ssobomax{ 379135045Ssobomax int numread; 380303095Ssobomax struct mkuz_blk *rval; 381303095Ssobomax static int blockcnt; 382303095Ssobomax off_t cpos; 383135045Ssobomax 384303095Ssobomax rval = mkuz_blk_ctor(clstsize); 385303095Ssobomax 386303095Ssobomax rval->info.blkno = blockcnt; 387303095Ssobomax blockcnt += 1; 388303095Ssobomax cpos = lseek(fd, 0, SEEK_CUR); 389303095Ssobomax if (cpos < 0) { 390303095Ssobomax err(1, "readblock: lseek() failed"); 391303095Ssobomax /* Not reached */ 392303095Ssobomax } 393303095Ssobomax rval->info.offset = cpos; 394303095Ssobomax 395303095Ssobomax numread = read(fd, rval->data, clstsize); 396135045Ssobomax if (numread < 0) { 397303095Ssobomax err(1, "readblock: read() failed"); 398135045Ssobomax /* Not reached */ 399135045Ssobomax } 400135045Ssobomax if (numread == 0) { 401303095Ssobomax free(rval); 402303095Ssobomax return MKUZ_BLK_EOF; 403135045Ssobomax } 404303095Ssobomax rval->info.len = numread; 405303095Ssobomax return rval; 406135045Ssobomax} 407135045Ssobomax 408135045Ssobomaxstatic void 409135058Ssobomaxusage(void) 410135058Ssobomax{ 411135045Ssobomax 412303095Ssobomax fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " 413303095Ssobomax "[-j ncompr] infile\n"); 414135045Ssobomax exit(1); 415135045Ssobomax} 416135045Ssobomax 417303095Ssobomaxvoid * 418303095Ssobomaxmkuz_safe_malloc(size_t size) 419135058Ssobomax{ 420135045Ssobomax void *retval; 421135045Ssobomax 422135045Ssobomax retval = malloc(size); 423135045Ssobomax if (retval == NULL) { 424135045Ssobomax err(1, "can't allocate memory"); 425135045Ssobomax /* Not reached */ 426135045Ssobomax } 427135045Ssobomax return retval; 428135045Ssobomax} 429135045Ssobomax 430303095Ssobomaxvoid * 431303095Ssobomaxmkuz_safe_zmalloc(size_t size) 432303095Ssobomax{ 433303095Ssobomax void *retval; 434303095Ssobomax 435303095Ssobomax retval = mkuz_safe_malloc(size); 436303095Ssobomax bzero(retval, size); 437303095Ssobomax return retval; 438303095Ssobomax} 439303095Ssobomax 440135045Ssobomaxstatic void 441135058Ssobomaxcleanup(void) 442135058Ssobomax{ 443135045Ssobomax 444135045Ssobomax if (cleanfile != NULL) 445135045Ssobomax unlink(cleanfile); 446135045Ssobomax} 447303095Ssobomax 448303095Ssobomaxint 449303095Ssobomaxmkuz_memvcmp(const void *memory, unsigned char val, size_t size) 450303095Ssobomax{ 451303095Ssobomax const u_char *mm; 452303095Ssobomax 453303095Ssobomax mm = (const u_char *)memory; 454303095Ssobomax return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 455303095Ssobomax} 456