1135045Ssobomax/*
2303095Ssobomax * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
3303095Ssobomax * All rights reserved.
4135045Ssobomax *
5303095Ssobomax * Redistribution and use in source and binary forms, with or without
6303095Ssobomax * modification, are permitted provided that the following conditions
7303095Ssobomax * are met:
8303095Ssobomax * 1. Redistributions of source code must retain the above copyright
9303095Ssobomax *    notice, this list of conditions and the following disclaimer.
10303095Ssobomax * 2. Redistributions in binary form must reproduce the above copyright
11303095Ssobomax *    notice, this list of conditions and the following disclaimer in the
12303095Ssobomax *    documentation and/or other materials provided with the distribution.
13135045Ssobomax *
14303095Ssobomax * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15303095Ssobomax * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16303095Ssobomax * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17303095Ssobomax * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18303095Ssobomax * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19303095Ssobomax * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20303095Ssobomax * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21303095Ssobomax * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22303095Ssobomax * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23303095Ssobomax * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24303095Ssobomax * SUCH DAMAGE.
25135045Ssobomax */
26135045Ssobomax
27303095Ssobomax#include <sys/cdefs.h>
28303095Ssobomax__FBSDID("$FreeBSD: stable/10/usr.bin/mkuzip/mkuzip.c 319267 2017-05-30 22:48:17Z asomers $");
29303095Ssobomax
30135045Ssobomax#include <sys/types.h>
31167272Sfjoe#include <sys/disk.h>
32135045Ssobomax#include <sys/endian.h>
33135045Ssobomax#include <sys/param.h>
34303095Ssobomax#include <sys/sysctl.h>
35135045Ssobomax#include <sys/stat.h>
36135045Ssobomax#include <sys/uio.h>
37135045Ssobomax#include <netinet/in.h>
38303095Ssobomax#include <assert.h>
39303095Ssobomax#include <ctype.h>
40135045Ssobomax#include <err.h>
41135045Ssobomax#include <fcntl.h>
42303095Ssobomax#include <pthread.h>
43135045Ssobomax#include <signal.h>
44303095Ssobomax#include <stdint.h>
45135045Ssobomax#include <stdio.h>
46135045Ssobomax#include <stdlib.h>
47135045Ssobomax#include <string.h>
48135045Ssobomax#include <unistd.h>
49135045Ssobomax
50303095Ssobomax#include "mkuzip.h"
51303095Ssobomax#include "mkuz_cloop.h"
52303095Ssobomax#include "mkuz_blockcache.h"
53303095Ssobomax#include "mkuz_zlib.h"
54303095Ssobomax#include "mkuz_lzma.h"
55303095Ssobomax#include "mkuz_blk.h"
56303095Ssobomax#include "mkuz_cfg.h"
57303095Ssobomax#include "mkuz_conveyor.h"
58303095Ssobomax#include "mkuz_format.h"
59303095Ssobomax#include "mkuz_fqueue.h"
60303095Ssobomax#include "mkuz_time.h"
61135045Ssobomax
62303095Ssobomax#define DEFAULT_CLSTSIZE	16384
63135045Ssobomax
64303095Ssobomaxstatic struct mkuz_format uzip_fmt = {
65303095Ssobomax	.magic = CLOOP_MAGIC_ZLIB,
66303095Ssobomax	.default_sufx = DEFAULT_SUFX_ZLIB,
67303095Ssobomax	.f_init = &mkuz_zlib_init,
68303095Ssobomax	.f_compress = &mkuz_zlib_compress
69303095Ssobomax};
70303095Ssobomax
71303095Ssobomaxstatic struct mkuz_format ulzma_fmt = {
72303095Ssobomax        .magic = CLOOP_MAGIC_LZMA,
73303095Ssobomax        .default_sufx = DEFAULT_SUFX_LZMA,
74303095Ssobomax        .f_init = &mkuz_lzma_init,
75303095Ssobomax        .f_compress = &mkuz_lzma_compress
76303095Ssobomax};
77303095Ssobomax
78303095Ssobomaxstatic struct mkuz_blk *readblock(int, u_int32_t);
79135045Ssobomaxstatic void usage(void);
80135045Ssobomaxstatic void cleanup(void);
81135045Ssobomax
82135045Ssobomaxstatic char *cleanfile = NULL;
83135045Ssobomax
84303095Ssobomaxstatic int
85303095Ssobomaxcmp_blkno(const struct mkuz_blk *bp, void *p)
86303095Ssobomax{
87303095Ssobomax	uint32_t *ap;
88303095Ssobomax
89303095Ssobomax	ap = (uint32_t *)p;
90303095Ssobomax
91303095Ssobomax	return (bp->info.blkno == *ap);
92303095Ssobomax}
93303095Ssobomax
94135045Ssobomaxint main(int argc, char **argv)
95135045Ssobomax{
96303095Ssobomax	struct mkuz_cfg cfs;
97303095Ssobomax	char *iname, *oname;
98135045Ssobomax	uint64_t *toc;
99303095Ssobomax	int i, io, opt, tmp;
100303095Ssobomax	struct {
101303095Ssobomax		int en;
102303095Ssobomax		FILE *f;
103303095Ssobomax	} summary;
104135045Ssobomax	struct iovec iov[2];
105135045Ssobomax	struct stat sb;
106303095Ssobomax	uint64_t offset, last_offset;
107303095Ssobomax	struct cloop_header hdr;
108303095Ssobomax	struct mkuz_conveyor *cvp;
109303095Ssobomax        void *c_ctx;
110303095Ssobomax	struct mkuz_blk_info *chit;
111319267Sasomers	size_t ncpusz, ncpu, magiclen;
112303095Ssobomax	double st, et;
113135045Ssobomax
114303095Ssobomax	st = getdtime();
115303095Ssobomax
116303095Ssobomax	ncpusz = sizeof(size_t);
117303095Ssobomax	if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
118303095Ssobomax		ncpu = 1;
119303095Ssobomax	} else if (ncpu > MAX_WORKERS_AUTO) {
120303095Ssobomax		ncpu = MAX_WORKERS_AUTO;
121303095Ssobomax	}
122303095Ssobomax
123135045Ssobomax	memset(&hdr, 0, sizeof(hdr));
124303095Ssobomax	cfs.blksz = DEFAULT_CLSTSIZE;
125135045Ssobomax	oname = NULL;
126303095Ssobomax	cfs.verbose = 0;
127303095Ssobomax	cfs.no_zcomp = 0;
128303095Ssobomax	cfs.en_dedup = 0;
129303095Ssobomax	summary.en = 0;
130303095Ssobomax	summary.f = stderr;
131303095Ssobomax	cfs.handler = &uzip_fmt;
132303095Ssobomax	cfs.nworkers = ncpu;
133303095Ssobomax	struct mkuz_blk *iblk, *oblk;
134135045Ssobomax
135303095Ssobomax	while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
136135045Ssobomax		switch(opt) {
137135045Ssobomax		case 'o':
138135045Ssobomax			oname = optarg;
139135045Ssobomax			break;
140135045Ssobomax
141135045Ssobomax		case 's':
142135045Ssobomax			tmp = atoi(optarg);
143135045Ssobomax			if (tmp <= 0) {
144135045Ssobomax				errx(1, "invalid cluster size specified: %s",
145135045Ssobomax				    optarg);
146135045Ssobomax				/* Not reached */
147135045Ssobomax			}
148303095Ssobomax			cfs.blksz = tmp;
149135045Ssobomax			break;
150135045Ssobomax
151135045Ssobomax		case 'v':
152303095Ssobomax			cfs.verbose = 1;
153135045Ssobomax			break;
154135045Ssobomax
155303095Ssobomax		case 'Z':
156303095Ssobomax			cfs.no_zcomp = 1;
157303095Ssobomax			break;
158303095Ssobomax
159303095Ssobomax		case 'd':
160303095Ssobomax			cfs.en_dedup = 1;
161303095Ssobomax			break;
162303095Ssobomax
163303095Ssobomax		case 'L':
164303095Ssobomax			cfs.handler = &ulzma_fmt;
165303095Ssobomax			break;
166303095Ssobomax
167303095Ssobomax		case 'S':
168303095Ssobomax			summary.en = 1;
169303095Ssobomax			summary.f = stdout;
170303095Ssobomax			break;
171303095Ssobomax
172303095Ssobomax		case 'j':
173303095Ssobomax			tmp = atoi(optarg);
174303095Ssobomax			if (tmp <= 0) {
175303095Ssobomax				errx(1, "invalid number of compression threads"
176303095Ssobomax                                    " specified: %s", optarg);
177303095Ssobomax				/* Not reached */
178303095Ssobomax			}
179303095Ssobomax			cfs.nworkers = tmp;
180303095Ssobomax			break;
181303095Ssobomax
182135045Ssobomax		default:
183135045Ssobomax			usage();
184135045Ssobomax			/* Not reached */
185135045Ssobomax		}
186135045Ssobomax	}
187135045Ssobomax	argc -= optind;
188135045Ssobomax	argv += optind;
189135045Ssobomax
190135045Ssobomax	if (argc != 1) {
191135045Ssobomax		usage();
192135045Ssobomax		/* Not reached */
193135045Ssobomax	}
194135045Ssobomax
195319267Sasomers	magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
196319267Sasomers	assert(magiclen < sizeof(hdr.magic));
197303095Ssobomax
198303095Ssobomax	if (cfs.en_dedup != 0) {
199303095Ssobomax		hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
200303095Ssobomax		hdr.magic[CLOOP_OFS_COMPR] =
201303095Ssobomax		    tolower(hdr.magic[CLOOP_OFS_COMPR]);
202303095Ssobomax	}
203303095Ssobomax
204303095Ssobomax	c_ctx = cfs.handler->f_init(cfs.blksz);
205303095Ssobomax
206135045Ssobomax	iname = argv[0];
207135045Ssobomax	if (oname == NULL) {
208303095Ssobomax		asprintf(&oname, "%s%s", iname, cfs.handler->default_sufx);
209135045Ssobomax		if (oname == NULL) {
210135045Ssobomax			err(1, "can't allocate memory");
211135045Ssobomax			/* Not reached */
212135045Ssobomax		}
213135045Ssobomax	}
214135045Ssobomax
215135045Ssobomax	signal(SIGHUP, exit);
216135045Ssobomax	signal(SIGINT, exit);
217135045Ssobomax	signal(SIGTERM, exit);
218135045Ssobomax	signal(SIGXCPU, exit);
219135045Ssobomax	signal(SIGXFSZ, exit);
220135045Ssobomax	atexit(cleanup);
221135045Ssobomax
222303095Ssobomax	cfs.fdr = open(iname, O_RDONLY);
223303095Ssobomax	if (cfs.fdr < 0) {
224167272Sfjoe		err(1, "open(%s)", iname);
225135045Ssobomax		/* Not reached */
226135045Ssobomax	}
227303095Ssobomax	if (fstat(cfs.fdr, &sb) != 0) {
228167272Sfjoe		err(1, "fstat(%s)", iname);
229167272Sfjoe		/* Not reached */
230167272Sfjoe	}
231167272Sfjoe	if (S_ISCHR(sb.st_mode)) {
232167272Sfjoe		off_t ms;
233167272Sfjoe
234303095Ssobomax		if (ioctl(cfs.fdr, DIOCGMEDIASIZE, &ms) < 0) {
235167272Sfjoe			err(1, "ioctl(DIOCGMEDIASIZE)");
236167272Sfjoe			/* Not reached */
237167272Sfjoe		}
238167272Sfjoe		sb.st_size = ms;
239167272Sfjoe	} else if (!S_ISREG(sb.st_mode)) {
240167272Sfjoe		fprintf(stderr, "%s: not a character device or regular file\n",
241167272Sfjoe			iname);
242167272Sfjoe		exit(1);
243167272Sfjoe	}
244303095Ssobomax	hdr.nblocks = sb.st_size / cfs.blksz;
245303095Ssobomax	if ((sb.st_size % cfs.blksz) != 0) {
246303095Ssobomax		if (cfs.verbose != 0)
247135058Ssobomax			fprintf(stderr, "file size is not multiple "
248303095Ssobomax			"of %d, padding data\n", cfs.blksz);
249135058Ssobomax		hdr.nblocks++;
250135045Ssobomax	}
251303095Ssobomax	toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
252135045Ssobomax
253303095Ssobomax	cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
254146107Sfjoe		   S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
255303095Ssobomax	if (cfs.fdw < 0) {
256155074Spjd		err(1, "open(%s)", oname);
257135045Ssobomax		/* Not reached */
258135045Ssobomax	}
259135045Ssobomax	cleanfile = oname;
260135045Ssobomax
261135045Ssobomax	/* Prepare header that we will write later when we have index ready. */
262135045Ssobomax	iov[0].iov_base = (char *)&hdr;
263135045Ssobomax	iov[0].iov_len = sizeof(hdr);
264135045Ssobomax	iov[1].iov_base = (char *)toc;
265135045Ssobomax	iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
266135045Ssobomax	offset = iov[0].iov_len + iov[1].iov_len;
267135045Ssobomax
268135045Ssobomax	/* Reserve space for header */
269303095Ssobomax	lseek(cfs.fdw, offset, SEEK_SET);
270135045Ssobomax
271303095Ssobomax	if (cfs.verbose != 0) {
272145808Ssobomax		fprintf(stderr, "data size %ju bytes, number of clusters "
273146107Sfjoe		    "%u, index length %zu bytes\n", sb.st_size,
274135058Ssobomax		    hdr.nblocks, iov[1].iov_len);
275303095Ssobomax	}
276135045Ssobomax
277303095Ssobomax	cvp = mkuz_conveyor_ctor(&cfs);
278303095Ssobomax
279303095Ssobomax	last_offset = 0;
280303095Ssobomax        iblk = oblk = NULL;
281303095Ssobomax	for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
282303095Ssobomax		iblk = readblock(cfs.fdr, cfs.blksz);
283303095Ssobomax		mkuz_fqueue_enq(cvp->wrk_queue, iblk);
284303095Ssobomax		if (iblk != MKUZ_BLK_EOF &&
285303095Ssobomax		    (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
286303095Ssobomax			continue;
287303095Ssobomax		}
288303095Ssobomaxdrain:
289303095Ssobomax		oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
290303095Ssobomax		assert(oblk->info.blkno == (unsigned)io);
291303095Ssobomax		oblk->info.offset = offset;
292303095Ssobomax		chit = NULL;
293303095Ssobomax		if (cfs.en_dedup != 0 && oblk->info.len > 0) {
294303095Ssobomax			chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
295303095Ssobomax			/*
296303095Ssobomax			 * There should be at least one non-empty block
297303095Ssobomax			 * between us and the backref'ed offset, otherwise
298303095Ssobomax			 * we won't be able to parse that sequence correctly
299303095Ssobomax			 * as it would be indistinguishible from another
300303095Ssobomax			 * empty block.
301303095Ssobomax			 */
302303095Ssobomax			if (chit != NULL && chit->offset == last_offset) {
303303095Ssobomax				chit = NULL;
304303095Ssobomax			}
305303095Ssobomax		}
306303095Ssobomax		if (chit != NULL) {
307303095Ssobomax			toc[io] = htobe64(chit->offset);
308303095Ssobomax			oblk->info.len = 0;
309303095Ssobomax		} else {
310303095Ssobomax			if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
311303095Ssobomax			    oblk->info.len) < 0) {
312303095Ssobomax				err(1, "write(%s)", oname);
313135045Ssobomax				/* Not reached */
314135045Ssobomax			}
315303095Ssobomax			toc[io] = htobe64(offset);
316303095Ssobomax			last_offset = offset;
317303095Ssobomax			offset += oblk->info.len;
318135045Ssobomax		}
319303095Ssobomax		if (cfs.verbose != 0) {
320303095Ssobomax			fprintf(stderr, "cluster #%d, in %u bytes, "
321303095Ssobomax			    "out len=%lu offset=%lu", io, cfs.blksz,
322303095Ssobomax			    (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
323303095Ssobomax			if (chit != NULL) {
324303095Ssobomax				fprintf(stderr, " (backref'ed to #%d)",
325303095Ssobomax				    chit->blkno);
326303095Ssobomax			}
327303095Ssobomax			fprintf(stderr, "\n");
328135045Ssobomax		}
329303095Ssobomax		free(oblk);
330303095Ssobomax		io += 1;
331303095Ssobomax		if (iblk == MKUZ_BLK_EOF) {
332303095Ssobomax			if (io < i)
333303095Ssobomax				goto drain;
334303095Ssobomax			/* Last block, see if we need to add some padding */
335303095Ssobomax			if ((offset % DEV_BSIZE) == 0)
336303095Ssobomax				continue;
337303095Ssobomax			oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
338303095Ssobomax			oblk->info.blkno = io;
339303095Ssobomax			oblk->info.len = oblk->alen;
340303095Ssobomax			if (cfs.verbose != 0) {
341303095Ssobomax				fprintf(stderr, "padding data with %lu bytes "
342303095Ssobomax				    "so that file size is multiple of %d\n",
343303095Ssobomax				    (u_long)oblk->alen, DEV_BSIZE);
344303095Ssobomax			}
345303095Ssobomax			mkuz_fqueue_enq(cvp->results, oblk);
346303095Ssobomax			goto drain;
347303095Ssobomax		}
348135045Ssobomax	}
349135045Ssobomax
350303095Ssobomax	close(cfs.fdr);
351135045Ssobomax
352303095Ssobomax	if (cfs.verbose != 0 || summary.en != 0) {
353303095Ssobomax		et = getdtime();
354303095Ssobomax		fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
355303095Ssobomax		    "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
356303095Ssobomax		    (long long)(sb.st_size - offset),
357303095Ssobomax		    100.0 * (long long)(sb.st_size - offset) /
358303095Ssobomax		    (float)sb.st_size, (float)sb.st_size / (et - st));
359303095Ssobomax	}
360303095Ssobomax
361135045Ssobomax	/* Convert to big endian */
362303095Ssobomax	hdr.blksz = htonl(cfs.blksz);
363135045Ssobomax	hdr.nblocks = htonl(hdr.nblocks);
364135045Ssobomax	/* Write headers into pre-allocated space */
365303095Ssobomax	lseek(cfs.fdw, 0, SEEK_SET);
366303095Ssobomax	if (writev(cfs.fdw, iov, 2) < 0) {
367155074Spjd		err(1, "writev(%s)", oname);
368135045Ssobomax		/* Not reached */
369135045Ssobomax	}
370135045Ssobomax	cleanfile = NULL;
371303095Ssobomax	close(cfs.fdw);
372135045Ssobomax
373135045Ssobomax	exit(0);
374135045Ssobomax}
375135045Ssobomax
376303095Ssobomaxstatic struct mkuz_blk *
377303095Ssobomaxreadblock(int fd, u_int32_t clstsize)
378135058Ssobomax{
379135045Ssobomax	int numread;
380303095Ssobomax	struct mkuz_blk *rval;
381303095Ssobomax	static int blockcnt;
382303095Ssobomax	off_t cpos;
383135045Ssobomax
384303095Ssobomax	rval = mkuz_blk_ctor(clstsize);
385303095Ssobomax
386303095Ssobomax	rval->info.blkno = blockcnt;
387303095Ssobomax	blockcnt += 1;
388303095Ssobomax	cpos = lseek(fd, 0, SEEK_CUR);
389303095Ssobomax	if (cpos < 0) {
390303095Ssobomax		err(1, "readblock: lseek() failed");
391303095Ssobomax		/* Not reached */
392303095Ssobomax	}
393303095Ssobomax	rval->info.offset = cpos;
394303095Ssobomax
395303095Ssobomax	numread = read(fd, rval->data, clstsize);
396135045Ssobomax	if (numread < 0) {
397303095Ssobomax		err(1, "readblock: read() failed");
398135045Ssobomax		/* Not reached */
399135045Ssobomax	}
400135045Ssobomax	if (numread == 0) {
401303095Ssobomax		free(rval);
402303095Ssobomax		return MKUZ_BLK_EOF;
403135045Ssobomax	}
404303095Ssobomax	rval->info.len = numread;
405303095Ssobomax	return rval;
406135045Ssobomax}
407135045Ssobomax
408135045Ssobomaxstatic void
409135058Ssobomaxusage(void)
410135058Ssobomax{
411135045Ssobomax
412303095Ssobomax	fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
413303095Ssobomax	    "[-j ncompr] infile\n");
414135045Ssobomax	exit(1);
415135045Ssobomax}
416135045Ssobomax
417303095Ssobomaxvoid *
418303095Ssobomaxmkuz_safe_malloc(size_t size)
419135058Ssobomax{
420135045Ssobomax	void *retval;
421135045Ssobomax
422135045Ssobomax	retval = malloc(size);
423135045Ssobomax	if (retval == NULL) {
424135045Ssobomax		err(1, "can't allocate memory");
425135045Ssobomax		/* Not reached */
426135045Ssobomax	}
427135045Ssobomax	return retval;
428135045Ssobomax}
429135045Ssobomax
430303095Ssobomaxvoid *
431303095Ssobomaxmkuz_safe_zmalloc(size_t size)
432303095Ssobomax{
433303095Ssobomax	void *retval;
434303095Ssobomax
435303095Ssobomax	retval = mkuz_safe_malloc(size);
436303095Ssobomax	bzero(retval, size);
437303095Ssobomax	return retval;
438303095Ssobomax}
439303095Ssobomax
440135045Ssobomaxstatic void
441135058Ssobomaxcleanup(void)
442135058Ssobomax{
443135045Ssobomax
444135045Ssobomax	if (cleanfile != NULL)
445135045Ssobomax		unlink(cleanfile);
446135045Ssobomax}
447303095Ssobomax
448303095Ssobomaxint
449303095Ssobomaxmkuz_memvcmp(const void *memory, unsigned char val, size_t size)
450303095Ssobomax{
451303095Ssobomax    const u_char *mm;
452303095Ssobomax
453303095Ssobomax    mm = (const u_char *)memory;
454303095Ssobomax    return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
455303095Ssobomax}
456