vndcompress.c revision 1.8
1/*	$NetBSD: vndcompress.c,v 1.8 2013/05/03 23:28:15 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2013 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__RCSID("$NetBSD: vndcompress.c,v 1.8 2013/05/03 23:28:15 riastradh Exp $");
34
35#include <sys/endian.h>
36
37#include <assert.h>
38#include <err.h>
39#include <errno.h>
40#include <fcntl.h>
41#include <inttypes.h>
42#include <limits.h>
43#include <signal.h>
44#include <stdbool.h>
45#include <stdint.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <unistd.h>
50#include <zlib.h>
51
52/* XXX Seems to be missing from <stdio.h>...  */
53int	snprintf_ss(char *restrict, size_t, const char *restrict, ...)
54	    __printflike(3, 4);
55int	vsnprintf_ss(char *restrict, size_t, const char *restrict, va_list);
56
57#include "common.h"
58
59/*
60 * XXX Switch to control bug-for-bug byte-for-byte compatibility with
61 * NetBSD's vndcompress.
62 */
63#define	VNDCOMPRESS_COMPAT	0
64
65__CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET);
66
67struct compress_state {
68	uint64_t	size;		/* uncompressed size */
69	uint64_t	offset;		/* output byte offset */
70	uint32_t	blocksize;	/* bytes per block */
71	uint32_t	blkno;		/* input block number */
72	uint32_t	n_full_blocks;	/* floor(size/blocksize) */
73	uint32_t	n_blocks;	/* ceiling(size/blocksize) */
74	uint32_t	n_offsets;	/* n_blocks + 1 */
75	uint32_t	end_block;	/* last block to transfer */
76	uint32_t	checkpoint_blocks;	/* blocks before checkpoint */
77	int		image_fd;
78	int		cloop2_fd;
79	uint64_t	*offset_table;
80	uint32_t	n_checkpointed_blocks;
81	volatile sig_atomic_t
82			initialized;	/* everything above initialized?  */
83};
84
85/* Global compression state for SIGINFO handler.  */
86static struct compress_state	global_state;
87
88struct sigdesc {
89	int sd_signo;
90	const char *sd_name;
91};
92
93static const struct sigdesc info_signals[] = {
94	{ SIGINFO, "SIGINFO" },
95	{ SIGUSR1, "SIGUSR1" },
96};
97
98static const struct sigdesc checkpoint_signals[] = {
99	{ SIGUSR2, "SIGUSR2" },
100};
101
102static void	init_signals(void);
103static void	init_signal_handler(int, const struct sigdesc *, size_t,
104		    void (*)(int));
105static void	info_signal_handler(int);
106static void	checkpoint_signal_handler(int);
107static void	block_signals(sigset_t *);
108static void	restore_sigmask(const sigset_t *);
109static void	compress_progress(struct compress_state *);
110static void	compress_init(int, char **, const struct options *,
111		    struct compress_state *);
112static bool	compress_restart(struct compress_state *);
113static uint32_t	compress_block(int, int, uint32_t, uint32_t, uint32_t, void *,
114		    void *);
115static void	compress_maybe_checkpoint(struct compress_state *);
116static void	compress_checkpoint(struct compress_state *);
117static void	compress_exit(struct compress_state *);
118static ssize_t	read_block(int, void *, size_t);
119static void	err_ss(int, const char *);
120static void	errx_ss(int, const char *, ...) __printflike(2, 3);
121static void	warn_ss(const char *);
122static void	warnx_ss(const char *, ...) __printflike(1, 2);
123static void	vwarnx_ss(const char *, va_list);
124
125/*
126 * Compression entry point.
127 */
128int
129vndcompress(int argc, char **argv, const struct options *O)
130{
131	struct compress_state *const S = &global_state;
132
133	/* Paranoia.  The other fields either have no sentinel or use zero.  */
134	S->image_fd = -1;
135	S->cloop2_fd = -1;
136
137	/* Set up signal handlers so we can handle SIGINFO ASAP.  */
138	init_signals();
139
140	/*
141	 * Parse the arguments to initialize our state.
142	 */
143	compress_init(argc, argv, O, S);
144	assert(MIN_BLOCKSIZE <= S->blocksize);
145	assert(S->blocksize <= MAX_BLOCKSIZE);
146	assert(S->offset_table != NULL);
147	assert(S->n_offsets > 0);
148	assert(S->offset_table[0] == htobe64(sizeof(struct cloop2_header) +
149		(S->n_offsets * sizeof(uint64_t))));
150
151	/*
152	 * Allocate compression buffers.
153	 *
154	 * Compression may actually expand.  From an overabundance of
155	 * caution, assume it can expand by at most double.
156	 *
157	 * XXX Check and consider tightening this assumption.
158	 */
159	__CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX);
160	void *const uncompbuf = malloc(S->blocksize);
161	if (uncompbuf == NULL)
162		err(1, "malloc uncompressed buffer");
163
164	/* XXX compression ratio bound */
165	__CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2));
166	void *const compbuf = malloc(2 * (size_t)S->blocksize);
167	if (compbuf == NULL)
168		err(1, "malloc compressed buffer");
169
170	/*
171	 * Compress the blocks.  S->blkno specifies the input block
172	 * we're about to transfer.  S->offset is the current output
173	 * offset.
174	 */
175	while (S->blkno < S->n_blocks) {
176		/* Report any progress.  */
177		compress_progress(S);
178
179		/* Stop if we've done the requested partial transfer.  */
180		if ((0 < S->end_block) && (S->end_block <= S->blkno))
181			goto out;
182
183		/* Checkpoint if appropriate.  */
184		compress_maybe_checkpoint(S);
185
186		/* Choose read size: partial if last block, full if not.  */
187		const uint32_t readsize = (S->blkno == S->n_full_blocks?
188		    (S->size % S->blocksize) : S->blocksize);
189		assert(readsize > 0);
190		assert(readsize <= S->blocksize);
191
192		/* Fail noisily if we might be about to overflow.  */
193		/* XXX compression ratio bound */
194		__CTASSERT(MAX_BLOCKSIZE <= (UINTMAX_MAX / 2));
195		assert(S->offset <= MIN(UINT64_MAX, OFF_MAX));
196		if ((2 * (uintmax_t)readsize) >
197		    (MIN(UINT64_MAX, OFF_MAX) - S->offset))
198			errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju",
199			    S->blkno, (uintmax_t)S->offset,
200			    (uintmax_t)readsize);
201
202		/* Process the block.  */
203		const uint32_t complen =
204		    compress_block(S->image_fd, S->cloop2_fd, S->blkno,
205			S->blocksize, readsize, uncompbuf, compbuf);
206
207		/*
208		 * Signal-atomically update the state to reflect
209		 * (a) what block number we are now at,
210		 * (b) how far we are now in the output file, and
211		 * (c) where the last block ended.
212		 */
213		assert(S->blkno <= (UINT32_MAX - 1));
214		assert(complen <= (MIN(UINT64_MAX, OFF_MAX) - S->offset));
215		assert((S->blkno + 1) < S->n_offsets);
216	    {
217		sigset_t old_sigmask;
218		block_signals(&old_sigmask);
219		S->blkno += 1;					/* (a) */
220		S->offset += complen;				/* (b) */
221		S->offset_table[S->blkno] = htobe64(S->offset);	/* (c) */
222		restore_sigmask(&old_sigmask);
223	    }
224	}
225
226	/* Make sure we're all done. */
227	assert(S->blkno == S->n_blocks);
228	assert((S->blkno + 1) == S->n_offsets);
229
230	/* Pad to the disk block size.  */
231	const uint32_t n_extra = (S->offset % DEV_BSIZE);
232	if (n_extra != 0) {
233		const uint32_t n_padding = (DEV_BSIZE - n_extra);
234		/* Reuse compbuf -- guaranteed to be large enough.  */
235		(void)memset(compbuf, 0, n_padding);
236		const ssize_t n_written = write(S->cloop2_fd, compbuf,
237		    n_padding);
238		if (n_written == -1)
239			err(1, "write final padding failed");
240		assert(n_written >= 0);
241		if ((size_t)n_written != n_padding)
242			errx(1, "partial write of final padding bytes"
243			    ": %zd <= %"PRIu32,
244			    n_written, n_padding);
245
246		/* Account for the extra bytes in the output file.  */
247		assert(n_padding <= (MIN(UINT64_MAX, OFF_MAX) - S->offset));
248	    {
249		sigset_t old_sigmask;
250		block_signals(&old_sigmask);
251		S->offset += n_padding;
252		restore_sigmask(&old_sigmask);
253	    }
254	}
255
256out:
257	/* Commit the offset table.  */
258	assert(S->offset <= OFF_MAX);
259	assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
260	compress_checkpoint(S);
261
262	/*
263	 * Free the compression buffers and finalize the compression.
264	 */
265	free(compbuf);
266	free(uncompbuf);
267	compress_exit(S);
268
269	return 0;
270}
271
272/*
273 * Signal cruft.
274 */
275
276static void
277init_signals(void)
278{
279
280	init_signal_handler(SA_RESTART, info_signals,
281	    __arraycount(info_signals), &info_signal_handler);
282	init_signal_handler(SA_RESTART, checkpoint_signals,
283	    __arraycount(checkpoint_signals), &checkpoint_signal_handler);
284}
285
286static void
287init_signal_handler(int flags, const struct sigdesc *signals, size_t n,
288    void (*handler)(int))
289{
290	static const struct sigaction zero_sa;
291	struct sigaction sa = zero_sa;
292	size_t i;
293
294	(void)sigemptyset(&sa.sa_mask);
295	for (i = 0; i < n; i++)
296		(void)sigaddset(&sa.sa_mask, signals[i].sd_signo);
297	sa.sa_flags = flags;
298	sa.sa_handler = handler;
299	for (i = 0; i < n; i++)
300		if (sigaction(signals[i].sd_signo, &sa, NULL) == -1)
301			err(1, "sigaction(%s)", signals[i].sd_name);
302}
303
304static void
305info_signal_handler(int signo __unused)
306{
307	/* Save errno.  */
308	const int error = errno;
309	struct compress_state *const S = &global_state;
310	char buf[128];
311
312	/* Bail if the state is not yet initialized.  */
313	if (!S->initialized) {
314		warnx_ss("initializing");
315		goto out;
316	}
317
318	/* Carefully calculate our I/O position.  */
319	assert(S->blocksize > 0);
320	__CTASSERT(MAX_N_BLOCKS <= (UINT64_MAX / MAX_BLOCKSIZE));
321	const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize);
322
323	assert(S->n_blocks > 0);
324	__CTASSERT(MAX_N_BLOCKS <= ((UINT64_MAX / sizeof(uint64_t)) -
325		CLOOP2_OFFSET_TABLE_OFFSET));
326	const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET +
327		(S->n_blocks * sizeof(uint64_t)))?
328	    0 : S->offset);
329
330	/* snprintf_ss can't do floating-point, so do fixed-point instead.  */
331	const uint64_t ratio_percent =
332	    (nread > 0?
333		((nwritten >= (UINT64_MAX / 100)) ?
334		    ((nwritten / nread) * 100) : ((nwritten * 100) / nread))
335		: 0);
336
337	/* Format the status.  */
338	assert(S->n_checkpointed_blocks <= (UINT64_MAX / S->blocksize));
339	const int n = snprintf_ss(buf, sizeof(buf),
340	    "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, "
341	    "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n",
342	    nread, nwritten, ratio_percent,
343	    ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize));
344	if (n < 0) {
345		const char msg[] = "vndcompress: can't format info\n";
346		(void)write(STDERR_FILENO, msg, __arraycount(msg));
347	} else {
348		__CTASSERT(INT_MAX <= SIZE_MAX);
349		(void)write(STDERR_FILENO, buf, (size_t)n);
350	}
351
352out:
353	/* Restore errno.  */
354	errno = error;
355}
356
357static void
358checkpoint_signal_handler(int signo __unused)
359{
360	/* Save errno.  */
361	const int error = errno;
362	struct compress_state *const S = &global_state;
363
364	/* Bail if the state is not yet initialized.  */
365	if (!S->initialized) {
366		warnx_ss("nothing to checkpoint yet");
367		goto out;
368	}
369
370	assert(S->image_fd >= 0);
371	assert(S->cloop2_fd >= 0);
372
373	/* Take a checkpoint.  */
374	assert(S->blocksize > 0);
375	assert(S->blkno <= (UINT64_MAX / S->blocksize));
376	warnx_ss("checkpointing %"PRIu64" bytes",
377	    ((uint64_t)S->blkno * (uint64_t)S->blocksize));
378	compress_checkpoint(S);
379
380out:
381	/* Restore errno.  */
382	errno = error;
383}
384
385static void
386block_signals(sigset_t *old_sigmask)
387{
388	sigset_t block;
389
390	(void)sigfillset(&block);
391	(void)sigprocmask(SIG_BLOCK, &block, old_sigmask);
392}
393
394static void
395restore_sigmask(const sigset_t *sigmask)
396{
397
398	(void)sigprocmask(SIG_SETMASK, sigmask, NULL);
399}
400
401/*
402 * Report progress.
403 *
404 * XXX Should do a progress bar here.
405 */
406static void
407compress_progress(struct compress_state *S __unused)
408{
409}
410
411/*
412 * Parse arguments, open the files, and initialize the state.
413 */
414static void
415compress_init(int argc, char **argv, const struct options *O,
416    struct compress_state *S)
417{
418	uint32_t i;
419
420	if (!((argc == 2) || (argc == 3)))
421		usage();
422
423	const char *const image_pathname = argv[0];
424	const char *const cloop2_pathname = argv[1];
425
426	/* Grab the block size either from `-s' or from the last argument.  */
427	__CTASSERT(0 < DEV_BSIZE);
428	__CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0);
429	__CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE);
430	__CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0);
431	__CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE);
432	__CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0);
433	if (ISSET(O->flags, FLAG_s)) {
434		if (argc == 3) {
435			warnx("use -s or the extra argument, not both");
436			usage();
437		}
438		S->blocksize = O->blocksize;
439	} else {
440		S->blocksize = (argc == 2? DEF_BLOCKSIZE :
441		    strsuftoll("block size", argv[2], MIN_BLOCKSIZE,
442			MAX_BLOCKSIZE));
443	}
444
445	/* Sanity-check the blocksize.  (strsuftoll guarantees bounds.)  */
446	__CTASSERT(DEV_BSIZE <= UINT32_MAX);
447	if ((S->blocksize % DEV_BSIZE) != 0)
448		errx(1, "bad blocksize: %"PRIu32
449		    " (not a multiple of %"PRIu32")",
450		    S->blocksize, (uint32_t)DEV_BSIZE);
451	assert(MIN_BLOCKSIZE <= S->blocksize);
452	assert((S->blocksize % DEV_BSIZE) == 0);
453	assert(S->blocksize <= MAX_BLOCKSIZE);
454
455	/* Grab the end block number if we have one.  */
456	S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0);
457
458	/* Grab the checkpoint block count, if we have one.  */
459	S->checkpoint_blocks =
460	    (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0);
461
462	/* Open the input image file and the output cloop2 file.  */
463	S->image_fd = open(image_pathname, O_RDONLY);
464	if (S->image_fd == -1)
465		err(1, "open(%s)", image_pathname);
466
467	int oflags;
468	if (!ISSET(O->flags, FLAG_r))
469		oflags = (O_WRONLY | O_TRUNC | O_CREAT); /* XXX O_EXCL?  */
470	else if (!ISSET(O->flags, FLAG_R))
471		oflags = (O_RDWR | O_CREAT);
472	else
473		oflags = O_RDWR;
474	S->cloop2_fd = open(cloop2_pathname, oflags, 0777);
475	if (S->cloop2_fd == -1)
476		err(1, "open(%s)", cloop2_pathname);
477
478	/* Find the size of the input image.  */
479	if (ISSET(O->flags, FLAG_l)) {
480		S->size = O->length;
481	} else {
482		static const struct stat zero_st;
483		struct stat st = zero_st;
484		if (fstat(S->image_fd, &st) == -1)
485			err(1, "stat(%s)", image_pathname);
486		if (st.st_size <= 0)
487			errx(1, "unknown image size");
488		assert(st.st_size >= 0);
489		__CTASSERT(OFF_MAX <= UINT64_MAX);
490		assert(__type_fit(uint64_t, st.st_size));
491		S->size = st.st_size;
492	}
493	assert(S->size <= OFF_MAX);
494
495	/* Find number of full blocks and whether there's a partial block.  */
496	S->n_full_blocks = (S->size / S->blocksize);
497	assert(S->n_full_blocks <=
498	    (UINT32_MAX - ((S->size % S->blocksize) > 0)));
499	S->n_blocks = (S->n_full_blocks + ((S->size % S->blocksize) > 0));
500	assert(S->n_full_blocks <= S->n_blocks);
501
502	if (S->n_blocks > MAX_N_BLOCKS)
503		errx(1, "image too large for block size %"PRIu32": %"PRIu64,
504		    S->blocksize, S->size);
505	assert(S->n_blocks <= MAX_N_BLOCKS);
506
507	/* Allocate an offset table for the blocks; one extra for the end.  */
508	__CTASSERT(MAX_N_BLOCKS <= (UINT32_MAX - 1));
509	S->n_offsets = (S->n_blocks + 1);
510	__CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1));
511	__CTASSERT(MAX_N_OFFSETS <= (SIZE_MAX / sizeof(uint64_t)));
512	S->offset_table = malloc(S->n_offsets * sizeof(uint64_t));
513	if (S->offset_table == NULL)
514		err(1, "malloc offset table");
515
516	/* Attempt to restart a partial transfer if requested.  */
517	if (ISSET(O->flags, FLAG_r)) {
518		if (compress_restart(S)) {
519			/*
520			 * Restart succeeded.  Truncate the output
521			 * here, in case any garbage got appended.  We
522			 * are committed to making progress at this
523			 * point.  If the ftruncate fails, we don't
524			 * lose anything valuable -- this is the last
525			 * point at which we can restart anyway.
526			 */
527			if (ftruncate(S->cloop2_fd, S->offset) == -1)
528				err(1, "ftruncate failed");
529
530			/* All set!  No more initialization to do.  */
531			return;
532		} else {
533			/* Restart failed.  Barf now if requested.  */
534			if (ISSET(O->flags, FLAG_R))
535				errx(1, "restart failed, aborting");
536
537			/* Otherwise, truncate and start at the top.  */
538			if (ftruncate(S->cloop2_fd, 0) == -1)
539				err(1, "truncate failed");
540			if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1)
541				err(1, "lseek to cloop2 beginning failed");
542			if (lseek(S->image_fd, 0, SEEK_SET) == -1)
543				err(1, "lseek to image beginning failed");
544		}
545	}
546
547	/*
548	 * Initialize the offset table to all ones (except for the
549	 * fixed first offset) so that we can easily detect where we
550	 * were interrupted if we want to restart.
551	 */
552	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
553	assert(S->n_offsets > 0);
554	S->offset_table[0] = htobe64(sizeof(struct cloop2_header) +
555	    (S->n_offsets * sizeof(uint64_t)));
556	for (i = 1; i < S->n_offsets; i++)
557		S->offset_table[i] = ~(uint64_t)0;
558
559	/* Write a bogus (zero) header for now, until we checkpoint.  */
560	static const struct cloop2_header zero_header;
561	const ssize_t h_written = write(S->cloop2_fd, &zero_header,
562	    sizeof(zero_header));
563	if (h_written == -1)
564		err(1, "write header");
565	assert(h_written >= 0);
566	if ((size_t)h_written != sizeof(zero_header))
567		errx(1, "partial write of header: %zd <= %zu", h_written,
568		    sizeof(zero_header));
569
570	/* Write the initial (empty) offset table.  */
571	const ssize_t ot_written = write(S->cloop2_fd, S->offset_table,
572	    (S->n_offsets * sizeof(uint64_t)));
573	if (ot_written == -1)
574		err(1, "write initial offset table");
575	assert(ot_written >= 0);
576	if ((size_t)ot_written != (S->n_offsets * sizeof(uint64_t)))
577		errx(1, "partial write of initial offset bytes: %zd <= %zu",
578		    ot_written, (size_t)(S->n_offsets * sizeof(uint64_t)));
579
580	/* Start at the beginning of the image.  */
581	S->blkno = 0;
582	S->offset = (sizeof(struct cloop2_header) +
583	    (S->n_offsets * sizeof(uint64_t)));
584	S->n_checkpointed_blocks = 0;
585
586	/* Good to go and ready for interruption by a signal.  */
587	S->initialized = 1;
588}
589
590/*
591 * Try to recover state from an existing output file.
592 *
593 * On success, fill S->offset_table with what's in the file, set
594 * S->blkno and S->offset to reflect our position, and seek to the
595 * respective positions in the input and output files.
596 *
597 * On failure, return false.  May clobber S->offset_table, S->blkno,
598 * S->offset, and the file pointers.
599 */
600static bool
601compress_restart(struct compress_state *S)
602{
603
604	/* Read in the header.  */
605	static const struct cloop2_header zero_header;
606	struct cloop2_header header = zero_header;
607
608	const ssize_t h_read = read_block(S->cloop2_fd, &header,
609	    sizeof(header));
610	if (h_read == -1) {
611		warn("failed to read header");
612		return false;
613	}
614	assert(h_read >= 0);
615	if ((size_t)h_read != sizeof(header)) {
616		warnx("partial read of header");
617		return false;
618	}
619
620	/* Check that the header looks like a header.  */
621	__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
622	if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic))
623	    != 0) {
624		warnx("bad cloop2 shell script magic");
625		return false;
626	}
627
628	/* Check the header parameters.  */
629	if (be32toh(header.cl2h_blocksize) != S->blocksize) {
630		warnx("mismatched block size: %"PRIu32
631		    " (expected %"PRIu32")",
632		    be32toh(header.cl2h_blocksize), S->blocksize);
633		return false;
634	}
635	if (be32toh(header.cl2h_n_blocks) != S->n_blocks) {
636		warnx("mismatched number of blocks: %"PRIu32
637		    " (expected %"PRIu32")",
638		    be32toh(header.cl2h_n_blocks), S->n_blocks);
639		return false;
640	}
641
642	/* Read in the partial offset table.  */
643	const ssize_t ot_read = read_block(S->cloop2_fd, S->offset_table,
644	    (S->n_offsets * sizeof(uint64_t)));
645	if (ot_read == -1) {
646		warn("failed to read offset table");
647		return false;
648	}
649	assert(ot_read >= 0);
650	if ((size_t)ot_read != (S->n_offsets * sizeof(uint64_t))) {
651		warnx("partial read of offset table");
652		return false;
653	}
654
655	if (be64toh(S->offset_table[0]) != (sizeof(struct cloop2_header) +
656		(S->n_offsets * sizeof(uint64_t)))) {
657		warnx("first offset is not %"PRIu64": %"PRIu64,
658		    ((uint64_t)S->n_offsets * sizeof(uint64_t)),
659		    be64toh(S->offset_table[0]));
660		return false;
661	}
662
663	/* Find where we left off.  */
664	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
665	uint32_t blkno = 0;
666	for (blkno = 0; blkno < S->n_blocks; blkno++) {
667		if (S->offset_table[blkno] == ~(uint64_t)0)
668			break;
669		if (0 < blkno) {
670			const uint64_t start =
671			    be64toh(S->offset_table[blkno - 1]);
672			const uint64_t end = be64toh(S->offset_table[blkno]);
673			if (end <= start) {
674				warnx("bad offset table: 0x%"PRIx64
675				    ", 0x%"PRIx64, start, end);
676				return false;
677			}
678			/* XXX compression ratio bound */
679			__CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2));
680			if ((2 * (size_t)S->blocksize) <= (end - start)) {
681				warnx("block %"PRIu32" too large:"
682				    " %"PRIu64" bytes",
683				    blkno, (end - start));
684				return false;
685			}
686		}
687	}
688
689	if (blkno == 0) {
690		warnx("no blocks were written; nothing to restart");
691		return false;
692	}
693
694	/* Make sure the rest of the offset table is all ones.  */
695	if (blkno < S->n_blocks) {
696		uint32_t nblkno;
697
698		for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) {
699			if (S->offset_table[nblkno] != ~(uint64_t)0) {
700				warnx("bad partial offset table entry"
701				    " at %"PRIu32": %"PRIu64,
702				    nblkno,
703				    be64toh(S->offset_table[nblkno]));
704				return false;
705			}
706		}
707	}
708
709	/*
710	 * XXX Consider decompressing some number of blocks to make
711	 * sure they match.
712	 */
713
714	/* Back up by one.  */
715	assert(1 <= blkno);
716	blkno -= 1;
717
718	/* Seek to the input position.  */
719	assert(S->size <= OFF_MAX);
720	assert(blkno <= (S->size / S->blocksize));
721	const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize);
722	assert(0 <= restart_position);
723	assert(restart_position <= (off_t)S->size);
724	if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) {
725		if (errno != ESPIPE) {
726			warn("lseek input image failed");
727			return false;
728		}
729
730		/* Try read instead of lseek for a pipe/socket/fifo.  */
731		void *const buffer = malloc(0x10000);
732		if (buffer == NULL)
733			err(1, "malloc temporary buffer");
734		off_t left = restart_position;
735		while (left > 0) {
736			const size_t size = MIN(0x10000, left);
737			const ssize_t n_read = read_block(S->image_fd, buffer,
738			    size);
739			if (n_read == -1) {
740				free(buffer);
741				warn("read of input image failed");
742				return false;
743			}
744			assert(n_read >= 0);
745			if ((size_t)n_read != size) {
746				free(buffer);
747				warnx("partial read of input image");
748				return false;
749			}
750			assert((off_t)size <= left);
751			left -= size;
752		}
753		free(buffer);
754	}
755
756	/* Seek to the output position.  */
757	const uint64_t offset = be64toh(S->offset_table[blkno]);
758	assert(offset <= OFF_MAX);
759	if (lseek(S->cloop2_fd, offset, SEEK_SET) == -1) {
760		warn("lseek output cloop2 to %"PRIx64" failed",
761		    S->offset);
762		return false;
763	}
764
765	/* Start where we left off.  */
766	S->blkno = blkno;
767	S->offset = offset;
768	S->n_checkpointed_blocks = blkno;
769
770	/* Good to go and ready for interruption by a signal.  */
771	S->initialized = 1;
772
773	/* Success!  */
774	return true;
775}
776
777/*
778 * Read a single block, compress it, and write the compressed block.
779 * Return the size of the compressed block.
780 */
781static uint32_t
782compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize,
783    uint32_t readsize, void *uncompbuf, void *compbuf)
784{
785
786	assert(readsize <= blocksize);
787	assert(blocksize <= MAX_BLOCKSIZE);
788
789	/* Read the uncompressed block.  */
790	const ssize_t n_read = read_block(in_fd, uncompbuf, readsize);
791	if (n_read == -1)
792		err(1, "read block %"PRIu32, blkno);
793	assert(n_read >= 0);
794	assert((uintmax_t)n_read <= (uintmax_t)readsize);
795	if (n_read < readsize)
796		errx(1, "partial read of block %"PRIu32": %zd <= %"PRIu32,
797		    blkno, n_read, readsize);
798
799	/* Compress the block.  */
800	/* XXX compression ratio bound */
801	__CTASSERT(MAX_BLOCKSIZE <= (ULONG_MAX / 2));
802	const unsigned long uncomplen =
803	    (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */
804	unsigned long complen = (uncomplen * 2);
805	const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen,
806	    Z_BEST_COMPRESSION);
807	if (zerror != Z_OK)
808		errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno,
809		    zerror, zError(zerror));
810	assert(complen <= (uncomplen * 2));
811
812	/* Write the compressed block.  */
813	const ssize_t n_written = write(out_fd, compbuf, complen);
814	if (n_written == -1)
815		err(1, "write block %"PRIu32, blkno);
816	assert(n_written >= 0);
817	if ((uint32_t)n_written != complen)
818		errx(1, "partial write of block %"PRIu32": %zd <= %zu", blkno,
819		    n_written, complen);
820
821	return n_written;
822}
823
824/*
825 * Checkpoint if appropriate.
826 */
827static void
828compress_maybe_checkpoint(struct compress_state *S)
829{
830
831	if ((0 < S->checkpoint_blocks) && (0 < S->blkno) &&
832	    ((S->blkno % S->checkpoint_blocks) == 0)) {
833		assert(S->offset <= OFF_MAX);
834		assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
835		compress_checkpoint(S);
836	}
837}
838
839/*
840 * Write the prefix of the offset table that we have filled so far.
841 *
842 * We fsync the data blocks we have written, and then write the offset
843 * table, and then fsync the offset table and file metadata.  This
844 * should help to avoid offset tables that point at garbage data.
845 *
846 * This may be called from a signal handler, so it must not use stdio,
847 * malloc, &c. -- it may only (a) handle signal-safe state in S, and
848 * (b) do file descriptor I/O / fsync.
849 *
850 * XXX This requires further thought and heavy testing to be sure.
851 *
852 * XXX Should have an option to suppress fsync.
853 *
854 * XXX Should have an option to fail on fsync failures.
855 *
856 * XXX Would be nice if we could just do a barrier rather than an
857 * fsync.
858 *
859 * XXX How might we automatically test the fsyncs?
860 */
861static void
862compress_checkpoint(struct compress_state *S)
863{
864
865	assert(S->blkno < S->n_offsets);
866	const uint32_t n_offsets = (S->blkno + 1);
867	assert(n_offsets <= S->n_offsets);
868
869	assert(S->offset <= OFF_MAX);
870	assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR));
871
872	/* Make sure the data hits the disk before we say it's ready.  */
873	if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset)
874	    == -1)
875		warn_ss("fsync of output failed");
876
877	/* Say the data blocks are ready.  */
878	const ssize_t n_written = pwrite(S->cloop2_fd, S->offset_table,
879	    (n_offsets * sizeof(uint64_t)), CLOOP2_OFFSET_TABLE_OFFSET);
880	if (n_written == -1)
881		err_ss(1, "write partial offset table");
882	assert(n_written >= 0);
883	if ((size_t)n_written != (n_offsets * sizeof(uint64_t)))
884		errx_ss(1, "partial write of partial offset table: %zd <= %zu",
885		    n_written, (size_t)(n_offsets * sizeof(uint64_t)));
886
887	/*
888	 * If this is the first checkpoint, initialize the header.
889	 * Signal handler can race with main code here, but it is
890	 * harmless -- just an extra fsync and write of the header,
891	 * which are both idempotent.
892	 */
893	if (S->n_checkpointed_blocks == 0) {
894		static const struct cloop2_header zero_header;
895		struct cloop2_header header = zero_header;
896
897		/* Force the offset table to disk before we set the header.  */
898		if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC),
899			0,
900			(CLOOP2_OFFSET_TABLE_OFFSET
901			    + (n_offsets * (sizeof(uint64_t)))))
902		    == -1)
903			warn_ss("fsync of offset table failed");
904
905		/* Subsequent writes will preserve a valid state.  */
906
907		/* Format the header.  */
908		__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
909		(void)memcpy(header.cl2h_magic, cloop2_magic,
910		    sizeof(cloop2_magic));
911		header.cl2h_blocksize = htobe32(S->blocksize);
912		header.cl2h_n_blocks = htobe32(S->n_blocks);
913
914		/* Write the header.  */
915		const ssize_t h_written = pwrite(S->cloop2_fd, &header,
916		    sizeof(header), 0);
917		if (h_written == -1)
918			err_ss(1, "write header");
919		assert(h_written >= 0);
920		if ((size_t)h_written != sizeof(header))
921			errx_ss(1, "partial write of header: %zd <= %zu",
922			    h_written, sizeof(header));
923	}
924
925	/* Record how many blocks we've checkpointed.  */
926    {
927	sigset_t old_sigmask;
928	block_signals(&old_sigmask);
929	S->n_checkpointed_blocks = S->blkno;
930	restore_sigmask(&old_sigmask);
931    }
932}
933
934/*
935 * Release everything we allocated in compress_init.
936 */
937static void
938compress_exit(struct compress_state *S)
939{
940
941	/* Done with the offset table.  Free it.  */
942	free(S->offset_table);
943
944	/* Done with the files.  Close them.  */
945	if (close(S->cloop2_fd) == -1)
946		warn("close(cloop2 fd)");
947	if (close(S->image_fd) == -1)
948		warn("close(image fd)");
949}
950
951/*
952 * Read, returning partial data only at end of file.
953 */
954static ssize_t
955read_block(int fd, void *buffer, size_t n)
956{
957	char *p = buffer, *const end __unused = (p + n);
958	size_t total_read = 0;
959
960	while (n > 0) {
961		const ssize_t n_read = read(fd, p, n);
962		if (n_read == -1)
963			return -1;
964		assert(n_read >= 0);
965		if (n_read == 0)
966			break;
967
968		assert((size_t)n_read <= n);
969		n -= (size_t)n_read;
970
971		assert(p <= end);
972		assert(n_read <= (end - p));
973		p += (size_t)n_read;
974
975		assert((size_t)n_read <= (SIZE_MAX - total_read));
976		total_read += (size_t)n_read;
977	}
978
979	return total_read;
980}
981
982/*
983 * Signal-safe err/warn utilities.  The errno varieties are limited to
984 * having no format arguments for reasons of laziness.
985 */
986
987static void
988err_ss(int exit_value, const char *msg)
989{
990	warn_ss(msg);
991	_Exit(exit_value);
992}
993
994static void
995errx_ss(int exit_value, const char *format, ...)
996{
997	va_list va;
998
999	va_start(va, format);
1000	vwarnx_ss(format, va);
1001	va_end(va);
1002	_Exit(exit_value);
1003}
1004
1005static void
1006warn_ss(const char *msg)
1007{
1008	int error = errno;
1009
1010	warnx_ss("%s: %s", msg, strerror(error));
1011
1012	errno = error;
1013}
1014
1015static void
1016warnx_ss(const char *format, ...)
1017{
1018	va_list va;
1019
1020	va_start(va, format);
1021	vwarnx_ss(format, va);
1022	va_end(va);
1023}
1024
1025static void
1026vwarnx_ss(const char *format, va_list va)
1027{
1028	char buf[128];
1029
1030	(void)strlcpy(buf, getprogname(), sizeof(buf));
1031	(void)strlcat(buf, ": ", sizeof(buf));
1032
1033	const int n = vsnprintf_ss(&buf[strlen(buf)], (sizeof(buf) -
1034		strlen(buf)), format, va);
1035	if (n <= 0) {
1036		const char fallback[] =
1037		    "vndcompress: Help!  I'm trapped in a signal handler!\n";
1038		(void)write(STDERR_FILENO, fallback, __arraycount(fallback));
1039	} else {
1040		(void)strlcat(buf, "\n", sizeof(buf));
1041		(void)write(STDERR_FILENO, buf, strlen(buf));
1042	}
1043}
1044