1/*-
2 * Copyright (c) 2014 Juniper Networks, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/mman.h>
31#include <sys/stat.h>
32#include <assert.h>
33#include <err.h>
34#include <errno.h>
35#include <limits.h>
36#include <paths.h>
37#include <stdint.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <unistd.h>
42
43#include "image.h"
44#include "mkimg.h"
45
46#ifndef MAP_NOCORE
47#define	MAP_NOCORE	0
48#endif
49#ifndef MAP_NOSYNC
50#define	MAP_NOSYNC	0
51#endif
52
53#ifndef SEEK_DATA
54#define	SEEK_DATA	-1
55#endif
56#ifndef SEEK_HOLE
57#define	SEEK_HOLE	-1
58#endif
59
60struct chunk {
61	TAILQ_ENTRY(chunk) ch_list;
62	size_t	ch_size;		/* Size of chunk in bytes. */
63	lba_t	ch_block;		/* Block address in image. */
64	union {
65		struct {
66			off_t	ofs;	/* Offset in backing file. */
67			int	fd;	/* FD of backing file. */
68		} file;
69		struct {
70			void	*ptr;	/* Pointer to data in memory */
71		} mem;
72	} ch_u;
73	u_int	ch_type;
74#define	CH_TYPE_ZEROES		0	/* Chunk is a gap (no data). */
75#define	CH_TYPE_FILE		1	/* File-backed chunk. */
76#define	CH_TYPE_MEMORY		2	/* Memory-backed chunk */
77};
78
79static TAILQ_HEAD(chunk_head, chunk) image_chunks;
80static u_int image_nchunks;
81
82static char image_swap_file[PATH_MAX];
83static int image_swap_fd = -1;
84static u_int image_swap_pgsz;
85static off_t image_swap_size;
86
87static lba_t image_size;
88
89static int
90is_empty_sector(void *buf)
91{
92	uint64_t *p = buf;
93	size_t n, max;
94
95	assert(((uintptr_t)p & 3) == 0);
96
97	max = secsz / sizeof(uint64_t);
98	for (n = 0; n < max; n++) {
99		if (p[n] != 0UL)
100			return (0);
101	}
102	return (1);
103}
104
105/*
106 * Swap file handlng.
107 */
108
109static off_t
110image_swap_alloc(size_t size)
111{
112	off_t ofs;
113	size_t unit;
114
115	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
116	assert((unit & (unit - 1)) == 0);
117
118	size = (size + unit - 1) & ~(unit - 1);
119
120	ofs = image_swap_size;
121	image_swap_size += size;
122	if (ftruncate(image_swap_fd, image_swap_size) == -1) {
123		image_swap_size = ofs;
124		ofs = -1LL;
125	}
126	return (ofs);
127}
128
129/*
130 * Image chunk handling.
131 */
132
133static struct chunk *
134image_chunk_find(lba_t blk)
135{
136	static struct chunk *last = NULL;
137	struct chunk *ch;
138
139	ch = (last != NULL && last->ch_block <= blk)
140	    ? last : TAILQ_FIRST(&image_chunks);
141	while (ch != NULL) {
142		if (ch->ch_block <= blk &&
143		    (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) {
144			last = ch;
145			break;
146		}
147		ch = TAILQ_NEXT(ch, ch_list);
148	}
149	return (ch);
150}
151
152static size_t
153image_chunk_grow(struct chunk *ch, size_t sz)
154{
155	size_t dsz, newsz;
156
157	newsz = ch->ch_size + sz;
158	if (newsz > ch->ch_size) {
159		ch->ch_size = newsz;
160		return (0);
161	}
162	/* We would overflow -- create new chunk for remainder. */
163	dsz = SIZE_MAX - ch->ch_size;
164	assert(dsz < sz);
165	ch->ch_size = SIZE_MAX;
166	return (sz - dsz);
167}
168
169static struct chunk *
170image_chunk_memory(struct chunk *ch, lba_t blk)
171{
172	struct chunk *new;
173	void *ptr;
174
175	ptr = calloc(1, secsz);
176	if (ptr == NULL)
177		return (NULL);
178
179	if (ch->ch_block < blk) {
180		new = malloc(sizeof(*new));
181		if (new == NULL) {
182			free(ptr);
183			return (NULL);
184		}
185		memcpy(new, ch, sizeof(*new));
186		ch->ch_size = (blk - ch->ch_block) * secsz;
187		new->ch_block = blk;
188		new->ch_size -= ch->ch_size;
189		TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
190		image_nchunks++;
191		ch = new;
192	}
193
194	if (ch->ch_size > secsz) {
195		new = malloc(sizeof(*new));
196		if (new == NULL) {
197			free(ptr);
198			return (NULL);
199		}
200		memcpy(new, ch, sizeof(*new));
201		ch->ch_size = secsz;
202		new->ch_block++;
203		new->ch_size -= secsz;
204		TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
205		image_nchunks++;
206	}
207
208	ch->ch_type = CH_TYPE_MEMORY;
209	ch->ch_u.mem.ptr = ptr;
210	return (ch);
211}
212
213static int
214image_chunk_skipto(lba_t to)
215{
216	struct chunk *ch;
217	lba_t from;
218	size_t sz;
219
220	ch = TAILQ_LAST(&image_chunks, chunk_head);
221	from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
222
223	assert(from <= to);
224
225	/* Nothing to do? */
226	if (from == to)
227		return (0);
228	/* Avoid bugs due to overflows. */
229	if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
230		return (EFBIG);
231	sz = (to - from) * secsz;
232	if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) {
233		sz = image_chunk_grow(ch, sz);
234		if (sz == 0)
235			return (0);
236		from = ch->ch_block + (ch->ch_size / secsz);
237	}
238	ch = malloc(sizeof(*ch));
239	if (ch == NULL)
240		return (ENOMEM);
241	memset(ch, 0, sizeof(*ch));
242	ch->ch_block = from;
243	ch->ch_size = sz;
244	ch->ch_type = CH_TYPE_ZEROES;
245	TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
246	image_nchunks++;
247	return (0);
248}
249
250static int
251image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
252{
253	struct chunk *ch;
254
255	ch = TAILQ_LAST(&image_chunks, chunk_head);
256	if (ch != NULL && ch->ch_type == CH_TYPE_FILE) {
257		if (fd == ch->ch_u.file.fd &&
258		    blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
259		    ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) {
260			sz = image_chunk_grow(ch, sz);
261			if (sz == 0)
262				return (0);
263			blk = ch->ch_block + (ch->ch_size / secsz);
264			ofs = ch->ch_u.file.ofs + ch->ch_size;
265		}
266	}
267	ch = malloc(sizeof(*ch));
268	if (ch == NULL)
269		return (ENOMEM);
270	memset(ch, 0, sizeof(*ch));
271	ch->ch_block = blk;
272	ch->ch_size = sz;
273	ch->ch_type = CH_TYPE_FILE;
274	ch->ch_u.file.ofs = ofs;
275	ch->ch_u.file.fd = fd;
276	TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
277	image_nchunks++;
278	return (0);
279}
280
281static int
282image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
283{
284	uint8_t *p = buf;
285	int error;
286
287	error = 0;
288	sz = (sz + secsz - 1) & ~(secsz - 1);
289	while (!error && sz > 0) {
290		if (is_empty_sector(p))
291			error = image_chunk_skipto(blk + 1);
292		else
293			error = image_chunk_append(blk, secsz, ofs, fd);
294		blk++;
295		p += secsz;
296		sz -= secsz;
297		ofs += secsz;
298	}
299	return (error);
300}
301
302/*
303 * File mapping support.
304 */
305
306static void *
307image_file_map(int fd, off_t ofs, size_t sz, off_t *iofp)
308{
309	void *ptr;
310	size_t unit;
311	int flags, prot;
312	off_t x;
313
314	/* On Linux anyway ofs must also be page aligned */
315	if ((x = (ofs % image_swap_pgsz)) != 0) {
316	    ofs -= x;
317	    sz += x;
318	    *iofp = x;
319	} else
320	    *iofp = 0;
321	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
322	assert((unit & (unit - 1)) == 0);
323
324	flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
325	/* Allow writing to our swap file only. */
326	prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
327	sz = (sz + unit - 1) & ~(unit - 1);
328	ptr = mmap(NULL, sz, prot, flags, fd, ofs);
329	return ((ptr == MAP_FAILED) ? NULL : ptr);
330}
331
332static int
333image_file_unmap(void *buffer, size_t sz)
334{
335	size_t unit;
336
337	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
338	sz = (sz + unit - 1) & ~(unit - 1);
339	if (madvise(buffer, sz, MADV_DONTNEED) != 0)
340		warn("madvise");
341	munmap(buffer, sz);
342	return (0);
343}
344
345/*
346 * Input/source file handling.
347 */
348
349static int
350image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
351{
352	char *buffer;
353	uint64_t bytesize;
354	off_t swofs;
355	size_t iosz;
356	ssize_t rdsz;
357	int error;
358	off_t iof;
359
360	/*
361	 * This makes sure we're doing I/O in multiples of the page
362	 * size as well as of the sector size. 2MB is the minimum
363	 * by virtue of secsz at least 512 bytes and the page size
364	 * at least 4K bytes.
365	 */
366	iosz = secsz * image_swap_pgsz;
367
368	bytesize = 0;
369	do {
370		swofs = image_swap_alloc(iosz);
371		if (swofs == -1LL)
372			return (errno);
373		buffer = image_file_map(image_swap_fd, swofs, iosz, &iof);
374		if (buffer == NULL)
375			return (errno);
376		rdsz = read(fd, &buffer[iof], iosz);
377		if (rdsz > 0)
378			error = image_chunk_copyin(blk, &buffer[iof], rdsz, swofs,
379			    image_swap_fd);
380		else if (rdsz < 0)
381			error = errno;
382		else
383			error = 0;
384		image_file_unmap(buffer, iosz);
385		/* XXX should we relinguish unused swap space? */
386		if (error)
387			return (error);
388
389		bytesize += rdsz;
390		blk += (rdsz + secsz - 1) / secsz;
391	} while (rdsz > 0);
392
393	if (sizep != NULL)
394		*sizep = bytesize;
395	return (0);
396}
397
398static int
399image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
400{
401	off_t cur, data, end, hole, pos, iof;
402	void *mp;
403	char *buf;
404	uint64_t bytesize;
405	size_t iosz, sz;
406	int error;
407
408	/*
409	 * We'd like to know the size of the file and we must
410	 * be able to seek in order to mmap(2). If this isn't
411	 * possible, then treat the file as a stream/pipe.
412	 */
413	end = lseek(fd, 0L, SEEK_END);
414	if (end == -1L)
415		return (image_copyin_stream(blk, fd, sizep));
416
417	/*
418	 * We need the file opened for the duration and our
419	 * caller is going to close the file. Make a dup(2)
420	 * so that control the faith of the descriptor.
421	 */
422	fd = dup(fd);
423	if (fd == -1)
424		return (errno);
425
426	iosz = secsz * image_swap_pgsz;
427
428	bytesize = 0;
429	cur = pos = 0;
430	error = 0;
431	while (!error && cur < end) {
432		hole = lseek(fd, cur, SEEK_HOLE);
433		if (hole == -1)
434			hole = end;
435		data = lseek(fd, cur, SEEK_DATA);
436		if (data == -1)
437			data = end;
438
439		/*
440		 * Treat the entire file as data if sparse files
441		 * are not supported by the underlying file system.
442		 */
443		if (hole == end && data == end)
444			data = cur;
445
446		if (cur == hole && data > hole) {
447			hole = pos;
448			pos = data & ~((uint64_t)secsz - 1);
449
450			blk += (pos - hole) / secsz;
451			error = image_chunk_skipto(blk);
452
453			bytesize += pos - hole;
454			cur = data;
455		} else if (cur == data && hole > data) {
456			data = pos;
457			pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1);
458
459			while (data < pos) {
460				sz = (pos - data > (off_t)iosz)
461				    ? iosz : (size_t)(pos - data);
462
463				buf = mp = image_file_map(fd, data, sz, &iof);
464				if (mp != NULL) {
465					buf += iof;
466					error = image_chunk_copyin(blk, buf,
467					    sz, data, fd);
468					image_file_unmap(mp, sz);
469				} else
470					error = errno;
471
472				blk += sz / secsz;
473				bytesize += sz;
474				data += sz;
475			}
476			cur = hole;
477		} else {
478			/*
479			 * I don't know what this means or whether it
480			 * can happen at all...
481			 */
482			assert(0);
483		}
484	}
485	if (error)
486		close(fd);
487	if (!error && sizep != NULL)
488		*sizep = bytesize;
489	return (error);
490}
491
492int
493image_copyin(lba_t blk, int fd, uint64_t *sizep)
494{
495	struct stat sb;
496	int error;
497
498	error = image_chunk_skipto(blk);
499	if (!error) {
500		if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
501			error = image_copyin_stream(blk, fd, sizep);
502		else
503			error = image_copyin_mapped(blk, fd, sizep);
504	}
505	return (error);
506}
507
508/*
509 * Output/sink file handling.
510 */
511
512int
513image_copyout(int fd)
514{
515	int error;
516
517	error = image_copyout_region(fd, 0, image_size);
518	if (!error)
519		error = image_copyout_done(fd);
520	return (error);
521}
522
523int
524image_copyout_done(int fd)
525{
526	off_t ofs;
527	int error;
528
529	ofs = lseek(fd, 0L, SEEK_CUR);
530	if (ofs == -1)
531		return (0);
532	error = (ftruncate(fd, ofs) == -1) ? errno : 0;
533	return (error);
534}
535
536static int
537image_copyout_memory(int fd, size_t size, void *ptr)
538{
539
540	if (write(fd, ptr, size) == -1)
541		return (errno);
542	return (0);
543}
544
545int
546image_copyout_zeroes(int fd, size_t count)
547{
548	static uint8_t *zeroes = NULL;
549	size_t sz;
550	int error;
551
552	if (lseek(fd, (off_t)count, SEEK_CUR) != -1)
553		return (0);
554
555	/*
556	 * If we can't seek, we must write.
557	 */
558
559	if (zeroes == NULL) {
560		zeroes = calloc(1, secsz);
561		if (zeroes == NULL)
562			return (ENOMEM);
563	}
564
565	while (count > 0) {
566		sz = (count > secsz) ? secsz : count;
567		error = image_copyout_memory(fd, sz, zeroes);
568		if (error)
569			return (error);
570		count -= sz;
571	}
572	return (0);
573}
574
575static int
576image_copyout_file(int fd, size_t size, int ifd, off_t iofs)
577{
578	void *mp;
579	char *buf;
580	size_t iosz, sz;
581	int error;
582	off_t iof;
583
584	iosz = secsz * image_swap_pgsz;
585
586	while (size > 0) {
587		sz = (size > iosz) ? iosz : size;
588		buf = mp = image_file_map(ifd, iofs, sz, &iof);
589		if (buf == NULL)
590			return (errno);
591		buf += iof;
592		error = image_copyout_memory(fd, sz, buf);
593		image_file_unmap(mp, sz);
594		if (error)
595			return (error);
596		size -= sz;
597		iofs += sz;
598	}
599	return (0);
600}
601
602int
603image_copyout_region(int fd, lba_t blk, lba_t size)
604{
605	struct chunk *ch;
606	size_t ofs, sz;
607	int error;
608
609	size *= secsz;
610
611	error = 0;
612	while (!error && size > 0) {
613		ch = image_chunk_find(blk);
614		if (ch == NULL) {
615			error = EINVAL;
616			break;
617		}
618		ofs = (blk - ch->ch_block) * secsz;
619		sz = ch->ch_size - ofs;
620		sz = ((lba_t)sz < size) ? sz : (size_t)size;
621		switch (ch->ch_type) {
622		case CH_TYPE_ZEROES:
623			error = image_copyout_zeroes(fd, sz);
624			break;
625		case CH_TYPE_FILE:
626			error = image_copyout_file(fd, sz, ch->ch_u.file.fd,
627			    ch->ch_u.file.ofs + ofs);
628			break;
629		case CH_TYPE_MEMORY:
630			error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr);
631			break;
632		default:
633			assert(0);
634		}
635		size -= sz;
636		blk += sz / secsz;
637	}
638	return (error);
639}
640
641int
642image_data(lba_t blk, lba_t size)
643{
644	struct chunk *ch;
645	lba_t lim;
646
647	while (1) {
648		ch = image_chunk_find(blk);
649		if (ch == NULL)
650			return (0);
651		if (ch->ch_type != CH_TYPE_ZEROES)
652			return (1);
653		lim = ch->ch_block + (ch->ch_size / secsz);
654		if (lim >= blk + size)
655			return (0);
656		size -= lim - blk;
657		blk = lim;
658	}
659	/*NOTREACHED*/
660}
661
662lba_t
663image_get_size(void)
664{
665
666	return (image_size);
667}
668
669int
670image_set_size(lba_t blk)
671{
672	int error;
673
674	error = image_chunk_skipto(blk);
675	if (!error)
676		image_size = blk;
677	return (error);
678}
679
680int
681image_write(lba_t blk, void *buf, ssize_t len)
682{
683	struct chunk *ch;
684
685	while (len > 0) {
686		if (!is_empty_sector(buf)) {
687			ch = image_chunk_find(blk);
688			if (ch == NULL)
689				return (ENXIO);
690			/* We may not be able to write to files. */
691			if (ch->ch_type == CH_TYPE_FILE)
692				return (EINVAL);
693			if (ch->ch_type == CH_TYPE_ZEROES) {
694				ch = image_chunk_memory(ch, blk);
695				if (ch == NULL)
696					return (ENOMEM);
697			}
698			assert(ch->ch_type == CH_TYPE_MEMORY);
699			memcpy(ch->ch_u.mem.ptr, buf, secsz);
700		}
701		blk++;
702		buf = (char *)buf + secsz;
703		len--;
704	}
705	return (0);
706}
707
708static void
709image_cleanup(void)
710{
711	struct chunk *ch;
712
713	while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) {
714		switch (ch->ch_type) {
715		case CH_TYPE_FILE:
716			/* We may be closing the same file multiple times. */
717			if (ch->ch_u.file.fd != -1)
718				close(ch->ch_u.file.fd);
719			break;
720		case CH_TYPE_MEMORY:
721			free(ch->ch_u.mem.ptr);
722			break;
723		default:
724			break;
725		}
726		TAILQ_REMOVE(&image_chunks, ch, ch_list);
727		free(ch);
728	}
729	if (image_swap_fd != -1)
730		close(image_swap_fd);
731	unlink(image_swap_file);
732}
733
734int
735image_init(void)
736{
737	const char *tmpdir;
738
739	TAILQ_INIT(&image_chunks);
740	image_nchunks = 0;
741
742	image_swap_size = 0;
743	image_swap_pgsz = getpagesize();
744
745	if (atexit(image_cleanup) == -1)
746		return (errno);
747	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
748		tmpdir = _PATH_TMP;
749	snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
750	    tmpdir);
751	image_swap_fd = mkstemp(image_swap_file);
752	if (image_swap_fd == -1)
753		return (errno);
754	return (0);
755}
756