1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26
27#include <errno.h>
28#include <stdlib.h>
29#include <string.h>
30
31/*
32 * This is a somewhat tricky test that verifies the ability to
33 * write and read very large entries to tar archives.  It
34 * writes entries from 2GB up to 1TB to an archive in memory.
35 * The memory storage here carefully avoids actually storing
36 * any part of the file bodies, so it runs very quickly and requires
37 * very little memory.  If you're willing to wait a few minutes,
38 * you should be able to exercise petabyte entries with this code.
39 */
40
41/*
42 * Each file is built up by duplicating the following block.
43 */
44static size_t filedatasize;
45static void *filedata;
46
47/*
48 * We store the archive as blocks of data generated by libarchive,
49 * each possibly followed by bytes of file data.
50 */
51struct memblock {
52	struct memblock *next;
53	size_t	size;
54	void *buff;
55	int64_t filebytes;
56};
57
58/*
59 * The total memory store is just a list of memblocks plus
60 * some accounting overhead.
61 */
62struct memdata {
63	int64_t filebytes;
64	void *buff;
65	struct memblock *first;
66	struct memblock *last;
67};
68
69/* The following size definitions simplify things below. */
70#define KB ((int64_t)1024)
71#define MB ((int64_t)1024 * KB)
72#define GB ((int64_t)1024 * MB)
73#define TB ((int64_t)1024 * GB)
74
75static int64_t	memory_read_skip(struct archive *, void *, int64_t request);
76static ssize_t	memory_read(struct archive *, void *, const void **buff);
77static ssize_t	memory_write(struct archive *, void *, const void *, size_t);
78
79
80static ssize_t
81memory_write(struct archive *a, void *_private, const void *buff, size_t size)
82{
83	struct memdata *private = _private;
84	struct memblock *block;
85
86	(void)a;
87
88	/*
89	 * Since libarchive tries to behave in a zero-copy manner, if
90	 * you give a pointer to filedata to the library, a pointer
91	 * into that data will (usually) pop out here.  This way, we
92	 * can tell the difference between filedata and library header
93	 * and metadata.
94	 */
95	if ((const char *)filedata <= (const char *)buff
96	    && (const char *)buff < (const char *)filedata + filedatasize) {
97		/* We don't need to store a block of file data. */
98		private->last->filebytes += (int64_t)size;
99	} else {
100		/* Yes, we're assuming the very first write is metadata. */
101		/* It's header or metadata, copy and save it. */
102		block = (struct memblock *)malloc(sizeof(*block));
103		memset(block, 0, sizeof(*block));
104		block->size = size;
105		block->buff = malloc(size);
106		memcpy(block->buff, buff, size);
107		if (private->last == NULL) {
108			private->first = private->last = block;
109		} else {
110			private->last->next = block;
111			private->last = block;
112		}
113		block->next = NULL;
114	}
115	return ((long)size);
116}
117
118static ssize_t
119memory_read(struct archive *a, void *_private, const void **buff)
120{
121	struct memdata *private = _private;
122	struct memblock *block;
123	ssize_t size;
124
125	(void)a;
126
127	free(private->buff);
128	private->buff = NULL;
129	if (private->first == NULL) {
130		private->last = NULL;
131		return (ARCHIVE_EOF);
132	}
133	if (private->filebytes > 0) {
134		/*
135		 * We're returning file bytes, simulate it by
136		 * passing blocks from the template data.
137		 */
138		if (private->filebytes > (int64_t)filedatasize)
139			size = (ssize_t)filedatasize;
140		else
141			size = (ssize_t)private->filebytes;
142		private->filebytes -= size;
143		*buff = filedata;
144	} else {
145		/*
146		 * We need to get some real data to return.
147		 */
148		block = private->first;
149		private->first = block->next;
150		size = (ssize_t)block->size;
151		if (block->buff != NULL) {
152			private->buff = block->buff;
153			*buff = block->buff;
154		} else {
155			private->buff = NULL;
156			*buff = filedata;
157		}
158		private->filebytes = block->filebytes;
159		free(block);
160	}
161	return (size);
162}
163
164
165static int64_t
166memory_read_skip(struct archive *a, void *_private, int64_t skip)
167{
168	struct memdata *private = _private;
169
170	(void)a;
171
172	if (private->first == NULL) {
173		private->last = NULL;
174		return (0);
175	}
176	if (private->filebytes > 0) {
177		if (private->filebytes < skip)
178			skip = (off_t)private->filebytes;
179		private->filebytes -= skip;
180	} else {
181		skip = 0;
182	}
183	return (skip);
184}
185
186DEFINE_TEST(test_tar_large)
187{
188	/* The sizes of the entries we're going to generate. */
189	static int64_t tests[] = {
190		/* Test for 32-bit signed overflow. */
191		2 * GB - 1, 2 * GB, 2 * GB + 1,
192		/* Test for 32-bit unsigned overflow. */
193		4 * GB - 1, 4 * GB, 4 * GB + 1,
194		/* 8GB is the "official" max for ustar. */
195		8 * GB - 1, 8 * GB, 8 * GB + 1,
196		/* Bend ustar a tad and you can get 64GB (12 octal digits). */
197		64 * GB - 1, 64 * GB,
198		/* And larger entries that require non-ustar extensions. */
199		256 * GB, 1 * TB, 0 };
200	int i;
201	char namebuff[64];
202	struct memdata memdata;
203	struct archive_entry *ae;
204	struct archive *a;
205	int64_t  filesize;
206	size_t writesize;
207
208	filedatasize = (size_t)(1 * MB);
209	filedata = malloc(filedatasize);
210	memset(filedata, 0xAA, filedatasize);
211	memset(&memdata, 0, sizeof(memdata));
212
213	/*
214	 * Open an archive for writing.
215	 */
216	a = archive_write_new();
217	archive_write_set_format_pax_restricted(a);
218	archive_write_set_bytes_per_block(a, 0); /* No buffering. */
219	archive_write_open(a, &memdata, NULL, memory_write, NULL);
220
221	/*
222	 * Write a series of large files to it.
223	 */
224	for (i = 0; tests[i] != 0; i++) {
225		assert((ae = archive_entry_new()) != NULL);
226		snprintf(namebuff, sizeof(namebuff), "file_%d", i);
227		archive_entry_copy_pathname(ae, namebuff);
228		archive_entry_set_mode(ae, S_IFREG | 0755);
229		filesize = tests[i];
230
231		archive_entry_set_size(ae, filesize);
232
233		assertA(0 == archive_write_header(a, ae));
234		archive_entry_free(ae);
235
236		/*
237		 * Write the actual data to the archive.
238		 */
239		while (filesize > 0) {
240			writesize = filedatasize;
241			if ((int64_t)writesize > filesize)
242				writesize = (size_t)filesize;
243			assertA((int)writesize
244			    == archive_write_data(a, filedata, writesize));
245			filesize -= writesize;
246		}
247	}
248
249	assert((ae = archive_entry_new()) != NULL);
250	archive_entry_copy_pathname(ae, "lastfile");
251	archive_entry_set_mode(ae, S_IFREG | 0755);
252	assertA(0 == archive_write_header(a, ae));
253	archive_entry_free(ae);
254
255
256	/* Close out the archive. */
257	assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a));
258	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
259
260	/*
261	 * Open the same archive for reading.
262	 */
263	a = archive_read_new();
264	archive_read_support_format_tar(a);
265	archive_read_open2(a, &memdata, NULL,
266	    memory_read, memory_read_skip, NULL);
267
268	/*
269	 * Read entries back.
270	 */
271	for (i = 0; tests[i] > 0; i++) {
272		assertEqualIntA(a, 0, archive_read_next_header(a, &ae));
273		snprintf(namebuff, sizeof(namebuff), "file_%d", i);
274		assertEqualString(namebuff, archive_entry_pathname(ae));
275		assert(tests[i] == archive_entry_size(ae));
276	}
277	assertEqualIntA(a, 0, archive_read_next_header(a, &ae));
278	assertEqualString("lastfile", archive_entry_pathname(ae));
279
280	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
281
282	/* Close out the archive. */
283	assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
284	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
285
286	free(memdata.buff);
287	free(filedata);
288}
289