1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26__FBSDID("$FreeBSD: head/lib/libarchive/test/test_tar_large.c 201247 2009-12-30 05:59:21Z kientzle $");
27
28#include <errno.h>
29#include <stdlib.h>
30#include <string.h>
31
32/*
33 * This is a somewhat tricky test that verifies the ability to
34 * write and read very large entries to tar archives.  It
35 * writes entries from 2GB up to 1TB to an archive in memory.
36 * The memory storage here carefully avoids actually storing
37 * any part of the file bodies, so it runs very quickly and requires
38 * very little memory.  If you're willing to wait a few minutes,
39 * you should be able to exercise petabyte entries with this code.
40 */
41
42/*
43 * Each file is built up by duplicating the following block.
44 */
45static size_t filedatasize;
46static void *filedata;
47
48/*
49 * We store the archive as blocks of data generated by libarchive,
50 * each possibly followed by bytes of file data.
51 */
52struct memblock {
53	struct memblock *next;
54	size_t	size;
55	void *buff;
56	int64_t filebytes;
57};
58
59/*
60 * The total memory store is just a list of memblocks plus
61 * some accounting overhead.
62 */
63struct memdata {
64	int64_t filebytes;
65	void *buff;
66	struct memblock *first;
67	struct memblock *last;
68};
69
70/* The following size definitions simplify things below. */
71#define KB ((int64_t)1024)
72#define MB ((int64_t)1024 * KB)
73#define GB ((int64_t)1024 * MB)
74#define TB ((int64_t)1024 * GB)
75
76#if ARCHIVE_VERSION_NUMBER < 2000000
77static ssize_t	memory_read_skip(struct archive *, void *, size_t request);
78#else
79static off_t	memory_read_skip(struct archive *, void *, off_t request);
80#endif
81static ssize_t	memory_read(struct archive *, void *, const void **buff);
82static ssize_t	memory_write(struct archive *, void *, const void *, size_t);
83
84
85static ssize_t
86memory_write(struct archive *a, void *_private, const void *buff, size_t size)
87{
88	struct memdata *private = _private;
89	struct memblock *block;
90
91	(void)a;
92
93	/*
94	 * Since libarchive tries to behave in a zero-copy manner, if
95	 * you give a pointer to filedata to the library, a pointer
96	 * into that data will (usually) pop out here.  This way, we
97	 * can tell the difference between filedata and library header
98	 * and metadata.
99	 */
100	if ((const char *)filedata <= (const char *)buff
101	    && (const char *)buff < (const char *)filedata + filedatasize) {
102		/* We don't need to store a block of file data. */
103		private->last->filebytes += (int64_t)size;
104	} else {
105		/* Yes, we're assuming the very first write is metadata. */
106		/* It's header or metadata, copy and save it. */
107		block = (struct memblock *)malloc(sizeof(*block));
108		memset(block, 0, sizeof(*block));
109		block->size = size;
110		block->buff = malloc(size);
111		memcpy(block->buff, buff, size);
112		if (private->last == NULL) {
113			private->first = private->last = block;
114		} else {
115			private->last->next = block;
116			private->last = block;
117		}
118		block->next = NULL;
119	}
120	return ((long)size);
121}
122
123static ssize_t
124memory_read(struct archive *a, void *_private, const void **buff)
125{
126	struct memdata *private = _private;
127	struct memblock *block;
128	ssize_t size;
129
130	(void)a;
131
132	free(private->buff);
133	private->buff = NULL;
134	if (private->first == NULL) {
135		private->last = NULL;
136		return (ARCHIVE_EOF);
137	}
138	if (private->filebytes > 0) {
139		/*
140		 * We're returning file bytes, simulate it by
141		 * passing blocks from the template data.
142		 */
143		if (private->filebytes > (int64_t)filedatasize)
144			size = (ssize_t)filedatasize;
145		else
146			size = (ssize_t)private->filebytes;
147		private->filebytes -= size;
148		*buff = filedata;
149	} else {
150		/*
151		 * We need to get some real data to return.
152		 */
153		block = private->first;
154		private->first = block->next;
155		size = (ssize_t)block->size;
156		if (block->buff != NULL) {
157			private->buff = block->buff;
158			*buff = block->buff;
159		} else {
160			private->buff = NULL;
161			*buff = filedata;
162		}
163		private->filebytes = block->filebytes;
164		free(block);
165	}
166	return (size);
167}
168
169
170#if ARCHIVE_VERSION_NUMBER < 2000000
171static ssize_t
172memory_read_skip(struct archive *a, void *private, size_t skip)
173{
174	(void)a;  /* UNUSED */
175	(void)private; /* UNUSED */
176	(void)skip; /* UNUSED */
177	return (0);
178}
179#else
180static off_t
181memory_read_skip(struct archive *a, void *_private, off_t skip)
182{
183	struct memdata *private = _private;
184
185	(void)a;
186
187	if (private->first == NULL) {
188		private->last = NULL;
189		return (0);
190	}
191	if (private->filebytes > 0) {
192		if (private->filebytes < skip)
193			skip = (off_t)private->filebytes;
194		private->filebytes -= skip;
195	} else {
196		skip = 0;
197	}
198	return (skip);
199}
200#endif
201
202DEFINE_TEST(test_tar_large)
203{
204	/* The sizes of the entries we're going to generate. */
205	static int64_t tests[] = {
206		/* Test for 32-bit signed overflow. */
207		2 * GB - 1, 2 * GB, 2 * GB + 1,
208		/* Test for 32-bit unsigned overflow. */
209		4 * GB - 1, 4 * GB, 4 * GB + 1,
210		/* 8GB is the "official" max for ustar. */
211		8 * GB - 1, 8 * GB, 8 * GB + 1,
212		/* Bend ustar a tad and you can get 64GB (12 octal digits). */
213		64 * GB - 1, 64 * GB,
214		/* And larger entries that require non-ustar extensions. */
215		256 * GB, 1 * TB, 0 };
216	int i;
217	char namebuff[64];
218	struct memdata memdata;
219	struct archive_entry *ae;
220	struct archive *a;
221	int64_t  filesize;
222	size_t writesize;
223
224	filedatasize = (size_t)(1 * MB);
225	filedata = malloc(filedatasize);
226	memset(filedata, 0xAA, filedatasize);
227	memset(&memdata, 0, sizeof(memdata));
228
229	/*
230	 * Open an archive for writing.
231	 */
232	a = archive_write_new();
233	archive_write_set_format_pax_restricted(a);
234	archive_write_set_bytes_per_block(a, 0); /* No buffering. */
235	archive_write_open(a, &memdata, NULL, memory_write, NULL);
236
237	/*
238	 * Write a series of large files to it.
239	 */
240	for (i = 0; tests[i] != 0; i++) {
241		assert((ae = archive_entry_new()) != NULL);
242		sprintf(namebuff, "file_%d", i);
243		archive_entry_copy_pathname(ae, namebuff);
244		archive_entry_set_mode(ae, S_IFREG | 0755);
245		filesize = tests[i];
246
247		archive_entry_set_size(ae, filesize);
248
249		assertA(0 == archive_write_header(a, ae));
250		archive_entry_free(ae);
251
252		/*
253		 * Write the actual data to the archive.
254		 */
255		while (filesize > 0) {
256			writesize = filedatasize;
257			if ((int64_t)writesize > filesize)
258				writesize = (size_t)filesize;
259			assertA((int)writesize
260			    == archive_write_data(a, filedata, writesize));
261			filesize -= writesize;
262		}
263	}
264
265	assert((ae = archive_entry_new()) != NULL);
266	archive_entry_copy_pathname(ae, "lastfile");
267	archive_entry_set_mode(ae, S_IFREG | 0755);
268	assertA(0 == archive_write_header(a, ae));
269	archive_entry_free(ae);
270
271
272	/* Close out the archive. */
273	assertA(0 == archive_write_close(a));
274#if ARCHIVE_VERSION_NUMBER < 2000000
275	archive_write_finish(a);
276#else
277	assertA(0 == archive_write_finish(a));
278#endif
279
280	/*
281	 * Open the same archive for reading.
282	 */
283	a = archive_read_new();
284	archive_read_support_format_tar(a);
285	archive_read_open2(a, &memdata, NULL,
286	    memory_read, memory_read_skip, NULL);
287
288	/*
289	 * Read entries back.
290	 */
291	for (i = 0; tests[i] > 0; i++) {
292		assertEqualIntA(a, 0, archive_read_next_header(a, &ae));
293		sprintf(namebuff, "file_%d", i);
294		assertEqualString(namebuff, archive_entry_pathname(ae));
295		assert(tests[i] == archive_entry_size(ae));
296	}
297	assertEqualIntA(a, 0, archive_read_next_header(a, &ae));
298	assertEqualString("lastfile", archive_entry_pathname(ae));
299
300	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
301
302	/* Close out the archive. */
303	assertA(0 == archive_read_close(a));
304#if ARCHIVE_VERSION_NUMBER < 2000000
305	archive_read_finish(a);
306#else
307	assertA(0 == archive_read_finish(a));
308#endif
309
310	free(memdata.buff);
311	free(filedata);
312}
313