1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2019 The FreeBSD Foundation
5 *
6 * This software was developed by BFF Storage Systems, LLC under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * $FreeBSD$
31 */
32
33extern "C" {
34#include <sys/types.h>
35#include <sys/mman.h>
36#include <sys/sysctl.h>
37
38#include <fcntl.h>
39#include <stdlib.h>
40#include <unistd.h>
41}
42
43#include "mockfs.hh"
44#include "utils.hh"
45
46/*
47 * For testing I/O like fsx does, but deterministically and without a real
48 * underlying file system
49 */
50
51using namespace testing;
52
53const char FULLPATH[] = "mountpoint/some_file.txt";
54const char RELPATH[] = "some_file.txt";
55const uint64_t ino = 42;
56
57static void compare(const void *tbuf, const void *controlbuf, off_t baseofs,
58	ssize_t size)
59{
60	int i;
61
62	for (i = 0; i < size; i++) {
63		if (((const char*)tbuf)[i] != ((const char*)controlbuf)[i]) {
64			off_t ofs = baseofs + i;
65			FAIL() << "miscompare at offset "
66			       << std::hex
67			       << std::showbase
68			       << ofs
69			       << ".  expected = "
70			       << std::setw(2)
71			       << (unsigned)((const uint8_t*)controlbuf)[i]
72			       << " got = "
73			       << (unsigned)((const uint8_t*)tbuf)[i];
74		}
75	}
76}
77
78typedef tuple<bool, uint32_t, cache_mode> IoParam;
79
80class Io: public FuseTest, public WithParamInterface<IoParam> {
81public:
82int m_backing_fd, m_control_fd, m_test_fd;
83off_t m_filesize;
84bool m_direct_io;
85
86Io(): m_backing_fd(-1), m_control_fd(-1), m_test_fd(-1), m_filesize(0),
87	m_direct_io(false) {};
88
89void SetUp()
90{
91	m_backing_fd = open("backing_file", O_RDWR | O_CREAT | O_TRUNC, 0644);
92	if (m_backing_fd < 0)
93		FAIL() << strerror(errno);
94	m_control_fd = open("control", O_RDWR | O_CREAT | O_TRUNC, 0644);
95	if (m_control_fd < 0)
96		FAIL() << strerror(errno);
97	srandom(22'9'1982);	// Seed with my birthday
98
99	if (get<0>(GetParam()))
100		m_init_flags |= FUSE_ASYNC_READ;
101	m_maxwrite = get<1>(GetParam());
102	switch (get<2>(GetParam())) {
103		case Uncached:
104			m_direct_io = true;
105			break;
106		case WritebackAsync:
107			m_async = true;
108			/* FALLTHROUGH */
109		case Writeback:
110			m_init_flags |= FUSE_WRITEBACK_CACHE;
111			/* FALLTHROUGH */
112		case Writethrough:
113			break;
114		default:
115			FAIL() << "Unknown cache mode";
116	}
117
118	FuseTest::SetUp();
119	if (IsSkipped())
120		return;
121
122	if (verbosity > 0) {
123		printf("Test Parameters: init_flags=%#x maxwrite=%#x "
124		    "%sasync cache=%s\n",
125		    m_init_flags, m_maxwrite, m_async? "" : "no",
126		    cache_mode_to_s(get<2>(GetParam())));
127	}
128
129	expect_lookup(RELPATH, ino, S_IFREG | 0644, 0, 1);
130	expect_open(ino, m_direct_io ? FOPEN_DIRECT_IO : 0, 1);
131	EXPECT_CALL(*m_mock, process(
132		ResultOf([=](auto in) {
133			return (in.header.opcode == FUSE_WRITE &&
134				in.header.nodeid == ino);
135		}, Eq(true)),
136		_)
137	).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) {
138		const char *buf = (const char*)in.body.bytes +
139			sizeof(struct fuse_write_in);
140		ssize_t isize = in.body.write.size;
141		off_t iofs = in.body.write.offset;
142
143		ASSERT_EQ(isize, pwrite(m_backing_fd, buf, isize, iofs))
144			<< strerror(errno);
145		SET_OUT_HEADER_LEN(out, write);
146		out.body.write.size = isize;
147	})));
148	EXPECT_CALL(*m_mock, process(
149		ResultOf([=](auto in) {
150			return (in.header.opcode == FUSE_READ &&
151				in.header.nodeid == ino);
152		}, Eq(true)),
153		_)
154	).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) {
155		ssize_t isize = in.body.write.size;
156		off_t iofs = in.body.write.offset;
157		void *buf = out.body.bytes;
158		ssize_t osize;
159
160		osize = pread(m_backing_fd, buf, isize, iofs);
161		ASSERT_LE(0, osize) << strerror(errno);
162		out.header.len = sizeof(struct fuse_out_header) + osize;
163	})));
164	EXPECT_CALL(*m_mock, process(
165		ResultOf([=](auto in) {
166			return (in.header.opcode == FUSE_SETATTR &&
167				in.header.nodeid == ino &&
168				(in.body.setattr.valid & FATTR_SIZE));
169
170		}, Eq(true)),
171		_)
172	).WillRepeatedly(Invoke(ReturnImmediate([=](auto in, auto& out) {
173		ASSERT_EQ(0, ftruncate(m_backing_fd, in.body.setattr.size))
174			<< strerror(errno);
175		SET_OUT_HEADER_LEN(out, attr);
176		out.body.attr.attr.ino = ino;
177		out.body.attr.attr.mode = S_IFREG | 0755;
178		out.body.attr.attr.size = in.body.setattr.size;
179		out.body.attr.attr_valid = UINT64_MAX;
180	})));
181	/* Any test that close()s will send FUSE_FLUSH and FUSE_RELEASE */
182	EXPECT_CALL(*m_mock, process(
183		ResultOf([=](auto in) {
184			return (in.header.opcode == FUSE_FLUSH &&
185				in.header.nodeid == ino);
186		}, Eq(true)),
187		_)
188	).WillRepeatedly(Invoke(ReturnErrno(0)));
189	EXPECT_CALL(*m_mock, process(
190		ResultOf([=](auto in) {
191			return (in.header.opcode == FUSE_RELEASE &&
192				in.header.nodeid == ino);
193		}, Eq(true)),
194		_)
195	).WillRepeatedly(Invoke(ReturnErrno(0)));
196
197	m_test_fd = open(FULLPATH, O_RDWR );
198	EXPECT_LE(0, m_test_fd) << strerror(errno);
199}
200
201void TearDown()
202{
203	if (m_test_fd >= 0)
204		close(m_test_fd);
205	if (m_backing_fd >= 0)
206		close(m_backing_fd);
207	if (m_control_fd >= 0)
208		close(m_control_fd);
209	FuseTest::TearDown();
210	leak(m_test_fd);
211}
212
213void do_closeopen()
214{
215	ASSERT_EQ(0, close(m_test_fd)) << strerror(errno);
216	m_test_fd = open("backing_file", O_RDWR);
217	ASSERT_LE(0, m_test_fd) << strerror(errno);
218
219	ASSERT_EQ(0, close(m_control_fd)) << strerror(errno);
220	m_control_fd = open("control", O_RDWR);
221	ASSERT_LE(0, m_control_fd) << strerror(errno);
222}
223
224void do_ftruncate(off_t offs)
225{
226	ASSERT_EQ(0, ftruncate(m_test_fd, offs)) << strerror(errno);
227	ASSERT_EQ(0, ftruncate(m_control_fd, offs)) << strerror(errno);
228	m_filesize = offs;
229}
230
231void do_mapread(ssize_t size, off_t offs)
232{
233	void *control_buf, *p;
234	off_t pg_offset, page_mask;
235	size_t map_size;
236
237	page_mask = getpagesize() - 1;
238	pg_offset = offs & page_mask;
239	map_size = pg_offset + size;
240
241	p = mmap(NULL, map_size, PROT_READ, MAP_FILE | MAP_SHARED, m_test_fd,
242	    offs - pg_offset);
243	ASSERT_NE(p, MAP_FAILED) << strerror(errno);
244
245	control_buf = malloc(size);
246	ASSERT_NE(nullptr, control_buf) << strerror(errno);
247
248	ASSERT_EQ(size, pread(m_control_fd, control_buf, size, offs))
249		<< strerror(errno);
250
251	compare((void*)((char*)p + pg_offset), control_buf, offs, size);
252
253	ASSERT_EQ(0, munmap(p, map_size)) << strerror(errno);
254	free(control_buf);
255}
256
257void do_read(ssize_t size, off_t offs)
258{
259	void *test_buf, *control_buf;
260	ssize_t r;
261
262	test_buf = malloc(size);
263	ASSERT_NE(nullptr, test_buf) << strerror(errno);
264	control_buf = malloc(size);
265	ASSERT_NE(nullptr, control_buf) << strerror(errno);
266
267	errno = 0;
268	r = pread(m_test_fd, test_buf, size, offs);
269	ASSERT_NE(-1, r) << strerror(errno);
270	ASSERT_EQ(size, r) << "unexpected short read";
271	r = pread(m_control_fd, control_buf, size, offs);
272	ASSERT_NE(-1, r) << strerror(errno);
273	ASSERT_EQ(size, r) << "unexpected short read";
274
275	compare(test_buf, control_buf, offs, size);
276
277	free(control_buf);
278	free(test_buf);
279}
280
281void do_mapwrite(ssize_t size, off_t offs)
282{
283	char *buf;
284	void *p;
285	off_t pg_offset, page_mask;
286	size_t map_size;
287	long i;
288
289	page_mask = getpagesize() - 1;
290	pg_offset = offs & page_mask;
291	map_size = pg_offset + size;
292
293	buf = (char*)malloc(size);
294	ASSERT_NE(nullptr, buf) << strerror(errno);
295	for (i=0; i < size; i++)
296		buf[i] = random();
297
298	if (offs + size > m_filesize) {
299		/*
300		 * Must manually extend.  vm_mmap_vnode will not implicitly
301		 * extend a vnode
302		 */
303		do_ftruncate(offs + size);
304	}
305
306	p = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
307	    MAP_FILE | MAP_SHARED, m_test_fd, offs - pg_offset);
308	ASSERT_NE(p, MAP_FAILED) << strerror(errno);
309
310	bcopy(buf, (char*)p + pg_offset, size);
311	ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs))
312		<< strerror(errno);
313
314	free(buf);
315	ASSERT_EQ(0, munmap(p, map_size)) << strerror(errno);
316}
317
318void do_write(ssize_t size, off_t offs)
319{
320	char *buf;
321	long i;
322
323	buf = (char*)malloc(size);
324	ASSERT_NE(nullptr, buf) << strerror(errno);
325	for (i=0; i < size; i++)
326		buf[i] = random();
327
328	ASSERT_EQ(size, pwrite(m_test_fd, buf, size, offs ))
329		<< strerror(errno);
330	ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs))
331		<< strerror(errno);
332	m_filesize = std::max(m_filesize, offs + size);
333
334	free(buf);
335}
336
337};
338
339class IoCacheable: public Io {
340public:
341virtual void SetUp() {
342	Io::SetUp();
343}
344};
345
346/*
347 * Extend a file with dirty data in the last page of the last block.
348 *
349 * fsx -WR -P /tmp -S8 -N3 fsx.bin
350 */
351TEST_P(Io, extend_from_dirty_page)
352{
353	off_t wofs = 0x21a0;
354	ssize_t wsize = 0xf0a8;
355	off_t rofs = 0xb284;
356	ssize_t rsize = 0x9b22;
357	off_t truncsize = 0x28702;
358
359	do_write(wsize, wofs);
360	do_ftruncate(truncsize);
361	do_read(rsize, rofs);
362}
363
364/*
365 * mapwrite into a newly extended part of a file.
366 *
367 * fsx -c 100 -i 100 -l 524288 -o 131072 -N5 -P /tmp -S19 fsx.bin
368 */
369TEST_P(IoCacheable, extend_by_mapwrite)
370{
371	do_mapwrite(0x849e, 0x29a3a);	/* [0x29a3a, 0x31ed7] */
372	do_mapwrite(0x3994, 0x3c7d8);	/* [0x3c7d8, 0x4016b] */
373	do_read(0xf556, 0x30c16);	/* [0x30c16, 0x4016b] */
374}
375
376/*
377 * When writing the last page of a file, it must be written synchronously.
378 * Otherwise the cached page can become invalid by a subsequent extend
379 * operation.
380 *
381 * fsx -WR -P /tmp -S642 -N3 fsx.bin
382 */
383TEST_P(Io, last_page)
384{
385	do_write(0xcc77, 0x1134f);	/* [0x1134f, 0x1dfc5] */
386	do_write(0xdfa7, 0x2096a);	/* [0x2096a, 0x2e910] */
387	do_read(0xb5b7, 0x1a3aa);	/* [0x1a3aa, 0x25960] */
388}
389
390/*
391 * Read a hole using mmap
392 *
393 * fsx -c 100 -i 100 -l 524288 -o 131072 -N11 -P /tmp  -S14 fsx.bin
394 */
395TEST_P(IoCacheable, mapread_hole)
396{
397	do_write(0x123b7, 0xf205);	/* [0xf205, 0x215bb] */
398	do_mapread(0xeeea, 0x2f4c);	/* [0x2f4c, 0x11e35] */
399}
400
401/*
402 * Read a hole from a block that contains some cached data.
403 *
404 * fsx -WR -P /tmp -S55  fsx.bin
405 */
406TEST_P(Io, read_hole_from_cached_block)
407{
408	off_t wofs = 0x160c5;
409	ssize_t wsize = 0xa996;
410	off_t rofs = 0x472e;
411	ssize_t rsize = 0xd8d5;
412
413	do_write(wsize, wofs);
414	do_read(rsize, rofs);
415}
416
417/*
418 * Truncating a file into a dirty buffer should not causing anything untoward
419 * to happen when that buffer is eventually flushed.
420 *
421 * fsx -WR -P /tmp -S839 -d -N6 fsx.bin
422 */
423TEST_P(Io, truncate_into_dirty_buffer)
424{
425	off_t wofs0 = 0x3bad7;
426	ssize_t wsize0 = 0x4529;
427	off_t wofs1 = 0xc30d;
428	ssize_t wsize1 = 0x5f77;
429	off_t truncsize0 = 0x10916;
430	off_t rofs = 0xdf17;
431	ssize_t rsize = 0x29ff;
432	off_t truncsize1 = 0x152b4;
433
434	do_write(wsize0, wofs0);
435	do_write(wsize1, wofs1);
436	do_ftruncate(truncsize0);
437	do_read(rsize, rofs);
438	do_ftruncate(truncsize1);
439	close(m_test_fd);
440}
441
442/*
443 * Truncating a file into a dirty buffer should not causing anything untoward
444 * to happen when that buffer is eventually flushed, even when the buffer's
445 * dirty_off is > 0.
446 *
447 * Based on this command with a few steps removed:
448 * fsx -WR -P /tmp -S677 -d -N8 fsx.bin
449 */
450TEST_P(Io, truncate_into_dirty_buffer2)
451{
452	off_t truncsize0 = 0x344f3;
453	off_t wofs = 0x2790c;
454	ssize_t wsize = 0xd86a;
455	off_t truncsize1 = 0x2de38;
456	off_t rofs2 = 0x1fd7a;
457	ssize_t rsize2 = 0xc594;
458	off_t truncsize2 = 0x31e71;
459
460	/* Sets the file size to something larger than the next write */
461	do_ftruncate(truncsize0);
462	/*
463	 * Creates a dirty buffer.  The part in lbn 2 doesn't flush
464	 * synchronously.
465	 */
466	do_write(wsize, wofs);
467	/* Truncates part of the dirty buffer created in step 2 */
468	do_ftruncate(truncsize1);
469	/* XXX ?I don't know why this is necessary? */
470	do_read(rsize2, rofs2);
471	/* Truncates the dirty buffer */
472	do_ftruncate(truncsize2);
473	close(m_test_fd);
474}
475
476/*
477 * Regression test for a bug introduced in r348931
478 *
479 * Sequence of operations:
480 * 1) The first write reads lbn so it can modify it
481 * 2) The first write flushes lbn 3 immediately because it's the end of file
482 * 3) The first write then flushes lbn 4 because it's the end of the file
483 * 4) The second write modifies the cached versions of lbn 3 and 4
484 * 5) The third write's getblkx invalidates lbn 4's B_CACHE because it's
485 *    extending the buffer.  Then it flushes lbn 4 because B_DELWRI was set but
486 *    B_CACHE was clear.
487 * 6) fuse_write_biobackend erroneously called vfs_bio_clrbuf, putting the
488 *    buffer into a weird write-only state.  All read operations would return
489 *    0.  Writes were apparently still processed, because the buffer's contents
490 *    were correct when examined in a core dump.
491 * 7) The third write reads lbn 4 because cache is clear
492 * 9) uiomove dutifully copies new data into the buffer
493 * 10) The buffer's dirty is flushed to lbn 4
494 * 11) The read returns all zeros because of step 6.
495 *
496 * Based on:
497 * fsx -WR -l 524388 -o 131072 -P /tmp -S6456 -q  fsx.bin
498 */
499TEST_P(Io, resize_a_valid_buffer_while_extending)
500{
501	do_write(0x14530, 0x36ee6);	/* [0x36ee6, 0x4b415] */
502	do_write(0x1507c, 0x33256);	/* [0x33256, 0x482d1] */
503	do_write(0x175c, 0x4c03d);	/* [0x4c03d, 0x4d798] */
504	do_read(0xe277, 0x3599c);	/* [0x3599c, 0x43c12] */
505	close(m_test_fd);
506}
507
508INSTANTIATE_TEST_CASE_P(Io, Io,
509	Combine(Bool(),					/* async read */
510		Values(0x1000, 0x10000, 0x20000),	/* m_maxwrite */
511		Values(Uncached, Writethrough, Writeback, WritebackAsync)
512	)
513);
514
515INSTANTIATE_TEST_CASE_P(Io, IoCacheable,
516	Combine(Bool(),					/* async read */
517		Values(0x1000, 0x10000, 0x20000),	/* m_maxwrite */
518		Values(Writethrough, Writeback, WritebackAsync)
519	)
520);
521