1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2019 The FreeBSD Foundation
5 *
6 * This software was developed by BFF Storage Systems, LLC under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31extern "C" {
32#include <sys/param.h>
33#include <sys/mman.h>
34#include <sys/resource.h>
35#include <sys/stat.h>
36#include <sys/time.h>
37#include <sys/uio.h>
38
39#include <aio.h>
40#include <fcntl.h>
41#include <signal.h>
42#include <unistd.h>
43}
44
45#include "mockfs.hh"
46#include "utils.hh"
47
48using namespace testing;
49
50class Write: public FuseTest {
51
52public:
53void SetUp() {
54	FuseTest::SetUp();
55}
56
57void TearDown() {
58	struct sigaction sa;
59
60	bzero(&sa, sizeof(sa));
61	sa.sa_handler = SIG_DFL;
62	sigaction(SIGXFSZ, &sa, NULL);
63
64	FuseTest::TearDown();
65}
66
67void expect_lookup(const char *relpath, uint64_t ino, uint64_t size)
68{
69	FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, size, 1);
70}
71
72void expect_release(uint64_t ino, ProcessMockerT r)
73{
74	EXPECT_CALL(*m_mock, process(
75		ResultOf([=](auto in) {
76			return (in.header.opcode == FUSE_RELEASE &&
77				in.header.nodeid == ino);
78		}, Eq(true)),
79		_)
80	).WillRepeatedly(Invoke(r));
81}
82
83void expect_write(uint64_t ino, uint64_t offset, uint64_t isize,
84	uint64_t osize, const void *contents)
85{
86	FuseTest::expect_write(ino, offset, isize, osize, 0, 0, contents);
87}
88
89/* Expect a write that may or may not come, depending on the cache mode */
90void maybe_expect_write(uint64_t ino, uint64_t offset, uint64_t size,
91	const void *contents)
92{
93	EXPECT_CALL(*m_mock, process(
94		ResultOf([=](auto in) {
95			const char *buf = (const char*)in.body.bytes +
96				sizeof(struct fuse_write_in);
97
98			assert(size <= sizeof(in.body.bytes) -
99				sizeof(struct fuse_write_in));
100			return (in.header.opcode == FUSE_WRITE &&
101				in.header.nodeid == ino &&
102				in.body.write.offset == offset  &&
103				in.body.write.size == size &&
104				0 == bcmp(buf, contents, size));
105		}, Eq(true)),
106		_)
107	).Times(AtMost(1))
108	.WillRepeatedly(Invoke(
109		ReturnImmediate([=](auto in __unused, auto& out) {
110			SET_OUT_HEADER_LEN(out, write);
111			out.body.write.size = size;
112		})
113	));
114}
115
116};
117
118class Write_7_8: public FuseTest {
119
120public:
121virtual void SetUp() {
122	m_kernel_minor_version = 8;
123	FuseTest::SetUp();
124}
125
126void expect_lookup(const char *relpath, uint64_t ino, uint64_t size)
127{
128	FuseTest::expect_lookup_7_8(relpath, ino, S_IFREG | 0644, size, 1);
129}
130
131};
132
133class AioWrite: public Write {
134virtual void SetUp() {
135	if (!is_unsafe_aio_enabled())
136		GTEST_SKIP() <<
137			"vfs.aio.enable_unsafe must be set for this test";
138	FuseTest::SetUp();
139}
140};
141
142/* Tests for the writeback cache mode */
143class WriteBack: public Write {
144public:
145virtual void SetUp() {
146	m_init_flags |= FUSE_WRITEBACK_CACHE;
147	FuseTest::SetUp();
148	if (IsSkipped())
149		return;
150}
151
152void expect_write(uint64_t ino, uint64_t offset, uint64_t isize,
153	uint64_t osize, const void *contents)
154{
155	FuseTest::expect_write(ino, offset, isize, osize, FUSE_WRITE_CACHE, 0,
156		contents);
157}
158};
159
160class WriteBackAsync: public WriteBack {
161public:
162virtual void SetUp() {
163	m_async = true;
164	m_maxwrite = 65536;
165	WriteBack::SetUp();
166}
167};
168
169class TimeGran: public WriteBackAsync, public WithParamInterface<unsigned> {
170public:
171virtual void SetUp() {
172	m_time_gran = 1 << GetParam();
173	WriteBackAsync::SetUp();
174}
175};
176
177/* Tests for clustered writes with WriteBack cacheing */
178class WriteCluster: public WriteBack {
179public:
180virtual void SetUp() {
181	m_async = true;
182	m_maxwrite = UINT32_MAX; // Anything larger than MAXPHYS will suffice
183	WriteBack::SetUp();
184	if (m_maxphys < 2 * DFLTPHYS)
185		GTEST_SKIP() << "MAXPHYS must be at least twice DFLTPHYS"
186			<< " for this test";
187	if (m_maxphys < 2 * (unsigned long )m_maxbcachebuf)
188		GTEST_SKIP() << "MAXPHYS must be at least twice maxbcachebuf"
189			<< " for this test";
190}
191};
192
193/* Tests relating to the server's max_write property */
194class WriteMaxWrite: public Write {
195public:
196virtual void SetUp() {
197	/*
198	 * For this test, m_maxwrite must be less than either m_maxbcachebuf or
199	 * maxphys.
200	 */
201	m_maxwrite = 32768;
202	Write::SetUp();
203}
204};
205
206class WriteEofDuringVnopStrategy: public Write, public WithParamInterface<int>
207{};
208
209class WriteRlimitFsize: public Write, public WithParamInterface<int> {
210public:
211static sig_atomic_t s_sigxfsz;
212struct rlimit	m_initial_limit;
213
214void SetUp() {
215	s_sigxfsz = 0;
216	getrlimit(RLIMIT_FSIZE, &m_initial_limit);
217	FuseTest::SetUp();
218}
219
220void TearDown() {
221	setrlimit(RLIMIT_FSIZE, &m_initial_limit);
222
223	FuseTest::TearDown();
224}
225};
226
227sig_atomic_t WriteRlimitFsize::s_sigxfsz = 0;
228
229void sigxfsz_handler(int __unused sig) {
230	WriteRlimitFsize::s_sigxfsz = 1;
231}
232
233/* AIO writes need to set the header's pid field correctly */
234/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236379 */
235TEST_F(AioWrite, DISABLED_aio_write)
236{
237	const char FULLPATH[] = "mountpoint/some_file.txt";
238	const char RELPATH[] = "some_file.txt";
239	const char *CONTENTS = "abcdefgh";
240	uint64_t ino = 42;
241	uint64_t offset = 4096;
242	int fd;
243	ssize_t bufsize = strlen(CONTENTS);
244	struct aiocb iocb, *piocb;
245
246	expect_lookup(RELPATH, ino, 0);
247	expect_open(ino, 0, 1);
248	expect_write(ino, offset, bufsize, bufsize, CONTENTS);
249
250	fd = open(FULLPATH, O_WRONLY);
251	ASSERT_LE(0, fd) << strerror(errno);
252
253	iocb.aio_nbytes = bufsize;
254	iocb.aio_fildes = fd;
255	iocb.aio_buf = __DECONST(void *, CONTENTS);
256	iocb.aio_offset = offset;
257	iocb.aio_sigevent.sigev_notify = SIGEV_NONE;
258	ASSERT_EQ(0, aio_write(&iocb)) << strerror(errno);
259	ASSERT_EQ(bufsize, aio_waitcomplete(&piocb, NULL)) << strerror(errno);
260	leak(fd);
261}
262
263/*
264 * When a file is opened with O_APPEND, we should forward that flag to
265 * FUSE_OPEN (tested by Open.o_append) but still attempt to calculate the
266 * offset internally.  That way we'll work both with filesystems that
267 * understand O_APPEND (and ignore the offset) and filesystems that don't (and
268 * simply use the offset).
269 *
270 * Note that verifying the O_APPEND flag in FUSE_OPEN is done in the
271 * Open.o_append test.
272 */
273TEST_F(Write, append)
274{
275	const ssize_t BUFSIZE = 9;
276	const char FULLPATH[] = "mountpoint/some_file.txt";
277	const char RELPATH[] = "some_file.txt";
278	const char CONTENTS[BUFSIZE] = "abcdefgh";
279	uint64_t ino = 42;
280	/*
281	 * Set offset to a maxbcachebuf boundary so we don't need to RMW when
282	 * using writeback caching
283	 */
284	uint64_t initial_offset = m_maxbcachebuf;
285	int fd;
286
287	expect_lookup(RELPATH, ino, initial_offset);
288	expect_open(ino, 0, 1);
289	expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, CONTENTS);
290
291	/* Must open O_RDWR or fuse(4) implicitly sets direct_io */
292	fd = open(FULLPATH, O_RDWR | O_APPEND);
293	ASSERT_LE(0, fd) << strerror(errno);
294
295	ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno);
296	leak(fd);
297}
298
299/* If a file is cached, then appending to the end should not cause a read */
300TEST_F(Write, append_to_cached)
301{
302	const ssize_t BUFSIZE = 9;
303	const char FULLPATH[] = "mountpoint/some_file.txt";
304	const char RELPATH[] = "some_file.txt";
305	char *oldcontents, *oldbuf;
306	const char CONTENTS[BUFSIZE] = "abcdefgh";
307	uint64_t ino = 42;
308	/*
309	 * Set offset in between maxbcachebuf boundary to test buffer handling
310	 */
311	uint64_t oldsize = m_maxbcachebuf / 2;
312	int fd;
313
314	oldcontents = new char[oldsize]();
315	oldbuf = new char[oldsize];
316
317	expect_lookup(RELPATH, ino, oldsize);
318	expect_open(ino, 0, 1);
319	expect_read(ino, 0, oldsize, oldsize, oldcontents);
320	maybe_expect_write(ino, oldsize, BUFSIZE, CONTENTS);
321
322	/* Must open O_RDWR or fuse(4) implicitly sets direct_io */
323	fd = open(FULLPATH, O_RDWR | O_APPEND);
324	ASSERT_LE(0, fd) << strerror(errno);
325
326	/* Read the old data into the cache */
327	ASSERT_EQ((ssize_t)oldsize, read(fd, oldbuf, oldsize))
328		<< strerror(errno);
329
330	/* Write the new data.  There should be no more read operations */
331	ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno);
332	leak(fd);
333	delete[] oldbuf;
334	delete[] oldcontents;
335}
336
337TEST_F(Write, append_direct_io)
338{
339	const ssize_t BUFSIZE = 9;
340	const char FULLPATH[] = "mountpoint/some_file.txt";
341	const char RELPATH[] = "some_file.txt";
342	const char CONTENTS[BUFSIZE] = "abcdefgh";
343	uint64_t ino = 42;
344	uint64_t initial_offset = 4096;
345	int fd;
346
347	expect_lookup(RELPATH, ino, initial_offset);
348	expect_open(ino, FOPEN_DIRECT_IO, 1);
349	expect_write(ino, initial_offset, BUFSIZE, BUFSIZE, CONTENTS);
350
351	fd = open(FULLPATH, O_WRONLY | O_APPEND);
352	ASSERT_LE(0, fd) << strerror(errno);
353
354	ASSERT_EQ(BUFSIZE, write(fd, CONTENTS, BUFSIZE)) << strerror(errno);
355	leak(fd);
356}
357
358/* A direct write should evict any overlapping cached data */
359TEST_F(Write, direct_io_evicts_cache)
360{
361	const char FULLPATH[] = "mountpoint/some_file.txt";
362	const char RELPATH[] = "some_file.txt";
363	const char CONTENTS0[] = "abcdefgh";
364	const char CONTENTS1[] = "ijklmnop";
365	uint64_t ino = 42;
366	int fd;
367	ssize_t bufsize = strlen(CONTENTS0) + 1;
368	char readbuf[bufsize];
369
370	expect_lookup(RELPATH, ino, bufsize);
371	expect_open(ino, 0, 1);
372	expect_read(ino, 0, bufsize, bufsize, CONTENTS0);
373	expect_write(ino, 0, bufsize, bufsize, CONTENTS1);
374
375	fd = open(FULLPATH, O_RDWR);
376	ASSERT_LE(0, fd) << strerror(errno);
377
378	// Prime cache
379	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
380
381	// Write directly, evicting cache
382	ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
383	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
384	ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno);
385
386	// Read again.  Cache should be bypassed
387	expect_read(ino, 0, bufsize, bufsize, CONTENTS1);
388	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
389	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
390	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
391	ASSERT_STREQ(readbuf, CONTENTS1);
392
393	leak(fd);
394}
395
396/*
397 * If the server doesn't return FOPEN_DIRECT_IO during FUSE_OPEN, then it's not
398 * allowed to return a short write for that file handle.  However, if it does
399 * then we should still do our darndest to handle it by resending the unwritten
400 * portion.
401 */
402TEST_F(Write, indirect_io_short_write)
403{
404	const char FULLPATH[] = "mountpoint/some_file.txt";
405	const char RELPATH[] = "some_file.txt";
406	const char *CONTENTS = "abcdefghijklmnop";
407	uint64_t ino = 42;
408	int fd;
409	ssize_t bufsize = strlen(CONTENTS);
410	ssize_t bufsize0 = 11;
411	ssize_t bufsize1 = strlen(CONTENTS) - bufsize0;
412	const char *contents1 = CONTENTS + bufsize0;
413
414	expect_lookup(RELPATH, ino, 0);
415	expect_open(ino, 0, 1);
416	expect_write(ino, 0, bufsize, bufsize0, CONTENTS);
417	expect_write(ino, bufsize0, bufsize1, bufsize1, contents1);
418
419	fd = open(FULLPATH, O_WRONLY);
420	ASSERT_LE(0, fd) << strerror(errno);
421
422	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
423	leak(fd);
424}
425
426/* It is an error if the daemon claims to have written more data than we sent */
427TEST_F(Write, indirect_io_long_write)
428{
429	const char FULLPATH[] = "mountpoint/some_file.txt";
430	const char RELPATH[] = "some_file.txt";
431	const char *CONTENTS = "abcdefghijklmnop";
432	uint64_t ino = 42;
433	int fd;
434	ssize_t bufsize = strlen(CONTENTS);
435	ssize_t bufsize_out = 100;
436	off_t some_other_size = 25;
437	struct stat sb;
438
439	expect_lookup(RELPATH, ino, 0);
440	expect_open(ino, 0, 1);
441	expect_write(ino, 0, bufsize, bufsize_out, CONTENTS);
442	expect_getattr(ino, some_other_size);
443
444	fd = open(FULLPATH, O_WRONLY);
445	ASSERT_LE(0, fd) << strerror(errno);
446
447	ASSERT_EQ(-1, write(fd, CONTENTS, bufsize)) << strerror(errno);
448	ASSERT_EQ(EINVAL, errno);
449
450	/*
451	 * Following such an error, we should requery the server for the file's
452	 * size.
453	 */
454	fstat(fd, &sb);
455	ASSERT_EQ(sb.st_size, some_other_size);
456
457	leak(fd);
458}
459
460/*
461 * Don't crash if the server returns a write that can't be represented as a
462 * signed 32 bit number.  Regression test for
463 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=263263
464 */
465TEST_F(Write, indirect_io_very_long_write)
466{
467	const char FULLPATH[] = "mountpoint/some_file.txt";
468	const char RELPATH[] = "some_file.txt";
469	const char *CONTENTS = "abcdefghijklmnop";
470	uint64_t ino = 42;
471	int fd;
472	ssize_t bufsize = strlen(CONTENTS);
473	ssize_t bufsize_out = 3 << 30;
474
475	expect_lookup(RELPATH, ino, 0);
476	expect_open(ino, 0, 1);
477	expect_write(ino, 0, bufsize, bufsize_out, CONTENTS);
478
479	fd = open(FULLPATH, O_WRONLY);
480	ASSERT_LE(0, fd) << strerror(errno);
481
482	ASSERT_EQ(-1, write(fd, CONTENTS, bufsize)) << strerror(errno);
483	ASSERT_EQ(EINVAL, errno);
484	leak(fd);
485}
486
487/*
488 * When the direct_io option is used, filesystems are allowed to write less
489 * data than requested.  We should return the short write to userland.
490 */
491TEST_F(Write, direct_io_short_write)
492{
493	const char FULLPATH[] = "mountpoint/some_file.txt";
494	const char RELPATH[] = "some_file.txt";
495	const char *CONTENTS = "abcdefghijklmnop";
496	uint64_t ino = 42;
497	int fd;
498	ssize_t bufsize = strlen(CONTENTS);
499	ssize_t halfbufsize = bufsize / 2;
500
501	expect_lookup(RELPATH, ino, 0);
502	expect_open(ino, FOPEN_DIRECT_IO, 1);
503	expect_write(ino, 0, bufsize, halfbufsize, CONTENTS);
504
505	fd = open(FULLPATH, O_WRONLY);
506	ASSERT_LE(0, fd) << strerror(errno);
507
508	ASSERT_EQ(halfbufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
509	leak(fd);
510}
511
512/*
513 * An insidious edge case: the filesystem returns a short write, and the
514 * difference between what we requested and what it actually wrote crosses an
515 * iov element boundary
516 */
517TEST_F(Write, direct_io_short_write_iov)
518{
519	const char FULLPATH[] = "mountpoint/some_file.txt";
520	const char RELPATH[] = "some_file.txt";
521	const char *CONTENTS0 = "abcdefgh";
522	const char *CONTENTS1 = "ijklmnop";
523	const char *EXPECTED0 = "abcdefghijklmnop";
524	uint64_t ino = 42;
525	int fd;
526	ssize_t size0 = strlen(CONTENTS0) - 1;
527	ssize_t size1 = strlen(CONTENTS1) + 1;
528	ssize_t totalsize = size0 + size1;
529	struct iovec iov[2];
530
531	expect_lookup(RELPATH, ino, 0);
532	expect_open(ino, FOPEN_DIRECT_IO, 1);
533	expect_write(ino, 0, totalsize, size0, EXPECTED0);
534
535	fd = open(FULLPATH, O_WRONLY);
536	ASSERT_LE(0, fd) << strerror(errno);
537
538	iov[0].iov_base = __DECONST(void*, CONTENTS0);
539	iov[0].iov_len = strlen(CONTENTS0);
540	iov[1].iov_base = __DECONST(void*, CONTENTS1);
541	iov[1].iov_len = strlen(CONTENTS1);
542	ASSERT_EQ(size0, writev(fd, iov, 2)) << strerror(errno);
543	leak(fd);
544}
545
546/* fusefs should respect RLIMIT_FSIZE */
547TEST_P(WriteRlimitFsize, rlimit_fsize)
548{
549	const char FULLPATH[] = "mountpoint/some_file.txt";
550	const char RELPATH[] = "some_file.txt";
551	const char *CONTENTS = "abcdefgh";
552	struct rlimit rl;
553	ssize_t bufsize = strlen(CONTENTS);
554	off_t offset = 1'000'000'000;
555	uint64_t ino = 42;
556	int fd, oflag;
557
558	oflag = GetParam();
559
560	expect_lookup(RELPATH, ino, 0);
561	expect_open(ino, 0, 1);
562
563	rl.rlim_cur = offset;
564	rl.rlim_max = m_initial_limit.rlim_max;
565	ASSERT_EQ(0, setrlimit(RLIMIT_FSIZE, &rl)) << strerror(errno);
566	ASSERT_NE(SIG_ERR, signal(SIGXFSZ, sigxfsz_handler)) << strerror(errno);
567
568	fd = open(FULLPATH, O_WRONLY | oflag);
569
570	ASSERT_LE(0, fd) << strerror(errno);
571
572	ASSERT_EQ(-1, pwrite(fd, CONTENTS, bufsize, offset));
573	EXPECT_EQ(EFBIG, errno);
574	EXPECT_EQ(1, s_sigxfsz);
575	leak(fd);
576}
577
578/*
579 * When crossing the RLIMIT_FSIZE boundary, writes should be truncated, not
580 * aborted.
581 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=164793
582 */
583TEST_P(WriteRlimitFsize, rlimit_fsize_truncate)
584{
585	const char FULLPATH[] = "mountpoint/some_file.txt";
586	const char RELPATH[] = "some_file.txt";
587	const char *CONTENTS = "abcdefghijklmnopqrstuvwxyz";
588	struct rlimit rl;
589	ssize_t bufsize = strlen(CONTENTS);
590	uint64_t ino = 42;
591	off_t offset = 1 << 30;
592	off_t limit = offset + strlen(CONTENTS) / 2;
593	int fd, oflag;
594
595	oflag = GetParam();
596
597	expect_lookup(RELPATH, ino, 0);
598	expect_open(ino, 0, 1);
599	expect_write(ino, offset, bufsize / 2, bufsize / 2, CONTENTS);
600
601	rl.rlim_cur = limit;
602	rl.rlim_max = m_initial_limit.rlim_max;
603	ASSERT_EQ(0, setrlimit(RLIMIT_FSIZE, &rl)) << strerror(errno);
604	ASSERT_NE(SIG_ERR, signal(SIGXFSZ, sigxfsz_handler)) << strerror(errno);
605
606	fd = open(FULLPATH, O_WRONLY | oflag);
607
608	ASSERT_LE(0, fd) << strerror(errno);
609
610	ASSERT_EQ(bufsize / 2, pwrite(fd, CONTENTS, bufsize, offset))
611		<< strerror(errno);
612	leak(fd);
613}
614
615INSTANTIATE_TEST_SUITE_P(W, WriteRlimitFsize,
616	Values(0, O_DIRECT)
617);
618
619/*
620 * A short read indicates EOF.  Test that nothing bad happens if we get EOF
621 * during the R of a RMW operation.
622 */
623TEST_F(Write, eof_during_rmw)
624{
625	const char FULLPATH[] = "mountpoint/some_file.txt";
626	const char RELPATH[] = "some_file.txt";
627	const char *CONTENTS = "abcdefgh";
628	const char *INITIAL   = "XXXXXXXXXX";
629	uint64_t ino = 42;
630	uint64_t offset = 1;
631	ssize_t bufsize = strlen(CONTENTS) + 1;
632	off_t orig_fsize = 10;
633	off_t truncated_fsize = 5;
634	int fd;
635
636	FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, orig_fsize, 1);
637	expect_open(ino, 0, 1);
638	expect_read(ino, 0, orig_fsize, truncated_fsize, INITIAL, O_RDWR);
639	maybe_expect_write(ino, offset, bufsize, CONTENTS);
640
641	fd = open(FULLPATH, O_RDWR);
642	ASSERT_LE(0, fd) << strerror(errno);
643
644	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset))
645		<< strerror(errno);
646	leak(fd);
647}
648
649/*
650 * VOP_STRATEGY should not query the server for the file's size, even if its
651 * cached attributes have expired.
652 * Regression test for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=256937
653 */
654TEST_P(WriteEofDuringVnopStrategy, eof_during_vop_strategy)
655{
656	const char FULLPATH[] = "mountpoint/some_file.txt";
657	const char RELPATH[] = "some_file.txt";
658	Sequence seq;
659	const off_t filesize = 2 * m_maxbcachebuf;
660	char *contents;
661	uint64_t ino = 42;
662	uint64_t attr_valid = 0;
663	uint64_t attr_valid_nsec = 0;
664	mode_t mode = S_IFREG | 0644;
665	int fd;
666	int ngetattrs;
667
668	ngetattrs = GetParam();
669	contents = new char[filesize]();
670
671	EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH)
672	.WillRepeatedly(Invoke(
673		ReturnImmediate([=](auto in __unused, auto& out) {
674		SET_OUT_HEADER_LEN(out, entry);
675		out.body.entry.attr.mode = mode;
676		out.body.entry.nodeid = ino;
677		out.body.entry.attr.nlink = 1;
678		out.body.entry.attr.size = filesize;
679		out.body.entry.attr_valid = attr_valid;
680		out.body.entry.attr_valid_nsec = attr_valid_nsec;
681	})));
682	expect_open(ino, 0, 1);
683	EXPECT_CALL(*m_mock, process(
684		ResultOf([=](auto in) {
685			return (in.header.opcode == FUSE_GETATTR &&
686				in.header.nodeid == ino);
687		}, Eq(true)),
688		_)
689	).Times(Between(ngetattrs - 1, ngetattrs))
690	.InSequence(seq)
691	.WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
692		SET_OUT_HEADER_LEN(out, attr);
693		out.body.attr.attr.ino = ino;
694		out.body.attr.attr.mode = mode;
695		out.body.attr.attr_valid = attr_valid;
696		out.body.attr.attr_valid_nsec = attr_valid_nsec;
697		out.body.attr.attr.size = filesize;
698	})));
699	EXPECT_CALL(*m_mock, process(
700		ResultOf([=](auto in) {
701			return (in.header.opcode == FUSE_GETATTR &&
702				in.header.nodeid == ino);
703		}, Eq(true)),
704		_)
705	).InSequence(seq)
706	.WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
707		SET_OUT_HEADER_LEN(out, attr);
708		out.body.attr.attr.ino = ino;
709		out.body.attr.attr.mode = mode;
710		out.body.attr.attr_valid = attr_valid;
711		out.body.attr.attr_valid_nsec = attr_valid_nsec;
712		out.body.attr.attr.size = filesize / 2;
713	})));
714	expect_write(ino, 0, filesize / 2, filesize / 2, contents);
715
716	fd = open(FULLPATH, O_RDWR);
717	ASSERT_LE(0, fd) << strerror(errno);
718	ASSERT_EQ(filesize / 2, write(fd, contents, filesize / 2))
719		<< strerror(errno);
720
721}
722
723INSTANTIATE_TEST_SUITE_P(W, WriteEofDuringVnopStrategy,
724	Values(1, 2, 3)
725);
726
727/*
728 * If the kernel cannot be sure which uid, gid, or pid was responsible for a
729 * write, then it must set the FUSE_WRITE_CACHE bit
730 */
731/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=236378 */
732TEST_F(Write, mmap)
733{
734	const char FULLPATH[] = "mountpoint/some_file.txt";
735	const char RELPATH[] = "some_file.txt";
736	const char *CONTENTS = "abcdefgh";
737	uint64_t ino = 42;
738	int fd;
739	ssize_t bufsize = strlen(CONTENTS);
740	void *p;
741	uint64_t offset = 10;
742	size_t len;
743	char *zeros, *expected;
744
745	len = getpagesize();
746
747	zeros = new char[len]();
748	expected = new char[len]();
749	memmove((uint8_t*)expected + offset, CONTENTS, bufsize);
750
751	expect_lookup(RELPATH, ino, len);
752	expect_open(ino, 0, 1);
753	expect_read(ino, 0, len, len, zeros);
754	/*
755	 * Writes from the pager may or may not be associated with the correct
756	 * pid, so they must set FUSE_WRITE_CACHE.
757	 */
758	FuseTest::expect_write(ino, 0, len, len, FUSE_WRITE_CACHE, 0, expected);
759	expect_flush(ino, 1, ReturnErrno(0));
760	expect_release(ino, ReturnErrno(0));
761
762	fd = open(FULLPATH, O_RDWR);
763	ASSERT_LE(0, fd) << strerror(errno);
764
765	p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
766	ASSERT_NE(MAP_FAILED, p) << strerror(errno);
767
768	memmove((uint8_t*)p + offset, CONTENTS, bufsize);
769
770	ASSERT_EQ(0, munmap(p, len)) << strerror(errno);
771	close(fd);	// Write mmap'd data on close
772
773	delete[] expected;
774	delete[] zeros;
775
776	leak(fd);
777}
778
779TEST_F(Write, pwrite)
780{
781	const char FULLPATH[] = "mountpoint/some_file.txt";
782	const char RELPATH[] = "some_file.txt";
783	const char *CONTENTS = "abcdefgh";
784	uint64_t ino = 42;
785	uint64_t offset = m_maxbcachebuf;
786	int fd;
787	ssize_t bufsize = strlen(CONTENTS);
788
789	expect_lookup(RELPATH, ino, 0);
790	expect_open(ino, 0, 1);
791	expect_write(ino, offset, bufsize, bufsize, CONTENTS);
792
793	fd = open(FULLPATH, O_WRONLY);
794	ASSERT_LE(0, fd) << strerror(errno);
795
796	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset))
797		<< strerror(errno);
798	leak(fd);
799}
800
801/* Writing a file should update its cached mtime and ctime */
802TEST_F(Write, timestamps)
803{
804	const char FULLPATH[] = "mountpoint/some_file.txt";
805	const char RELPATH[] = "some_file.txt";
806	const char *CONTENTS = "abcdefgh";
807	ssize_t bufsize = strlen(CONTENTS);
808	uint64_t ino = 42;
809	struct stat sb0, sb1;
810	int fd;
811
812	expect_lookup(RELPATH, ino, 0);
813	expect_open(ino, 0, 1);
814	maybe_expect_write(ino, 0, bufsize, CONTENTS);
815
816	fd = open(FULLPATH, O_RDWR);
817	ASSERT_LE(0, fd) << strerror(errno);
818	ASSERT_EQ(0, fstat(fd, &sb0)) << strerror(errno);
819	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
820
821	nap();
822
823	ASSERT_EQ(0, fstat(fd, &sb1)) << strerror(errno);
824
825	EXPECT_EQ(sb0.st_atime, sb1.st_atime);
826	EXPECT_NE(sb0.st_mtime, sb1.st_mtime);
827	EXPECT_NE(sb0.st_ctime, sb1.st_ctime);
828
829	leak(fd);
830}
831
832TEST_F(Write, write)
833{
834	const char FULLPATH[] = "mountpoint/some_file.txt";
835	const char RELPATH[] = "some_file.txt";
836	const char *CONTENTS = "abcdefgh";
837	uint64_t ino = 42;
838	int fd;
839	ssize_t bufsize = strlen(CONTENTS);
840
841	expect_lookup(RELPATH, ino, 0);
842	expect_open(ino, 0, 1);
843	expect_write(ino, 0, bufsize, bufsize, CONTENTS);
844
845	fd = open(FULLPATH, O_WRONLY);
846	ASSERT_LE(0, fd) << strerror(errno);
847
848	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
849	leak(fd);
850}
851
852/* fuse(4) should not issue writes of greater size than the daemon requests */
853TEST_F(WriteMaxWrite, write)
854{
855	const char FULLPATH[] = "mountpoint/some_file.txt";
856	const char RELPATH[] = "some_file.txt";
857	int *contents;
858	uint64_t ino = 42;
859	int fd;
860	ssize_t halfbufsize, bufsize;
861
862	halfbufsize = m_mock->m_maxwrite;
863	if (halfbufsize >= m_maxbcachebuf ||
864	    (unsigned long )halfbufsize >= m_maxphys)
865		GTEST_SKIP() << "Must lower m_maxwrite for this test";
866	bufsize = halfbufsize * 2;
867	contents = new int[bufsize / sizeof(int)];
868	for (int i = 0; i < (int)bufsize / (int)sizeof(i); i++) {
869		contents[i] = i;
870	}
871
872	expect_lookup(RELPATH, ino, 0);
873	expect_open(ino, 0, 1);
874	maybe_expect_write(ino, 0, halfbufsize, contents);
875	maybe_expect_write(ino, halfbufsize, halfbufsize,
876		&contents[halfbufsize / sizeof(int)]);
877
878	fd = open(FULLPATH, O_WRONLY);
879	ASSERT_LE(0, fd) << strerror(errno);
880
881	ASSERT_EQ(bufsize, write(fd, contents, bufsize)) << strerror(errno);
882	leak(fd);
883
884	delete[] contents;
885}
886
887TEST_F(Write, write_nothing)
888{
889	const char FULLPATH[] = "mountpoint/some_file.txt";
890	const char RELPATH[] = "some_file.txt";
891	const char *CONTENTS = "";
892	uint64_t ino = 42;
893	int fd;
894	ssize_t bufsize = 0;
895
896	expect_lookup(RELPATH, ino, 0);
897	expect_open(ino, 0, 1);
898
899	fd = open(FULLPATH, O_WRONLY);
900	ASSERT_LE(0, fd) << strerror(errno);
901
902	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
903	leak(fd);
904}
905
906TEST_F(Write_7_8, write)
907{
908	const char FULLPATH[] = "mountpoint/some_file.txt";
909	const char RELPATH[] = "some_file.txt";
910	const char *CONTENTS = "abcdefgh";
911	uint64_t ino = 42;
912	int fd;
913	ssize_t bufsize = strlen(CONTENTS);
914
915	expect_lookup(RELPATH, ino, 0);
916	expect_open(ino, 0, 1);
917	expect_write_7_8(ino, 0, bufsize, bufsize, CONTENTS);
918
919	fd = open(FULLPATH, O_WRONLY);
920	ASSERT_LE(0, fd) << strerror(errno);
921
922	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
923	leak(fd);
924}
925
926/* In writeback mode, dirty data should be written on close */
927TEST_F(WriteBackAsync, close)
928{
929	const char FULLPATH[] = "mountpoint/some_file.txt";
930	const char RELPATH[] = "some_file.txt";
931	const char *CONTENTS = "abcdefgh";
932	uint64_t ino = 42;
933	int fd;
934	ssize_t bufsize = strlen(CONTENTS);
935
936	expect_lookup(RELPATH, ino, 0);
937	expect_open(ino, 0, 1);
938	expect_write(ino, 0, bufsize, bufsize, CONTENTS);
939	EXPECT_CALL(*m_mock, process(
940		ResultOf([=](auto in) {
941			return (in.header.opcode == FUSE_SETATTR);
942		}, Eq(true)),
943		_)
944	).WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
945		SET_OUT_HEADER_LEN(out, attr);
946		out.body.attr.attr.ino = ino;	// Must match nodeid
947	})));
948	expect_flush(ino, 1, ReturnErrno(0));
949	expect_release(ino, ReturnErrno(0));
950
951	fd = open(FULLPATH, O_RDWR);
952	ASSERT_LE(0, fd) << strerror(errno);
953
954	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
955	close(fd);
956}
957
958/* In writeback mode, adjacent writes will be clustered together */
959TEST_F(WriteCluster, clustering)
960{
961	const char FULLPATH[] = "mountpoint/some_file.txt";
962	const char RELPATH[] = "some_file.txt";
963	uint64_t ino = 42;
964	int i, fd;
965	char *wbuf, *wbuf2x;
966	ssize_t bufsize = m_maxbcachebuf;
967	off_t filesize = 5 * bufsize;
968
969	wbuf = new char[bufsize];
970	memset(wbuf, 'X', bufsize);
971	wbuf2x = new char[2 * bufsize];
972	memset(wbuf2x, 'X', 2 * bufsize);
973
974	expect_lookup(RELPATH, ino, filesize);
975	expect_open(ino, 0, 1);
976	/*
977	 * Writes of bufsize-bytes each should be clustered into greater sizes.
978	 * The amount of clustering is adaptive, so the first write actually
979	 * issued will be 2x bufsize and subsequent writes may be larger
980	 */
981	expect_write(ino, 0, 2 * bufsize, 2 * bufsize, wbuf2x);
982	expect_write(ino, 2 * bufsize, 2 * bufsize, 2 * bufsize, wbuf2x);
983	expect_flush(ino, 1, ReturnErrno(0));
984	expect_release(ino, ReturnErrno(0));
985
986	fd = open(FULLPATH, O_RDWR);
987	ASSERT_LE(0, fd) << strerror(errno);
988
989	for (i = 0; i < 4; i++) {
990		ASSERT_EQ(bufsize, write(fd, wbuf, bufsize))
991			<< strerror(errno);
992	}
993	close(fd);
994	delete[] wbuf2x;
995	delete[] wbuf;
996}
997
998/*
999 * When clustering writes, an I/O error to any of the cluster's children should
1000 * not panic the system on unmount
1001 */
1002/*
1003 * Regression test for bug 238585
1004 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=238565
1005 */
1006TEST_F(WriteCluster, cluster_write_err)
1007{
1008	const char FULLPATH[] = "mountpoint/some_file.txt";
1009	const char RELPATH[] = "some_file.txt";
1010	uint64_t ino = 42;
1011	int i, fd;
1012	char *wbuf;
1013	ssize_t bufsize = m_maxbcachebuf;
1014	off_t filesize = 4 * bufsize;
1015
1016	wbuf = new char[bufsize];
1017	memset(wbuf, 'X', bufsize);
1018
1019	expect_lookup(RELPATH, ino, filesize);
1020	expect_open(ino, 0, 1);
1021	EXPECT_CALL(*m_mock, process(
1022		ResultOf([=](auto in) {
1023			return (in.header.opcode == FUSE_WRITE);
1024		}, Eq(true)),
1025		_)
1026	).WillRepeatedly(Invoke(ReturnErrno(EIO)));
1027	expect_flush(ino, 1, ReturnErrno(0));
1028	expect_release(ino, ReturnErrno(0));
1029
1030	fd = open(FULLPATH, O_RDWR);
1031	ASSERT_LE(0, fd) << strerror(errno);
1032
1033	for (i = 0; i < 3; i++) {
1034		ASSERT_EQ(bufsize, write(fd, wbuf, bufsize))
1035			<< strerror(errno);
1036	}
1037	close(fd);
1038	delete[] wbuf;
1039}
1040
1041/*
1042 * In writeback mode, writes to an O_WRONLY file could trigger reads from the
1043 * server.  The FUSE protocol explicitly allows that.
1044 */
1045TEST_F(WriteBack, rmw)
1046{
1047	const char FULLPATH[] = "mountpoint/some_file.txt";
1048	const char RELPATH[] = "some_file.txt";
1049	const char *CONTENTS = "abcdefgh";
1050	const char *INITIAL   = "XXXXXXXXXX";
1051	uint64_t ino = 42;
1052	uint64_t offset = 1;
1053	off_t fsize = 10;
1054	int fd;
1055	ssize_t bufsize = strlen(CONTENTS);
1056
1057	FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, fsize, 1);
1058	expect_open(ino, 0, 1);
1059	expect_read(ino, 0, fsize, fsize, INITIAL, O_WRONLY);
1060	maybe_expect_write(ino, offset, bufsize, CONTENTS);
1061
1062	fd = open(FULLPATH, O_WRONLY);
1063	ASSERT_LE(0, fd) << strerror(errno);
1064
1065	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset))
1066		<< strerror(errno);
1067	leak(fd);
1068}
1069
1070/*
1071 * Without direct_io, writes should be committed to cache
1072 */
1073TEST_F(WriteBack, cache)
1074{
1075	const char FULLPATH[] = "mountpoint/some_file.txt";
1076	const char RELPATH[] = "some_file.txt";
1077	const char *CONTENTS = "abcdefgh";
1078	uint64_t ino = 42;
1079	int fd;
1080	ssize_t bufsize = strlen(CONTENTS);
1081	uint8_t readbuf[bufsize];
1082
1083	expect_lookup(RELPATH, ino, 0);
1084	expect_open(ino, 0, 1);
1085	expect_write(ino, 0, bufsize, bufsize, CONTENTS);
1086
1087	fd = open(FULLPATH, O_RDWR);
1088	ASSERT_LE(0, fd) << strerror(errno);
1089
1090	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1091	/*
1092	 * A subsequent read should be serviced by cache, without querying the
1093	 * filesystem daemon
1094	 */
1095	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
1096	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
1097	leak(fd);
1098}
1099
1100/*
1101 * With O_DIRECT, writes should be not committed to cache.  Admittedly this is
1102 * an odd test, because it would be unusual to use O_DIRECT for writes but not
1103 * reads.
1104 */
1105TEST_F(WriteBack, o_direct)
1106{
1107	const char FULLPATH[] = "mountpoint/some_file.txt";
1108	const char RELPATH[] = "some_file.txt";
1109	const char *CONTENTS = "abcdefgh";
1110	uint64_t ino = 42;
1111	int fd;
1112	ssize_t bufsize = strlen(CONTENTS);
1113	uint8_t readbuf[bufsize];
1114
1115	expect_lookup(RELPATH, ino, 0);
1116	expect_open(ino, 0, 1);
1117	FuseTest::expect_write(ino, 0, bufsize, bufsize, 0, FUSE_WRITE_CACHE,
1118		CONTENTS);
1119	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
1120
1121	fd = open(FULLPATH, O_RDWR | O_DIRECT);
1122	ASSERT_LE(0, fd) << strerror(errno);
1123
1124	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1125	/* A subsequent read must query the daemon because cache is empty */
1126	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
1127	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
1128	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
1129	leak(fd);
1130}
1131
1132TEST_F(WriteBack, direct_io)
1133{
1134	const char FULLPATH[] = "mountpoint/some_file.txt";
1135	const char RELPATH[] = "some_file.txt";
1136	const char *CONTENTS = "abcdefgh";
1137	uint64_t ino = 42;
1138	int fd;
1139	ssize_t bufsize = strlen(CONTENTS);
1140	uint8_t readbuf[bufsize];
1141
1142	expect_lookup(RELPATH, ino, 0);
1143	expect_open(ino, FOPEN_DIRECT_IO, 1);
1144	FuseTest::expect_write(ino, 0, bufsize, bufsize, 0, FUSE_WRITE_CACHE,
1145		CONTENTS);
1146	expect_read(ino, 0, bufsize, bufsize, CONTENTS);
1147
1148	fd = open(FULLPATH, O_RDWR);
1149	ASSERT_LE(0, fd) << strerror(errno);
1150
1151	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1152	/* A subsequent read must query the daemon because cache is empty */
1153	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
1154	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
1155	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
1156	leak(fd);
1157}
1158
1159/*
1160 * mmap should still be possible even if the server used direct_io.  Mmap will
1161 * still use the cache, though.
1162 *
1163 * Regression test for bug 247276
1164 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=247276
1165 */
1166TEST_F(WriteBack, mmap_direct_io)
1167{
1168	const char FULLPATH[] = "mountpoint/some_file.txt";
1169	const char RELPATH[] = "some_file.txt";
1170	const char *CONTENTS = "abcdefgh";
1171	uint64_t ino = 42;
1172	int fd;
1173	size_t len;
1174	ssize_t bufsize = strlen(CONTENTS);
1175	char *zeros;
1176	void *p;
1177
1178	len = getpagesize();
1179	zeros = new char[len]();
1180
1181	expect_lookup(RELPATH, ino, len);
1182	expect_open(ino, FOPEN_DIRECT_IO, 1);
1183	expect_read(ino, 0, len, len, zeros);
1184	expect_flush(ino, 1, ReturnErrno(0));
1185	FuseTest::expect_write(ino, 0, len, len, FUSE_WRITE_CACHE, 0, zeros);
1186	expect_release(ino, ReturnErrno(0));
1187
1188	fd = open(FULLPATH, O_RDWR);
1189	ASSERT_LE(0, fd) << strerror(errno);
1190
1191	p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
1192	ASSERT_NE(MAP_FAILED, p) << strerror(errno);
1193
1194	memmove((uint8_t*)p, CONTENTS, bufsize);
1195
1196	ASSERT_EQ(0, munmap(p, len)) << strerror(errno);
1197	close(fd);	// Write mmap'd data on close
1198
1199	delete[] zeros;
1200}
1201
1202/*
1203 * When mounted with -o async, the writeback cache mode should delay writes
1204 */
1205TEST_F(WriteBackAsync, delay)
1206{
1207	const char FULLPATH[] = "mountpoint/some_file.txt";
1208	const char RELPATH[] = "some_file.txt";
1209	const char *CONTENTS = "abcdefgh";
1210	uint64_t ino = 42;
1211	int fd;
1212	ssize_t bufsize = strlen(CONTENTS);
1213
1214	expect_lookup(RELPATH, ino, 0);
1215	expect_open(ino, 0, 1);
1216	/* Write should be cached, but FUSE_WRITE shouldn't be sent */
1217	EXPECT_CALL(*m_mock, process(
1218		ResultOf([=](auto in) {
1219			return (in.header.opcode == FUSE_WRITE);
1220		}, Eq(true)),
1221		_)
1222	).Times(0);
1223
1224	fd = open(FULLPATH, O_RDWR);
1225	ASSERT_LE(0, fd) << strerror(errno);
1226
1227	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1228
1229	/* Don't close the file because that would flush the cache */
1230	leak(fd);
1231}
1232
1233/*
1234 * A direct write should not evict dirty cached data from outside of its own
1235 * byte range.
1236 */
1237TEST_F(WriteBackAsync, direct_io_ignores_unrelated_cached)
1238{
1239	const char FULLPATH[] = "mountpoint/some_file.txt";
1240	const char RELPATH[] = "some_file.txt";
1241	const char CONTENTS0[] = "abcdefgh";
1242	const char CONTENTS1[] = "ijklmnop";
1243	uint64_t ino = 42;
1244	int fd;
1245	ssize_t bufsize = strlen(CONTENTS0) + 1;
1246	ssize_t fsize = 2 * m_maxbcachebuf;
1247	char readbuf[bufsize];
1248	char *zeros;
1249
1250	zeros = new char[m_maxbcachebuf]();
1251
1252	expect_lookup(RELPATH, ino, fsize);
1253	expect_open(ino, 0, 1);
1254	expect_read(ino, 0, m_maxbcachebuf, m_maxbcachebuf, zeros);
1255	FuseTest::expect_write(ino, m_maxbcachebuf, bufsize, bufsize, 0, 0,
1256		CONTENTS1);
1257
1258	fd = open(FULLPATH, O_RDWR);
1259	ASSERT_LE(0, fd) << strerror(errno);
1260
1261	// Cache first block with dirty data.  This will entail first reading
1262	// the existing data.
1263	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS0, bufsize, 0))
1264		<< strerror(errno);
1265
1266	// Write directly to second block
1267	ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
1268	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS1, bufsize, m_maxbcachebuf))
1269		<< strerror(errno);
1270
1271	// Read from the first block again.  Should be serviced by cache.
1272	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
1273	ASSERT_EQ(bufsize, pread(fd, readbuf, bufsize, 0)) << strerror(errno);
1274	ASSERT_STREQ(readbuf, CONTENTS0);
1275
1276	leak(fd);
1277	delete[] zeros;
1278}
1279
1280/*
1281 * If a direct io write partially overlaps one or two blocks of dirty cached
1282 * data, No dirty data should be lost.  Admittedly this is a weird test,
1283 * because it would be unusual to use O_DIRECT and the writeback cache.
1284 */
1285TEST_F(WriteBackAsync, direct_io_partially_overlaps_cached_block)
1286{
1287	const char FULLPATH[] = "mountpoint/some_file.txt";
1288	const char RELPATH[] = "some_file.txt";
1289	uint64_t ino = 42;
1290	int fd;
1291	off_t bs = m_maxbcachebuf;
1292	ssize_t fsize = 3 * bs;
1293	char *readbuf, *zeros, *ones, *zeroones, *onezeros;
1294
1295	readbuf = new char[bs];
1296	zeros = new char[3 * bs]();
1297	ones = new char[2 * bs];
1298	memset(ones, 1, 2 * bs);
1299	zeroones = new char[bs]();
1300	memset((uint8_t*)zeroones + bs / 2, 1, bs / 2);
1301	onezeros = new char[bs]();
1302	memset(onezeros, 1, bs / 2);
1303
1304	expect_lookup(RELPATH, ino, fsize);
1305	expect_open(ino, 0, 1);
1306
1307	fd = open(FULLPATH, O_RDWR);
1308	ASSERT_LE(0, fd) << strerror(errno);
1309
1310	/* Cache first and third blocks with dirty data.  */
1311	ASSERT_EQ(3 * bs, pwrite(fd, zeros, 3 * bs, 0)) << strerror(errno);
1312
1313	/*
1314	 * Write directly to all three blocks.  The partially written blocks
1315	 * will be flushed because they're dirty.
1316	 */
1317	FuseTest::expect_write(ino, 0, bs, bs, 0, 0, zeros);
1318	FuseTest::expect_write(ino, 2 * bs, bs, bs, 0, 0, zeros);
1319	/* The direct write is split in two because of the m_maxwrite value */
1320	FuseTest::expect_write(ino,     bs / 2, bs, bs, 0, 0, ones);
1321	FuseTest::expect_write(ino, 3 * bs / 2, bs, bs, 0, 0, ones);
1322	ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
1323	ASSERT_EQ(2 * bs, pwrite(fd, ones, 2 * bs, bs / 2)) << strerror(errno);
1324
1325	/*
1326	 * Read from both the valid and invalid portions of the first and third
1327	 * blocks again.  This will entail FUSE_READ operations because these
1328	 * blocks were invalidated by the direct write.
1329	 */
1330	expect_read(ino, 0, bs, bs, zeroones);
1331	expect_read(ino, 2 * bs, bs, bs, onezeros);
1332	ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
1333	ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 0)) << strerror(errno);
1334	EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2));
1335	ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 5 * bs / 2))
1336		<< strerror(errno);
1337	EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2));
1338	ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, bs / 2))
1339		<< strerror(errno);
1340	EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2));
1341	ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 2 * bs))
1342		<< strerror(errno);
1343	EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2));
1344
1345	leak(fd);
1346	delete[] zeroones;
1347	delete[] onezeros;
1348	delete[] ones;
1349	delete[] zeros;
1350	delete[] readbuf;
1351}
1352
1353/*
1354 * In WriteBack mode, writes may be cached beyond what the server thinks is the
1355 * EOF.  In this case, a short read at EOF should _not_ cause fusefs to update
1356 * the file's size.
1357 */
1358TEST_F(WriteBackAsync, eof)
1359{
1360	const char FULLPATH[] = "mountpoint/some_file.txt";
1361	const char RELPATH[] = "some_file.txt";
1362	const char *CONTENTS0 = "abcdefgh";
1363	const char *CONTENTS1 = "ijklmnop";
1364	uint64_t ino = 42;
1365	int fd;
1366	off_t offset = m_maxbcachebuf;
1367	ssize_t wbufsize = strlen(CONTENTS1);
1368	off_t old_filesize = (off_t)strlen(CONTENTS0);
1369	ssize_t rbufsize = 2 * old_filesize;
1370	char readbuf[rbufsize];
1371	size_t holesize = rbufsize - old_filesize;
1372	char hole[holesize];
1373	struct stat sb;
1374	ssize_t r;
1375
1376	expect_lookup(RELPATH, ino, 0);
1377	expect_open(ino, 0, 1);
1378	expect_read(ino, 0, m_maxbcachebuf, old_filesize, CONTENTS0);
1379
1380	fd = open(FULLPATH, O_RDWR);
1381	ASSERT_LE(0, fd) << strerror(errno);
1382
1383	/* Write and cache data beyond EOF */
1384	ASSERT_EQ(wbufsize, pwrite(fd, CONTENTS1, wbufsize, offset))
1385		<< strerror(errno);
1386
1387	/* Read from the old EOF */
1388	r = pread(fd, readbuf, rbufsize, 0);
1389	ASSERT_LE(0, r) << strerror(errno);
1390	EXPECT_EQ(rbufsize, r) << "read should've synthesized a hole";
1391	EXPECT_EQ(0, memcmp(CONTENTS0, readbuf, old_filesize));
1392	bzero(hole, holesize);
1393	EXPECT_EQ(0, memcmp(hole, readbuf + old_filesize, holesize));
1394
1395	/* The file's size should still be what was established by pwrite */
1396	ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
1397	EXPECT_EQ(offset + wbufsize, sb.st_size);
1398	leak(fd);
1399}
1400
1401/*
1402 * When a file has dirty writes that haven't been flushed, the server's notion
1403 * of its mtime and ctime will be wrong.  The kernel should ignore those if it
1404 * gets them from a FUSE_GETATTR before flushing.
1405 */
1406TEST_F(WriteBackAsync, timestamps)
1407{
1408	const char FULLPATH[] = "mountpoint/some_file.txt";
1409	const char RELPATH[] = "some_file.txt";
1410	const char *CONTENTS = "abcdefgh";
1411	ssize_t bufsize = strlen(CONTENTS);
1412	uint64_t ino = 42;
1413	uint64_t attr_valid = 0;
1414	uint64_t attr_valid_nsec = 0;
1415	uint64_t server_time = 12345;
1416	mode_t mode = S_IFREG | 0644;
1417	int fd;
1418
1419	struct stat sb;
1420
1421	EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH)
1422	.WillRepeatedly(Invoke(
1423		ReturnImmediate([=](auto in __unused, auto& out) {
1424		SET_OUT_HEADER_LEN(out, entry);
1425		out.body.entry.attr.mode = mode;
1426		out.body.entry.nodeid = ino;
1427		out.body.entry.attr.nlink = 1;
1428		out.body.entry.attr_valid = attr_valid;
1429		out.body.entry.attr_valid_nsec = attr_valid_nsec;
1430	})));
1431	expect_open(ino, 0, 1);
1432	EXPECT_CALL(*m_mock, process(
1433		ResultOf([=](auto in) {
1434			return (in.header.opcode == FUSE_GETATTR &&
1435				in.header.nodeid == ino);
1436		}, Eq(true)),
1437		_)
1438	).WillRepeatedly(Invoke(
1439	ReturnImmediate([=](auto i __unused, auto& out) {
1440		SET_OUT_HEADER_LEN(out, attr);
1441		out.body.attr.attr.ino = ino;
1442		out.body.attr.attr.mode = mode;
1443		out.body.attr.attr_valid = attr_valid;
1444		out.body.attr.attr_valid_nsec = attr_valid_nsec;
1445		out.body.attr.attr.atime = server_time;
1446		out.body.attr.attr.mtime = server_time;
1447		out.body.attr.attr.ctime = server_time;
1448	})));
1449
1450	fd = open(FULLPATH, O_RDWR);
1451	ASSERT_LE(0, fd) << strerror(errno);
1452	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1453
1454	ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
1455	EXPECT_EQ((time_t)server_time, sb.st_atime);
1456	EXPECT_NE((time_t)server_time, sb.st_mtime);
1457	EXPECT_NE((time_t)server_time, sb.st_ctime);
1458
1459	leak(fd);
1460}
1461
1462/* Any dirty timestamp fields should be flushed during a SETATTR */
1463TEST_F(WriteBackAsync, timestamps_during_setattr)
1464{
1465	const char FULLPATH[] = "mountpoint/some_file.txt";
1466	const char RELPATH[] = "some_file.txt";
1467	const char *CONTENTS = "abcdefgh";
1468	ssize_t bufsize = strlen(CONTENTS);
1469	uint64_t ino = 42;
1470	const mode_t newmode = 0755;
1471	int fd;
1472
1473	expect_lookup(RELPATH, ino, 0);
1474	expect_open(ino, 0, 1);
1475	EXPECT_CALL(*m_mock, process(
1476		ResultOf([=](auto in) {
1477			uint32_t valid = FATTR_MODE | FATTR_MTIME | FATTR_CTIME;
1478			return (in.header.opcode == FUSE_SETATTR &&
1479				in.header.nodeid == ino &&
1480				in.body.setattr.valid == valid);
1481		}, Eq(true)),
1482		_)
1483	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
1484		SET_OUT_HEADER_LEN(out, attr);
1485		out.body.attr.attr.ino = ino;
1486		out.body.attr.attr.mode = S_IFREG | newmode;
1487	})));
1488
1489	fd = open(FULLPATH, O_RDWR);
1490	ASSERT_LE(0, fd) << strerror(errno);
1491	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1492	ASSERT_EQ(0, fchmod(fd, newmode)) << strerror(errno);
1493
1494	leak(fd);
1495}
1496
1497/* fuse_init_out.time_gran controls the granularity of timestamps */
1498TEST_P(TimeGran, timestamps_during_setattr)
1499{
1500	const char FULLPATH[] = "mountpoint/some_file.txt";
1501	const char RELPATH[] = "some_file.txt";
1502	const char *CONTENTS = "abcdefgh";
1503	ssize_t bufsize = strlen(CONTENTS);
1504	uint64_t ino = 42;
1505	const mode_t newmode = 0755;
1506	int fd;
1507
1508	expect_lookup(RELPATH, ino, 0);
1509	expect_open(ino, 0, 1);
1510	EXPECT_CALL(*m_mock, process(
1511		ResultOf([=](auto in) {
1512			uint32_t valid = FATTR_MODE | FATTR_MTIME | FATTR_CTIME;
1513			return (in.header.opcode == FUSE_SETATTR &&
1514				in.header.nodeid == ino &&
1515				in.body.setattr.valid == valid &&
1516				in.body.setattr.mtimensec % m_time_gran == 0 &&
1517				in.body.setattr.ctimensec % m_time_gran == 0);
1518		}, Eq(true)),
1519		_)
1520	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
1521		SET_OUT_HEADER_LEN(out, attr);
1522		out.body.attr.attr.ino = ino;
1523		out.body.attr.attr.mode = S_IFREG | newmode;
1524	})));
1525
1526	fd = open(FULLPATH, O_RDWR);
1527	ASSERT_LE(0, fd) << strerror(errno);
1528	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1529	ASSERT_EQ(0, fchmod(fd, newmode)) << strerror(errno);
1530
1531	leak(fd);
1532}
1533
1534INSTANTIATE_TEST_SUITE_P(RA, TimeGran, Range(0u, 10u));
1535
1536/*
1537 * Without direct_io, writes should be committed to cache
1538 */
1539TEST_F(Write, writethrough)
1540{
1541	const char FULLPATH[] = "mountpoint/some_file.txt";
1542	const char RELPATH[] = "some_file.txt";
1543	const char *CONTENTS = "abcdefgh";
1544	uint64_t ino = 42;
1545	int fd;
1546	ssize_t bufsize = strlen(CONTENTS);
1547	uint8_t readbuf[bufsize];
1548
1549	expect_lookup(RELPATH, ino, 0);
1550	expect_open(ino, 0, 1);
1551	expect_write(ino, 0, bufsize, bufsize, CONTENTS);
1552
1553	fd = open(FULLPATH, O_RDWR);
1554	ASSERT_LE(0, fd) << strerror(errno);
1555
1556	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1557	/*
1558	 * A subsequent read should be serviced by cache, without querying the
1559	 * filesystem daemon
1560	 */
1561	ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
1562	ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
1563	leak(fd);
1564}
1565
1566/* Writes that extend a file should update the cached file size */
1567TEST_F(Write, update_file_size)
1568{
1569	const char FULLPATH[] = "mountpoint/some_file.txt";
1570	const char RELPATH[] = "some_file.txt";
1571	const char *CONTENTS = "abcdefgh";
1572	struct stat sb;
1573	uint64_t ino = 42;
1574	int fd;
1575	ssize_t bufsize = strlen(CONTENTS);
1576
1577	expect_lookup(RELPATH, ino, 0);
1578	expect_open(ino, 0, 1);
1579	expect_write(ino, 0, bufsize, bufsize, CONTENTS);
1580
1581	fd = open(FULLPATH, O_RDWR);
1582	ASSERT_LE(0, fd) << strerror(errno);
1583
1584	ASSERT_EQ(bufsize, write(fd, CONTENTS, bufsize)) << strerror(errno);
1585	/* Get cached attributes */
1586	ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
1587	ASSERT_EQ(bufsize, sb.st_size);
1588	leak(fd);
1589}
1590