1/*
2 * Copyright (c) 2020 iXsystems, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/types.h>
32#include <sys/param.h>
33#include <sys/dmu.h>
34#include <sys/dmu_impl.h>
35#include <sys/dmu_tx.h>
36#include <sys/dbuf.h>
37#include <sys/dnode.h>
38#include <sys/zfs_context.h>
39#include <sys/dmu_objset.h>
40#include <sys/dmu_traverse.h>
41#include <sys/dsl_dataset.h>
42#include <sys/dsl_dir.h>
43#include <sys/dsl_pool.h>
44#include <sys/dsl_synctask.h>
45#include <sys/dsl_prop.h>
46#include <sys/dmu_zfetch.h>
47#include <sys/zfs_ioctl.h>
48#include <sys/zap.h>
49#include <sys/zio_checksum.h>
50#include <sys/zio_compress.h>
51#include <sys/sa.h>
52#include <sys/zfeature.h>
53#include <sys/abd.h>
54#include <sys/zfs_rlock.h>
55#include <sys/racct.h>
56#include <sys/vm.h>
57#include <sys/zfs_znode.h>
58#include <sys/zfs_vnops.h>
59
60#include <sys/ccompat.h>
61
62#ifndef IDX_TO_OFF
63#define	IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
64#endif
65
66#if  __FreeBSD_version < 1300051
67#define	VM_ALLOC_BUSY_FLAGS VM_ALLOC_NOBUSY
68#else
69#define	VM_ALLOC_BUSY_FLAGS  VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
70#endif
71
72
73#if __FreeBSD_version < 1300072
74#define	dmu_page_lock(m)	vm_page_lock(m)
75#define	dmu_page_unlock(m)	vm_page_unlock(m)
76#else
77#define	dmu_page_lock(m)
78#define	dmu_page_unlock(m)
79#endif
80
81static int
82dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
83    uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
84{
85	dnode_t *dn;
86	int err;
87
88	err = dnode_hold(os, object, FTAG, &dn);
89	if (err)
90		return (err);
91
92	err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
93	    numbufsp, dbpp, DMU_READ_PREFETCH);
94
95	dnode_rele(dn, FTAG);
96
97	return (err);
98}
99
100int
101dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
102    vm_page_t *ma, dmu_tx_t *tx)
103{
104	dmu_buf_t **dbp;
105	struct sf_buf *sf;
106	int numbufs, i;
107	int err;
108
109	if (size == 0)
110		return (0);
111
112	err = dmu_buf_hold_array(os, object, offset, size,
113	    FALSE, FTAG, &numbufs, &dbp);
114	if (err)
115		return (err);
116
117	for (i = 0; i < numbufs; i++) {
118		int tocpy, copied, thiscpy;
119		int bufoff;
120		dmu_buf_t *db = dbp[i];
121		caddr_t va;
122
123		ASSERT(size > 0);
124		ASSERT3U(db->db_size, >=, PAGESIZE);
125
126		bufoff = offset - db->db_offset;
127		tocpy = (int)MIN(db->db_size - bufoff, size);
128
129		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
130
131		if (tocpy == db->db_size)
132			dmu_buf_will_fill(db, tx);
133		else
134			dmu_buf_will_dirty(db, tx);
135
136		for (copied = 0; copied < tocpy; copied += PAGESIZE) {
137			ASSERT3U(ptoa((*ma)->pindex), ==,
138			    db->db_offset + bufoff);
139			thiscpy = MIN(PAGESIZE, tocpy - copied);
140			va = zfs_map_page(*ma, &sf);
141			bcopy(va, (char *)db->db_data + bufoff, thiscpy);
142			zfs_unmap_page(sf);
143			ma += 1;
144			bufoff += PAGESIZE;
145		}
146
147		if (tocpy == db->db_size)
148			dmu_buf_fill_done(db, tx);
149
150		offset += tocpy;
151		size -= tocpy;
152	}
153	dmu_buf_rele_array(dbp, numbufs, FTAG);
154	return (err);
155}
156
157int
158dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
159    int *rbehind, int *rahead, int last_size)
160{
161	struct sf_buf *sf;
162	vm_object_t vmobj;
163	vm_page_t m;
164	dmu_buf_t **dbp;
165	dmu_buf_t *db;
166	caddr_t va;
167	int numbufs, i;
168	int bufoff, pgoff, tocpy;
169	int mi, di;
170	int err;
171
172	ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
173	ASSERT(last_size <= PAGE_SIZE);
174
175	err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
176	    IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
177	if (err != 0)
178		return (err);
179
180#ifdef ZFS_DEBUG
181	IMPLY(last_size < PAGE_SIZE, *rahead == 0);
182	if (dbp[0]->db_offset != 0 || numbufs > 1) {
183		for (i = 0; i < numbufs; i++) {
184			ASSERT(ISP2(dbp[i]->db_size));
185			ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0);
186			ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
187		}
188	}
189#endif
190
191	vmobj = ma[0]->object;
192	zfs_vmobject_wlock_12(vmobj);
193
194	db = dbp[0];
195	for (i = 0; i < *rbehind; i++) {
196		m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
197		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
198		if (m == NULL)
199			break;
200		if (!vm_page_none_valid(m)) {
201			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
202			vm_page_do_sunbusy(m);
203			break;
204		}
205		ASSERT(m->dirty == 0);
206		ASSERT(!pmap_page_is_write_mapped(m));
207
208		ASSERT(db->db_size > PAGE_SIZE);
209		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
210		va = zfs_map_page(m, &sf);
211		bcopy((char *)db->db_data + bufoff, va, PAGESIZE);
212		zfs_unmap_page(sf);
213		vm_page_valid(m);
214		dmu_page_lock(m);
215		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
216			vm_page_activate(m);
217		else
218			vm_page_deactivate(m);
219		dmu_page_unlock(m);
220		vm_page_do_sunbusy(m);
221	}
222	*rbehind = i;
223
224	bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
225	pgoff = 0;
226	for (mi = 0, di = 0; mi < count && di < numbufs; ) {
227		if (pgoff == 0) {
228			m = ma[mi];
229			if (m != bogus_page) {
230				vm_page_assert_xbusied(m);
231				ASSERT(vm_page_none_valid(m));
232				ASSERT(m->dirty == 0);
233				ASSERT(!pmap_page_is_write_mapped(m));
234				va = zfs_map_page(m, &sf);
235			}
236		}
237		if (bufoff == 0)
238			db = dbp[di];
239
240		if (m != bogus_page) {
241			ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
242			    db->db_offset + bufoff);
243		}
244
245		/*
246		 * We do not need to clamp the copy size by the file
247		 * size as the last block is zero-filled beyond the
248		 * end of file anyway.
249		 */
250		tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
251		if (m != bogus_page)
252			bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy);
253
254		pgoff += tocpy;
255		ASSERT(pgoff <= PAGESIZE);
256		if (pgoff == PAGESIZE) {
257			if (m != bogus_page) {
258				zfs_unmap_page(sf);
259				vm_page_valid(m);
260			}
261			ASSERT(mi < count);
262			mi++;
263			pgoff = 0;
264		}
265
266		bufoff += tocpy;
267		ASSERT(bufoff <= db->db_size);
268		if (bufoff == db->db_size) {
269			ASSERT(di < numbufs);
270			di++;
271			bufoff = 0;
272		}
273	}
274
275#ifdef ZFS_DEBUG
276	/*
277	 * Three possibilities:
278	 * - last requested page ends at a buffer boundary and , thus,
279	 *   all pages and buffers have been iterated;
280	 * - all requested pages are filled, but the last buffer
281	 *   has not been exhausted;
282	 *   the read-ahead is possible only in this case;
283	 * - all buffers have been read, but the last page has not been
284	 *   fully filled;
285	 *   this is only possible if the file has only a single buffer
286	 *   with a size that is not a multiple of the page size.
287	 */
288	if (mi == count) {
289		ASSERT(di >= numbufs - 1);
290		IMPLY(*rahead != 0, di == numbufs - 1);
291		IMPLY(*rahead != 0, bufoff != 0);
292		ASSERT(pgoff == 0);
293	}
294	if (di == numbufs) {
295		ASSERT(mi >= count - 1);
296		ASSERT(*rahead == 0);
297		IMPLY(pgoff == 0, mi == count);
298		if (pgoff != 0) {
299			ASSERT(mi == count - 1);
300			ASSERT((dbp[0]->db_size & PAGE_MASK) != 0);
301		}
302	}
303#endif
304	if (pgoff != 0) {
305		ASSERT(m != bogus_page);
306		bzero(va + pgoff, PAGESIZE - pgoff);
307		zfs_unmap_page(sf);
308		vm_page_valid(m);
309	}
310
311	for (i = 0; i < *rahead; i++) {
312		m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
313		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
314		if (m == NULL)
315			break;
316		if (!vm_page_none_valid(m)) {
317			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
318			vm_page_do_sunbusy(m);
319			break;
320		}
321		ASSERT(m->dirty == 0);
322		ASSERT(!pmap_page_is_write_mapped(m));
323
324		ASSERT(db->db_size > PAGE_SIZE);
325		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
326		tocpy = MIN(db->db_size - bufoff, PAGESIZE);
327		va = zfs_map_page(m, &sf);
328		bcopy((char *)db->db_data + bufoff, va, tocpy);
329		if (tocpy < PAGESIZE) {
330			ASSERT(i == *rahead - 1);
331			ASSERT((db->db_size & PAGE_MASK) != 0);
332			bzero(va + tocpy, PAGESIZE - tocpy);
333		}
334		zfs_unmap_page(sf);
335		vm_page_valid(m);
336		dmu_page_lock(m);
337		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
338			vm_page_activate(m);
339		else
340			vm_page_deactivate(m);
341		dmu_page_unlock(m);
342		vm_page_do_sunbusy(m);
343	}
344	*rahead = i;
345	zfs_vmobject_wunlock_12(vmobj);
346
347	dmu_buf_rele_array(dbp, numbufs, FTAG);
348	return (0);
349}
350