1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27/*	  All Rights Reserved	*/
28
29/*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38/*
39 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
40 */
41
42#ifdef _KERNEL
43
44#include <sys/types.h>
45#include <sys/uio_impl.h>
46#include <sys/sysmacros.h>
47#include <sys/string.h>
48#include <linux/kmap_compat.h>
49#include <linux/uaccess.h>
50
51/*
52 * Move "n" bytes at byte address "p"; "rw" indicates the direction
53 * of the move, and the I/O parameters are provided in "uio", which is
54 * update to reflect the data which was moved.  Returns 0 on success or
55 * a non-zero errno on failure.
56 */
57static int
58zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
59{
60	const struct iovec *iov = uio->uio_iov;
61	size_t skip = uio->uio_skip;
62	ulong_t cnt;
63
64	while (n && uio->uio_resid) {
65		cnt = MIN(iov->iov_len - skip, n);
66		switch (uio->uio_segflg) {
67		case UIO_USERSPACE:
68			/*
69			 * p = kernel data pointer
70			 * iov->iov_base = user data pointer
71			 */
72			if (rw == UIO_READ) {
73				if (copy_to_user(iov->iov_base+skip, p, cnt))
74					return (EFAULT);
75			} else {
76				unsigned long b_left = 0;
77				if (uio->uio_fault_disable) {
78					if (!zfs_access_ok(VERIFY_READ,
79					    (iov->iov_base + skip), cnt)) {
80						return (EFAULT);
81					}
82					pagefault_disable();
83					b_left =
84					    __copy_from_user_inatomic(p,
85					    (iov->iov_base + skip), cnt);
86					pagefault_enable();
87				} else {
88					b_left =
89					    copy_from_user(p,
90					    (iov->iov_base + skip), cnt);
91				}
92				if (b_left > 0) {
93					unsigned long c_bytes =
94					    cnt - b_left;
95					uio->uio_skip += c_bytes;
96					ASSERT3U(uio->uio_skip, <,
97					    iov->iov_len);
98					uio->uio_resid -= c_bytes;
99					uio->uio_loffset += c_bytes;
100					return (EFAULT);
101				}
102			}
103			break;
104		case UIO_SYSSPACE:
105			if (rw == UIO_READ)
106				memcpy(iov->iov_base + skip, p, cnt);
107			else
108				memcpy(p, iov->iov_base + skip, cnt);
109			break;
110		default:
111			ASSERT(0);
112		}
113		skip += cnt;
114		if (skip == iov->iov_len) {
115			skip = 0;
116			uio->uio_iov = (++iov);
117			uio->uio_iovcnt--;
118		}
119		uio->uio_skip = skip;
120		uio->uio_resid -= cnt;
121		uio->uio_loffset += cnt;
122		p = (caddr_t)p + cnt;
123		n -= cnt;
124	}
125	return (0);
126}
127
128static int
129zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
130{
131	const struct bio_vec *bv = uio->uio_bvec;
132	size_t skip = uio->uio_skip;
133	ulong_t cnt;
134
135	while (n && uio->uio_resid) {
136		void *paddr;
137		cnt = MIN(bv->bv_len - skip, n);
138
139		paddr = zfs_kmap_atomic(bv->bv_page);
140		if (rw == UIO_READ) {
141			/* Copy from buffer 'p' to the bvec data */
142			memcpy(paddr + bv->bv_offset + skip, p, cnt);
143		} else {
144			/* Copy from bvec data to buffer 'p' */
145			memcpy(p, paddr + bv->bv_offset + skip, cnt);
146		}
147		zfs_kunmap_atomic(paddr);
148
149		skip += cnt;
150		if (skip == bv->bv_len) {
151			skip = 0;
152			uio->uio_bvec = (++bv);
153			uio->uio_iovcnt--;
154		}
155		uio->uio_skip = skip;
156		uio->uio_resid -= cnt;
157		uio->uio_loffset += cnt;
158		p = (caddr_t)p + cnt;
159		n -= cnt;
160	}
161	return (0);
162}
163
164#ifdef HAVE_BLK_MQ
165static void
166zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
167    struct bio_vec *bv)
168{
169	void *paddr;
170
171	paddr = zfs_kmap_atomic(bv->bv_page);
172	if (rw == UIO_READ) {
173		/* Copy from buffer 'p' to the bvec data */
174		memcpy(paddr + bv->bv_offset + skip, p, cnt);
175	} else {
176		/* Copy from bvec data to buffer 'p' */
177		memcpy(p, paddr + bv->bv_offset + skip, cnt);
178	}
179	zfs_kunmap_atomic(paddr);
180}
181
182/*
183 * Copy 'n' bytes of data between the buffer p[] and the data represented
184 * by the request in the uio.
185 */
186static int
187zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
188{
189	struct request *rq = uio->rq;
190	struct bio_vec bv;
191	struct req_iterator iter;
192	size_t this_seg_start;	/* logical offset */
193	size_t this_seg_end;		/* logical offset */
194	size_t skip_in_seg;
195	size_t copy_from_seg;
196	size_t orig_loffset;
197	int copied = 0;
198
199	/*
200	 * Get the original logical offset of this entire request (because
201	 * uio->uio_loffset will be modified over time).
202	 */
203	orig_loffset = io_offset(NULL, rq);
204	this_seg_start = orig_loffset;
205
206	rq_for_each_segment(bv, rq, iter) {
207		/*
208		 * Lookup what the logical offset of the last byte of this
209		 * segment is.
210		 */
211		this_seg_end = this_seg_start + bv.bv_len - 1;
212
213		/*
214		 * We only need to operate on segments that have data we're
215		 * copying.
216		 */
217		if (uio->uio_loffset >= this_seg_start &&
218		    uio->uio_loffset <= this_seg_end) {
219			/*
220			 * Some, or all, of the data in this segment needs to be
221			 * copied.
222			 */
223
224			/*
225			 * We may be not be copying from the first byte in the
226			 * segment.  Figure out how many bytes to skip copying
227			 * from the beginning of this segment.
228			 */
229			skip_in_seg = uio->uio_loffset - this_seg_start;
230
231			/*
232			 * Calculate the total number of bytes from this
233			 * segment that we will be copying.
234			 */
235			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
236
237			/* Copy the bytes */
238			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
239			p = ((char *)p) + copy_from_seg;
240
241			n -= copy_from_seg;
242			uio->uio_resid -= copy_from_seg;
243			uio->uio_loffset += copy_from_seg;
244			copied = 1;	/* We copied some data */
245		}
246
247		this_seg_start = this_seg_end + 1;
248	}
249
250	if (!copied) {
251		/* Didn't copy anything */
252		uio->uio_resid = 0;
253	}
254	return (0);
255}
256#endif
257
258static int
259zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
260{
261#ifdef HAVE_BLK_MQ
262	if (uio->rq != NULL)
263		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
264#else
265	ASSERT3P(uio->rq, ==, NULL);
266#endif
267	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
268}
269
270#if defined(HAVE_VFS_IOV_ITER)
271static int
272zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
273    boolean_t revert)
274{
275	size_t cnt = MIN(n, uio->uio_resid);
276
277	if (uio->uio_skip)
278		iov_iter_advance(uio->uio_iter, uio->uio_skip);
279
280	if (rw == UIO_READ)
281		cnt = copy_to_iter(p, cnt, uio->uio_iter);
282	else
283		cnt = copy_from_iter(p, cnt, uio->uio_iter);
284
285	/*
286	 * When operating on a full pipe no bytes are processed.
287	 * In which case return EFAULT which is converted to EAGAIN
288	 * by the kernel's generic_file_splice_read() function.
289	 */
290	if (cnt == 0)
291		return (EFAULT);
292
293	/*
294	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
295	 * to avoid consuming the uio and its iov_iter structure.
296	 */
297	if (revert)
298		iov_iter_revert(uio->uio_iter, cnt);
299
300	uio->uio_resid -= cnt;
301	uio->uio_loffset += cnt;
302
303	return (0);
304}
305#endif
306
307int
308zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
309{
310	if (uio->uio_segflg == UIO_BVEC)
311		return (zfs_uiomove_bvec(p, n, rw, uio));
312#if defined(HAVE_VFS_IOV_ITER)
313	else if (uio->uio_segflg == UIO_ITER)
314		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
315#endif
316	else
317		return (zfs_uiomove_iov(p, n, rw, uio));
318}
319EXPORT_SYMBOL(zfs_uiomove);
320
321/*
322 * Fault in the pages of the first n bytes specified by the uio structure.
323 * 1 byte in each page is touched and the uio struct is unmodified. Any
324 * error will terminate the process as this is only a best attempt to get
325 * the pages resident.
326 */
327int
328zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
329{
330	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) {
331		/* There's never a need to fault in kernel pages */
332		return (0);
333#if defined(HAVE_VFS_IOV_ITER)
334	} else if (uio->uio_segflg == UIO_ITER) {
335		/*
336		 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
337		 * can be relied on to fault in user pages when referenced.
338		 */
339		if (iov_iter_fault_in_readable(uio->uio_iter, n))
340			return (EFAULT);
341#endif
342	} else {
343		/* Fault in all user pages */
344		ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
345		const struct iovec *iov = uio->uio_iov;
346		int iovcnt = uio->uio_iovcnt;
347		size_t skip = uio->uio_skip;
348		uint8_t tmp;
349		caddr_t p;
350
351		for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
352			ulong_t cnt = MIN(iov->iov_len - skip, n);
353			/* empty iov */
354			if (cnt == 0)
355				continue;
356			n -= cnt;
357			/* touch each page in this segment. */
358			p = iov->iov_base + skip;
359			while (cnt) {
360				if (copy_from_user(&tmp, p, 1))
361					return (EFAULT);
362				ulong_t incr = MIN(cnt, PAGESIZE);
363				p += incr;
364				cnt -= incr;
365			}
366			/* touch the last byte in case it straddles a page. */
367			p--;
368			if (copy_from_user(&tmp, p, 1))
369				return (EFAULT);
370		}
371	}
372
373	return (0);
374}
375EXPORT_SYMBOL(zfs_uio_prefaultpages);
376
377/*
378 * The same as zfs_uiomove() but doesn't modify uio structure.
379 * return in cbytes how many bytes were copied.
380 */
381int
382zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
383{
384	zfs_uio_t uio_copy;
385	int ret;
386
387	memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
388
389	if (uio->uio_segflg == UIO_BVEC)
390		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
391#if defined(HAVE_VFS_IOV_ITER)
392	else if (uio->uio_segflg == UIO_ITER)
393		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
394#endif
395	else
396		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
397
398	*cbytes = uio->uio_resid - uio_copy.uio_resid;
399
400	return (ret);
401}
402EXPORT_SYMBOL(zfs_uiocopy);
403
404/*
405 * Drop the next n chars out of *uio.
406 */
407void
408zfs_uioskip(zfs_uio_t *uio, size_t n)
409{
410	if (n > uio->uio_resid)
411		return;
412	/*
413	 * When using a uio with a struct request, we simply
414	 * use uio_loffset as a pointer to the next logical byte to
415	 * copy in the request.  We don't have to do any fancy
416	 * accounting with uio_bvec/uio_iovcnt since we don't use
417	 * them.
418	 */
419	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
420		uio->uio_skip += n;
421		while (uio->uio_iovcnt &&
422		    uio->uio_skip >= uio->uio_bvec->bv_len) {
423			uio->uio_skip -= uio->uio_bvec->bv_len;
424			uio->uio_bvec++;
425			uio->uio_iovcnt--;
426		}
427#if defined(HAVE_VFS_IOV_ITER)
428	} else if (uio->uio_segflg == UIO_ITER) {
429		iov_iter_advance(uio->uio_iter, n);
430#endif
431	} else {
432		uio->uio_skip += n;
433		while (uio->uio_iovcnt &&
434		    uio->uio_skip >= uio->uio_iov->iov_len) {
435			uio->uio_skip -= uio->uio_iov->iov_len;
436			uio->uio_iov++;
437			uio->uio_iovcnt--;
438		}
439	}
440	uio->uio_loffset += n;
441	uio->uio_resid -= n;
442}
443EXPORT_SYMBOL(zfs_uioskip);
444
445#endif /* _KERNEL */
446