1/*-
2 * Copyright (c) 1994 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice immediately at the beginning of the file, without modification,
10 *    this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 *    John S. Dyson.
16 * 4. Modifications may be freely made to this file if the above conditions
17 *    are met.
18 */
19
20#include <sys/cdefs.h>
21__FBSDID("$FreeBSD: stable/11/sys/kern/kern_physio.c 366939 2020-10-22 16:29:21Z brooks $");
22
23#include <sys/param.h>
24#include <sys/systm.h>
25#include <sys/bio.h>
26#include <sys/buf.h>
27#include <sys/conf.h>
28#include <sys/malloc.h>
29#include <sys/proc.h>
30#include <sys/racct.h>
31#include <sys/uio.h>
32#include <geom/geom.h>
33
34#include <vm/vm.h>
35#include <vm/vm_page.h>
36#include <vm/vm_extern.h>
37#include <vm/vm_map.h>
38
39int
40physio(struct cdev *dev, struct uio *uio, int ioflag)
41{
42	struct cdevsw *csw;
43	struct buf *pbuf;
44	struct bio *bp;
45	struct vm_page **pages;
46	char *base, *sa;
47	u_int iolen, poff;
48	int error, i, npages, maxpages;
49	vm_prot_t prot;
50
51	csw = dev->si_devsw;
52	/* check if character device is being destroyed */
53	if (csw == NULL)
54		return (ENXIO);
55
56	/* XXX: sanity check */
57	if(dev->si_iosize_max < PAGE_SIZE) {
58		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
59		    devtoname(dev), dev->si_iosize_max);
60		dev->si_iosize_max = DFLTPHYS;
61	}
62
63	/*
64	 * If the driver does not want I/O to be split, that means that we
65	 * need to reject any requests that will not fit into one buffer.
66	 */
67	if (dev->si_flags & SI_NOSPLIT &&
68	    (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS ||
69	    uio->uio_iovcnt > 1)) {
70		/*
71		 * Tell the user why his I/O was rejected.
72		 */
73		if (uio->uio_resid > dev->si_iosize_max)
74			uprintf("%s: request size=%zd > si_iosize_max=%d; "
75			    "cannot split request\n", devtoname(dev),
76			    uio->uio_resid, dev->si_iosize_max);
77		if (uio->uio_resid > MAXPHYS)
78			uprintf("%s: request size=%zd > MAXPHYS=%d; "
79			    "cannot split request\n", devtoname(dev),
80			    uio->uio_resid, MAXPHYS);
81		if (uio->uio_iovcnt > 1)
82			uprintf("%s: request vectors=%d > 1; "
83			    "cannot split request\n", devtoname(dev),
84			    uio->uio_iovcnt);
85		return (EFBIG);
86	}
87
88	/*
89	 * Keep the process UPAGES from being swapped.  Processes swapped
90	 * out while holding pbufs, used by swapper, may lead to deadlock.
91	 */
92	PHOLD(curproc);
93
94	bp = g_alloc_bio();
95	if (uio->uio_segflg != UIO_USERSPACE) {
96		pbuf = NULL;
97		pages = NULL;
98	} else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
99		pbuf = NULL;
100		maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1;
101		pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
102	} else {
103		pbuf = getpbuf(NULL);
104		sa = pbuf->b_data;
105		maxpages = btoc(MAXPHYS);
106		pages = pbuf->b_pages;
107	}
108	prot = VM_PROT_READ;
109	if (uio->uio_rw == UIO_READ)
110		prot |= VM_PROT_WRITE;	/* Less backwards than it looks */
111	error = 0;
112	for (i = 0; i < uio->uio_iovcnt; i++) {
113#ifdef RACCT
114		if (racct_enable) {
115			PROC_LOCK(curproc);
116			if (uio->uio_rw == UIO_READ) {
117				racct_add_force(curproc, RACCT_READBPS,
118				    uio->uio_iov[i].iov_len);
119				racct_add_force(curproc, RACCT_READIOPS, 1);
120			} else {
121				racct_add_force(curproc, RACCT_WRITEBPS,
122				    uio->uio_iov[i].iov_len);
123				racct_add_force(curproc, RACCT_WRITEIOPS, 1);
124			}
125			PROC_UNLOCK(curproc);
126		}
127#endif /* RACCT */
128
129		while (uio->uio_iov[i].iov_len) {
130			g_reset_bio(bp);
131			if (uio->uio_rw == UIO_READ) {
132				bp->bio_cmd = BIO_READ;
133				curthread->td_ru.ru_inblock++;
134			} else {
135				bp->bio_cmd = BIO_WRITE;
136				curthread->td_ru.ru_oublock++;
137			}
138			bp->bio_offset = uio->uio_offset;
139			base = uio->uio_iov[i].iov_base;
140			bp->bio_length = uio->uio_iov[i].iov_len;
141			if (bp->bio_length > dev->si_iosize_max)
142				bp->bio_length = dev->si_iosize_max;
143			if (bp->bio_length > MAXPHYS)
144				bp->bio_length = MAXPHYS;
145
146			/*
147			 * Make sure the pbuf can map the request.
148			 * The pbuf has kvasize = MAXPHYS, so a request
149			 * larger than MAXPHYS - PAGE_SIZE must be
150			 * page aligned or it will be fragmented.
151			 */
152			poff = (vm_offset_t)base & PAGE_MASK;
153			if (pbuf && bp->bio_length + poff > pbuf->b_kvasize) {
154				if (dev->si_flags & SI_NOSPLIT) {
155					uprintf("%s: request ptr %p is not "
156					    "on a page boundary; cannot split "
157					    "request\n", devtoname(dev),
158					    base);
159					error = EFBIG;
160					goto doerror;
161				}
162				bp->bio_length = pbuf->b_kvasize;
163				if (poff != 0)
164					bp->bio_length -= PAGE_SIZE;
165			}
166
167			bp->bio_bcount = bp->bio_length;
168			bp->bio_dev = dev;
169
170			if (pages) {
171				if ((npages = vm_fault_quick_hold_pages(
172				    &curproc->p_vmspace->vm_map,
173				    (vm_offset_t)base, bp->bio_length,
174				    prot, pages, maxpages)) < 0) {
175					error = EFAULT;
176					goto doerror;
177				}
178				if (pbuf) {
179					pmap_qenter((vm_offset_t)sa,
180					    pages, npages);
181					bp->bio_data = sa + poff;
182				} else {
183					bp->bio_ma = pages;
184					bp->bio_ma_n = npages;
185					bp->bio_ma_offset = poff;
186					bp->bio_data = unmapped_buf;
187					bp->bio_flags |= BIO_UNMAPPED;
188				}
189			} else
190				bp->bio_data = base;
191
192			csw->d_strategy(bp);
193			if (uio->uio_rw == UIO_READ)
194				biowait(bp, "physrd");
195			else
196				biowait(bp, "physwr");
197
198			if (pages) {
199				if (pbuf)
200					pmap_qremove((vm_offset_t)sa, npages);
201				vm_page_unhold_pages(pages, npages);
202			}
203
204			iolen = bp->bio_length - bp->bio_resid;
205			if (iolen == 0 && !(bp->bio_flags & BIO_ERROR))
206				goto doerror;	/* EOF */
207			uio->uio_iov[i].iov_len -= iolen;
208			uio->uio_iov[i].iov_base =
209			    (char *)uio->uio_iov[i].iov_base + iolen;
210			uio->uio_resid -= iolen;
211			uio->uio_offset += iolen;
212			if (bp->bio_flags & BIO_ERROR) {
213				error = bp->bio_error;
214				goto doerror;
215			}
216		}
217	}
218doerror:
219	if (pbuf)
220		relpbuf(pbuf, NULL);
221	else if (pages)
222		free(pages, M_DEVBUF);
223	g_destroy_bio(bp);
224	PRELE(curproc);
225	return (error);
226}
227