1139804Simp/*-
21549Srgrimes * Copyright (c) 1994 John S. Dyson
31549Srgrimes * All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91549Srgrimes *    notice immediately at the beginning of the file, without modification,
101549Srgrimes *    this list of conditions, and the following disclaimer.
111541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer in the
131541Srgrimes *    documentation and/or other materials provided with the distribution.
141549Srgrimes * 3. Absolutely no warranty of function or purpose is made by the author
151549Srgrimes *    John S. Dyson.
161549Srgrimes * 4. Modifications may be freely made to this file if the above conditions
171549Srgrimes *    are met.
181541Srgrimes */
191541Srgrimes
20116182Sobrien#include <sys/cdefs.h>
21116182Sobrien__FBSDID("$FreeBSD: stable/11/sys/kern/kern_physio.c 366939 2020-10-22 16:29:21Z brooks $");
22116182Sobrien
231541Srgrimes#include <sys/param.h>
241541Srgrimes#include <sys/systm.h>
2560041Sphk#include <sys/bio.h>
261541Srgrimes#include <sys/buf.h>
271541Srgrimes#include <sys/conf.h>
28281825Smav#include <sys/malloc.h>
291541Srgrimes#include <sys/proc.h>
30297633Strasz#include <sys/racct.h>
3134924Sbde#include <sys/uio.h>
32281825Smav#include <geom/geom.h>
3334924Sbde
341549Srgrimes#include <vm/vm.h>
35281825Smav#include <vm/vm_page.h>
3612662Sdg#include <vm/vm_extern.h>
37281825Smav#include <vm/vm_map.h>
381541Srgrimes
3946625Sphkint
40130585Sphkphysio(struct cdev *dev, struct uio *uio, int ioflag)
4146625Sphk{
42290140Shselasky	struct cdevsw *csw;
43281825Smav	struct buf *pbuf;
44281825Smav	struct bio *bp;
45281825Smav	struct vm_page **pages;
46366939Sbrooks	char *base, *sa;
47281825Smav	u_int iolen, poff;
48281825Smav	int error, i, npages, maxpages;
49281825Smav	vm_prot_t prot;
501541Srgrimes
51290140Shselasky	csw = dev->si_devsw;
52290140Shselasky	/* check if character device is being destroyed */
53290140Shselasky	if (csw == NULL)
54290140Shselasky		return (ENXIO);
55290140Shselasky
5652066Sphk	/* XXX: sanity check */
5752066Sphk	if(dev->si_iosize_max < PAGE_SIZE) {
5852066Sphk		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
5952066Sphk		    devtoname(dev), dev->si_iosize_max);
6052066Sphk		dev->si_iosize_max = DFLTPHYS;
6152066Sphk	}
6252066Sphk
63254760Sken	/*
64254760Sken	 * If the driver does not want I/O to be split, that means that we
65254760Sken	 * need to reject any requests that will not fit into one buffer.
66254760Sken	 */
67255032Sken	if (dev->si_flags & SI_NOSPLIT &&
68255032Sken	    (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS ||
69255032Sken	    uio->uio_iovcnt > 1)) {
70254760Sken		/*
71254760Sken		 * Tell the user why his I/O was rejected.
72254760Sken		 */
73254760Sken		if (uio->uio_resid > dev->si_iosize_max)
74255032Sken			uprintf("%s: request size=%zd > si_iosize_max=%d; "
75254760Sken			    "cannot split request\n", devtoname(dev),
76254760Sken			    uio->uio_resid, dev->si_iosize_max);
77254760Sken		if (uio->uio_resid > MAXPHYS)
78255032Sken			uprintf("%s: request size=%zd > MAXPHYS=%d; "
79254760Sken			    "cannot split request\n", devtoname(dev),
80254760Sken			    uio->uio_resid, MAXPHYS);
81254760Sken		if (uio->uio_iovcnt > 1)
82255032Sken			uprintf("%s: request vectors=%d > 1; "
83254760Sken			    "cannot split request\n", devtoname(dev),
84254760Sken			    uio->uio_iovcnt);
85281825Smav		return (EFBIG);
86281825Smav	}
87254760Sken
88281825Smav	/*
89281825Smav	 * Keep the process UPAGES from being swapped.  Processes swapped
90281825Smav	 * out while holding pbufs, used by swapper, may lead to deadlock.
91281825Smav	 */
92281825Smav	PHOLD(curproc);
93281825Smav
94281825Smav	bp = g_alloc_bio();
95281825Smav	if (uio->uio_segflg != UIO_USERSPACE) {
96281825Smav		pbuf = NULL;
97281825Smav		pages = NULL;
98281825Smav	} else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
99281825Smav		pbuf = NULL;
100281825Smav		maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1;
101281825Smav		pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
102281825Smav	} else {
103281825Smav		pbuf = getpbuf(NULL);
104281825Smav		sa = pbuf->b_data;
105281825Smav		maxpages = btoc(MAXPHYS);
106281825Smav		pages = pbuf->b_pages;
107254760Sken	}
108281825Smav	prot = VM_PROT_READ;
109281825Smav	if (uio->uio_rw == UIO_READ)
110281825Smav		prot |= VM_PROT_WRITE;	/* Less backwards than it looks */
111281825Smav	error = 0;
11248225Smckusick	for (i = 0; i < uio->uio_iovcnt; i++) {
113297633Strasz#ifdef RACCT
114297633Strasz		if (racct_enable) {
115297633Strasz			PROC_LOCK(curproc);
116297633Strasz			if (uio->uio_rw == UIO_READ) {
117297633Strasz				racct_add_force(curproc, RACCT_READBPS,
118297633Strasz				    uio->uio_iov[i].iov_len);
119297633Strasz				racct_add_force(curproc, RACCT_READIOPS, 1);
120297633Strasz			} else {
121297633Strasz				racct_add_force(curproc, RACCT_WRITEBPS,
122297633Strasz				    uio->uio_iov[i].iov_len);
123297633Strasz				racct_add_force(curproc, RACCT_WRITEIOPS, 1);
124297633Strasz			}
125297633Strasz			PROC_UNLOCK(curproc);
126297633Strasz		}
127297633Strasz#endif /* RACCT */
128297633Strasz
12948225Smckusick		while (uio->uio_iov[i].iov_len) {
130295707Simp			g_reset_bio(bp);
131215838Skib			if (uio->uio_rw == UIO_READ) {
132281825Smav				bp->bio_cmd = BIO_READ;
133215838Skib				curthread->td_ru.ru_inblock++;
134215838Skib			} else {
135281825Smav				bp->bio_cmd = BIO_WRITE;
136215838Skib				curthread->td_ru.ru_oublock++;
137215838Skib			}
138281825Smav			bp->bio_offset = uio->uio_offset;
139366939Sbrooks			base = uio->uio_iov[i].iov_base;
140281825Smav			bp->bio_length = uio->uio_iov[i].iov_len;
141281825Smav			if (bp->bio_length > dev->si_iosize_max)
142281825Smav				bp->bio_length = dev->si_iosize_max;
143281825Smav			if (bp->bio_length > MAXPHYS)
144281825Smav				bp->bio_length = MAXPHYS;
14552066Sphk
146281825Smav			/*
147281825Smav			 * Make sure the pbuf can map the request.
148281825Smav			 * The pbuf has kvasize = MAXPHYS, so a request
149281825Smav			 * larger than MAXPHYS - PAGE_SIZE must be
150281825Smav			 * page aligned or it will be fragmented.
15152066Sphk			 */
152366939Sbrooks			poff = (vm_offset_t)base & PAGE_MASK;
153281825Smav			if (pbuf && bp->bio_length + poff > pbuf->b_kvasize) {
154254760Sken				if (dev->si_flags & SI_NOSPLIT) {
155255032Sken					uprintf("%s: request ptr %p is not "
156255032Sken					    "on a page boundary; cannot split "
157254760Sken					    "request\n", devtoname(dev),
158366939Sbrooks					    base);
159254760Sken					error = EFBIG;
160254760Sken					goto doerror;
161254760Sken				}
162281825Smav				bp->bio_length = pbuf->b_kvasize;
163281825Smav				if (poff != 0)
164281825Smav					bp->bio_length -= PAGE_SIZE;
16552066Sphk			}
16652066Sphk
167281825Smav			bp->bio_bcount = bp->bio_length;
168281825Smav			bp->bio_dev = dev;
1691549Srgrimes
170281825Smav			if (pages) {
171281825Smav				if ((npages = vm_fault_quick_hold_pages(
172281825Smav				    &curproc->p_vmspace->vm_map,
173366939Sbrooks				    (vm_offset_t)base, bp->bio_length,
174281825Smav				    prot, pages, maxpages)) < 0) {
175109572Sdillon					error = EFAULT;
176109572Sdillon					goto doerror;
177109572Sdillon				}
178281825Smav				if (pbuf) {
179281825Smav					pmap_qenter((vm_offset_t)sa,
180281825Smav					    pages, npages);
181281825Smav					bp->bio_data = sa + poff;
182281825Smav				} else {
183281825Smav					bp->bio_ma = pages;
184281825Smav					bp->bio_ma_n = npages;
185281825Smav					bp->bio_ma_offset = poff;
186281825Smav					bp->bio_data = unmapped_buf;
187281825Smav					bp->bio_flags |= BIO_UNMAPPED;
188281825Smav				}
189366939Sbrooks			} else
190366939Sbrooks				bp->bio_data = base;
1911549Srgrimes
192290140Shselasky			csw->d_strategy(bp);
193112183Sjeff			if (uio->uio_rw == UIO_READ)
194281825Smav				biowait(bp, "physrd");
195112183Sjeff			else
196281825Smav				biowait(bp, "physwr");
1971549Srgrimes
198281825Smav			if (pages) {
199281825Smav				if (pbuf)
200281825Smav					pmap_qremove((vm_offset_t)sa, npages);
201281825Smav				vm_page_unhold_pages(pages, npages);
202281825Smav			}
203281825Smav
204281825Smav			iolen = bp->bio_length - bp->bio_resid;
205281825Smav			if (iolen == 0 && !(bp->bio_flags & BIO_ERROR))
20652066Sphk				goto doerror;	/* EOF */
20752066Sphk			uio->uio_iov[i].iov_len -= iolen;
208104908Smike			uio->uio_iov[i].iov_base =
209104908Smike			    (char *)uio->uio_iov[i].iov_base + iolen;
21052066Sphk			uio->uio_resid -= iolen;
21152066Sphk			uio->uio_offset += iolen;
212281825Smav			if (bp->bio_flags & BIO_ERROR) {
213281825Smav				error = bp->bio_error;
2141549Srgrimes				goto doerror;
2151549Srgrimes			}
2161549Srgrimes		}
2171549Srgrimes	}
2181549Srgrimesdoerror:
219281825Smav	if (pbuf)
220281825Smav		relpbuf(pbuf, NULL);
221281825Smav	else if (pages)
222281825Smav		free(pages, M_DEVBUF);
223281825Smav	g_destroy_bio(bp);
22445358Speter	PRELE(curproc);
2251549Srgrimes	return (error);
2261541Srgrimes}
227