Deleted Added
full compact
ffs_rawread.c (116192) ffs_rawread.c (118986)
1/*-
2 * Copyright (c) 2000-2003 Tor Egge
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2000-2003 Tor Egge
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_rawread.c 116192 2003-06-11 06:34:30Z obrien $");
28__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_rawread.c 118986 2003-08-16 06:15:17Z alc $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/fcntl.h>
33#include <sys/file.h>
34#include <sys/stat.h>
35#include <sys/proc.h>
36#include <sys/limits.h>
37#include <sys/mount.h>
38#include <sys/namei.h>
39#include <sys/vnode.h>
40#include <sys/conf.h>
41#include <sys/filio.h>
42#include <sys/ttycom.h>
43#include <sys/bio.h>
44#include <sys/buf.h>
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/fcntl.h>
33#include <sys/file.h>
34#include <sys/stat.h>
35#include <sys/proc.h>
36#include <sys/limits.h>
37#include <sys/mount.h>
38#include <sys/namei.h>
39#include <sys/vnode.h>
40#include <sys/conf.h>
41#include <sys/filio.h>
42#include <sys/ttycom.h>
43#include <sys/bio.h>
44#include <sys/buf.h>
45#include <ufs/ufs/extattr.h>
45#include <ufs/ufs/quota.h>
46#include <ufs/ufs/inode.h>
46#include <ufs/ufs/quota.h>
47#include <ufs/ufs/inode.h>
48#include <ufs/ufs/ufsmount.h>
47#include <ufs/ffs/fs.h>
48
49#include <vm/vm.h>
50#include <vm/vm_extern.h>
51#include <vm/vm_object.h>
52#include <sys/kernel.h>
53#include <sys/sysctl.h>
54
55static int ffs_rawread_readahead(struct vnode *vp,
56 caddr_t udata,
57 off_t offset,
58 size_t len,
59 struct thread *td,
60 struct buf *bp,
61 caddr_t sa);
62static int ffs_rawread_main(struct vnode *vp,
63 struct uio *uio);
64
65static int ffs_rawread_sync(struct vnode *vp, struct thread *td);
66
67int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
68
69void ffs_rawread_setup(void);
70
71static void ffs_rawreadwakeup(struct buf *bp);
72
73
74SYSCTL_DECL(_vfs_ffs);
75
76static int ffsrawbufcnt = 4;
77SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0,
78 "Buffers available for raw reads");
79
80static int allowrawread = 1;
81SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
82 "Flag to enable raw reads");
83
84static int rawreadahead = 1;
85SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
86 "Flag to enable readahead for long raw reads");
87
88
89void
90ffs_rawread_setup(void)
91{
92 ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
93}
94
95
96static int
97ffs_rawread_sync(struct vnode *vp, struct thread *td)
98{
99 int spl;
100 int error;
101 int upgraded;
102
103 GIANT_REQUIRED;
104 /* Check for dirty mmap, pending writes and dirty buffers */
105 spl = splbio();
106 VI_LOCK(vp);
107 if (vp->v_numoutput > 0 ||
108 !TAILQ_EMPTY(&vp->v_dirtyblkhd) ||
109 (vp->v_iflag & VI_OBJDIRTY) != 0) {
110 splx(spl);
111 VI_UNLOCK(vp);
112
113 if (VOP_ISLOCKED(vp, td) != LK_EXCLUSIVE) {
114 upgraded = 1;
115 /* Upgrade to exclusive lock, this might block */
116 VOP_LOCK(vp, LK_UPGRADE | LK_NOPAUSE, td);
117 } else
118 upgraded = 0;
119
120
121 /* Attempt to msync mmap() regions to clean dirty mmap */
122 VI_LOCK(vp);
123 if ((vp->v_iflag & VI_OBJDIRTY) != 0) {
124 struct vm_object *obj;
125 VI_UNLOCK(vp);
126 if (VOP_GETVOBJECT(vp, &obj) == 0) {
127 VM_OBJECT_LOCK(obj);
128 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
129 VM_OBJECT_UNLOCK(obj);
130 }
131 VI_LOCK(vp);
132 }
133
134 /* Wait for pending writes to complete */
135 spl = splbio();
136 while (vp->v_numoutput) {
137 vp->v_iflag |= VI_BWAIT;
138 error = msleep((caddr_t)&vp->v_numoutput,
139 VI_MTX(vp),
140 PRIBIO + 1,
141 "rawrdfls", 0);
142 if (error != 0) {
143 splx(spl);
144 VI_UNLOCK(vp);
145 if (upgraded != 0)
146 VOP_LOCK(vp, LK_DOWNGRADE, td);
147 return (error);
148 }
149 }
150 /* Flush dirty buffers */
151 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
152 splx(spl);
153 VI_UNLOCK(vp);
154 if ((error = VOP_FSYNC(vp, NOCRED, MNT_WAIT, td)) != 0) {
155 if (upgraded != 0)
156 VOP_LOCK(vp, LK_DOWNGRADE, td);
157 return (error);
158 }
159 VI_LOCK(vp);
160 spl = splbio();
161 if (vp->v_numoutput > 0 ||
162 !TAILQ_EMPTY(&vp->v_dirtyblkhd))
163 panic("ffs_rawread_sync: dirty bufs");
164 }
165 splx(spl);
166 VI_UNLOCK(vp);
167 if (upgraded != 0)
168 VOP_LOCK(vp, LK_DOWNGRADE, td);
169 } else {
170 splx(spl);
171 VI_UNLOCK(vp);
172 }
173 return 0;
174}
175
176
177static int
178ffs_rawread_readahead(struct vnode *vp,
179 caddr_t udata,
180 off_t offset,
181 size_t len,
182 struct thread *td,
183 struct buf *bp,
184 caddr_t sa)
185{
186 int error;
187 u_int iolen;
188 off_t blockno;
189 int blockoff;
190 int bsize;
191 struct vnode *dp;
192 int bforwards;
193
194 GIANT_REQUIRED;
195 bsize = vp->v_mount->mnt_stat.f_iosize;
196
197 iolen = ((vm_offset_t) udata) & PAGE_MASK;
198 bp->b_bcount = len;
199 if (bp->b_bcount + iolen > bp->b_kvasize) {
200 bp->b_bcount = bp->b_kvasize;
201 if (iolen != 0)
202 bp->b_bcount -= PAGE_SIZE;
203 }
204 bp->b_flags = B_PHYS;
205 bp->b_iocmd = BIO_READ;
206 bp->b_iodone = ffs_rawreadwakeup;
207 bp->b_data = udata;
208 bp->b_saveaddr = sa;
209 bp->b_offset = offset;
210 blockno = bp->b_offset / bsize;
211 blockoff = (bp->b_offset % bsize) / DEV_BSIZE;
212 if ((daddr_t) blockno != blockno) {
213 return EINVAL; /* blockno overflow */
214 }
215
216 bp->b_lblkno = bp->b_blkno = blockno;
217
218 error = VOP_BMAP(vp, bp->b_lblkno, &dp, &bp->b_blkno, &bforwards,
219 NULL);
220 if (error != 0) {
221 return error;
222 }
223 if (bp->b_blkno == -1) {
224
225 /* Fill holes with NULs to preserve semantics */
226
227 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize)
228 bp->b_bcount = bsize - blockoff * DEV_BSIZE;
229 bp->b_bufsize = bp->b_bcount;
230
231 if (vmapbuf(bp) < 0)
232 return EFAULT;
233
234 if (ticks - PCPU_GET(switchticks) >= hogticks)
235 uio_yield();
236 bzero(bp->b_data, bp->b_bufsize);
237
238 /* Mark operation completed (similar to bufdone()) */
239
240 bp->b_resid = 0;
241 bp->b_flags |= B_DONE;
242 return 0;
243 }
244
245 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards))
246 bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
247 bp->b_bufsize = bp->b_bcount;
248 bp->b_blkno += blockoff;
249 bp->b_dev = dp->v_rdev;
250
251 if (vmapbuf(bp) < 0)
252 return EFAULT;
253
254 if (dp->v_type == VCHR)
255 (void) VOP_SPECSTRATEGY(dp, bp);
256 else
257 (void) VOP_STRATEGY(dp, bp);
258 return 0;
259}
260
261
262static int
263ffs_rawread_main(struct vnode *vp,
264 struct uio *uio)
265{
266 int error, nerror;
267 struct buf *bp, *nbp, *tbp;
268 caddr_t sa, nsa, tsa;
269 u_int iolen;
270 int spl;
271 caddr_t udata;
272 long resid;
273 off_t offset;
274 struct thread *td;
275
276 GIANT_REQUIRED;
277 td = uio->uio_td ? uio->uio_td : curthread;
278 udata = uio->uio_iov->iov_base;
279 resid = uio->uio_resid;
280 offset = uio->uio_offset;
281
282 /*
283 * keep the process from being swapped
284 */
285 PHOLD(td->td_proc);
286
287 error = 0;
288 nerror = 0;
289
290 bp = NULL;
291 nbp = NULL;
292 sa = NULL;
293 nsa = NULL;
294
295 while (resid > 0) {
296
297 if (bp == NULL) { /* Setup first read */
298 /* XXX: Leave some bufs for swap */
299 bp = getpbuf(&ffsrawbufcnt);
300 sa = bp->b_data;
301 bp->b_vp = vp;
302 error = ffs_rawread_readahead(vp, udata, offset,
303 resid, td, bp, sa);
304 if (error != 0)
305 break;
306
307 if (resid > bp->b_bufsize) { /* Setup fist readahead */
308 /* XXX: Leave bufs for swap */
309 if (rawreadahead != 0)
310 nbp = trypbuf(&ffsrawbufcnt);
311 else
312 nbp = NULL;
313 if (nbp != NULL) {
314 nsa = nbp->b_data;
315 nbp->b_vp = vp;
316
317 nerror = ffs_rawread_readahead(vp,
318 udata +
319 bp->b_bufsize,
320 offset +
321 bp->b_bufsize,
322 resid -
323 bp->b_bufsize,
324 td,
325 nbp,
326 nsa);
327 if (nerror) {
328 relpbuf(nbp, &ffsrawbufcnt);
329 nbp = NULL;
330 }
331 }
332 }
333 }
334
335 spl = splbio();
336 bwait(bp, PRIBIO, "rawrd");
337 splx(spl);
338
339 vunmapbuf(bp);
340
341 iolen = bp->b_bcount - bp->b_resid;
342 if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) {
343 nerror = 0; /* Ignore possible beyond EOF error */
344 break; /* EOF */
345 }
346
347 if ((bp->b_ioflags & BIO_ERROR) != 0) {
348 error = bp->b_error;
349 break;
350 }
351 resid -= iolen;
352 udata += iolen;
353 offset += iolen;
354 if (iolen < bp->b_bufsize) {
355 /* Incomplete read. Try to read remaining part */
356 error = ffs_rawread_readahead(vp,
357 udata,
358 offset,
359 bp->b_bufsize - iolen,
360 td,
361 bp,
362 sa);
363 if (error != 0)
364 break;
365 } else if (nbp != NULL) { /* Complete read with readahead */
366
367 tbp = bp;
368 bp = nbp;
369 nbp = tbp;
370
371 tsa = sa;
372 sa = nsa;
373 nsa = tsa;
374
375 if (resid <= bp->b_bufsize) { /* No more readaheads */
376 relpbuf(nbp, &ffsrawbufcnt);
377 nbp = NULL;
378 } else { /* Setup next readahead */
379 nerror = ffs_rawread_readahead(vp,
380 udata +
381 bp->b_bufsize,
382 offset +
383 bp->b_bufsize,
384 resid -
385 bp->b_bufsize,
386 td,
387 nbp,
388 nsa);
389 if (nerror != 0) {
390 relpbuf(nbp, &ffsrawbufcnt);
391 nbp = NULL;
392 }
393 }
394 } else if (nerror != 0) {/* Deferred Readahead error */
395 break;
396 } else if (resid > 0) { /* More to read, no readahead */
397 error = ffs_rawread_readahead(vp, udata, offset,
398 resid, td, bp, sa);
399 if (error != 0)
400 break;
401 }
402 }
403
404 if (bp != NULL)
405 relpbuf(bp, &ffsrawbufcnt);
406 if (nbp != NULL) { /* Run down readahead buffer */
407 spl = splbio();
408 bwait(nbp, PRIBIO, "rawrd");
409 splx(spl);
410 vunmapbuf(nbp);
411 relpbuf(nbp, &ffsrawbufcnt);
412 }
413
414 if (error == 0)
415 error = nerror;
416 PRELE(td->td_proc);
417 uio->uio_iov->iov_base = udata;
418 uio->uio_resid = resid;
419 uio->uio_offset = offset;
420 return error;
421}
422
423
424int
425ffs_rawread(struct vnode *vp,
426 struct uio *uio,
427 int *workdone)
428{
429 if (allowrawread != 0 &&
430 uio->uio_iovcnt == 1 &&
431 uio->uio_segflg == UIO_USERSPACE &&
432 uio->uio_resid == uio->uio_iov->iov_len &&
433 (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_flags &
434 TDF_DEADLKTREAT) == 0) {
435 int secsize; /* Media sector size */
436 off_t filebytes; /* Bytes left of file */
437 int blockbytes; /* Bytes left of file in full blocks */
438 int partialbytes; /* Bytes in last partial block */
439 int skipbytes; /* Bytes not to read in ffs_rawread */
440 struct inode *ip;
441 int error;
442
443
444 /* Only handle sector aligned reads */
445 ip = VTOI(vp);
446 secsize = ip->i_devvp->v_rdev->si_bsize_phys;
447 if ((uio->uio_offset & (secsize - 1)) == 0 &&
448 (uio->uio_resid & (secsize - 1)) == 0) {
449
450 /* Sync dirty pages and buffers if needed */
451 error = ffs_rawread_sync(vp,
452 (uio->uio_td != NULL) ?
453 uio->uio_td : curthread);
454 if (error != 0)
455 return error;
456
457 /* Check for end of file */
458 if (ip->i_size > uio->uio_offset) {
459 filebytes = ip->i_size - uio->uio_offset;
460
461 /* No special eof handling needed ? */
462 if (uio->uio_resid <= filebytes) {
463 *workdone = 1;
464 return ffs_rawread_main(vp, uio);
465 }
466
467 partialbytes = ((unsigned int) ip->i_size) %
468 ip->i_fs->fs_bsize;
469 blockbytes = (int) filebytes - partialbytes;
470 if (blockbytes > 0) {
471 skipbytes = uio->uio_resid -
472 blockbytes;
473 uio->uio_resid = blockbytes;
474 error = ffs_rawread_main(vp, uio);
475 uio->uio_resid += skipbytes;
476 if (error != 0)
477 return error;
478 /* Read remaining part using buffer */
479 }
480 }
481 }
482 }
483 *workdone = 0;
484 return 0;
485}
486
487
488static void
489ffs_rawreadwakeup(struct buf *bp)
490{
491 bdone(bp);
492}
49#include <ufs/ffs/fs.h>
50
51#include <vm/vm.h>
52#include <vm/vm_extern.h>
53#include <vm/vm_object.h>
54#include <sys/kernel.h>
55#include <sys/sysctl.h>
56
57static int ffs_rawread_readahead(struct vnode *vp,
58 caddr_t udata,
59 off_t offset,
60 size_t len,
61 struct thread *td,
62 struct buf *bp,
63 caddr_t sa);
64static int ffs_rawread_main(struct vnode *vp,
65 struct uio *uio);
66
67static int ffs_rawread_sync(struct vnode *vp, struct thread *td);
68
69int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
70
71void ffs_rawread_setup(void);
72
73static void ffs_rawreadwakeup(struct buf *bp);
74
75
76SYSCTL_DECL(_vfs_ffs);
77
78static int ffsrawbufcnt = 4;
79SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0,
80 "Buffers available for raw reads");
81
82static int allowrawread = 1;
83SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
84 "Flag to enable raw reads");
85
86static int rawreadahead = 1;
87SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
88 "Flag to enable readahead for long raw reads");
89
90
91void
92ffs_rawread_setup(void)
93{
94 ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
95}
96
97
98static int
99ffs_rawread_sync(struct vnode *vp, struct thread *td)
100{
101 int spl;
102 int error;
103 int upgraded;
104
105 GIANT_REQUIRED;
106 /* Check for dirty mmap, pending writes and dirty buffers */
107 spl = splbio();
108 VI_LOCK(vp);
109 if (vp->v_numoutput > 0 ||
110 !TAILQ_EMPTY(&vp->v_dirtyblkhd) ||
111 (vp->v_iflag & VI_OBJDIRTY) != 0) {
112 splx(spl);
113 VI_UNLOCK(vp);
114
115 if (VOP_ISLOCKED(vp, td) != LK_EXCLUSIVE) {
116 upgraded = 1;
117 /* Upgrade to exclusive lock, this might block */
118 VOP_LOCK(vp, LK_UPGRADE | LK_NOPAUSE, td);
119 } else
120 upgraded = 0;
121
122
123 /* Attempt to msync mmap() regions to clean dirty mmap */
124 VI_LOCK(vp);
125 if ((vp->v_iflag & VI_OBJDIRTY) != 0) {
126 struct vm_object *obj;
127 VI_UNLOCK(vp);
128 if (VOP_GETVOBJECT(vp, &obj) == 0) {
129 VM_OBJECT_LOCK(obj);
130 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
131 VM_OBJECT_UNLOCK(obj);
132 }
133 VI_LOCK(vp);
134 }
135
136 /* Wait for pending writes to complete */
137 spl = splbio();
138 while (vp->v_numoutput) {
139 vp->v_iflag |= VI_BWAIT;
140 error = msleep((caddr_t)&vp->v_numoutput,
141 VI_MTX(vp),
142 PRIBIO + 1,
143 "rawrdfls", 0);
144 if (error != 0) {
145 splx(spl);
146 VI_UNLOCK(vp);
147 if (upgraded != 0)
148 VOP_LOCK(vp, LK_DOWNGRADE, td);
149 return (error);
150 }
151 }
152 /* Flush dirty buffers */
153 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
154 splx(spl);
155 VI_UNLOCK(vp);
156 if ((error = VOP_FSYNC(vp, NOCRED, MNT_WAIT, td)) != 0) {
157 if (upgraded != 0)
158 VOP_LOCK(vp, LK_DOWNGRADE, td);
159 return (error);
160 }
161 VI_LOCK(vp);
162 spl = splbio();
163 if (vp->v_numoutput > 0 ||
164 !TAILQ_EMPTY(&vp->v_dirtyblkhd))
165 panic("ffs_rawread_sync: dirty bufs");
166 }
167 splx(spl);
168 VI_UNLOCK(vp);
169 if (upgraded != 0)
170 VOP_LOCK(vp, LK_DOWNGRADE, td);
171 } else {
172 splx(spl);
173 VI_UNLOCK(vp);
174 }
175 return 0;
176}
177
178
179static int
180ffs_rawread_readahead(struct vnode *vp,
181 caddr_t udata,
182 off_t offset,
183 size_t len,
184 struct thread *td,
185 struct buf *bp,
186 caddr_t sa)
187{
188 int error;
189 u_int iolen;
190 off_t blockno;
191 int blockoff;
192 int bsize;
193 struct vnode *dp;
194 int bforwards;
195
196 GIANT_REQUIRED;
197 bsize = vp->v_mount->mnt_stat.f_iosize;
198
199 iolen = ((vm_offset_t) udata) & PAGE_MASK;
200 bp->b_bcount = len;
201 if (bp->b_bcount + iolen > bp->b_kvasize) {
202 bp->b_bcount = bp->b_kvasize;
203 if (iolen != 0)
204 bp->b_bcount -= PAGE_SIZE;
205 }
206 bp->b_flags = B_PHYS;
207 bp->b_iocmd = BIO_READ;
208 bp->b_iodone = ffs_rawreadwakeup;
209 bp->b_data = udata;
210 bp->b_saveaddr = sa;
211 bp->b_offset = offset;
212 blockno = bp->b_offset / bsize;
213 blockoff = (bp->b_offset % bsize) / DEV_BSIZE;
214 if ((daddr_t) blockno != blockno) {
215 return EINVAL; /* blockno overflow */
216 }
217
218 bp->b_lblkno = bp->b_blkno = blockno;
219
220 error = VOP_BMAP(vp, bp->b_lblkno, &dp, &bp->b_blkno, &bforwards,
221 NULL);
222 if (error != 0) {
223 return error;
224 }
225 if (bp->b_blkno == -1) {
226
227 /* Fill holes with NULs to preserve semantics */
228
229 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize)
230 bp->b_bcount = bsize - blockoff * DEV_BSIZE;
231 bp->b_bufsize = bp->b_bcount;
232
233 if (vmapbuf(bp) < 0)
234 return EFAULT;
235
236 if (ticks - PCPU_GET(switchticks) >= hogticks)
237 uio_yield();
238 bzero(bp->b_data, bp->b_bufsize);
239
240 /* Mark operation completed (similar to bufdone()) */
241
242 bp->b_resid = 0;
243 bp->b_flags |= B_DONE;
244 return 0;
245 }
246
247 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards))
248 bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
249 bp->b_bufsize = bp->b_bcount;
250 bp->b_blkno += blockoff;
251 bp->b_dev = dp->v_rdev;
252
253 if (vmapbuf(bp) < 0)
254 return EFAULT;
255
256 if (dp->v_type == VCHR)
257 (void) VOP_SPECSTRATEGY(dp, bp);
258 else
259 (void) VOP_STRATEGY(dp, bp);
260 return 0;
261}
262
263
264static int
265ffs_rawread_main(struct vnode *vp,
266 struct uio *uio)
267{
268 int error, nerror;
269 struct buf *bp, *nbp, *tbp;
270 caddr_t sa, nsa, tsa;
271 u_int iolen;
272 int spl;
273 caddr_t udata;
274 long resid;
275 off_t offset;
276 struct thread *td;
277
278 GIANT_REQUIRED;
279 td = uio->uio_td ? uio->uio_td : curthread;
280 udata = uio->uio_iov->iov_base;
281 resid = uio->uio_resid;
282 offset = uio->uio_offset;
283
284 /*
285 * keep the process from being swapped
286 */
287 PHOLD(td->td_proc);
288
289 error = 0;
290 nerror = 0;
291
292 bp = NULL;
293 nbp = NULL;
294 sa = NULL;
295 nsa = NULL;
296
297 while (resid > 0) {
298
299 if (bp == NULL) { /* Setup first read */
300 /* XXX: Leave some bufs for swap */
301 bp = getpbuf(&ffsrawbufcnt);
302 sa = bp->b_data;
303 bp->b_vp = vp;
304 error = ffs_rawread_readahead(vp, udata, offset,
305 resid, td, bp, sa);
306 if (error != 0)
307 break;
308
309 if (resid > bp->b_bufsize) { /* Setup fist readahead */
310 /* XXX: Leave bufs for swap */
311 if (rawreadahead != 0)
312 nbp = trypbuf(&ffsrawbufcnt);
313 else
314 nbp = NULL;
315 if (nbp != NULL) {
316 nsa = nbp->b_data;
317 nbp->b_vp = vp;
318
319 nerror = ffs_rawread_readahead(vp,
320 udata +
321 bp->b_bufsize,
322 offset +
323 bp->b_bufsize,
324 resid -
325 bp->b_bufsize,
326 td,
327 nbp,
328 nsa);
329 if (nerror) {
330 relpbuf(nbp, &ffsrawbufcnt);
331 nbp = NULL;
332 }
333 }
334 }
335 }
336
337 spl = splbio();
338 bwait(bp, PRIBIO, "rawrd");
339 splx(spl);
340
341 vunmapbuf(bp);
342
343 iolen = bp->b_bcount - bp->b_resid;
344 if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) {
345 nerror = 0; /* Ignore possible beyond EOF error */
346 break; /* EOF */
347 }
348
349 if ((bp->b_ioflags & BIO_ERROR) != 0) {
350 error = bp->b_error;
351 break;
352 }
353 resid -= iolen;
354 udata += iolen;
355 offset += iolen;
356 if (iolen < bp->b_bufsize) {
357 /* Incomplete read. Try to read remaining part */
358 error = ffs_rawread_readahead(vp,
359 udata,
360 offset,
361 bp->b_bufsize - iolen,
362 td,
363 bp,
364 sa);
365 if (error != 0)
366 break;
367 } else if (nbp != NULL) { /* Complete read with readahead */
368
369 tbp = bp;
370 bp = nbp;
371 nbp = tbp;
372
373 tsa = sa;
374 sa = nsa;
375 nsa = tsa;
376
377 if (resid <= bp->b_bufsize) { /* No more readaheads */
378 relpbuf(nbp, &ffsrawbufcnt);
379 nbp = NULL;
380 } else { /* Setup next readahead */
381 nerror = ffs_rawread_readahead(vp,
382 udata +
383 bp->b_bufsize,
384 offset +
385 bp->b_bufsize,
386 resid -
387 bp->b_bufsize,
388 td,
389 nbp,
390 nsa);
391 if (nerror != 0) {
392 relpbuf(nbp, &ffsrawbufcnt);
393 nbp = NULL;
394 }
395 }
396 } else if (nerror != 0) {/* Deferred Readahead error */
397 break;
398 } else if (resid > 0) { /* More to read, no readahead */
399 error = ffs_rawread_readahead(vp, udata, offset,
400 resid, td, bp, sa);
401 if (error != 0)
402 break;
403 }
404 }
405
406 if (bp != NULL)
407 relpbuf(bp, &ffsrawbufcnt);
408 if (nbp != NULL) { /* Run down readahead buffer */
409 spl = splbio();
410 bwait(nbp, PRIBIO, "rawrd");
411 splx(spl);
412 vunmapbuf(nbp);
413 relpbuf(nbp, &ffsrawbufcnt);
414 }
415
416 if (error == 0)
417 error = nerror;
418 PRELE(td->td_proc);
419 uio->uio_iov->iov_base = udata;
420 uio->uio_resid = resid;
421 uio->uio_offset = offset;
422 return error;
423}
424
425
426int
427ffs_rawread(struct vnode *vp,
428 struct uio *uio,
429 int *workdone)
430{
431 if (allowrawread != 0 &&
432 uio->uio_iovcnt == 1 &&
433 uio->uio_segflg == UIO_USERSPACE &&
434 uio->uio_resid == uio->uio_iov->iov_len &&
435 (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_flags &
436 TDF_DEADLKTREAT) == 0) {
437 int secsize; /* Media sector size */
438 off_t filebytes; /* Bytes left of file */
439 int blockbytes; /* Bytes left of file in full blocks */
440 int partialbytes; /* Bytes in last partial block */
441 int skipbytes; /* Bytes not to read in ffs_rawread */
442 struct inode *ip;
443 int error;
444
445
446 /* Only handle sector aligned reads */
447 ip = VTOI(vp);
448 secsize = ip->i_devvp->v_rdev->si_bsize_phys;
449 if ((uio->uio_offset & (secsize - 1)) == 0 &&
450 (uio->uio_resid & (secsize - 1)) == 0) {
451
452 /* Sync dirty pages and buffers if needed */
453 error = ffs_rawread_sync(vp,
454 (uio->uio_td != NULL) ?
455 uio->uio_td : curthread);
456 if (error != 0)
457 return error;
458
459 /* Check for end of file */
460 if (ip->i_size > uio->uio_offset) {
461 filebytes = ip->i_size - uio->uio_offset;
462
463 /* No special eof handling needed ? */
464 if (uio->uio_resid <= filebytes) {
465 *workdone = 1;
466 return ffs_rawread_main(vp, uio);
467 }
468
469 partialbytes = ((unsigned int) ip->i_size) %
470 ip->i_fs->fs_bsize;
471 blockbytes = (int) filebytes - partialbytes;
472 if (blockbytes > 0) {
473 skipbytes = uio->uio_resid -
474 blockbytes;
475 uio->uio_resid = blockbytes;
476 error = ffs_rawread_main(vp, uio);
477 uio->uio_resid += skipbytes;
478 if (error != 0)
479 return error;
480 /* Read remaining part using buffer */
481 }
482 }
483 }
484 }
485 *workdone = 0;
486 return 0;
487}
488
489
490static void
491ffs_rawreadwakeup(struct buf *bp)
492{
493 bdone(bp);
494}