1/*- 2 * Copyright (c) 2000-2003 Tor Egge 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h>
| 1/*- 2 * Copyright (c) 2000-2003 Tor Egge 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h>
|
47#include <ufs/ffs/fs.h> 48 49#include <vm/vm.h> 50#include <vm/vm_extern.h> 51#include <vm/vm_object.h> 52#include <sys/kernel.h> 53#include <sys/sysctl.h> 54 55static int ffs_rawread_readahead(struct vnode *vp, 56 caddr_t udata, 57 off_t offset, 58 size_t len, 59 struct thread *td, 60 struct buf *bp, 61 caddr_t sa); 62static int ffs_rawread_main(struct vnode *vp, 63 struct uio *uio); 64 65static int ffs_rawread_sync(struct vnode *vp, struct thread *td); 66 67int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 68 69void ffs_rawread_setup(void); 70 71static void ffs_rawreadwakeup(struct buf *bp); 72 73 74SYSCTL_DECL(_vfs_ffs); 75 76static int ffsrawbufcnt = 4; 77SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0, 78 "Buffers available for raw reads"); 79 80static int allowrawread = 1; 81SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0, 82 "Flag to enable raw reads"); 83 84static int rawreadahead = 1; 85SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0, 86 "Flag to enable readahead for long raw reads"); 87 88 89void 90ffs_rawread_setup(void) 91{ 92 ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8; 93} 94 95 96static int 97ffs_rawread_sync(struct vnode *vp, struct thread *td) 98{ 99 int spl; 100 int error; 101 int upgraded; 102 103 GIANT_REQUIRED; 104 /* Check for dirty mmap, pending writes and dirty buffers */ 105 spl = splbio(); 106 VI_LOCK(vp); 107 if (vp->v_numoutput > 0 || 108 !TAILQ_EMPTY(&vp->v_dirtyblkhd) || 109 (vp->v_iflag & VI_OBJDIRTY) != 0) { 110 splx(spl); 111 VI_UNLOCK(vp); 112 113 if (VOP_ISLOCKED(vp, td) != LK_EXCLUSIVE) { 114 upgraded = 1; 115 /* Upgrade to exclusive lock, this might block */ 116 VOP_LOCK(vp, LK_UPGRADE | LK_NOPAUSE, td); 117 } else 118 upgraded = 0; 119 120 121 /* Attempt to msync mmap() regions to clean dirty mmap */ 122 VI_LOCK(vp); 123 if ((vp->v_iflag & VI_OBJDIRTY) != 0) { 124 struct vm_object *obj; 125 VI_UNLOCK(vp); 126 if (VOP_GETVOBJECT(vp, &obj) == 0) { 127 VM_OBJECT_LOCK(obj); 128 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); 129 VM_OBJECT_UNLOCK(obj); 130 } 131 VI_LOCK(vp); 132 } 133 134 /* Wait for pending writes to complete */ 135 spl = splbio(); 136 while (vp->v_numoutput) { 137 vp->v_iflag |= VI_BWAIT; 138 error = msleep((caddr_t)&vp->v_numoutput, 139 VI_MTX(vp), 140 PRIBIO + 1, 141 "rawrdfls", 0); 142 if (error != 0) { 143 splx(spl); 144 VI_UNLOCK(vp); 145 if (upgraded != 0) 146 VOP_LOCK(vp, LK_DOWNGRADE, td); 147 return (error); 148 } 149 } 150 /* Flush dirty buffers */ 151 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 152 splx(spl); 153 VI_UNLOCK(vp); 154 if ((error = VOP_FSYNC(vp, NOCRED, MNT_WAIT, td)) != 0) { 155 if (upgraded != 0) 156 VOP_LOCK(vp, LK_DOWNGRADE, td); 157 return (error); 158 } 159 VI_LOCK(vp); 160 spl = splbio(); 161 if (vp->v_numoutput > 0 || 162 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 163 panic("ffs_rawread_sync: dirty bufs"); 164 } 165 splx(spl); 166 VI_UNLOCK(vp); 167 if (upgraded != 0) 168 VOP_LOCK(vp, LK_DOWNGRADE, td); 169 } else { 170 splx(spl); 171 VI_UNLOCK(vp); 172 } 173 return 0; 174} 175 176 177static int 178ffs_rawread_readahead(struct vnode *vp, 179 caddr_t udata, 180 off_t offset, 181 size_t len, 182 struct thread *td, 183 struct buf *bp, 184 caddr_t sa) 185{ 186 int error; 187 u_int iolen; 188 off_t blockno; 189 int blockoff; 190 int bsize; 191 struct vnode *dp; 192 int bforwards; 193 194 GIANT_REQUIRED; 195 bsize = vp->v_mount->mnt_stat.f_iosize; 196 197 iolen = ((vm_offset_t) udata) & PAGE_MASK; 198 bp->b_bcount = len; 199 if (bp->b_bcount + iolen > bp->b_kvasize) { 200 bp->b_bcount = bp->b_kvasize; 201 if (iolen != 0) 202 bp->b_bcount -= PAGE_SIZE; 203 } 204 bp->b_flags = B_PHYS; 205 bp->b_iocmd = BIO_READ; 206 bp->b_iodone = ffs_rawreadwakeup; 207 bp->b_data = udata; 208 bp->b_saveaddr = sa; 209 bp->b_offset = offset; 210 blockno = bp->b_offset / bsize; 211 blockoff = (bp->b_offset % bsize) / DEV_BSIZE; 212 if ((daddr_t) blockno != blockno) { 213 return EINVAL; /* blockno overflow */ 214 } 215 216 bp->b_lblkno = bp->b_blkno = blockno; 217 218 error = VOP_BMAP(vp, bp->b_lblkno, &dp, &bp->b_blkno, &bforwards, 219 NULL); 220 if (error != 0) { 221 return error; 222 } 223 if (bp->b_blkno == -1) { 224 225 /* Fill holes with NULs to preserve semantics */ 226 227 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize) 228 bp->b_bcount = bsize - blockoff * DEV_BSIZE; 229 bp->b_bufsize = bp->b_bcount; 230 231 if (vmapbuf(bp) < 0) 232 return EFAULT; 233 234 if (ticks - PCPU_GET(switchticks) >= hogticks) 235 uio_yield(); 236 bzero(bp->b_data, bp->b_bufsize); 237 238 /* Mark operation completed (similar to bufdone()) */ 239 240 bp->b_resid = 0; 241 bp->b_flags |= B_DONE; 242 return 0; 243 } 244 245 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards)) 246 bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE; 247 bp->b_bufsize = bp->b_bcount; 248 bp->b_blkno += blockoff; 249 bp->b_dev = dp->v_rdev; 250 251 if (vmapbuf(bp) < 0) 252 return EFAULT; 253 254 if (dp->v_type == VCHR) 255 (void) VOP_SPECSTRATEGY(dp, bp); 256 else 257 (void) VOP_STRATEGY(dp, bp); 258 return 0; 259} 260 261 262static int 263ffs_rawread_main(struct vnode *vp, 264 struct uio *uio) 265{ 266 int error, nerror; 267 struct buf *bp, *nbp, *tbp; 268 caddr_t sa, nsa, tsa; 269 u_int iolen; 270 int spl; 271 caddr_t udata; 272 long resid; 273 off_t offset; 274 struct thread *td; 275 276 GIANT_REQUIRED; 277 td = uio->uio_td ? uio->uio_td : curthread; 278 udata = uio->uio_iov->iov_base; 279 resid = uio->uio_resid; 280 offset = uio->uio_offset; 281 282 /* 283 * keep the process from being swapped 284 */ 285 PHOLD(td->td_proc); 286 287 error = 0; 288 nerror = 0; 289 290 bp = NULL; 291 nbp = NULL; 292 sa = NULL; 293 nsa = NULL; 294 295 while (resid > 0) { 296 297 if (bp == NULL) { /* Setup first read */ 298 /* XXX: Leave some bufs for swap */ 299 bp = getpbuf(&ffsrawbufcnt); 300 sa = bp->b_data; 301 bp->b_vp = vp; 302 error = ffs_rawread_readahead(vp, udata, offset, 303 resid, td, bp, sa); 304 if (error != 0) 305 break; 306 307 if (resid > bp->b_bufsize) { /* Setup fist readahead */ 308 /* XXX: Leave bufs for swap */ 309 if (rawreadahead != 0) 310 nbp = trypbuf(&ffsrawbufcnt); 311 else 312 nbp = NULL; 313 if (nbp != NULL) { 314 nsa = nbp->b_data; 315 nbp->b_vp = vp; 316 317 nerror = ffs_rawread_readahead(vp, 318 udata + 319 bp->b_bufsize, 320 offset + 321 bp->b_bufsize, 322 resid - 323 bp->b_bufsize, 324 td, 325 nbp, 326 nsa); 327 if (nerror) { 328 relpbuf(nbp, &ffsrawbufcnt); 329 nbp = NULL; 330 } 331 } 332 } 333 } 334 335 spl = splbio(); 336 bwait(bp, PRIBIO, "rawrd"); 337 splx(spl); 338 339 vunmapbuf(bp); 340 341 iolen = bp->b_bcount - bp->b_resid; 342 if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) { 343 nerror = 0; /* Ignore possible beyond EOF error */ 344 break; /* EOF */ 345 } 346 347 if ((bp->b_ioflags & BIO_ERROR) != 0) { 348 error = bp->b_error; 349 break; 350 } 351 resid -= iolen; 352 udata += iolen; 353 offset += iolen; 354 if (iolen < bp->b_bufsize) { 355 /* Incomplete read. Try to read remaining part */ 356 error = ffs_rawread_readahead(vp, 357 udata, 358 offset, 359 bp->b_bufsize - iolen, 360 td, 361 bp, 362 sa); 363 if (error != 0) 364 break; 365 } else if (nbp != NULL) { /* Complete read with readahead */ 366 367 tbp = bp; 368 bp = nbp; 369 nbp = tbp; 370 371 tsa = sa; 372 sa = nsa; 373 nsa = tsa; 374 375 if (resid <= bp->b_bufsize) { /* No more readaheads */ 376 relpbuf(nbp, &ffsrawbufcnt); 377 nbp = NULL; 378 } else { /* Setup next readahead */ 379 nerror = ffs_rawread_readahead(vp, 380 udata + 381 bp->b_bufsize, 382 offset + 383 bp->b_bufsize, 384 resid - 385 bp->b_bufsize, 386 td, 387 nbp, 388 nsa); 389 if (nerror != 0) { 390 relpbuf(nbp, &ffsrawbufcnt); 391 nbp = NULL; 392 } 393 } 394 } else if (nerror != 0) {/* Deferred Readahead error */ 395 break; 396 } else if (resid > 0) { /* More to read, no readahead */ 397 error = ffs_rawread_readahead(vp, udata, offset, 398 resid, td, bp, sa); 399 if (error != 0) 400 break; 401 } 402 } 403 404 if (bp != NULL) 405 relpbuf(bp, &ffsrawbufcnt); 406 if (nbp != NULL) { /* Run down readahead buffer */ 407 spl = splbio(); 408 bwait(nbp, PRIBIO, "rawrd"); 409 splx(spl); 410 vunmapbuf(nbp); 411 relpbuf(nbp, &ffsrawbufcnt); 412 } 413 414 if (error == 0) 415 error = nerror; 416 PRELE(td->td_proc); 417 uio->uio_iov->iov_base = udata; 418 uio->uio_resid = resid; 419 uio->uio_offset = offset; 420 return error; 421} 422 423 424int 425ffs_rawread(struct vnode *vp, 426 struct uio *uio, 427 int *workdone) 428{ 429 if (allowrawread != 0 && 430 uio->uio_iovcnt == 1 && 431 uio->uio_segflg == UIO_USERSPACE && 432 uio->uio_resid == uio->uio_iov->iov_len && 433 (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_flags & 434 TDF_DEADLKTREAT) == 0) { 435 int secsize; /* Media sector size */ 436 off_t filebytes; /* Bytes left of file */ 437 int blockbytes; /* Bytes left of file in full blocks */ 438 int partialbytes; /* Bytes in last partial block */ 439 int skipbytes; /* Bytes not to read in ffs_rawread */ 440 struct inode *ip; 441 int error; 442 443 444 /* Only handle sector aligned reads */ 445 ip = VTOI(vp); 446 secsize = ip->i_devvp->v_rdev->si_bsize_phys; 447 if ((uio->uio_offset & (secsize - 1)) == 0 && 448 (uio->uio_resid & (secsize - 1)) == 0) { 449 450 /* Sync dirty pages and buffers if needed */ 451 error = ffs_rawread_sync(vp, 452 (uio->uio_td != NULL) ? 453 uio->uio_td : curthread); 454 if (error != 0) 455 return error; 456 457 /* Check for end of file */ 458 if (ip->i_size > uio->uio_offset) { 459 filebytes = ip->i_size - uio->uio_offset; 460 461 /* No special eof handling needed ? */ 462 if (uio->uio_resid <= filebytes) { 463 *workdone = 1; 464 return ffs_rawread_main(vp, uio); 465 } 466 467 partialbytes = ((unsigned int) ip->i_size) % 468 ip->i_fs->fs_bsize; 469 blockbytes = (int) filebytes - partialbytes; 470 if (blockbytes > 0) { 471 skipbytes = uio->uio_resid - 472 blockbytes; 473 uio->uio_resid = blockbytes; 474 error = ffs_rawread_main(vp, uio); 475 uio->uio_resid += skipbytes; 476 if (error != 0) 477 return error; 478 /* Read remaining part using buffer */ 479 } 480 } 481 } 482 } 483 *workdone = 0; 484 return 0; 485} 486 487 488static void 489ffs_rawreadwakeup(struct buf *bp) 490{ 491 bdone(bp); 492}
| 49#include <ufs/ffs/fs.h> 50 51#include <vm/vm.h> 52#include <vm/vm_extern.h> 53#include <vm/vm_object.h> 54#include <sys/kernel.h> 55#include <sys/sysctl.h> 56 57static int ffs_rawread_readahead(struct vnode *vp, 58 caddr_t udata, 59 off_t offset, 60 size_t len, 61 struct thread *td, 62 struct buf *bp, 63 caddr_t sa); 64static int ffs_rawread_main(struct vnode *vp, 65 struct uio *uio); 66 67static int ffs_rawread_sync(struct vnode *vp, struct thread *td); 68 69int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 70 71void ffs_rawread_setup(void); 72 73static void ffs_rawreadwakeup(struct buf *bp); 74 75 76SYSCTL_DECL(_vfs_ffs); 77 78static int ffsrawbufcnt = 4; 79SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0, 80 "Buffers available for raw reads"); 81 82static int allowrawread = 1; 83SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0, 84 "Flag to enable raw reads"); 85 86static int rawreadahead = 1; 87SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0, 88 "Flag to enable readahead for long raw reads"); 89 90 91void 92ffs_rawread_setup(void) 93{ 94 ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8; 95} 96 97 98static int 99ffs_rawread_sync(struct vnode *vp, struct thread *td) 100{ 101 int spl; 102 int error; 103 int upgraded; 104 105 GIANT_REQUIRED; 106 /* Check for dirty mmap, pending writes and dirty buffers */ 107 spl = splbio(); 108 VI_LOCK(vp); 109 if (vp->v_numoutput > 0 || 110 !TAILQ_EMPTY(&vp->v_dirtyblkhd) || 111 (vp->v_iflag & VI_OBJDIRTY) != 0) { 112 splx(spl); 113 VI_UNLOCK(vp); 114 115 if (VOP_ISLOCKED(vp, td) != LK_EXCLUSIVE) { 116 upgraded = 1; 117 /* Upgrade to exclusive lock, this might block */ 118 VOP_LOCK(vp, LK_UPGRADE | LK_NOPAUSE, td); 119 } else 120 upgraded = 0; 121 122 123 /* Attempt to msync mmap() regions to clean dirty mmap */ 124 VI_LOCK(vp); 125 if ((vp->v_iflag & VI_OBJDIRTY) != 0) { 126 struct vm_object *obj; 127 VI_UNLOCK(vp); 128 if (VOP_GETVOBJECT(vp, &obj) == 0) { 129 VM_OBJECT_LOCK(obj); 130 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); 131 VM_OBJECT_UNLOCK(obj); 132 } 133 VI_LOCK(vp); 134 } 135 136 /* Wait for pending writes to complete */ 137 spl = splbio(); 138 while (vp->v_numoutput) { 139 vp->v_iflag |= VI_BWAIT; 140 error = msleep((caddr_t)&vp->v_numoutput, 141 VI_MTX(vp), 142 PRIBIO + 1, 143 "rawrdfls", 0); 144 if (error != 0) { 145 splx(spl); 146 VI_UNLOCK(vp); 147 if (upgraded != 0) 148 VOP_LOCK(vp, LK_DOWNGRADE, td); 149 return (error); 150 } 151 } 152 /* Flush dirty buffers */ 153 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 154 splx(spl); 155 VI_UNLOCK(vp); 156 if ((error = VOP_FSYNC(vp, NOCRED, MNT_WAIT, td)) != 0) { 157 if (upgraded != 0) 158 VOP_LOCK(vp, LK_DOWNGRADE, td); 159 return (error); 160 } 161 VI_LOCK(vp); 162 spl = splbio(); 163 if (vp->v_numoutput > 0 || 164 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 165 panic("ffs_rawread_sync: dirty bufs"); 166 } 167 splx(spl); 168 VI_UNLOCK(vp); 169 if (upgraded != 0) 170 VOP_LOCK(vp, LK_DOWNGRADE, td); 171 } else { 172 splx(spl); 173 VI_UNLOCK(vp); 174 } 175 return 0; 176} 177 178 179static int 180ffs_rawread_readahead(struct vnode *vp, 181 caddr_t udata, 182 off_t offset, 183 size_t len, 184 struct thread *td, 185 struct buf *bp, 186 caddr_t sa) 187{ 188 int error; 189 u_int iolen; 190 off_t blockno; 191 int blockoff; 192 int bsize; 193 struct vnode *dp; 194 int bforwards; 195 196 GIANT_REQUIRED; 197 bsize = vp->v_mount->mnt_stat.f_iosize; 198 199 iolen = ((vm_offset_t) udata) & PAGE_MASK; 200 bp->b_bcount = len; 201 if (bp->b_bcount + iolen > bp->b_kvasize) { 202 bp->b_bcount = bp->b_kvasize; 203 if (iolen != 0) 204 bp->b_bcount -= PAGE_SIZE; 205 } 206 bp->b_flags = B_PHYS; 207 bp->b_iocmd = BIO_READ; 208 bp->b_iodone = ffs_rawreadwakeup; 209 bp->b_data = udata; 210 bp->b_saveaddr = sa; 211 bp->b_offset = offset; 212 blockno = bp->b_offset / bsize; 213 blockoff = (bp->b_offset % bsize) / DEV_BSIZE; 214 if ((daddr_t) blockno != blockno) { 215 return EINVAL; /* blockno overflow */ 216 } 217 218 bp->b_lblkno = bp->b_blkno = blockno; 219 220 error = VOP_BMAP(vp, bp->b_lblkno, &dp, &bp->b_blkno, &bforwards, 221 NULL); 222 if (error != 0) { 223 return error; 224 } 225 if (bp->b_blkno == -1) { 226 227 /* Fill holes with NULs to preserve semantics */ 228 229 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize) 230 bp->b_bcount = bsize - blockoff * DEV_BSIZE; 231 bp->b_bufsize = bp->b_bcount; 232 233 if (vmapbuf(bp) < 0) 234 return EFAULT; 235 236 if (ticks - PCPU_GET(switchticks) >= hogticks) 237 uio_yield(); 238 bzero(bp->b_data, bp->b_bufsize); 239 240 /* Mark operation completed (similar to bufdone()) */ 241 242 bp->b_resid = 0; 243 bp->b_flags |= B_DONE; 244 return 0; 245 } 246 247 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards)) 248 bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE; 249 bp->b_bufsize = bp->b_bcount; 250 bp->b_blkno += blockoff; 251 bp->b_dev = dp->v_rdev; 252 253 if (vmapbuf(bp) < 0) 254 return EFAULT; 255 256 if (dp->v_type == VCHR) 257 (void) VOP_SPECSTRATEGY(dp, bp); 258 else 259 (void) VOP_STRATEGY(dp, bp); 260 return 0; 261} 262 263 264static int 265ffs_rawread_main(struct vnode *vp, 266 struct uio *uio) 267{ 268 int error, nerror; 269 struct buf *bp, *nbp, *tbp; 270 caddr_t sa, nsa, tsa; 271 u_int iolen; 272 int spl; 273 caddr_t udata; 274 long resid; 275 off_t offset; 276 struct thread *td; 277 278 GIANT_REQUIRED; 279 td = uio->uio_td ? uio->uio_td : curthread; 280 udata = uio->uio_iov->iov_base; 281 resid = uio->uio_resid; 282 offset = uio->uio_offset; 283 284 /* 285 * keep the process from being swapped 286 */ 287 PHOLD(td->td_proc); 288 289 error = 0; 290 nerror = 0; 291 292 bp = NULL; 293 nbp = NULL; 294 sa = NULL; 295 nsa = NULL; 296 297 while (resid > 0) { 298 299 if (bp == NULL) { /* Setup first read */ 300 /* XXX: Leave some bufs for swap */ 301 bp = getpbuf(&ffsrawbufcnt); 302 sa = bp->b_data; 303 bp->b_vp = vp; 304 error = ffs_rawread_readahead(vp, udata, offset, 305 resid, td, bp, sa); 306 if (error != 0) 307 break; 308 309 if (resid > bp->b_bufsize) { /* Setup fist readahead */ 310 /* XXX: Leave bufs for swap */ 311 if (rawreadahead != 0) 312 nbp = trypbuf(&ffsrawbufcnt); 313 else 314 nbp = NULL; 315 if (nbp != NULL) { 316 nsa = nbp->b_data; 317 nbp->b_vp = vp; 318 319 nerror = ffs_rawread_readahead(vp, 320 udata + 321 bp->b_bufsize, 322 offset + 323 bp->b_bufsize, 324 resid - 325 bp->b_bufsize, 326 td, 327 nbp, 328 nsa); 329 if (nerror) { 330 relpbuf(nbp, &ffsrawbufcnt); 331 nbp = NULL; 332 } 333 } 334 } 335 } 336 337 spl = splbio(); 338 bwait(bp, PRIBIO, "rawrd"); 339 splx(spl); 340 341 vunmapbuf(bp); 342 343 iolen = bp->b_bcount - bp->b_resid; 344 if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) { 345 nerror = 0; /* Ignore possible beyond EOF error */ 346 break; /* EOF */ 347 } 348 349 if ((bp->b_ioflags & BIO_ERROR) != 0) { 350 error = bp->b_error; 351 break; 352 } 353 resid -= iolen; 354 udata += iolen; 355 offset += iolen; 356 if (iolen < bp->b_bufsize) { 357 /* Incomplete read. Try to read remaining part */ 358 error = ffs_rawread_readahead(vp, 359 udata, 360 offset, 361 bp->b_bufsize - iolen, 362 td, 363 bp, 364 sa); 365 if (error != 0) 366 break; 367 } else if (nbp != NULL) { /* Complete read with readahead */ 368 369 tbp = bp; 370 bp = nbp; 371 nbp = tbp; 372 373 tsa = sa; 374 sa = nsa; 375 nsa = tsa; 376 377 if (resid <= bp->b_bufsize) { /* No more readaheads */ 378 relpbuf(nbp, &ffsrawbufcnt); 379 nbp = NULL; 380 } else { /* Setup next readahead */ 381 nerror = ffs_rawread_readahead(vp, 382 udata + 383 bp->b_bufsize, 384 offset + 385 bp->b_bufsize, 386 resid - 387 bp->b_bufsize, 388 td, 389 nbp, 390 nsa); 391 if (nerror != 0) { 392 relpbuf(nbp, &ffsrawbufcnt); 393 nbp = NULL; 394 } 395 } 396 } else if (nerror != 0) {/* Deferred Readahead error */ 397 break; 398 } else if (resid > 0) { /* More to read, no readahead */ 399 error = ffs_rawread_readahead(vp, udata, offset, 400 resid, td, bp, sa); 401 if (error != 0) 402 break; 403 } 404 } 405 406 if (bp != NULL) 407 relpbuf(bp, &ffsrawbufcnt); 408 if (nbp != NULL) { /* Run down readahead buffer */ 409 spl = splbio(); 410 bwait(nbp, PRIBIO, "rawrd"); 411 splx(spl); 412 vunmapbuf(nbp); 413 relpbuf(nbp, &ffsrawbufcnt); 414 } 415 416 if (error == 0) 417 error = nerror; 418 PRELE(td->td_proc); 419 uio->uio_iov->iov_base = udata; 420 uio->uio_resid = resid; 421 uio->uio_offset = offset; 422 return error; 423} 424 425 426int 427ffs_rawread(struct vnode *vp, 428 struct uio *uio, 429 int *workdone) 430{ 431 if (allowrawread != 0 && 432 uio->uio_iovcnt == 1 && 433 uio->uio_segflg == UIO_USERSPACE && 434 uio->uio_resid == uio->uio_iov->iov_len && 435 (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_flags & 436 TDF_DEADLKTREAT) == 0) { 437 int secsize; /* Media sector size */ 438 off_t filebytes; /* Bytes left of file */ 439 int blockbytes; /* Bytes left of file in full blocks */ 440 int partialbytes; /* Bytes in last partial block */ 441 int skipbytes; /* Bytes not to read in ffs_rawread */ 442 struct inode *ip; 443 int error; 444 445 446 /* Only handle sector aligned reads */ 447 ip = VTOI(vp); 448 secsize = ip->i_devvp->v_rdev->si_bsize_phys; 449 if ((uio->uio_offset & (secsize - 1)) == 0 && 450 (uio->uio_resid & (secsize - 1)) == 0) { 451 452 /* Sync dirty pages and buffers if needed */ 453 error = ffs_rawread_sync(vp, 454 (uio->uio_td != NULL) ? 455 uio->uio_td : curthread); 456 if (error != 0) 457 return error; 458 459 /* Check for end of file */ 460 if (ip->i_size > uio->uio_offset) { 461 filebytes = ip->i_size - uio->uio_offset; 462 463 /* No special eof handling needed ? */ 464 if (uio->uio_resid <= filebytes) { 465 *workdone = 1; 466 return ffs_rawread_main(vp, uio); 467 } 468 469 partialbytes = ((unsigned int) ip->i_size) % 470 ip->i_fs->fs_bsize; 471 blockbytes = (int) filebytes - partialbytes; 472 if (blockbytes > 0) { 473 skipbytes = uio->uio_resid - 474 blockbytes; 475 uio->uio_resid = blockbytes; 476 error = ffs_rawread_main(vp, uio); 477 uio->uio_resid += skipbytes; 478 if (error != 0) 479 return error; 480 /* Read remaining part using buffer */ 481 } 482 } 483 } 484 } 485 *workdone = 0; 486 return 0; 487} 488 489 490static void 491ffs_rawreadwakeup(struct buf *bp) 492{ 493 bdone(bp); 494}
|