1/*- 2 * Copyright (c) 2000-2003 Tor Egge 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h>
| 1/*- 2 * Copyright (c) 2000-2003 Tor Egge 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h>
|
28__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_rawread.c 233438 2012-03-25 00:02:37Z mckusick $");
| 28__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_rawread.c 248084 2013-03-09 02:32:23Z attilio $");
|
29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/fcntl.h> 33#include <sys/file.h> 34#include <sys/stat.h> 35#include <sys/proc.h> 36#include <sys/limits.h> 37#include <sys/mount.h> 38#include <sys/namei.h> 39#include <sys/vnode.h> 40#include <sys/conf.h> 41#include <sys/filio.h> 42#include <sys/ttycom.h> 43#include <sys/bio.h> 44#include <sys/buf.h>
| 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/fcntl.h> 33#include <sys/file.h> 34#include <sys/stat.h> 35#include <sys/proc.h> 36#include <sys/limits.h> 37#include <sys/mount.h> 38#include <sys/namei.h> 39#include <sys/vnode.h> 40#include <sys/conf.h> 41#include <sys/filio.h> 42#include <sys/ttycom.h> 43#include <sys/bio.h> 44#include <sys/buf.h>
|
| 45#include <sys/rwlock.h>
|
45#include <ufs/ufs/extattr.h> 46#include <ufs/ufs/quota.h> 47#include <ufs/ufs/inode.h> 48#include <ufs/ufs/ufsmount.h> 49#include <ufs/ufs/ufs_extern.h> 50#include <ufs/ffs/fs.h> 51#include <ufs/ffs/ffs_extern.h> 52 53#include <vm/vm.h> 54#include <vm/vm_extern.h> 55#include <vm/vm_object.h> 56#include <sys/kernel.h> 57#include <sys/sysctl.h> 58 59static int ffs_rawread_readahead(struct vnode *vp, 60 caddr_t udata, 61 off_t offset, 62 size_t len, 63 struct thread *td, 64 struct buf *bp, 65 caddr_t sa); 66static int ffs_rawread_main(struct vnode *vp, 67 struct uio *uio); 68 69static int ffs_rawread_sync(struct vnode *vp); 70 71int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 72 73void ffs_rawread_setup(void); 74 75SYSCTL_DECL(_vfs_ffs); 76 77static int ffsrawbufcnt = 4; 78SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0, 79 "Buffers available for raw reads"); 80 81static int allowrawread = 1; 82SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0, 83 "Flag to enable raw reads"); 84 85static int rawreadahead = 1; 86SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0, 87 "Flag to enable readahead for long raw reads"); 88 89 90void 91ffs_rawread_setup(void) 92{ 93 ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8; 94} 95 96 97static int 98ffs_rawread_sync(struct vnode *vp) 99{ 100 int error; 101 int upgraded; 102 struct bufobj *bo; 103 struct mount *mp; 104 vm_object_t obj; 105 106 /* Check for dirty mmap, pending writes and dirty buffers */ 107 bo = &vp->v_bufobj; 108 BO_LOCK(bo); 109 VI_LOCK(vp); 110 if (bo->bo_numoutput > 0 || 111 bo->bo_dirty.bv_cnt > 0 || 112 ((obj = vp->v_object) != NULL && 113 (obj->flags & OBJ_MIGHTBEDIRTY) != 0)) { 114 VI_UNLOCK(vp); 115 BO_UNLOCK(bo); 116 117 if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { 118 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) 119 upgraded = 1; 120 else 121 upgraded = 0; 122 VOP_UNLOCK(vp, 0); 123 (void) vn_start_write(vp, &mp, V_WAIT); 124 VOP_LOCK(vp, LK_EXCLUSIVE); 125 } else if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 126 upgraded = 1; 127 /* Upgrade to exclusive lock, this might block */ 128 VOP_LOCK(vp, LK_UPGRADE); 129 } else 130 upgraded = 0; 131 132 133 VI_LOCK(vp); 134 /* Check if vnode was reclaimed while unlocked. */ 135 if ((vp->v_iflag & VI_DOOMED) != 0) { 136 VI_UNLOCK(vp); 137 if (upgraded != 0) 138 VOP_LOCK(vp, LK_DOWNGRADE); 139 vn_finished_write(mp); 140 return (EIO); 141 } 142 /* Attempt to msync mmap() regions to clean dirty mmap */ 143 if ((obj = vp->v_object) != NULL && 144 (obj->flags & OBJ_MIGHTBEDIRTY) != 0) { 145 VI_UNLOCK(vp);
| 46#include <ufs/ufs/extattr.h> 47#include <ufs/ufs/quota.h> 48#include <ufs/ufs/inode.h> 49#include <ufs/ufs/ufsmount.h> 50#include <ufs/ufs/ufs_extern.h> 51#include <ufs/ffs/fs.h> 52#include <ufs/ffs/ffs_extern.h> 53 54#include <vm/vm.h> 55#include <vm/vm_extern.h> 56#include <vm/vm_object.h> 57#include <sys/kernel.h> 58#include <sys/sysctl.h> 59 60static int ffs_rawread_readahead(struct vnode *vp, 61 caddr_t udata, 62 off_t offset, 63 size_t len, 64 struct thread *td, 65 struct buf *bp, 66 caddr_t sa); 67static int ffs_rawread_main(struct vnode *vp, 68 struct uio *uio); 69 70static int ffs_rawread_sync(struct vnode *vp); 71 72int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 73 74void ffs_rawread_setup(void); 75 76SYSCTL_DECL(_vfs_ffs); 77 78static int ffsrawbufcnt = 4; 79SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0, 80 "Buffers available for raw reads"); 81 82static int allowrawread = 1; 83SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0, 84 "Flag to enable raw reads"); 85 86static int rawreadahead = 1; 87SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0, 88 "Flag to enable readahead for long raw reads"); 89 90 91void 92ffs_rawread_setup(void) 93{ 94 ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8; 95} 96 97 98static int 99ffs_rawread_sync(struct vnode *vp) 100{ 101 int error; 102 int upgraded; 103 struct bufobj *bo; 104 struct mount *mp; 105 vm_object_t obj; 106 107 /* Check for dirty mmap, pending writes and dirty buffers */ 108 bo = &vp->v_bufobj; 109 BO_LOCK(bo); 110 VI_LOCK(vp); 111 if (bo->bo_numoutput > 0 || 112 bo->bo_dirty.bv_cnt > 0 || 113 ((obj = vp->v_object) != NULL && 114 (obj->flags & OBJ_MIGHTBEDIRTY) != 0)) { 115 VI_UNLOCK(vp); 116 BO_UNLOCK(bo); 117 118 if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { 119 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) 120 upgraded = 1; 121 else 122 upgraded = 0; 123 VOP_UNLOCK(vp, 0); 124 (void) vn_start_write(vp, &mp, V_WAIT); 125 VOP_LOCK(vp, LK_EXCLUSIVE); 126 } else if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 127 upgraded = 1; 128 /* Upgrade to exclusive lock, this might block */ 129 VOP_LOCK(vp, LK_UPGRADE); 130 } else 131 upgraded = 0; 132 133 134 VI_LOCK(vp); 135 /* Check if vnode was reclaimed while unlocked. */ 136 if ((vp->v_iflag & VI_DOOMED) != 0) { 137 VI_UNLOCK(vp); 138 if (upgraded != 0) 139 VOP_LOCK(vp, LK_DOWNGRADE); 140 vn_finished_write(mp); 141 return (EIO); 142 } 143 /* Attempt to msync mmap() regions to clean dirty mmap */ 144 if ((obj = vp->v_object) != NULL && 145 (obj->flags & OBJ_MIGHTBEDIRTY) != 0) { 146 VI_UNLOCK(vp);
|
146 VM_OBJECT_LOCK(obj);
| 147 VM_OBJECT_WLOCK(obj);
|
147 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
| 148 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
|
148 VM_OBJECT_UNLOCK(obj);
| 149 VM_OBJECT_WUNLOCK(obj);
|
149 } else 150 VI_UNLOCK(vp); 151 152 /* Wait for pending writes to complete */ 153 BO_LOCK(bo); 154 error = bufobj_wwait(&vp->v_bufobj, 0, 0); 155 if (error != 0) { 156 /* XXX: can't happen with a zero timeout ??? */ 157 BO_UNLOCK(bo); 158 if (upgraded != 0) 159 VOP_LOCK(vp, LK_DOWNGRADE); 160 vn_finished_write(mp); 161 return (error); 162 } 163 /* Flush dirty buffers */ 164 if (bo->bo_dirty.bv_cnt > 0) { 165 BO_UNLOCK(bo); 166 if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) { 167 if (upgraded != 0) 168 VOP_LOCK(vp, LK_DOWNGRADE); 169 vn_finished_write(mp); 170 return (error); 171 } 172 BO_LOCK(bo); 173 if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) 174 panic("ffs_rawread_sync: dirty bufs"); 175 } 176 BO_UNLOCK(bo); 177 if (upgraded != 0) 178 VOP_LOCK(vp, LK_DOWNGRADE); 179 vn_finished_write(mp); 180 } else { 181 VI_UNLOCK(vp); 182 BO_UNLOCK(bo); 183 } 184 return 0; 185} 186 187 188static int 189ffs_rawread_readahead(struct vnode *vp, 190 caddr_t udata, 191 off_t offset, 192 size_t len, 193 struct thread *td, 194 struct buf *bp, 195 caddr_t sa) 196{ 197 int error; 198 u_int iolen; 199 off_t blockno; 200 int blockoff; 201 int bsize; 202 struct vnode *dp; 203 int bforwards; 204 struct inode *ip; 205 ufs2_daddr_t blkno; 206 207 bsize = vp->v_mount->mnt_stat.f_iosize; 208 209 ip = VTOI(vp); 210 dp = ip->i_devvp; 211 212 iolen = ((vm_offset_t) udata) & PAGE_MASK; 213 bp->b_bcount = len; 214 if (bp->b_bcount + iolen > bp->b_kvasize) { 215 bp->b_bcount = bp->b_kvasize; 216 if (iolen != 0) 217 bp->b_bcount -= PAGE_SIZE; 218 } 219 bp->b_flags = 0; /* XXX necessary ? */ 220 bp->b_iocmd = BIO_READ; 221 bp->b_iodone = bdone; 222 bp->b_data = udata; 223 bp->b_saveaddr = sa; 224 blockno = offset / bsize; 225 blockoff = (offset % bsize) / DEV_BSIZE; 226 if ((daddr_t) blockno != blockno) { 227 return EINVAL; /* blockno overflow */ 228 } 229 230 bp->b_lblkno = bp->b_blkno = blockno; 231 232 error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, &bforwards, NULL); 233 if (error != 0) 234 return error; 235 if (blkno == -1) { 236 237 /* Fill holes with NULs to preserve semantics */ 238 239 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize) 240 bp->b_bcount = bsize - blockoff * DEV_BSIZE; 241 bp->b_bufsize = bp->b_bcount; 242 243 if (vmapbuf(bp) < 0) 244 return EFAULT; 245 246 maybe_yield(); 247 bzero(bp->b_data, bp->b_bufsize); 248 249 /* Mark operation completed (similar to bufdone()) */ 250 251 bp->b_resid = 0; 252 bp->b_flags |= B_DONE; 253 return 0; 254 } 255 bp->b_blkno = blkno + blockoff; 256 bp->b_offset = bp->b_iooffset = (blkno + blockoff) * DEV_BSIZE; 257 258 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards)) 259 bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE; 260 bp->b_bufsize = bp->b_bcount; 261 262 if (vmapbuf(bp) < 0) 263 return EFAULT; 264 265 BO_STRATEGY(&dp->v_bufobj, bp); 266 return 0; 267} 268 269 270static int 271ffs_rawread_main(struct vnode *vp, 272 struct uio *uio) 273{ 274 int error, nerror; 275 struct buf *bp, *nbp, *tbp; 276 caddr_t sa, nsa, tsa; 277 u_int iolen; 278 int spl; 279 caddr_t udata; 280 long resid; 281 off_t offset; 282 struct thread *td; 283 284 td = uio->uio_td ? uio->uio_td : curthread; 285 udata = uio->uio_iov->iov_base; 286 resid = uio->uio_resid; 287 offset = uio->uio_offset; 288 289 /* 290 * keep the process from being swapped 291 */ 292 PHOLD(td->td_proc); 293 294 error = 0; 295 nerror = 0; 296 297 bp = NULL; 298 nbp = NULL; 299 sa = NULL; 300 nsa = NULL; 301 302 while (resid > 0) { 303 304 if (bp == NULL) { /* Setup first read */ 305 /* XXX: Leave some bufs for swap */ 306 bp = getpbuf(&ffsrawbufcnt); 307 sa = bp->b_data; 308 pbgetvp(vp, bp); 309 error = ffs_rawread_readahead(vp, udata, offset, 310 resid, td, bp, sa); 311 if (error != 0) 312 break; 313 314 if (resid > bp->b_bufsize) { /* Setup fist readahead */ 315 /* XXX: Leave bufs for swap */ 316 if (rawreadahead != 0) 317 nbp = trypbuf(&ffsrawbufcnt); 318 else 319 nbp = NULL; 320 if (nbp != NULL) { 321 nsa = nbp->b_data; 322 pbgetvp(vp, nbp); 323 324 nerror = ffs_rawread_readahead(vp, 325 udata + 326 bp->b_bufsize, 327 offset + 328 bp->b_bufsize, 329 resid - 330 bp->b_bufsize, 331 td, 332 nbp, 333 nsa); 334 if (nerror) { 335 pbrelvp(nbp); 336 relpbuf(nbp, &ffsrawbufcnt); 337 nbp = NULL; 338 } 339 } 340 } 341 } 342 343 spl = splbio(); 344 bwait(bp, PRIBIO, "rawrd"); 345 splx(spl); 346 347 vunmapbuf(bp); 348 349 iolen = bp->b_bcount - bp->b_resid; 350 if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) { 351 nerror = 0; /* Ignore possible beyond EOF error */ 352 break; /* EOF */ 353 } 354 355 if ((bp->b_ioflags & BIO_ERROR) != 0) { 356 error = bp->b_error; 357 break; 358 } 359 resid -= iolen; 360 udata += iolen; 361 offset += iolen; 362 if (iolen < bp->b_bufsize) { 363 /* Incomplete read. Try to read remaining part */ 364 error = ffs_rawread_readahead(vp, 365 udata, 366 offset, 367 bp->b_bufsize - iolen, 368 td, 369 bp, 370 sa); 371 if (error != 0) 372 break; 373 } else if (nbp != NULL) { /* Complete read with readahead */ 374 375 tbp = bp; 376 bp = nbp; 377 nbp = tbp; 378 379 tsa = sa; 380 sa = nsa; 381 nsa = tsa; 382 383 if (resid <= bp->b_bufsize) { /* No more readaheads */ 384 pbrelvp(nbp); 385 relpbuf(nbp, &ffsrawbufcnt); 386 nbp = NULL; 387 } else { /* Setup next readahead */ 388 nerror = ffs_rawread_readahead(vp, 389 udata + 390 bp->b_bufsize, 391 offset + 392 bp->b_bufsize, 393 resid - 394 bp->b_bufsize, 395 td, 396 nbp, 397 nsa); 398 if (nerror != 0) { 399 pbrelvp(nbp); 400 relpbuf(nbp, &ffsrawbufcnt); 401 nbp = NULL; 402 } 403 } 404 } else if (nerror != 0) {/* Deferred Readahead error */ 405 break; 406 } else if (resid > 0) { /* More to read, no readahead */ 407 error = ffs_rawread_readahead(vp, udata, offset, 408 resid, td, bp, sa); 409 if (error != 0) 410 break; 411 } 412 } 413 414 if (bp != NULL) { 415 pbrelvp(bp); 416 relpbuf(bp, &ffsrawbufcnt); 417 } 418 if (nbp != NULL) { /* Run down readahead buffer */ 419 spl = splbio(); 420 bwait(nbp, PRIBIO, "rawrd"); 421 splx(spl); 422 vunmapbuf(nbp); 423 pbrelvp(nbp); 424 relpbuf(nbp, &ffsrawbufcnt); 425 } 426 427 if (error == 0) 428 error = nerror; 429 PRELE(td->td_proc); 430 uio->uio_iov->iov_base = udata; 431 uio->uio_resid = resid; 432 uio->uio_offset = offset; 433 return error; 434} 435 436 437int 438ffs_rawread(struct vnode *vp, 439 struct uio *uio, 440 int *workdone) 441{ 442 if (allowrawread != 0 && 443 uio->uio_iovcnt == 1 && 444 uio->uio_segflg == UIO_USERSPACE && 445 uio->uio_resid == uio->uio_iov->iov_len && 446 (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_pflags & 447 TDP_DEADLKTREAT) == 0) { 448 int secsize; /* Media sector size */ 449 off_t filebytes; /* Bytes left of file */ 450 int blockbytes; /* Bytes left of file in full blocks */ 451 int partialbytes; /* Bytes in last partial block */ 452 int skipbytes; /* Bytes not to read in ffs_rawread */ 453 struct inode *ip; 454 int error; 455 456 457 /* Only handle sector aligned reads */ 458 ip = VTOI(vp); 459 secsize = ip->i_devvp->v_bufobj.bo_bsize; 460 if ((uio->uio_offset & (secsize - 1)) == 0 && 461 (uio->uio_resid & (secsize - 1)) == 0) { 462 463 /* Sync dirty pages and buffers if needed */ 464 error = ffs_rawread_sync(vp); 465 if (error != 0) 466 return error; 467 468 /* Check for end of file */ 469 if (ip->i_size > uio->uio_offset) { 470 filebytes = ip->i_size - uio->uio_offset; 471 472 /* No special eof handling needed ? */ 473 if (uio->uio_resid <= filebytes) { 474 *workdone = 1; 475 return ffs_rawread_main(vp, uio); 476 } 477 478 partialbytes = ((unsigned int) ip->i_size) % 479 ip->i_fs->fs_bsize; 480 blockbytes = (int) filebytes - partialbytes; 481 if (blockbytes > 0) { 482 skipbytes = uio->uio_resid - 483 blockbytes; 484 uio->uio_resid = blockbytes; 485 error = ffs_rawread_main(vp, uio); 486 uio->uio_resid += skipbytes; 487 if (error != 0) 488 return error; 489 /* Read remaining part using buffer */ 490 } 491 } 492 } 493 } 494 *workdone = 0; 495 return 0; 496}
| 150 } else 151 VI_UNLOCK(vp); 152 153 /* Wait for pending writes to complete */ 154 BO_LOCK(bo); 155 error = bufobj_wwait(&vp->v_bufobj, 0, 0); 156 if (error != 0) { 157 /* XXX: can't happen with a zero timeout ??? */ 158 BO_UNLOCK(bo); 159 if (upgraded != 0) 160 VOP_LOCK(vp, LK_DOWNGRADE); 161 vn_finished_write(mp); 162 return (error); 163 } 164 /* Flush dirty buffers */ 165 if (bo->bo_dirty.bv_cnt > 0) { 166 BO_UNLOCK(bo); 167 if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) { 168 if (upgraded != 0) 169 VOP_LOCK(vp, LK_DOWNGRADE); 170 vn_finished_write(mp); 171 return (error); 172 } 173 BO_LOCK(bo); 174 if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) 175 panic("ffs_rawread_sync: dirty bufs"); 176 } 177 BO_UNLOCK(bo); 178 if (upgraded != 0) 179 VOP_LOCK(vp, LK_DOWNGRADE); 180 vn_finished_write(mp); 181 } else { 182 VI_UNLOCK(vp); 183 BO_UNLOCK(bo); 184 } 185 return 0; 186} 187 188 189static int 190ffs_rawread_readahead(struct vnode *vp, 191 caddr_t udata, 192 off_t offset, 193 size_t len, 194 struct thread *td, 195 struct buf *bp, 196 caddr_t sa) 197{ 198 int error; 199 u_int iolen; 200 off_t blockno; 201 int blockoff; 202 int bsize; 203 struct vnode *dp; 204 int bforwards; 205 struct inode *ip; 206 ufs2_daddr_t blkno; 207 208 bsize = vp->v_mount->mnt_stat.f_iosize; 209 210 ip = VTOI(vp); 211 dp = ip->i_devvp; 212 213 iolen = ((vm_offset_t) udata) & PAGE_MASK; 214 bp->b_bcount = len; 215 if (bp->b_bcount + iolen > bp->b_kvasize) { 216 bp->b_bcount = bp->b_kvasize; 217 if (iolen != 0) 218 bp->b_bcount -= PAGE_SIZE; 219 } 220 bp->b_flags = 0; /* XXX necessary ? */ 221 bp->b_iocmd = BIO_READ; 222 bp->b_iodone = bdone; 223 bp->b_data = udata; 224 bp->b_saveaddr = sa; 225 blockno = offset / bsize; 226 blockoff = (offset % bsize) / DEV_BSIZE; 227 if ((daddr_t) blockno != blockno) { 228 return EINVAL; /* blockno overflow */ 229 } 230 231 bp->b_lblkno = bp->b_blkno = blockno; 232 233 error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, &bforwards, NULL); 234 if (error != 0) 235 return error; 236 if (blkno == -1) { 237 238 /* Fill holes with NULs to preserve semantics */ 239 240 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize) 241 bp->b_bcount = bsize - blockoff * DEV_BSIZE; 242 bp->b_bufsize = bp->b_bcount; 243 244 if (vmapbuf(bp) < 0) 245 return EFAULT; 246 247 maybe_yield(); 248 bzero(bp->b_data, bp->b_bufsize); 249 250 /* Mark operation completed (similar to bufdone()) */ 251 252 bp->b_resid = 0; 253 bp->b_flags |= B_DONE; 254 return 0; 255 } 256 bp->b_blkno = blkno + blockoff; 257 bp->b_offset = bp->b_iooffset = (blkno + blockoff) * DEV_BSIZE; 258 259 if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards)) 260 bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE; 261 bp->b_bufsize = bp->b_bcount; 262 263 if (vmapbuf(bp) < 0) 264 return EFAULT; 265 266 BO_STRATEGY(&dp->v_bufobj, bp); 267 return 0; 268} 269 270 271static int 272ffs_rawread_main(struct vnode *vp, 273 struct uio *uio) 274{ 275 int error, nerror; 276 struct buf *bp, *nbp, *tbp; 277 caddr_t sa, nsa, tsa; 278 u_int iolen; 279 int spl; 280 caddr_t udata; 281 long resid; 282 off_t offset; 283 struct thread *td; 284 285 td = uio->uio_td ? uio->uio_td : curthread; 286 udata = uio->uio_iov->iov_base; 287 resid = uio->uio_resid; 288 offset = uio->uio_offset; 289 290 /* 291 * keep the process from being swapped 292 */ 293 PHOLD(td->td_proc); 294 295 error = 0; 296 nerror = 0; 297 298 bp = NULL; 299 nbp = NULL; 300 sa = NULL; 301 nsa = NULL; 302 303 while (resid > 0) { 304 305 if (bp == NULL) { /* Setup first read */ 306 /* XXX: Leave some bufs for swap */ 307 bp = getpbuf(&ffsrawbufcnt); 308 sa = bp->b_data; 309 pbgetvp(vp, bp); 310 error = ffs_rawread_readahead(vp, udata, offset, 311 resid, td, bp, sa); 312 if (error != 0) 313 break; 314 315 if (resid > bp->b_bufsize) { /* Setup fist readahead */ 316 /* XXX: Leave bufs for swap */ 317 if (rawreadahead != 0) 318 nbp = trypbuf(&ffsrawbufcnt); 319 else 320 nbp = NULL; 321 if (nbp != NULL) { 322 nsa = nbp->b_data; 323 pbgetvp(vp, nbp); 324 325 nerror = ffs_rawread_readahead(vp, 326 udata + 327 bp->b_bufsize, 328 offset + 329 bp->b_bufsize, 330 resid - 331 bp->b_bufsize, 332 td, 333 nbp, 334 nsa); 335 if (nerror) { 336 pbrelvp(nbp); 337 relpbuf(nbp, &ffsrawbufcnt); 338 nbp = NULL; 339 } 340 } 341 } 342 } 343 344 spl = splbio(); 345 bwait(bp, PRIBIO, "rawrd"); 346 splx(spl); 347 348 vunmapbuf(bp); 349 350 iolen = bp->b_bcount - bp->b_resid; 351 if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) { 352 nerror = 0; /* Ignore possible beyond EOF error */ 353 break; /* EOF */ 354 } 355 356 if ((bp->b_ioflags & BIO_ERROR) != 0) { 357 error = bp->b_error; 358 break; 359 } 360 resid -= iolen; 361 udata += iolen; 362 offset += iolen; 363 if (iolen < bp->b_bufsize) { 364 /* Incomplete read. Try to read remaining part */ 365 error = ffs_rawread_readahead(vp, 366 udata, 367 offset, 368 bp->b_bufsize - iolen, 369 td, 370 bp, 371 sa); 372 if (error != 0) 373 break; 374 } else if (nbp != NULL) { /* Complete read with readahead */ 375 376 tbp = bp; 377 bp = nbp; 378 nbp = tbp; 379 380 tsa = sa; 381 sa = nsa; 382 nsa = tsa; 383 384 if (resid <= bp->b_bufsize) { /* No more readaheads */ 385 pbrelvp(nbp); 386 relpbuf(nbp, &ffsrawbufcnt); 387 nbp = NULL; 388 } else { /* Setup next readahead */ 389 nerror = ffs_rawread_readahead(vp, 390 udata + 391 bp->b_bufsize, 392 offset + 393 bp->b_bufsize, 394 resid - 395 bp->b_bufsize, 396 td, 397 nbp, 398 nsa); 399 if (nerror != 0) { 400 pbrelvp(nbp); 401 relpbuf(nbp, &ffsrawbufcnt); 402 nbp = NULL; 403 } 404 } 405 } else if (nerror != 0) {/* Deferred Readahead error */ 406 break; 407 } else if (resid > 0) { /* More to read, no readahead */ 408 error = ffs_rawread_readahead(vp, udata, offset, 409 resid, td, bp, sa); 410 if (error != 0) 411 break; 412 } 413 } 414 415 if (bp != NULL) { 416 pbrelvp(bp); 417 relpbuf(bp, &ffsrawbufcnt); 418 } 419 if (nbp != NULL) { /* Run down readahead buffer */ 420 spl = splbio(); 421 bwait(nbp, PRIBIO, "rawrd"); 422 splx(spl); 423 vunmapbuf(nbp); 424 pbrelvp(nbp); 425 relpbuf(nbp, &ffsrawbufcnt); 426 } 427 428 if (error == 0) 429 error = nerror; 430 PRELE(td->td_proc); 431 uio->uio_iov->iov_base = udata; 432 uio->uio_resid = resid; 433 uio->uio_offset = offset; 434 return error; 435} 436 437 438int 439ffs_rawread(struct vnode *vp, 440 struct uio *uio, 441 int *workdone) 442{ 443 if (allowrawread != 0 && 444 uio->uio_iovcnt == 1 && 445 uio->uio_segflg == UIO_USERSPACE && 446 uio->uio_resid == uio->uio_iov->iov_len && 447 (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_pflags & 448 TDP_DEADLKTREAT) == 0) { 449 int secsize; /* Media sector size */ 450 off_t filebytes; /* Bytes left of file */ 451 int blockbytes; /* Bytes left of file in full blocks */ 452 int partialbytes; /* Bytes in last partial block */ 453 int skipbytes; /* Bytes not to read in ffs_rawread */ 454 struct inode *ip; 455 int error; 456 457 458 /* Only handle sector aligned reads */ 459 ip = VTOI(vp); 460 secsize = ip->i_devvp->v_bufobj.bo_bsize; 461 if ((uio->uio_offset & (secsize - 1)) == 0 && 462 (uio->uio_resid & (secsize - 1)) == 0) { 463 464 /* Sync dirty pages and buffers if needed */ 465 error = ffs_rawread_sync(vp); 466 if (error != 0) 467 return error; 468 469 /* Check for end of file */ 470 if (ip->i_size > uio->uio_offset) { 471 filebytes = ip->i_size - uio->uio_offset; 472 473 /* No special eof handling needed ? */ 474 if (uio->uio_resid <= filebytes) { 475 *workdone = 1; 476 return ffs_rawread_main(vp, uio); 477 } 478 479 partialbytes = ((unsigned int) ip->i_size) % 480 ip->i_fs->fs_bsize; 481 blockbytes = (int) filebytes - partialbytes; 482 if (blockbytes > 0) { 483 skipbytes = uio->uio_resid - 484 blockbytes; 485 uio->uio_resid = blockbytes; 486 error = ffs_rawread_main(vp, uio); 487 uio->uio_resid += skipbytes; 488 if (error != 0) 489 return error; 490 /* Read remaining part using buffer */ 491 } 492 } 493 } 494 } 495 *workdone = 0; 496 return 0; 497}
|