Cross Reference: /freebsd-11.0-release/sys/kern/vfs

Deleted Added

sdiff udiff text old ( 71576 ) new ( 72200 )

full compact

vfs_vnops.c (71576)	vfs_vnops.c (72200)
1/* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94	1/* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/vfs_vnops.c 71576 2001-01-24 12:35:55Z jasone $	39 * $FreeBSD: head/sys/kern/vfs_vnops.c 72200 2001-02-09 06:11:45Z bmilekic $
40 / 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/fcntl.h> 45#include <sys/file.h> 46#include <sys/stat.h> 47#include <sys/proc.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/vnode.h> 52#include <sys/bio.h> 53#include <sys/buf.h> 54#include <sys/filio.h> 55#include <sys/ttycom.h> 56#include <sys/conf.h> 57 58#include <ufs/ufs/quota.h> 59#include <ufs/ufs/inode.h> 60 61static int vn_closefile __P((struct file fp, struct proc p)); 62static int vn_ioctl __P((struct file fp, u_long com, caddr_t data, 63 struct proc p)); 64static int vn_read __P((struct file fp, struct uio uio, 65 struct ucred cred, int flags, struct proc p)); 66static int vn_poll __P((struct file fp, int events, struct ucred cred, 67 struct proc p)); 68static int vn_statfile __P((struct file fp, struct stat sb, struct proc p)); 69static int vn_write __P((struct file fp, struct uio uio, 70 struct ucred cred, int flags, struct proc p)); 71 72struct fileops vnops = 73 { vn_read, vn_write, vn_ioctl, vn_poll, vn_statfile, vn_closefile }; 74 75static int filt_nullattach(struct knote kn); 76static int filt_vnattach(struct knote kn); 77static void filt_vndetach(struct knote kn); 78static int filt_vnode(struct knote kn, long hint); 79static int filt_vnread(struct knote kn, long hint); 80 81struct filterops vn_filtops = 82 { 1, filt_vnattach, filt_vndetach, filt_vnode }; 83 84/* 85 * XXX 86 * filt_vnread is ufs-specific, so the attach routine should really 87 * switch out to different filterops based on the vn filetype 88 / 89struct filterops vn_rwfiltops[] = { 90 { 1, filt_vnattach, filt_vndetach, filt_vnread }, 91 { 1, filt_nullattach, NULL, NULL }, 92}; 93 94/ 95 * Common code for vnode open operations. 96 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 97 * 98 * Note that this does NOT free nameidata for the successful case, 99 * due to the NDINIT being done elsewhere. 100 / 101int 102vn_open(ndp, flagp, cmode) 103* register struct nameidata ndp; 104* int flagp, cmode; 105{ 106* struct vnode vp; 107* struct mount mp; 108* struct proc p = ndp->ni_cnd.cn_proc; 109* struct ucred cred = p->p_ucred; 110* struct vattr vat; 111 struct vattr vap = &vat; 112* int mode, fmode, error; 113 114restart: 115 fmode = flagp; 116* if (fmode & O_CREAT) { 117 ndp->ni_cnd.cn_nameiop = CREATE; 118 ndp->ni_cnd.cn_flags = LOCKPARENT \| LOCKLEAF; 119 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 120 ndp->ni_cnd.cn_flags \|= FOLLOW; 121 bwillwrite(); 122 if ((error = namei(ndp)) != 0) 123 return (error); 124 if (ndp->ni_vp == NULL) { 125 VATTR_NULL(vap); 126 vap->va_type = VREG; 127 vap->va_mode = cmode; 128 if (fmode & O_EXCL) 129 vap->va_vaflags \|= VA_EXCLUSIVE; 130 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 131 NDFREE(ndp, NDF_ONLY_PNBUF); 132 vput(ndp->ni_dvp); 133 if ((error = vn_start_write(NULL, &mp, 134 V_XSLEEP \| PCATCH)) != 0) 135 return (error); 136 goto restart; 137 } 138 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); 139 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 140 &ndp->ni_cnd, vap); 141 vput(ndp->ni_dvp); 142 vn_finished_write(mp); 143 if (error) { 144 NDFREE(ndp, NDF_ONLY_PNBUF); 145 return (error); 146 } 147 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 148 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 149 fmode &= ~O_TRUNC; 150 vp = ndp->ni_vp; 151 } else { 152 if (ndp->ni_dvp == ndp->ni_vp) 153 vrele(ndp->ni_dvp); 154 else 155 vput(ndp->ni_dvp); 156 ndp->ni_dvp = NULL; 157 vp = ndp->ni_vp; 158 if (fmode & O_EXCL) { 159 error = EEXIST; 160 goto bad; 161 } 162 fmode &= ~O_CREAT; 163 } 164 } else { 165 ndp->ni_cnd.cn_nameiop = LOOKUP; 166 ndp->ni_cnd.cn_flags = 167 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) \| LOCKLEAF; 168 if ((error = namei(ndp)) != 0) 169 return (error); 170 vp = ndp->ni_vp; 171 } 172 if (vp->v_type == VLNK) { 173 error = EMLINK; 174 goto bad; 175 } 176 if (vp->v_type == VSOCK) { 177 error = EOPNOTSUPP; 178 goto bad; 179 } 180 if ((fmode & O_CREAT) == 0) { 181 mode = 0; 182 if (fmode & (FWRITE \| O_TRUNC)) { 183 if (vp->v_type == VDIR) { 184 error = EISDIR; 185 goto bad; 186 } 187 error = vn_writechk(vp); 188 if (error) 189 goto bad; 190 mode \|= VWRITE; 191 } 192 if (fmode & FREAD) 193 mode \|= VREAD; 194 if (mode) { 195 error = VOP_ACCESS(vp, mode, cred, p); 196 if (error) 197 goto bad; 198 } 199 } 200 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) 201 goto bad; 202 /* 203 * Make sure that a VM object is created for VMIO support. 204 / 205* if (vn_canvmio(vp) == TRUE) { 206 if ((error = vfs_object_create(vp, p, cred)) != 0) 207 goto bad; 208 } 209 210 if (fmode & FWRITE) 211 vp->v_writecount++; 212 flagp = fmode; 213* return (0); 214bad: 215 NDFREE(ndp, NDF_ONLY_PNBUF); 216 vput(vp); 217 flagp = fmode; 218* return (error); 219} 220 221/* 222 * Check for write permissions on the specified vnode. 223 * Prototype text segments cannot be written. 224 / 225int 226vn_writechk(vp) 227* register struct vnode vp; 228{ 229* 230 /* 231 * If there's shared text associated with 232 * the vnode, try to free it up once. If 233 * we fail, we can't allow writing. 234 / 235* if (vp->v_flag & VTEXT) 236 return (ETXTBSY); 237 return (0); 238} 239 240/* 241 * Vnode close call 242 / 243int 244vn_close(vp, flags, cred, p) 245* register struct vnode vp; 246* int flags; 247 struct ucred cred; 248* struct proc p; 249{ 250* int error; 251 252 if (flags & FWRITE) 253 vp->v_writecount--; 254 error = VOP_CLOSE(vp, flags, cred, p); 255 vrele(vp); 256 return (error); 257} 258 259static __inline 260int 261sequential_heuristic(struct uio uio, struct file fp) 262{ 263 /* 264 * Sequential heuristic - detect sequential operation 265 / 266* if ((uio->uio_offset == 0 && fp->f_seqcount > 0) \|\| 267 uio->uio_offset == fp->f_nextoff) { 268 /* 269 * XXX we assume that the filesystem block size is 270 * the default. Not true, but still gives us a pretty 271 * good indicator of how sequential the read operations 272 * are. 273 / 274* fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 275 if (fp->f_seqcount >= 127) 276 fp->f_seqcount = 127; 277 return(fp->f_seqcount << 16); 278 } 279 280 /* 281 * Not sequential, quick draw-down of seqcount 282 / 283* if (fp->f_seqcount > 1) 284 fp->f_seqcount = 1; 285 else 286 fp->f_seqcount = 0; 287 return(0); 288} 289 290/* 291 * Package up an I/O request on a vnode into a uio and do it. 292 / 293int 294vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) 295* enum uio_rw rw; 296 struct vnode vp; 297* caddr_t base; 298 int len; 299 off_t offset; 300 enum uio_seg segflg; 301 int ioflg; 302 struct ucred cred; 303* int aresid; 304* struct proc p; 305{ 306* struct uio auio; 307 struct iovec aiov; 308 struct mount mp; 309* int error; 310 311 if ((ioflg & IO_NODELOCKED) == 0) { 312 mp = NULL; 313 if (rw == UIO_WRITE && 314 vp->v_type != VCHR && 315 (error = vn_start_write(vp, &mp, V_WAIT \| PCATCH)) != 0) 316 return (error); 317 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 318 } 319 auio.uio_iov = &aiov; 320 auio.uio_iovcnt = 1; 321 aiov.iov_base = base; 322 aiov.iov_len = len; 323 auio.uio_resid = len; 324 auio.uio_offset = offset; 325 auio.uio_segflg = segflg; 326 auio.uio_rw = rw; 327 auio.uio_procp = p; 328 if (rw == UIO_READ) { 329 error = VOP_READ(vp, &auio, ioflg, cred); 330 } else { 331 error = VOP_WRITE(vp, &auio, ioflg, cred); 332 } 333 if (aresid) 334 aresid = auio.uio_resid; 335* else 336 if (auio.uio_resid && error == 0) 337 error = EIO; 338 if ((ioflg & IO_NODELOCKED) == 0) { 339 vn_finished_write(mp); 340 VOP_UNLOCK(vp, 0, p); 341 } 342 return (error); 343} 344 345/* 346 * File table vnode read routine. 347 / 348static int 349vn_read(fp, uio, cred, flags, p) 350* struct file fp; 351* struct uio uio; 352* struct ucred cred; 353* struct proc p; 354* int flags; 355{ 356 struct vnode vp; 357* int error, ioflag; 358 359 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 360 uio->uio_procp, p)); 361 vp = (struct vnode )fp->f_data; 362* ioflag = 0; 363 if (fp->f_flag & FNONBLOCK) 364 ioflag \|= IO_NDELAY; 365 VOP_LEASE(vp, p, cred, LEASE_READ); 366 vn_lock(vp, LK_SHARED \| LK_NOPAUSE \| LK_RETRY, p); 367 if ((flags & FOF_OFFSET) == 0) 368 uio->uio_offset = fp->f_offset; 369 370 ioflag \|= sequential_heuristic(uio, fp); 371 372 error = VOP_READ(vp, uio, ioflag, cred); 373 if ((flags & FOF_OFFSET) == 0) 374 fp->f_offset = uio->uio_offset; 375 fp->f_nextoff = uio->uio_offset; 376 VOP_UNLOCK(vp, 0, p); 377 return (error); 378} 379 380/* 381 * File table vnode write routine. 382 / 383static int 384vn_write(fp, uio, cred, flags, p) 385* struct file fp; 386* struct uio uio; 387* struct ucred cred; 388* struct proc p; 389* int flags; 390{ 391 struct vnode vp; 392* struct mount mp; 393* int error, ioflag; 394 395 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 396 uio->uio_procp, p)); 397 vp = (struct vnode )fp->f_data; 398* if (vp->v_type == VREG) 399 bwillwrite(); 400 vp = (struct vnode )fp->f_data; / XXX needed? / 401* ioflag = IO_UNIT; 402 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 403 ioflag \|= IO_APPEND; 404 if (fp->f_flag & FNONBLOCK) 405 ioflag \|= IO_NDELAY; 406 if ((fp->f_flag & O_FSYNC) \|\| 407 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 408 ioflag \|= IO_SYNC; 409 mp = NULL; 410 if (vp->v_type != VCHR && 411 (error = vn_start_write(vp, &mp, V_WAIT \| PCATCH)) != 0) 412 return (error); 413 VOP_LEASE(vp, p, cred, LEASE_WRITE); 414 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 415 if ((flags & FOF_OFFSET) == 0) 416 uio->uio_offset = fp->f_offset; 417 ioflag \|= sequential_heuristic(uio, fp); 418 error = VOP_WRITE(vp, uio, ioflag, cred); 419 if ((flags & FOF_OFFSET) == 0) 420 fp->f_offset = uio->uio_offset; 421 fp->f_nextoff = uio->uio_offset; 422 VOP_UNLOCK(vp, 0, p); 423 vn_finished_write(mp); 424 return (error); 425} 426 427/* 428 * File table vnode stat routine. 429 / 430static int 431vn_statfile(fp, sb, p) 432* struct file fp; 433* struct stat sb; 434* struct proc p; 435{ 436* struct vnode vp = (struct vnode )fp->f_data; 437 438 return vn_stat(vp, sb, p); 439} 440 441int 442vn_stat(vp, sb, p) 443 struct vnode vp; 444* register struct stat sb; 445* struct proc p; 446{ 447* struct vattr vattr; 448 register struct vattr vap; 449* int error; 450 u_short mode; 451 452 vap = &vattr; 453 error = VOP_GETATTR(vp, vap, p->p_ucred, p); 454 if (error) 455 return (error); 456 457 /* 458 * Zero the spare stat fields 459 / 460* sb->st_lspare = 0; 461 sb->st_qspare[0] = 0; 462 sb->st_qspare[1] = 0; 463 464 /* 465 * Copy from vattr table 466 / 467* if (vap->va_fsid != VNOVAL) 468 sb->st_dev = vap->va_fsid; 469 else 470 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 471 sb->st_ino = vap->va_fileid; 472 mode = vap->va_mode; 473 switch (vap->va_type) { 474 case VREG: 475 mode \|= S_IFREG; 476 break; 477 case VDIR: 478 mode \|= S_IFDIR; 479 break; 480 case VBLK: 481 mode \|= S_IFBLK; 482 break; 483 case VCHR: 484 mode \|= S_IFCHR; 485 break; 486 case VLNK: 487 mode \|= S_IFLNK; 488 /* This is a cosmetic change, symlinks do not have a mode. / 489* if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 490 sb->st_mode &= ~ACCESSPERMS; /* 0000 / 491* else 492 sb->st_mode \|= ACCESSPERMS; /* 0777 / 493* break; 494 case VSOCK: 495 mode \|= S_IFSOCK; 496 break; 497 case VFIFO: 498 mode \|= S_IFIFO; 499 break; 500 default: 501 return (EBADF); 502 }; 503 sb->st_mode = mode; 504 sb->st_nlink = vap->va_nlink; 505 sb->st_uid = vap->va_uid; 506 sb->st_gid = vap->va_gid; 507 sb->st_rdev = vap->va_rdev; 508 sb->st_size = vap->va_size; 509 sb->st_atimespec = vap->va_atime; 510 sb->st_mtimespec = vap->va_mtime; 511 sb->st_ctimespec = vap->va_ctime; 512 513 /* 514 * According to www.opengroup.org, the meaning of st_blksize is 515 * "a filesystem-specific preferred I/O block size for this 516 * object. In some filesystem types, this may vary from file 517 * to file" 518 * Default to zero to catch bogus uses of this field. 519 / 520* 521 if (vap->va_type == VREG) { 522 sb->st_blksize = vap->va_blocksize; 523 } else if (vn_isdisk(vp, NULL)) { 524 sb->st_blksize = vp->v_rdev->si_bsize_best; 525 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 526 sb->st_blksize = vp->v_rdev->si_bsize_phys; 527 if (sb->st_blksize < BLKDEV_IOSIZE) 528 sb->st_blksize = BLKDEV_IOSIZE; 529 } else { 530 sb->st_blksize = 0; 531 } 532 533 sb->st_flags = vap->va_flags; 534 if (suser_xxx(p->p_ucred, 0, 0)) 535 sb->st_gen = 0; 536 else 537 sb->st_gen = vap->va_gen; 538 539#if (S_BLKSIZE == 512) 540 /* Optimize this case / 541* sb->st_blocks = vap->va_bytes >> 9; 542#else 543 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 544#endif 545 return (0); 546} 547 548/* 549 * File table vnode ioctl routine. 550 / 551static int 552vn_ioctl(fp, com, data, p) 553* struct file fp; 554* u_long com; 555 caddr_t data; 556 struct proc p; 557{ 558* register struct vnode vp = ((struct vnode )fp->f_data); 559 struct vattr vattr; 560 int error; 561 562 switch (vp->v_type) { 563 564 case VREG: 565 case VDIR: 566 if (com == FIONREAD) { 567 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); 568 if (error) 569 return (error); 570 (int )data = vattr.va_size - fp->f_offset; 571 return (0); 572 } 573 if (com == FIONBIO \|\| com == FIOASYNC) /* XXX / 574* return (0); /* XXX / 575* /* fall into ... / 576* 577 default: 578#if 0 579 return (ENOTTY); 580#endif 581 case VFIFO: 582 case VCHR: 583 case VBLK: 584 if (com == FIODTYPE) { 585 if (vp->v_type != VCHR && vp->v_type != VBLK) 586 return (ENOTTY); 587 (int )data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 588 return (0); 589 } 590 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); 591 if (error == 0 && com == TIOCSCTTY) { 592 593 /* Do nothing if reassigning same control tty / 594* if (p->p_session->s_ttyvp == vp) 595 return (0); 596 597 /* Get rid of reference to old control tty / 598* if (p->p_session->s_ttyvp) 599 vrele(p->p_session->s_ttyvp); 600 601 p->p_session->s_ttyvp = vp; 602 VREF(vp); 603 } 604 return (error); 605 } 606} 607 608/* 609 * File table vnode poll routine. 610 / 611static int 612vn_poll(fp, events, cred, p) 613* struct file fp; 614* int events; 615 struct ucred cred; 616* struct proc p; 617{ 618* 619 return (VOP_POLL(((struct vnode )fp->f_data), events, cred, p)); 620} 621* 622/* 623 * Check that the vnode is still valid, and if so 624 * acquire requested lock. 625 / 626int 627#ifndef DEBUG_LOCKS 628vn_lock(vp, flags, p) 629#else 630debug_vn_lock(vp, flags, p, filename, line) 631#endif 632* struct vnode vp; 633* int flags; 634 struct proc p; 635#ifdef DEBUG_LOCKS 636* const char filename; 637* int line; 638#endif 639{ 640 int error; 641 642 do { 643 if ((flags & LK_INTERLOCK) == 0)	40 / 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/fcntl.h> 45#include <sys/file.h> 46#include <sys/stat.h> 47#include <sys/proc.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/vnode.h> 52#include <sys/bio.h> 53#include <sys/buf.h> 54#include <sys/filio.h> 55#include <sys/ttycom.h> 56#include <sys/conf.h> 57 58#include <ufs/ufs/quota.h> 59#include <ufs/ufs/inode.h> 60 61static int vn_closefile __P((struct file fp, struct proc p)); 62static int vn_ioctl __P((struct file fp, u_long com, caddr_t data, 63 struct proc p)); 64static int vn_read __P((struct file fp, struct uio uio, 65 struct ucred cred, int flags, struct proc p)); 66static int vn_poll __P((struct file fp, int events, struct ucred cred, 67 struct proc p)); 68static int vn_statfile __P((struct file fp, struct stat sb, struct proc p)); 69static int vn_write __P((struct file fp, struct uio uio, 70 struct ucred cred, int flags, struct proc p)); 71 72struct fileops vnops = 73 { vn_read, vn_write, vn_ioctl, vn_poll, vn_statfile, vn_closefile }; 74 75static int filt_nullattach(struct knote kn); 76static int filt_vnattach(struct knote kn); 77static void filt_vndetach(struct knote kn); 78static int filt_vnode(struct knote kn, long hint); 79static int filt_vnread(struct knote kn, long hint); 80 81struct filterops vn_filtops = 82 { 1, filt_vnattach, filt_vndetach, filt_vnode }; 83 84/* 85 * XXX 86 * filt_vnread is ufs-specific, so the attach routine should really 87 * switch out to different filterops based on the vn filetype 88 / 89struct filterops vn_rwfiltops[] = { 90 { 1, filt_vnattach, filt_vndetach, filt_vnread }, 91 { 1, filt_nullattach, NULL, NULL }, 92}; 93 94/ 95 * Common code for vnode open operations. 96 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 97 * 98 * Note that this does NOT free nameidata for the successful case, 99 * due to the NDINIT being done elsewhere. 100 / 101int 102vn_open(ndp, flagp, cmode) 103* register struct nameidata ndp; 104* int flagp, cmode; 105{ 106* struct vnode vp; 107* struct mount mp; 108* struct proc p = ndp->ni_cnd.cn_proc; 109* struct ucred cred = p->p_ucred; 110* struct vattr vat; 111 struct vattr vap = &vat; 112* int mode, fmode, error; 113 114restart: 115 fmode = flagp; 116* if (fmode & O_CREAT) { 117 ndp->ni_cnd.cn_nameiop = CREATE; 118 ndp->ni_cnd.cn_flags = LOCKPARENT \| LOCKLEAF; 119 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 120 ndp->ni_cnd.cn_flags \|= FOLLOW; 121 bwillwrite(); 122 if ((error = namei(ndp)) != 0) 123 return (error); 124 if (ndp->ni_vp == NULL) { 125 VATTR_NULL(vap); 126 vap->va_type = VREG; 127 vap->va_mode = cmode; 128 if (fmode & O_EXCL) 129 vap->va_vaflags \|= VA_EXCLUSIVE; 130 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 131 NDFREE(ndp, NDF_ONLY_PNBUF); 132 vput(ndp->ni_dvp); 133 if ((error = vn_start_write(NULL, &mp, 134 V_XSLEEP \| PCATCH)) != 0) 135 return (error); 136 goto restart; 137 } 138 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); 139 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 140 &ndp->ni_cnd, vap); 141 vput(ndp->ni_dvp); 142 vn_finished_write(mp); 143 if (error) { 144 NDFREE(ndp, NDF_ONLY_PNBUF); 145 return (error); 146 } 147 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 148 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 149 fmode &= ~O_TRUNC; 150 vp = ndp->ni_vp; 151 } else { 152 if (ndp->ni_dvp == ndp->ni_vp) 153 vrele(ndp->ni_dvp); 154 else 155 vput(ndp->ni_dvp); 156 ndp->ni_dvp = NULL; 157 vp = ndp->ni_vp; 158 if (fmode & O_EXCL) { 159 error = EEXIST; 160 goto bad; 161 } 162 fmode &= ~O_CREAT; 163 } 164 } else { 165 ndp->ni_cnd.cn_nameiop = LOOKUP; 166 ndp->ni_cnd.cn_flags = 167 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) \| LOCKLEAF; 168 if ((error = namei(ndp)) != 0) 169 return (error); 170 vp = ndp->ni_vp; 171 } 172 if (vp->v_type == VLNK) { 173 error = EMLINK; 174 goto bad; 175 } 176 if (vp->v_type == VSOCK) { 177 error = EOPNOTSUPP; 178 goto bad; 179 } 180 if ((fmode & O_CREAT) == 0) { 181 mode = 0; 182 if (fmode & (FWRITE \| O_TRUNC)) { 183 if (vp->v_type == VDIR) { 184 error = EISDIR; 185 goto bad; 186 } 187 error = vn_writechk(vp); 188 if (error) 189 goto bad; 190 mode \|= VWRITE; 191 } 192 if (fmode & FREAD) 193 mode \|= VREAD; 194 if (mode) { 195 error = VOP_ACCESS(vp, mode, cred, p); 196 if (error) 197 goto bad; 198 } 199 } 200 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) 201 goto bad; 202 /* 203 * Make sure that a VM object is created for VMIO support. 204 / 205* if (vn_canvmio(vp) == TRUE) { 206 if ((error = vfs_object_create(vp, p, cred)) != 0) 207 goto bad; 208 } 209 210 if (fmode & FWRITE) 211 vp->v_writecount++; 212 flagp = fmode; 213* return (0); 214bad: 215 NDFREE(ndp, NDF_ONLY_PNBUF); 216 vput(vp); 217 flagp = fmode; 218* return (error); 219} 220 221/* 222 * Check for write permissions on the specified vnode. 223 * Prototype text segments cannot be written. 224 / 225int 226vn_writechk(vp) 227* register struct vnode vp; 228{ 229* 230 /* 231 * If there's shared text associated with 232 * the vnode, try to free it up once. If 233 * we fail, we can't allow writing. 234 / 235* if (vp->v_flag & VTEXT) 236 return (ETXTBSY); 237 return (0); 238} 239 240/* 241 * Vnode close call 242 / 243int 244vn_close(vp, flags, cred, p) 245* register struct vnode vp; 246* int flags; 247 struct ucred cred; 248* struct proc p; 249{ 250* int error; 251 252 if (flags & FWRITE) 253 vp->v_writecount--; 254 error = VOP_CLOSE(vp, flags, cred, p); 255 vrele(vp); 256 return (error); 257} 258 259static __inline 260int 261sequential_heuristic(struct uio uio, struct file fp) 262{ 263 /* 264 * Sequential heuristic - detect sequential operation 265 / 266* if ((uio->uio_offset == 0 && fp->f_seqcount > 0) \|\| 267 uio->uio_offset == fp->f_nextoff) { 268 /* 269 * XXX we assume that the filesystem block size is 270 * the default. Not true, but still gives us a pretty 271 * good indicator of how sequential the read operations 272 * are. 273 / 274* fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 275 if (fp->f_seqcount >= 127) 276 fp->f_seqcount = 127; 277 return(fp->f_seqcount << 16); 278 } 279 280 /* 281 * Not sequential, quick draw-down of seqcount 282 / 283* if (fp->f_seqcount > 1) 284 fp->f_seqcount = 1; 285 else 286 fp->f_seqcount = 0; 287 return(0); 288} 289 290/* 291 * Package up an I/O request on a vnode into a uio and do it. 292 / 293int 294vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) 295* enum uio_rw rw; 296 struct vnode vp; 297* caddr_t base; 298 int len; 299 off_t offset; 300 enum uio_seg segflg; 301 int ioflg; 302 struct ucred cred; 303* int aresid; 304* struct proc p; 305{ 306* struct uio auio; 307 struct iovec aiov; 308 struct mount mp; 309* int error; 310 311 if ((ioflg & IO_NODELOCKED) == 0) { 312 mp = NULL; 313 if (rw == UIO_WRITE && 314 vp->v_type != VCHR && 315 (error = vn_start_write(vp, &mp, V_WAIT \| PCATCH)) != 0) 316 return (error); 317 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 318 } 319 auio.uio_iov = &aiov; 320 auio.uio_iovcnt = 1; 321 aiov.iov_base = base; 322 aiov.iov_len = len; 323 auio.uio_resid = len; 324 auio.uio_offset = offset; 325 auio.uio_segflg = segflg; 326 auio.uio_rw = rw; 327 auio.uio_procp = p; 328 if (rw == UIO_READ) { 329 error = VOP_READ(vp, &auio, ioflg, cred); 330 } else { 331 error = VOP_WRITE(vp, &auio, ioflg, cred); 332 } 333 if (aresid) 334 aresid = auio.uio_resid; 335* else 336 if (auio.uio_resid && error == 0) 337 error = EIO; 338 if ((ioflg & IO_NODELOCKED) == 0) { 339 vn_finished_write(mp); 340 VOP_UNLOCK(vp, 0, p); 341 } 342 return (error); 343} 344 345/* 346 * File table vnode read routine. 347 / 348static int 349vn_read(fp, uio, cred, flags, p) 350* struct file fp; 351* struct uio uio; 352* struct ucred cred; 353* struct proc p; 354* int flags; 355{ 356 struct vnode vp; 357* int error, ioflag; 358 359 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 360 uio->uio_procp, p)); 361 vp = (struct vnode )fp->f_data; 362* ioflag = 0; 363 if (fp->f_flag & FNONBLOCK) 364 ioflag \|= IO_NDELAY; 365 VOP_LEASE(vp, p, cred, LEASE_READ); 366 vn_lock(vp, LK_SHARED \| LK_NOPAUSE \| LK_RETRY, p); 367 if ((flags & FOF_OFFSET) == 0) 368 uio->uio_offset = fp->f_offset; 369 370 ioflag \|= sequential_heuristic(uio, fp); 371 372 error = VOP_READ(vp, uio, ioflag, cred); 373 if ((flags & FOF_OFFSET) == 0) 374 fp->f_offset = uio->uio_offset; 375 fp->f_nextoff = uio->uio_offset; 376 VOP_UNLOCK(vp, 0, p); 377 return (error); 378} 379 380/* 381 * File table vnode write routine. 382 / 383static int 384vn_write(fp, uio, cred, flags, p) 385* struct file fp; 386* struct uio uio; 387* struct ucred cred; 388* struct proc p; 389* int flags; 390{ 391 struct vnode vp; 392* struct mount mp; 393* int error, ioflag; 394 395 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 396 uio->uio_procp, p)); 397 vp = (struct vnode )fp->f_data; 398* if (vp->v_type == VREG) 399 bwillwrite(); 400 vp = (struct vnode )fp->f_data; / XXX needed? / 401* ioflag = IO_UNIT; 402 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 403 ioflag \|= IO_APPEND; 404 if (fp->f_flag & FNONBLOCK) 405 ioflag \|= IO_NDELAY; 406 if ((fp->f_flag & O_FSYNC) \|\| 407 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 408 ioflag \|= IO_SYNC; 409 mp = NULL; 410 if (vp->v_type != VCHR && 411 (error = vn_start_write(vp, &mp, V_WAIT \| PCATCH)) != 0) 412 return (error); 413 VOP_LEASE(vp, p, cred, LEASE_WRITE); 414 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 415 if ((flags & FOF_OFFSET) == 0) 416 uio->uio_offset = fp->f_offset; 417 ioflag \|= sequential_heuristic(uio, fp); 418 error = VOP_WRITE(vp, uio, ioflag, cred); 419 if ((flags & FOF_OFFSET) == 0) 420 fp->f_offset = uio->uio_offset; 421 fp->f_nextoff = uio->uio_offset; 422 VOP_UNLOCK(vp, 0, p); 423 vn_finished_write(mp); 424 return (error); 425} 426 427/* 428 * File table vnode stat routine. 429 / 430static int 431vn_statfile(fp, sb, p) 432* struct file fp; 433* struct stat sb; 434* struct proc p; 435{ 436* struct vnode vp = (struct vnode )fp->f_data; 437 438 return vn_stat(vp, sb, p); 439} 440 441int 442vn_stat(vp, sb, p) 443 struct vnode vp; 444* register struct stat sb; 445* struct proc p; 446{ 447* struct vattr vattr; 448 register struct vattr vap; 449* int error; 450 u_short mode; 451 452 vap = &vattr; 453 error = VOP_GETATTR(vp, vap, p->p_ucred, p); 454 if (error) 455 return (error); 456 457 /* 458 * Zero the spare stat fields 459 / 460* sb->st_lspare = 0; 461 sb->st_qspare[0] = 0; 462 sb->st_qspare[1] = 0; 463 464 /* 465 * Copy from vattr table 466 / 467* if (vap->va_fsid != VNOVAL) 468 sb->st_dev = vap->va_fsid; 469 else 470 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 471 sb->st_ino = vap->va_fileid; 472 mode = vap->va_mode; 473 switch (vap->va_type) { 474 case VREG: 475 mode \|= S_IFREG; 476 break; 477 case VDIR: 478 mode \|= S_IFDIR; 479 break; 480 case VBLK: 481 mode \|= S_IFBLK; 482 break; 483 case VCHR: 484 mode \|= S_IFCHR; 485 break; 486 case VLNK: 487 mode \|= S_IFLNK; 488 /* This is a cosmetic change, symlinks do not have a mode. / 489* if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 490 sb->st_mode &= ~ACCESSPERMS; /* 0000 / 491* else 492 sb->st_mode \|= ACCESSPERMS; /* 0777 / 493* break; 494 case VSOCK: 495 mode \|= S_IFSOCK; 496 break; 497 case VFIFO: 498 mode \|= S_IFIFO; 499 break; 500 default: 501 return (EBADF); 502 }; 503 sb->st_mode = mode; 504 sb->st_nlink = vap->va_nlink; 505 sb->st_uid = vap->va_uid; 506 sb->st_gid = vap->va_gid; 507 sb->st_rdev = vap->va_rdev; 508 sb->st_size = vap->va_size; 509 sb->st_atimespec = vap->va_atime; 510 sb->st_mtimespec = vap->va_mtime; 511 sb->st_ctimespec = vap->va_ctime; 512 513 /* 514 * According to www.opengroup.org, the meaning of st_blksize is 515 * "a filesystem-specific preferred I/O block size for this 516 * object. In some filesystem types, this may vary from file 517 * to file" 518 * Default to zero to catch bogus uses of this field. 519 / 520* 521 if (vap->va_type == VREG) { 522 sb->st_blksize = vap->va_blocksize; 523 } else if (vn_isdisk(vp, NULL)) { 524 sb->st_blksize = vp->v_rdev->si_bsize_best; 525 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 526 sb->st_blksize = vp->v_rdev->si_bsize_phys; 527 if (sb->st_blksize < BLKDEV_IOSIZE) 528 sb->st_blksize = BLKDEV_IOSIZE; 529 } else { 530 sb->st_blksize = 0; 531 } 532 533 sb->st_flags = vap->va_flags; 534 if (suser_xxx(p->p_ucred, 0, 0)) 535 sb->st_gen = 0; 536 else 537 sb->st_gen = vap->va_gen; 538 539#if (S_BLKSIZE == 512) 540 /* Optimize this case / 541* sb->st_blocks = vap->va_bytes >> 9; 542#else 543 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 544#endif 545 return (0); 546} 547 548/* 549 * File table vnode ioctl routine. 550 / 551static int 552vn_ioctl(fp, com, data, p) 553* struct file fp; 554* u_long com; 555 caddr_t data; 556 struct proc p; 557{ 558* register struct vnode vp = ((struct vnode )fp->f_data); 559 struct vattr vattr; 560 int error; 561 562 switch (vp->v_type) { 563 564 case VREG: 565 case VDIR: 566 if (com == FIONREAD) { 567 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); 568 if (error) 569 return (error); 570 (int )data = vattr.va_size - fp->f_offset; 571 return (0); 572 } 573 if (com == FIONBIO \|\| com == FIOASYNC) /* XXX / 574* return (0); /* XXX / 575* /* fall into ... / 576* 577 default: 578#if 0 579 return (ENOTTY); 580#endif 581 case VFIFO: 582 case VCHR: 583 case VBLK: 584 if (com == FIODTYPE) { 585 if (vp->v_type != VCHR && vp->v_type != VBLK) 586 return (ENOTTY); 587 (int )data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 588 return (0); 589 } 590 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); 591 if (error == 0 && com == TIOCSCTTY) { 592 593 /* Do nothing if reassigning same control tty / 594* if (p->p_session->s_ttyvp == vp) 595 return (0); 596 597 /* Get rid of reference to old control tty / 598* if (p->p_session->s_ttyvp) 599 vrele(p->p_session->s_ttyvp); 600 601 p->p_session->s_ttyvp = vp; 602 VREF(vp); 603 } 604 return (error); 605 } 606} 607 608/* 609 * File table vnode poll routine. 610 / 611static int 612vn_poll(fp, events, cred, p) 613* struct file fp; 614* int events; 615 struct ucred cred; 616* struct proc p; 617{ 618* 619 return (VOP_POLL(((struct vnode )fp->f_data), events, cred, p)); 620} 621* 622/* 623 * Check that the vnode is still valid, and if so 624 * acquire requested lock. 625 / 626int 627#ifndef DEBUG_LOCKS 628vn_lock(vp, flags, p) 629#else 630debug_vn_lock(vp, flags, p, filename, line) 631#endif 632* struct vnode vp; 633* int flags; 634 struct proc p; 635#ifdef DEBUG_LOCKS 636* const char filename; 637* int line; 638#endif 639{ 640 int error; 641 642 do { 643 if ((flags & LK_INTERLOCK) == 0)
644 mtx_enter(&vp->v_interlock, MTX_DEF);	644 mtx_lock(&vp->v_interlock);
645 if ((vp->v_flag & VXLOCK) && vp->v_vxproc != curproc) { 646 vp->v_flag \|= VXWANT;	645 if ((vp->v_flag & VXLOCK) && vp->v_vxproc != curproc) { 646 vp->v_flag \|= VXWANT;
647 mtx_exit(&vp->v_interlock, MTX_DEF);	647 mtx_unlock(&vp->v_interlock);
648 tsleep((caddr_t)vp, PINOD, "vn_lock", 0); 649 error = ENOENT; 650 } else { 651 if (vp->v_vxproc != NULL) 652 printf("VXLOCK interlock avoided in vn_lock\n"); 653#ifdef DEBUG_LOCKS 654 vp->filename = filename; 655 vp->line = line; 656#endif 657 error = VOP_LOCK(vp, 658 flags \| LK_NOPAUSE \| LK_INTERLOCK, p); 659 if (error == 0) 660 return (error); 661 } 662 flags &= ~LK_INTERLOCK; 663 } while (flags & LK_RETRY); 664 return (error); 665} 666 667/* 668 * File table vnode close routine. 669 / 670static int 671vn_closefile(fp, p) 672* struct file fp; 673* struct proc p; 674{ 675* 676 fp->f_ops = &badfileops; 677 return (vn_close(((struct vnode )fp->f_data), fp->f_flag, 678* fp->f_cred, p)); 679} 680 681/* 682 * Preparing to start a filesystem write operation. If the operation is 683 * permitted, then we bump the count of operations in progress and 684 * proceed. If a suspend request is in progress, we wait until the 685 * suspension is over, and then proceed. 686 / 687int 688vn_start_write(vp, mpp, flags) 689* struct vnode vp; 690* struct mount *mpp; 691* int flags; 692{ 693 struct mount mp; 694* int error; 695 696 /* 697 * If a vnode is provided, get and return the mount point that 698 * to which it will write. 699 / 700* if (vp != NULL) { 701 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { 702 mpp = NULL; 703* if (error != EOPNOTSUPP) 704 return (error); 705 return (0); 706 } 707 } 708 if ((mp = mpp) == NULL) 709* return (0); 710 /* 711 * Check on status of suspension. 712 / 713* while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 714 if (flags & V_NOWAIT) 715 return (EWOULDBLOCK); 716 error = tsleep(&mp->mnt_flag, (PUSER - 1) \| (flags & PCATCH), 717 "suspfs", 0); 718 if (error) 719 return (error); 720 } 721 if (flags & V_XSLEEP) 722 return (0); 723 mp->mnt_writeopcount++; 724 return (0); 725} 726 727/* 728 * Secondary suspension. Used by operations such as vop_inactive 729 * routines that are needed by the higher level functions. These 730 * are allowed to proceed until all the higher level functions have 731 * completed (indicated by mnt_writeopcount dropping to zero). At that 732 * time, these operations are halted until the suspension is over. 733 / 734int 735vn_write_suspend_wait(vp, mp, flags) 736* struct vnode vp; 737* struct mount mp; 738* int flags; 739{ 740 int error; 741 742 if (vp != NULL) { 743 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { 744 if (error != EOPNOTSUPP) 745 return (error); 746 return (0); 747 } 748 } 749 /* 750 * If we are not suspended or have not yet reached suspended 751 * mode, then let the operation proceed. 752 / 753* if (mp == NULL \|\| (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) 754 return (0); 755 if (flags & V_NOWAIT) 756 return (EWOULDBLOCK); 757 /* 758 * Wait for the suspension to finish. 759 / 760* return (tsleep(&mp->mnt_flag, (PUSER - 1) \| (flags & PCATCH), 761 "suspfs", 0)); 762} 763 764/* 765 * Filesystem write operation has completed. If we are suspending and this 766 * operation is the last one, notify the suspender that the suspension is 767 * now in effect. 768 / 769void 770vn_finished_write(mp) 771* struct mount mp; 772{ 773* 774 if (mp == NULL) 775 return; 776 mp->mnt_writeopcount--; 777 if (mp->mnt_writeopcount < 0) 778 panic("vn_finished_write: neg cnt"); 779 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && 780 mp->mnt_writeopcount <= 0) 781 wakeup(&mp->mnt_writeopcount); 782} 783 784/* 785 * Request a filesystem to suspend write operations. 786 / 787void 788vfs_write_suspend(mp) 789* struct mount mp; 790{ 791* struct proc p = curproc; 792* 793 if (mp->mnt_kern_flag & MNTK_SUSPEND) 794 return; 795 mp->mnt_kern_flag \|= MNTK_SUSPEND; 796 if (mp->mnt_writeopcount > 0) 797 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); 798 VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); 799 mp->mnt_kern_flag \|= MNTK_SUSPENDED; 800} 801 802/* 803 * Request a filesystem to resume write operations. 804 / 805void 806vfs_write_resume(mp) 807* struct mount mp; 808{ 809* 810 if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) 811 return; 812 mp->mnt_kern_flag &= ~(MNTK_SUSPEND \| MNTK_SUSPENDED); 813 wakeup(&mp->mnt_writeopcount); 814 wakeup(&mp->mnt_flag); 815} 816 817static int 818filt_vnattach(struct knote kn) 819{ 820* struct vnode vp; 821* 822 if (kn->kn_fp->f_type != DTYPE_VNODE && 823 kn->kn_fp->f_type != DTYPE_FIFO) 824 return (EBADF); 825 826 vp = (struct vnode )kn->kn_fp->f_data; 827* 828 /* 829 * XXX 830 * this is a hack simply to cause the filter attach to fail 831 * for non-ufs filesystems, until the support for them is done. 832 / 833* if ((vp)->v_tag != VT_UFS) 834 return (EOPNOTSUPP); 835	648 tsleep((caddr_t)vp, PINOD, "vn_lock", 0); 649 error = ENOENT; 650 } else { 651 if (vp->v_vxproc != NULL) 652 printf("VXLOCK interlock avoided in vn_lock\n"); 653#ifdef DEBUG_LOCKS 654 vp->filename = filename; 655 vp->line = line; 656#endif 657 error = VOP_LOCK(vp, 658 flags \| LK_NOPAUSE \| LK_INTERLOCK, p); 659 if (error == 0) 660 return (error); 661 } 662 flags &= ~LK_INTERLOCK; 663 } while (flags & LK_RETRY); 664 return (error); 665} 666 667/* 668 * File table vnode close routine. 669 / 670static int 671vn_closefile(fp, p) 672* struct file fp; 673* struct proc p; 674{ 675* 676 fp->f_ops = &badfileops; 677 return (vn_close(((struct vnode )fp->f_data), fp->f_flag, 678* fp->f_cred, p)); 679} 680 681/* 682 * Preparing to start a filesystem write operation. If the operation is 683 * permitted, then we bump the count of operations in progress and 684 * proceed. If a suspend request is in progress, we wait until the 685 * suspension is over, and then proceed. 686 / 687int 688vn_start_write(vp, mpp, flags) 689* struct vnode vp; 690* struct mount *mpp; 691* int flags; 692{ 693 struct mount mp; 694* int error; 695 696 /* 697 * If a vnode is provided, get and return the mount point that 698 * to which it will write. 699 / 700* if (vp != NULL) { 701 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { 702 mpp = NULL; 703* if (error != EOPNOTSUPP) 704 return (error); 705 return (0); 706 } 707 } 708 if ((mp = mpp) == NULL) 709* return (0); 710 /* 711 * Check on status of suspension. 712 / 713* while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 714 if (flags & V_NOWAIT) 715 return (EWOULDBLOCK); 716 error = tsleep(&mp->mnt_flag, (PUSER - 1) \| (flags & PCATCH), 717 "suspfs", 0); 718 if (error) 719 return (error); 720 } 721 if (flags & V_XSLEEP) 722 return (0); 723 mp->mnt_writeopcount++; 724 return (0); 725} 726 727/* 728 * Secondary suspension. Used by operations such as vop_inactive 729 * routines that are needed by the higher level functions. These 730 * are allowed to proceed until all the higher level functions have 731 * completed (indicated by mnt_writeopcount dropping to zero). At that 732 * time, these operations are halted until the suspension is over. 733 / 734int 735vn_write_suspend_wait(vp, mp, flags) 736* struct vnode vp; 737* struct mount mp; 738* int flags; 739{ 740 int error; 741 742 if (vp != NULL) { 743 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { 744 if (error != EOPNOTSUPP) 745 return (error); 746 return (0); 747 } 748 } 749 /* 750 * If we are not suspended or have not yet reached suspended 751 * mode, then let the operation proceed. 752 / 753* if (mp == NULL \|\| (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) 754 return (0); 755 if (flags & V_NOWAIT) 756 return (EWOULDBLOCK); 757 /* 758 * Wait for the suspension to finish. 759 / 760* return (tsleep(&mp->mnt_flag, (PUSER - 1) \| (flags & PCATCH), 761 "suspfs", 0)); 762} 763 764/* 765 * Filesystem write operation has completed. If we are suspending and this 766 * operation is the last one, notify the suspender that the suspension is 767 * now in effect. 768 / 769void 770vn_finished_write(mp) 771* struct mount mp; 772{ 773* 774 if (mp == NULL) 775 return; 776 mp->mnt_writeopcount--; 777 if (mp->mnt_writeopcount < 0) 778 panic("vn_finished_write: neg cnt"); 779 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && 780 mp->mnt_writeopcount <= 0) 781 wakeup(&mp->mnt_writeopcount); 782} 783 784/* 785 * Request a filesystem to suspend write operations. 786 / 787void 788vfs_write_suspend(mp) 789* struct mount mp; 790{ 791* struct proc p = curproc; 792* 793 if (mp->mnt_kern_flag & MNTK_SUSPEND) 794 return; 795 mp->mnt_kern_flag \|= MNTK_SUSPEND; 796 if (mp->mnt_writeopcount > 0) 797 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); 798 VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); 799 mp->mnt_kern_flag \|= MNTK_SUSPENDED; 800} 801 802/* 803 * Request a filesystem to resume write operations. 804 / 805void 806vfs_write_resume(mp) 807* struct mount mp; 808{ 809* 810 if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) 811 return; 812 mp->mnt_kern_flag &= ~(MNTK_SUSPEND \| MNTK_SUSPENDED); 813 wakeup(&mp->mnt_writeopcount); 814 wakeup(&mp->mnt_flag); 815} 816 817static int 818filt_vnattach(struct knote kn) 819{ 820* struct vnode vp; 821* 822 if (kn->kn_fp->f_type != DTYPE_VNODE && 823 kn->kn_fp->f_type != DTYPE_FIFO) 824 return (EBADF); 825 826 vp = (struct vnode )kn->kn_fp->f_data; 827* 828 /* 829 * XXX 830 * this is a hack simply to cause the filter attach to fail 831 * for non-ufs filesystems, until the support for them is done. 832 / 833* if ((vp)->v_tag != VT_UFS) 834 return (EOPNOTSUPP); 835
836 mtx_enter(&vp->v_pollinfo.vpi_lock, MTX_DEF);	836 mtx_lock(&vp->v_pollinfo.vpi_lock);
837 SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext);	837 SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext);
838 mtx_exit(&vp->v_pollinfo.vpi_lock, MTX_DEF);	838 mtx_unlock(&vp->v_pollinfo.vpi_lock);
839 840 return (0); 841} 842 843static void 844filt_vndetach(struct knote kn) 845{ 846* struct vnode vp = (struct vnode )kn->kn_fp->f_data; 847	839 840 return (0); 841} 842 843static void 844filt_vndetach(struct knote kn) 845{ 846* struct vnode vp = (struct vnode )kn->kn_fp->f_data; 847
848 mtx_enter(&vp->v_pollinfo.vpi_lock, MTX_DEF);	848 mtx_lock(&vp->v_pollinfo.vpi_lock);
849 SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note, 850 kn, knote, kn_selnext);	849 SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note, 850 kn, knote, kn_selnext);
851 mtx_exit(&vp->v_pollinfo.vpi_lock, MTX_DEF);	851 mtx_unlock(&vp->v_pollinfo.vpi_lock);
852} 853 854static int 855filt_vnode(struct knote kn, long hint) 856{ 857* 858 if (kn->kn_sfflags & hint) 859 kn->kn_fflags \|= hint; 860 return (kn->kn_fflags != 0); 861} 862 863static int 864filt_nullattach(struct knote kn) 865{ 866* return (ENXIO); 867} 868 869/ARGSUSED/ 870static int 871filt_vnread(struct knote kn, long hint) 872{ 873* struct vnode vp = (struct vnode )kn->kn_fp->f_data; 874 struct inode ip = VTOI(vp); 875* 876 kn->kn_data = ip->i_size - kn->kn_fp->f_offset; 877 return (kn->kn_data != 0); 878} 879 880/* 881 * Simplified in-kernel wrapper calls for extended attribute access. 882 * Both calls pass in a NULL credential, authorizing as "kernel" access. 883 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 884 / 885int 886vn_extattr_get(struct vnode vp, int ioflg, const char attrname, int buflen, 887 char buf, struct proc p) 888{ 889 struct uio auio; 890 struct iovec iov; 891 int error; 892 893 iov.iov_len = buflen; 894* iov.iov_base = buf; 895 896 auio.uio_iov = &iov; 897 auio.uio_iovcnt = 1; 898 auio.uio_rw = UIO_READ; 899 auio.uio_segflg = UIO_SYSSPACE; 900 auio.uio_procp = p; 901 auio.uio_offset = 0; 902 auio.uio_resid = buflen; 903* 904 if ((ioflg & IO_NODELOCKED) == 0) 905 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 906 907 /* authorize attribute retrieval as kernel / 908* error = VOP_GETEXTATTR(vp, attrname, &auio, NULL, p); 909 910 if ((ioflg & IO_NODELOCKED) == 0) 911 VOP_UNLOCK(vp, 0, p); 912 913 if (error == 0) { 914 buflen = buflen - auio.uio_resid; 915 } 916 917 return (error); 918} 919 920/* 921 * XXX failure mode if partially written? 922 / 923int 924vn_extattr_set(struct vnode vp, int ioflg, const char attrname, int buflen, 925* char buf, struct proc p) 926{ 927 struct uio auio; 928 struct iovec iov; 929 struct mount mp; 930* int error; 931 932 iov.iov_len = buflen; 933 iov.iov_base = buf; 934 935 auio.uio_iov = &iov; 936 auio.uio_iovcnt = 1; 937 auio.uio_rw = UIO_WRITE; 938 auio.uio_segflg = UIO_SYSSPACE; 939 auio.uio_procp = p; 940 auio.uio_offset = 0; 941 auio.uio_resid = buflen; 942 943 if ((ioflg & IO_NODELOCKED) == 0) { 944 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 945 return (error); 946 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 947 } 948 949 /* authorize attribute setting as kernel / 950* error = VOP_SETEXTATTR(vp, attrname, &auio, NULL, p); 951 952 if ((ioflg & IO_NODELOCKED) == 0) { 953 vn_finished_write(mp); 954 VOP_UNLOCK(vp, 0, p); 955 } 956 957 return (error); 958} 959 960int 961vn_extattr_rm(struct vnode vp, int ioflg, const char attrname, struct proc p) 962{ 963* struct mount mp; 964* int error; 965 966 if ((ioflg & IO_NODELOCKED) == 0) { 967 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 968 return (error); 969 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 970 } 971 972 /* authorize attribute removal as kernel / 973* error = VOP_SETEXTATTR(vp, attrname, NULL, NULL, p); 974 975 if ((ioflg & IO_NODELOCKED) == 0) { 976 vn_finished_write(mp); 977 VOP_UNLOCK(vp, 0, p); 978 } 979 980 return (error); 981}	852} 853 854static int 855filt_vnode(struct knote kn, long hint) 856{ 857* 858 if (kn->kn_sfflags & hint) 859 kn->kn_fflags \|= hint; 860 return (kn->kn_fflags != 0); 861} 862 863static int 864filt_nullattach(struct knote kn) 865{ 866* return (ENXIO); 867} 868 869/ARGSUSED/ 870static int 871filt_vnread(struct knote kn, long hint) 872{ 873* struct vnode vp = (struct vnode )kn->kn_fp->f_data; 874 struct inode ip = VTOI(vp); 875* 876 kn->kn_data = ip->i_size - kn->kn_fp->f_offset; 877 return (kn->kn_data != 0); 878} 879 880/* 881 * Simplified in-kernel wrapper calls for extended attribute access. 882 * Both calls pass in a NULL credential, authorizing as "kernel" access. 883 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 884 / 885int 886vn_extattr_get(struct vnode vp, int ioflg, const char attrname, int buflen, 887 char buf, struct proc p) 888{ 889 struct uio auio; 890 struct iovec iov; 891 int error; 892 893 iov.iov_len = buflen; 894* iov.iov_base = buf; 895 896 auio.uio_iov = &iov; 897 auio.uio_iovcnt = 1; 898 auio.uio_rw = UIO_READ; 899 auio.uio_segflg = UIO_SYSSPACE; 900 auio.uio_procp = p; 901 auio.uio_offset = 0; 902 auio.uio_resid = buflen; 903* 904 if ((ioflg & IO_NODELOCKED) == 0) 905 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 906 907 /* authorize attribute retrieval as kernel / 908* error = VOP_GETEXTATTR(vp, attrname, &auio, NULL, p); 909 910 if ((ioflg & IO_NODELOCKED) == 0) 911 VOP_UNLOCK(vp, 0, p); 912 913 if (error == 0) { 914 buflen = buflen - auio.uio_resid; 915 } 916 917 return (error); 918} 919 920/* 921 * XXX failure mode if partially written? 922 / 923int 924vn_extattr_set(struct vnode vp, int ioflg, const char attrname, int buflen, 925* char buf, struct proc p) 926{ 927 struct uio auio; 928 struct iovec iov; 929 struct mount mp; 930* int error; 931 932 iov.iov_len = buflen; 933 iov.iov_base = buf; 934 935 auio.uio_iov = &iov; 936 auio.uio_iovcnt = 1; 937 auio.uio_rw = UIO_WRITE; 938 auio.uio_segflg = UIO_SYSSPACE; 939 auio.uio_procp = p; 940 auio.uio_offset = 0; 941 auio.uio_resid = buflen; 942 943 if ((ioflg & IO_NODELOCKED) == 0) { 944 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 945 return (error); 946 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 947 } 948 949 /* authorize attribute setting as kernel / 950* error = VOP_SETEXTATTR(vp, attrname, &auio, NULL, p); 951 952 if ((ioflg & IO_NODELOCKED) == 0) { 953 vn_finished_write(mp); 954 VOP_UNLOCK(vp, 0, p); 955 } 956 957 return (error); 958} 959 960int 961vn_extattr_rm(struct vnode vp, int ioflg, const char attrname, struct proc p) 962{ 963* struct mount mp; 964* int error; 965 966 if ((ioflg & IO_NODELOCKED) == 0) { 967 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 968 return (error); 969 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 970 } 971 972 /* authorize attribute removal as kernel / 973* error = VOP_SETEXTATTR(vp, attrname, NULL, NULL, p); 974 975 if ((ioflg & IO_NODELOCKED) == 0) { 976 vn_finished_write(mp); 977 VOP_UNLOCK(vp, 0, p); 978 } 979 980 return (error); 981}