Cross Reference: /freebsd-11.0-release/sys/kern/vfs

Deleted Added

sdiff udiff text old ( 90946 ) new ( 91140 )

full compact

vfs_vnops.c (90946)	vfs_vnops.c (91140)
1/* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94	1/* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/vfs_vnops.c 90946 2002-02-20 00:11:57Z rwatson $	39 * $FreeBSD: head/sys/kern/vfs_vnops.c 91140 2002-02-23 11:12:57Z tanimura $
40 / 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/fcntl.h> 45#include <sys/file.h> 46#include <sys/stat.h> 47#include <sys/proc.h> 48#include <sys/lock.h> 49#include <sys/mount.h> 50#include <sys/mutex.h> 51#include <sys/namei.h> 52#include <sys/vnode.h> 53#include <sys/bio.h> 54#include <sys/buf.h> 55#include <sys/filio.h> 56#include <sys/ttycom.h> 57#include <sys/conf.h> 58#include <sys/syslog.h> 59 60#include <machine/limits.h> 61 62static int vn_closefile __P((struct file fp, struct thread td)); 63static int vn_ioctl __P((struct file fp, u_long com, caddr_t data, 64 struct thread td)); 65static int vn_read __P((struct file fp, struct uio uio, 66 struct ucred cred, int flags, struct thread td)); 67static int vn_poll __P((struct file fp, int events, struct ucred cred, 68 struct thread td)); 69static int vn_kqfilter __P((struct file fp, struct knote kn)); 70static int vn_statfile __P((struct file fp, struct stat sb, struct thread td)); 71static int vn_write __P((struct file fp, struct uio uio, 72 struct ucred cred, int flags, struct thread td)); 73 74struct fileops vnops = { 75 vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter, 76 vn_statfile, vn_closefile 77}; 78 79int 80vn_open(ndp, flagp, cmode) 81 register struct nameidata ndp; 82 int flagp, cmode; 83{ 84 struct thread td = ndp->ni_cnd.cn_thread; 85 86 return (vn_open_cred(ndp, flagp, cmode, td->td_proc->p_ucred)); 87} 88 89/* 90 * Common code for vnode open operations. 91 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 92 * 93 * Note that this does NOT free nameidata for the successful case, 94 * due to the NDINIT being done elsewhere. 95 / 96int 97vn_open_cred(ndp, flagp, cmode, cred) 98 register struct nameidata ndp; 99 int flagp, cmode; 100* struct ucred cred; 101{ 102* struct vnode vp; 103* struct mount mp; 104* struct thread td = ndp->ni_cnd.cn_thread; 105* struct vattr vat; 106 struct vattr vap = &vat; 107* int mode, fmode, error; 108 109restart: 110 fmode = flagp; 111* if (fmode & O_CREAT) { 112 ndp->ni_cnd.cn_nameiop = CREATE; 113 ndp->ni_cnd.cn_flags = LOCKPARENT \| LOCKLEAF; 114 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 115 ndp->ni_cnd.cn_flags \|= FOLLOW; 116 bwillwrite(); 117 if ((error = namei(ndp)) != 0) 118 return (error); 119 if (ndp->ni_vp == NULL) { 120 VATTR_NULL(vap); 121 vap->va_type = VREG; 122 vap->va_mode = cmode; 123 if (fmode & O_EXCL) 124 vap->va_vaflags \|= VA_EXCLUSIVE; 125 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 126 NDFREE(ndp, NDF_ONLY_PNBUF); 127 vput(ndp->ni_dvp); 128 if ((error = vn_start_write(NULL, &mp, 129 V_XSLEEP \| PCATCH)) != 0) 130 return (error); 131 goto restart; 132 } 133 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE); 134 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 135 &ndp->ni_cnd, vap); 136 vput(ndp->ni_dvp); 137 vn_finished_write(mp); 138 if (error) { 139 NDFREE(ndp, NDF_ONLY_PNBUF); 140 return (error); 141 } 142 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 143 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 144 fmode &= ~O_TRUNC; 145 vp = ndp->ni_vp; 146 } else { 147 if (ndp->ni_dvp == ndp->ni_vp) 148 vrele(ndp->ni_dvp); 149 else 150 vput(ndp->ni_dvp); 151 ndp->ni_dvp = NULL; 152 vp = ndp->ni_vp; 153 if (fmode & O_EXCL) { 154 error = EEXIST; 155 goto bad; 156 } 157 fmode &= ~O_CREAT; 158 } 159 } else { 160 ndp->ni_cnd.cn_nameiop = LOOKUP; 161 ndp->ni_cnd.cn_flags = 162 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) \| LOCKLEAF; 163 if ((error = namei(ndp)) != 0) 164 return (error); 165 vp = ndp->ni_vp; 166 } 167 if (vp->v_type == VLNK) { 168 error = EMLINK; 169 goto bad; 170 } 171 if (vp->v_type == VSOCK) { 172 error = EOPNOTSUPP; 173 goto bad; 174 } 175 if ((fmode & O_CREAT) == 0) { 176 mode = 0; 177 if (fmode & (FWRITE \| O_TRUNC)) { 178 if (vp->v_type == VDIR) { 179 error = EISDIR; 180 goto bad; 181 } 182 error = vn_writechk(vp); 183 if (error) 184 goto bad; 185 mode \|= VWRITE; 186 } 187 if (fmode & FREAD) 188 mode \|= VREAD; 189 if (mode) { 190 error = VOP_ACCESS(vp, mode, cred, td); 191 if (error) 192 goto bad; 193 } 194 } 195 if ((error = VOP_OPEN(vp, fmode, cred, td)) != 0) 196 goto bad; 197 /* 198 * Make sure that a VM object is created for VMIO support. 199 / 200* if (vn_canvmio(vp) == TRUE) { 201 if ((error = vfs_object_create(vp, td, cred)) != 0) { 202 VOP_UNLOCK(vp, 0, td); 203 VOP_CLOSE(vp, fmode, cred, td); 204 NDFREE(ndp, NDF_ONLY_PNBUF); 205 vrele(vp); 206 flagp = fmode; 207* return (error); 208 } 209 } 210 211 if (fmode & FWRITE) 212 vp->v_writecount++; 213 flagp = fmode; 214* return (0); 215bad: 216 NDFREE(ndp, NDF_ONLY_PNBUF); 217 vput(vp); 218 flagp = fmode; 219* return (error); 220} 221 222/* 223 * Check for write permissions on the specified vnode. 224 * Prototype text segments cannot be written. 225 / 226int 227vn_writechk(vp) 228* register struct vnode vp; 229{ 230* 231 /* 232 * If there's shared text associated with 233 * the vnode, try to free it up once. If 234 * we fail, we can't allow writing. 235 / 236* if (vp->v_flag & VTEXT) 237 return (ETXTBSY); 238 return (0); 239} 240 241/* 242 * Vnode close call 243 / 244int 245vn_close(vp, flags, cred, td) 246* register struct vnode vp; 247* int flags; 248 struct ucred cred; 249* struct thread td; 250{ 251* int error; 252 253 if (flags & FWRITE) 254 vp->v_writecount--; 255 error = VOP_CLOSE(vp, flags, cred, td); 256 /* 257 * XXX - In certain instances VOP_CLOSE has to do the vrele 258 * itself. If the vrele has been done, it will return EAGAIN 259 * to indicate that the vrele should not be done again. When 260 * this happens, we just return success. The correct thing to 261 * do would be to have all VOP_CLOSE instances do the vrele. 262 / 263* if (error == EAGAIN) 264 return (0); 265 vrele(vp); 266 return (error); 267} 268 269static __inline 270int 271sequential_heuristic(struct uio uio, struct file fp) 272{ 273 274 /* 275 * Sequential heuristic - detect sequential operation 276 / 277* if ((uio->uio_offset == 0 && fp->f_seqcount > 0) \|\| 278 uio->uio_offset == fp->f_nextoff) { 279 /* 280 * XXX we assume that the filesystem block size is 281 * the default. Not true, but still gives us a pretty 282 * good indicator of how sequential the read operations 283 * are. 284 / 285* fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 286 if (fp->f_seqcount >= 127) 287 fp->f_seqcount = 127; 288 return(fp->f_seqcount << 16); 289 } 290 291 /* 292 * Not sequential, quick draw-down of seqcount 293 / 294* if (fp->f_seqcount > 1) 295 fp->f_seqcount = 1; 296 else 297 fp->f_seqcount = 0; 298 return(0); 299} 300 301/* 302 * Package up an I/O request on a vnode into a uio and do it. 303 / 304int 305vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, td) 306* enum uio_rw rw; 307 struct vnode vp; 308* caddr_t base; 309 int len; 310 off_t offset; 311 enum uio_seg segflg; 312 int ioflg; 313 struct ucred cred; 314* int aresid; 315* struct thread td; 316{ 317* struct uio auio; 318 struct iovec aiov; 319 struct mount mp; 320* int error; 321 322 if ((ioflg & IO_NODELOCKED) == 0) { 323 mp = NULL; 324 if (rw == UIO_WRITE && 325 vp->v_type != VCHR && 326 (error = vn_start_write(vp, &mp, V_WAIT \| PCATCH)) != 0) 327 return (error); 328 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 329 } 330 auio.uio_iov = &aiov; 331 auio.uio_iovcnt = 1; 332 aiov.iov_base = base; 333 aiov.iov_len = len; 334 auio.uio_resid = len; 335 auio.uio_offset = offset; 336 auio.uio_segflg = segflg; 337 auio.uio_rw = rw; 338 auio.uio_td = td; 339 if (rw == UIO_READ) { 340 error = VOP_READ(vp, &auio, ioflg, cred); 341 } else { 342 error = VOP_WRITE(vp, &auio, ioflg, cred); 343 } 344 if (aresid) 345 aresid = auio.uio_resid; 346* else 347 if (auio.uio_resid && error == 0) 348 error = EIO; 349 if ((ioflg & IO_NODELOCKED) == 0) { 350 vn_finished_write(mp); 351 VOP_UNLOCK(vp, 0, td); 352 } 353 return (error); 354} 355 356/* 357 * Package up an I/O request on a vnode into a uio and do it. The I/O 358 * request is split up into smaller chunks and we try to avoid saturating 359 * the buffer cache while potentially holding a vnode locked, so we 360 * check bwillwrite() before calling vn_rdwr(). We also call uio_yield() 361 * to give other processes a chance to lock the vnode (either other processes 362 * core'ing the same binary, or unrelated processes scanning the directory). 363 / 364int 365vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, td) 366* enum uio_rw rw; 367 struct vnode vp; 368* caddr_t base; 369 int len; 370 off_t offset; 371 enum uio_seg segflg; 372 int ioflg; 373 struct ucred cred; 374* int aresid; 375* struct thread td; 376{ 377* int error = 0; 378 379 do { 380 int chunk = (len > MAXBSIZE) ? MAXBSIZE : len; 381 382 if (rw != UIO_READ && vp->v_type == VREG) 383 bwillwrite(); 384 error = vn_rdwr(rw, vp, base, chunk, offset, segflg, 385 ioflg, cred, aresid, td); 386 len -= chunk; /* aresid calc already includes length / 387* if (error) 388 break; 389 offset += chunk; 390 base += chunk; 391 uio_yield(); 392 } while (len); 393 if (aresid) 394 aresid += len; 395* return (error); 396} 397 398/* 399 * File table vnode read routine. 400 / 401static int 402vn_read(fp, uio, cred, flags, td) 403* struct file fp; 404* struct uio uio; 405* struct ucred cred; 406* struct thread td; 407* int flags; 408{ 409 struct vnode vp; 410* int error, ioflag; 411 412 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", 413 uio->uio_td, td)); 414 vp = (struct vnode )fp->f_data; 415* ioflag = 0; 416 if (fp->f_flag & FNONBLOCK) 417 ioflag \|= IO_NDELAY; 418 if (fp->f_flag & O_DIRECT) 419 ioflag \|= IO_DIRECT; 420 VOP_LEASE(vp, td, cred, LEASE_READ); 421 vn_lock(vp, LK_SHARED \| LK_NOPAUSE \| LK_RETRY, td); 422 if ((flags & FOF_OFFSET) == 0) 423 uio->uio_offset = fp->f_offset; 424 425 ioflag \|= sequential_heuristic(uio, fp); 426 427 error = VOP_READ(vp, uio, ioflag, cred); 428 if ((flags & FOF_OFFSET) == 0) 429 fp->f_offset = uio->uio_offset; 430 fp->f_nextoff = uio->uio_offset; 431 VOP_UNLOCK(vp, 0, td); 432 return (error); 433} 434 435/* 436 * File table vnode write routine. 437 / 438static int 439vn_write(fp, uio, cred, flags, td) 440* struct file fp; 441* struct uio uio; 442* struct ucred cred; 443* struct thread td; 444* int flags; 445{ 446 struct vnode vp; 447* struct mount mp; 448* int error, ioflag; 449 450 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", 451 uio->uio_td, td)); 452 vp = (struct vnode )fp->f_data; 453* if (vp->v_type == VREG) 454 bwillwrite(); 455 ioflag = IO_UNIT; 456 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 457 ioflag \|= IO_APPEND; 458 if (fp->f_flag & FNONBLOCK) 459 ioflag \|= IO_NDELAY; 460 if (fp->f_flag & O_DIRECT) 461 ioflag \|= IO_DIRECT; 462 if ((fp->f_flag & O_FSYNC) \|\| 463 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 464 ioflag \|= IO_SYNC; 465 mp = NULL; 466 if (vp->v_type != VCHR && 467 (error = vn_start_write(vp, &mp, V_WAIT \| PCATCH)) != 0) 468 return (error); 469 VOP_LEASE(vp, td, cred, LEASE_WRITE); 470 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 471 if ((flags & FOF_OFFSET) == 0) 472 uio->uio_offset = fp->f_offset; 473 ioflag \|= sequential_heuristic(uio, fp); 474 error = VOP_WRITE(vp, uio, ioflag, cred); 475 if ((flags & FOF_OFFSET) == 0) 476 fp->f_offset = uio->uio_offset; 477 fp->f_nextoff = uio->uio_offset; 478 VOP_UNLOCK(vp, 0, td); 479 vn_finished_write(mp); 480 return (error); 481} 482 483/* 484 * File table vnode stat routine. 485 / 486static int 487vn_statfile(fp, sb, td) 488* struct file fp; 489* struct stat sb; 490* struct thread td; 491{ 492* struct vnode vp = (struct vnode )fp->f_data; 493 int error; 494 495 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 496 error = vn_stat(vp, sb, td); 497 VOP_UNLOCK(vp, 0, td); 498 499 return (error); 500} 501 502int 503vn_stat(vp, sb, td) 504 struct vnode vp; 505* register struct stat sb; 506* struct thread td; 507{ 508* struct vattr vattr; 509 register struct vattr vap; 510* int error; 511 u_short mode; 512 513 vap = &vattr; 514 error = VOP_GETATTR(vp, vap, td->td_proc->p_ucred, td); 515 if (error) 516 return (error); 517 518 /* 519 * Zero the spare stat fields 520 / 521* sb->st_lspare = 0; 522 sb->st_qspare[0] = 0; 523 sb->st_qspare[1] = 0; 524 525 /* 526 * Copy from vattr table 527 / 528* if (vap->va_fsid != VNOVAL) 529 sb->st_dev = vap->va_fsid; 530 else 531 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 532 sb->st_ino = vap->va_fileid; 533 mode = vap->va_mode; 534 switch (vap->va_type) { 535 case VREG: 536 mode \|= S_IFREG; 537 break; 538 case VDIR: 539 mode \|= S_IFDIR; 540 break; 541 case VBLK: 542 mode \|= S_IFBLK; 543 break; 544 case VCHR: 545 mode \|= S_IFCHR; 546 break; 547 case VLNK: 548 mode \|= S_IFLNK; 549 /* This is a cosmetic change, symlinks do not have a mode. / 550* if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 551 sb->st_mode &= ~ACCESSPERMS; /* 0000 / 552* else 553 sb->st_mode \|= ACCESSPERMS; /* 0777 / 554* break; 555 case VSOCK: 556 mode \|= S_IFSOCK; 557 break; 558 case VFIFO: 559 mode \|= S_IFIFO; 560 break; 561 default: 562 return (EBADF); 563 }; 564 sb->st_mode = mode; 565 sb->st_nlink = vap->va_nlink; 566 sb->st_uid = vap->va_uid; 567 sb->st_gid = vap->va_gid; 568 sb->st_rdev = vap->va_rdev; 569 if (vap->va_size > OFF_MAX) 570 return (EOVERFLOW); 571 sb->st_size = vap->va_size; 572 sb->st_atimespec = vap->va_atime; 573 sb->st_mtimespec = vap->va_mtime; 574 sb->st_ctimespec = vap->va_ctime; 575 576 /* 577 * According to www.opengroup.org, the meaning of st_blksize is 578 * "a filesystem-specific preferred I/O block size for this 579 * object. In some filesystem types, this may vary from file 580 * to file" 581 * Default to PAGE_SIZE after much discussion. 582 / 583* 584 if (vap->va_type == VREG) { 585 sb->st_blksize = vap->va_blocksize; 586 } else if (vn_isdisk(vp, NULL)) { 587 sb->st_blksize = vp->v_rdev->si_bsize_best; 588 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 589 sb->st_blksize = vp->v_rdev->si_bsize_phys; 590 if (sb->st_blksize < BLKDEV_IOSIZE) 591 sb->st_blksize = BLKDEV_IOSIZE; 592 } else { 593 sb->st_blksize = PAGE_SIZE; 594 } 595 596 sb->st_flags = vap->va_flags; 597 if (suser_xxx(td->td_proc->p_ucred, 0, 0)) 598 sb->st_gen = 0; 599 else 600 sb->st_gen = vap->va_gen; 601 602#if (S_BLKSIZE == 512) 603 /* Optimize this case / 604* sb->st_blocks = vap->va_bytes >> 9; 605#else 606 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 607#endif 608 return (0); 609} 610 611/* 612 * File table vnode ioctl routine. 613 / 614static int 615vn_ioctl(fp, com, data, td) 616* struct file fp; 617* u_long com; 618 caddr_t data; 619 struct thread td; 620{ 621* register struct vnode vp = ((struct vnode )fp->f_data);	40 / 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/fcntl.h> 45#include <sys/file.h> 46#include <sys/stat.h> 47#include <sys/proc.h> 48#include <sys/lock.h> 49#include <sys/mount.h> 50#include <sys/mutex.h> 51#include <sys/namei.h> 52#include <sys/vnode.h> 53#include <sys/bio.h> 54#include <sys/buf.h> 55#include <sys/filio.h> 56#include <sys/ttycom.h> 57#include <sys/conf.h> 58#include <sys/syslog.h> 59 60#include <machine/limits.h> 61 62static int vn_closefile __P((struct file fp, struct thread td)); 63static int vn_ioctl __P((struct file fp, u_long com, caddr_t data, 64 struct thread td)); 65static int vn_read __P((struct file fp, struct uio uio, 66 struct ucred cred, int flags, struct thread td)); 67static int vn_poll __P((struct file fp, int events, struct ucred cred, 68 struct thread td)); 69static int vn_kqfilter __P((struct file fp, struct knote kn)); 70static int vn_statfile __P((struct file fp, struct stat sb, struct thread td)); 71static int vn_write __P((struct file fp, struct uio uio, 72 struct ucred cred, int flags, struct thread td)); 73 74struct fileops vnops = { 75 vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter, 76 vn_statfile, vn_closefile 77}; 78 79int 80vn_open(ndp, flagp, cmode) 81 register struct nameidata ndp; 82 int flagp, cmode; 83{ 84 struct thread td = ndp->ni_cnd.cn_thread; 85 86 return (vn_open_cred(ndp, flagp, cmode, td->td_proc->p_ucred)); 87} 88 89/* 90 * Common code for vnode open operations. 91 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 92 * 93 * Note that this does NOT free nameidata for the successful case, 94 * due to the NDINIT being done elsewhere. 95 / 96int 97vn_open_cred(ndp, flagp, cmode, cred) 98 register struct nameidata ndp; 99 int flagp, cmode; 100* struct ucred cred; 101{ 102* struct vnode vp; 103* struct mount mp; 104* struct thread td = ndp->ni_cnd.cn_thread; 105* struct vattr vat; 106 struct vattr vap = &vat; 107* int mode, fmode, error; 108 109restart: 110 fmode = flagp; 111* if (fmode & O_CREAT) { 112 ndp->ni_cnd.cn_nameiop = CREATE; 113 ndp->ni_cnd.cn_flags = LOCKPARENT \| LOCKLEAF; 114 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 115 ndp->ni_cnd.cn_flags \|= FOLLOW; 116 bwillwrite(); 117 if ((error = namei(ndp)) != 0) 118 return (error); 119 if (ndp->ni_vp == NULL) { 120 VATTR_NULL(vap); 121 vap->va_type = VREG; 122 vap->va_mode = cmode; 123 if (fmode & O_EXCL) 124 vap->va_vaflags \|= VA_EXCLUSIVE; 125 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 126 NDFREE(ndp, NDF_ONLY_PNBUF); 127 vput(ndp->ni_dvp); 128 if ((error = vn_start_write(NULL, &mp, 129 V_XSLEEP \| PCATCH)) != 0) 130 return (error); 131 goto restart; 132 } 133 VOP_LEASE(ndp->ni_dvp, td, cred, LEASE_WRITE); 134 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 135 &ndp->ni_cnd, vap); 136 vput(ndp->ni_dvp); 137 vn_finished_write(mp); 138 if (error) { 139 NDFREE(ndp, NDF_ONLY_PNBUF); 140 return (error); 141 } 142 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 143 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 144 fmode &= ~O_TRUNC; 145 vp = ndp->ni_vp; 146 } else { 147 if (ndp->ni_dvp == ndp->ni_vp) 148 vrele(ndp->ni_dvp); 149 else 150 vput(ndp->ni_dvp); 151 ndp->ni_dvp = NULL; 152 vp = ndp->ni_vp; 153 if (fmode & O_EXCL) { 154 error = EEXIST; 155 goto bad; 156 } 157 fmode &= ~O_CREAT; 158 } 159 } else { 160 ndp->ni_cnd.cn_nameiop = LOOKUP; 161 ndp->ni_cnd.cn_flags = 162 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) \| LOCKLEAF; 163 if ((error = namei(ndp)) != 0) 164 return (error); 165 vp = ndp->ni_vp; 166 } 167 if (vp->v_type == VLNK) { 168 error = EMLINK; 169 goto bad; 170 } 171 if (vp->v_type == VSOCK) { 172 error = EOPNOTSUPP; 173 goto bad; 174 } 175 if ((fmode & O_CREAT) == 0) { 176 mode = 0; 177 if (fmode & (FWRITE \| O_TRUNC)) { 178 if (vp->v_type == VDIR) { 179 error = EISDIR; 180 goto bad; 181 } 182 error = vn_writechk(vp); 183 if (error) 184 goto bad; 185 mode \|= VWRITE; 186 } 187 if (fmode & FREAD) 188 mode \|= VREAD; 189 if (mode) { 190 error = VOP_ACCESS(vp, mode, cred, td); 191 if (error) 192 goto bad; 193 } 194 } 195 if ((error = VOP_OPEN(vp, fmode, cred, td)) != 0) 196 goto bad; 197 /* 198 * Make sure that a VM object is created for VMIO support. 199 / 200* if (vn_canvmio(vp) == TRUE) { 201 if ((error = vfs_object_create(vp, td, cred)) != 0) { 202 VOP_UNLOCK(vp, 0, td); 203 VOP_CLOSE(vp, fmode, cred, td); 204 NDFREE(ndp, NDF_ONLY_PNBUF); 205 vrele(vp); 206 flagp = fmode; 207* return (error); 208 } 209 } 210 211 if (fmode & FWRITE) 212 vp->v_writecount++; 213 flagp = fmode; 214* return (0); 215bad: 216 NDFREE(ndp, NDF_ONLY_PNBUF); 217 vput(vp); 218 flagp = fmode; 219* return (error); 220} 221 222/* 223 * Check for write permissions on the specified vnode. 224 * Prototype text segments cannot be written. 225 / 226int 227vn_writechk(vp) 228* register struct vnode vp; 229{ 230* 231 /* 232 * If there's shared text associated with 233 * the vnode, try to free it up once. If 234 * we fail, we can't allow writing. 235 / 236* if (vp->v_flag & VTEXT) 237 return (ETXTBSY); 238 return (0); 239} 240 241/* 242 * Vnode close call 243 / 244int 245vn_close(vp, flags, cred, td) 246* register struct vnode vp; 247* int flags; 248 struct ucred cred; 249* struct thread td; 250{ 251* int error; 252 253 if (flags & FWRITE) 254 vp->v_writecount--; 255 error = VOP_CLOSE(vp, flags, cred, td); 256 /* 257 * XXX - In certain instances VOP_CLOSE has to do the vrele 258 * itself. If the vrele has been done, it will return EAGAIN 259 * to indicate that the vrele should not be done again. When 260 * this happens, we just return success. The correct thing to 261 * do would be to have all VOP_CLOSE instances do the vrele. 262 / 263* if (error == EAGAIN) 264 return (0); 265 vrele(vp); 266 return (error); 267} 268 269static __inline 270int 271sequential_heuristic(struct uio uio, struct file fp) 272{ 273 274 /* 275 * Sequential heuristic - detect sequential operation 276 / 277* if ((uio->uio_offset == 0 && fp->f_seqcount > 0) \|\| 278 uio->uio_offset == fp->f_nextoff) { 279 /* 280 * XXX we assume that the filesystem block size is 281 * the default. Not true, but still gives us a pretty 282 * good indicator of how sequential the read operations 283 * are. 284 / 285* fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 286 if (fp->f_seqcount >= 127) 287 fp->f_seqcount = 127; 288 return(fp->f_seqcount << 16); 289 } 290 291 /* 292 * Not sequential, quick draw-down of seqcount 293 / 294* if (fp->f_seqcount > 1) 295 fp->f_seqcount = 1; 296 else 297 fp->f_seqcount = 0; 298 return(0); 299} 300 301/* 302 * Package up an I/O request on a vnode into a uio and do it. 303 / 304int 305vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, td) 306* enum uio_rw rw; 307 struct vnode vp; 308* caddr_t base; 309 int len; 310 off_t offset; 311 enum uio_seg segflg; 312 int ioflg; 313 struct ucred cred; 314* int aresid; 315* struct thread td; 316{ 317* struct uio auio; 318 struct iovec aiov; 319 struct mount mp; 320* int error; 321 322 if ((ioflg & IO_NODELOCKED) == 0) { 323 mp = NULL; 324 if (rw == UIO_WRITE && 325 vp->v_type != VCHR && 326 (error = vn_start_write(vp, &mp, V_WAIT \| PCATCH)) != 0) 327 return (error); 328 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 329 } 330 auio.uio_iov = &aiov; 331 auio.uio_iovcnt = 1; 332 aiov.iov_base = base; 333 aiov.iov_len = len; 334 auio.uio_resid = len; 335 auio.uio_offset = offset; 336 auio.uio_segflg = segflg; 337 auio.uio_rw = rw; 338 auio.uio_td = td; 339 if (rw == UIO_READ) { 340 error = VOP_READ(vp, &auio, ioflg, cred); 341 } else { 342 error = VOP_WRITE(vp, &auio, ioflg, cred); 343 } 344 if (aresid) 345 aresid = auio.uio_resid; 346* else 347 if (auio.uio_resid && error == 0) 348 error = EIO; 349 if ((ioflg & IO_NODELOCKED) == 0) { 350 vn_finished_write(mp); 351 VOP_UNLOCK(vp, 0, td); 352 } 353 return (error); 354} 355 356/* 357 * Package up an I/O request on a vnode into a uio and do it. The I/O 358 * request is split up into smaller chunks and we try to avoid saturating 359 * the buffer cache while potentially holding a vnode locked, so we 360 * check bwillwrite() before calling vn_rdwr(). We also call uio_yield() 361 * to give other processes a chance to lock the vnode (either other processes 362 * core'ing the same binary, or unrelated processes scanning the directory). 363 / 364int 365vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, td) 366* enum uio_rw rw; 367 struct vnode vp; 368* caddr_t base; 369 int len; 370 off_t offset; 371 enum uio_seg segflg; 372 int ioflg; 373 struct ucred cred; 374* int aresid; 375* struct thread td; 376{ 377* int error = 0; 378 379 do { 380 int chunk = (len > MAXBSIZE) ? MAXBSIZE : len; 381 382 if (rw != UIO_READ && vp->v_type == VREG) 383 bwillwrite(); 384 error = vn_rdwr(rw, vp, base, chunk, offset, segflg, 385 ioflg, cred, aresid, td); 386 len -= chunk; /* aresid calc already includes length / 387* if (error) 388 break; 389 offset += chunk; 390 base += chunk; 391 uio_yield(); 392 } while (len); 393 if (aresid) 394 aresid += len; 395* return (error); 396} 397 398/* 399 * File table vnode read routine. 400 / 401static int 402vn_read(fp, uio, cred, flags, td) 403* struct file fp; 404* struct uio uio; 405* struct ucred cred; 406* struct thread td; 407* int flags; 408{ 409 struct vnode vp; 410* int error, ioflag; 411 412 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", 413 uio->uio_td, td)); 414 vp = (struct vnode )fp->f_data; 415* ioflag = 0; 416 if (fp->f_flag & FNONBLOCK) 417 ioflag \|= IO_NDELAY; 418 if (fp->f_flag & O_DIRECT) 419 ioflag \|= IO_DIRECT; 420 VOP_LEASE(vp, td, cred, LEASE_READ); 421 vn_lock(vp, LK_SHARED \| LK_NOPAUSE \| LK_RETRY, td); 422 if ((flags & FOF_OFFSET) == 0) 423 uio->uio_offset = fp->f_offset; 424 425 ioflag \|= sequential_heuristic(uio, fp); 426 427 error = VOP_READ(vp, uio, ioflag, cred); 428 if ((flags & FOF_OFFSET) == 0) 429 fp->f_offset = uio->uio_offset; 430 fp->f_nextoff = uio->uio_offset; 431 VOP_UNLOCK(vp, 0, td); 432 return (error); 433} 434 435/* 436 * File table vnode write routine. 437 / 438static int 439vn_write(fp, uio, cred, flags, td) 440* struct file fp; 441* struct uio uio; 442* struct ucred cred; 443* struct thread td; 444* int flags; 445{ 446 struct vnode vp; 447* struct mount mp; 448* int error, ioflag; 449 450 KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", 451 uio->uio_td, td)); 452 vp = (struct vnode )fp->f_data; 453* if (vp->v_type == VREG) 454 bwillwrite(); 455 ioflag = IO_UNIT; 456 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 457 ioflag \|= IO_APPEND; 458 if (fp->f_flag & FNONBLOCK) 459 ioflag \|= IO_NDELAY; 460 if (fp->f_flag & O_DIRECT) 461 ioflag \|= IO_DIRECT; 462 if ((fp->f_flag & O_FSYNC) \|\| 463 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 464 ioflag \|= IO_SYNC; 465 mp = NULL; 466 if (vp->v_type != VCHR && 467 (error = vn_start_write(vp, &mp, V_WAIT \| PCATCH)) != 0) 468 return (error); 469 VOP_LEASE(vp, td, cred, LEASE_WRITE); 470 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 471 if ((flags & FOF_OFFSET) == 0) 472 uio->uio_offset = fp->f_offset; 473 ioflag \|= sequential_heuristic(uio, fp); 474 error = VOP_WRITE(vp, uio, ioflag, cred); 475 if ((flags & FOF_OFFSET) == 0) 476 fp->f_offset = uio->uio_offset; 477 fp->f_nextoff = uio->uio_offset; 478 VOP_UNLOCK(vp, 0, td); 479 vn_finished_write(mp); 480 return (error); 481} 482 483/* 484 * File table vnode stat routine. 485 / 486static int 487vn_statfile(fp, sb, td) 488* struct file fp; 489* struct stat sb; 490* struct thread td; 491{ 492* struct vnode vp = (struct vnode )fp->f_data; 493 int error; 494 495 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 496 error = vn_stat(vp, sb, td); 497 VOP_UNLOCK(vp, 0, td); 498 499 return (error); 500} 501 502int 503vn_stat(vp, sb, td) 504 struct vnode vp; 505* register struct stat sb; 506* struct thread td; 507{ 508* struct vattr vattr; 509 register struct vattr vap; 510* int error; 511 u_short mode; 512 513 vap = &vattr; 514 error = VOP_GETATTR(vp, vap, td->td_proc->p_ucred, td); 515 if (error) 516 return (error); 517 518 /* 519 * Zero the spare stat fields 520 / 521* sb->st_lspare = 0; 522 sb->st_qspare[0] = 0; 523 sb->st_qspare[1] = 0; 524 525 /* 526 * Copy from vattr table 527 / 528* if (vap->va_fsid != VNOVAL) 529 sb->st_dev = vap->va_fsid; 530 else 531 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 532 sb->st_ino = vap->va_fileid; 533 mode = vap->va_mode; 534 switch (vap->va_type) { 535 case VREG: 536 mode \|= S_IFREG; 537 break; 538 case VDIR: 539 mode \|= S_IFDIR; 540 break; 541 case VBLK: 542 mode \|= S_IFBLK; 543 break; 544 case VCHR: 545 mode \|= S_IFCHR; 546 break; 547 case VLNK: 548 mode \|= S_IFLNK; 549 /* This is a cosmetic change, symlinks do not have a mode. / 550* if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 551 sb->st_mode &= ~ACCESSPERMS; /* 0000 / 552* else 553 sb->st_mode \|= ACCESSPERMS; /* 0777 / 554* break; 555 case VSOCK: 556 mode \|= S_IFSOCK; 557 break; 558 case VFIFO: 559 mode \|= S_IFIFO; 560 break; 561 default: 562 return (EBADF); 563 }; 564 sb->st_mode = mode; 565 sb->st_nlink = vap->va_nlink; 566 sb->st_uid = vap->va_uid; 567 sb->st_gid = vap->va_gid; 568 sb->st_rdev = vap->va_rdev; 569 if (vap->va_size > OFF_MAX) 570 return (EOVERFLOW); 571 sb->st_size = vap->va_size; 572 sb->st_atimespec = vap->va_atime; 573 sb->st_mtimespec = vap->va_mtime; 574 sb->st_ctimespec = vap->va_ctime; 575 576 /* 577 * According to www.opengroup.org, the meaning of st_blksize is 578 * "a filesystem-specific preferred I/O block size for this 579 * object. In some filesystem types, this may vary from file 580 * to file" 581 * Default to PAGE_SIZE after much discussion. 582 / 583* 584 if (vap->va_type == VREG) { 585 sb->st_blksize = vap->va_blocksize; 586 } else if (vn_isdisk(vp, NULL)) { 587 sb->st_blksize = vp->v_rdev->si_bsize_best; 588 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 589 sb->st_blksize = vp->v_rdev->si_bsize_phys; 590 if (sb->st_blksize < BLKDEV_IOSIZE) 591 sb->st_blksize = BLKDEV_IOSIZE; 592 } else { 593 sb->st_blksize = PAGE_SIZE; 594 } 595 596 sb->st_flags = vap->va_flags; 597 if (suser_xxx(td->td_proc->p_ucred, 0, 0)) 598 sb->st_gen = 0; 599 else 600 sb->st_gen = vap->va_gen; 601 602#if (S_BLKSIZE == 512) 603 /* Optimize this case / 604* sb->st_blocks = vap->va_bytes >> 9; 605#else 606 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 607#endif 608 return (0); 609} 610 611/* 612 * File table vnode ioctl routine. 613 / 614static int 615vn_ioctl(fp, com, data, td) 616* struct file fp; 617* u_long com; 618 caddr_t data; 619 struct thread td; 620{ 621* register struct vnode vp = ((struct vnode )fp->f_data);
	622 struct vnode *vpold;
622 struct vattr vattr; 623 int error; 624 625 switch (vp->v_type) { 626 627 case VREG: 628 case VDIR: 629 if (com == FIONREAD) { 630 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 631 error = VOP_GETATTR(vp, &vattr, td->td_proc->p_ucred, td); 632 VOP_UNLOCK(vp, 0, td); 633 if (error) 634 return (error); 635 (int )data = vattr.va_size - fp->f_offset; 636 return (0); 637 } 638 if (com == FIONBIO \|\| com == FIOASYNC) /* XXX / 639* return (0); /* XXX / 640* /* fall into ... / 641* 642 default: 643#if 0 644 return (ENOTTY); 645#endif 646 case VFIFO: 647 case VCHR: 648 case VBLK: 649 if (com == FIODTYPE) { 650 if (vp->v_type != VCHR && vp->v_type != VBLK) 651 return (ENOTTY); 652 (int )data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 653 return (0); 654 } 655 error = VOP_IOCTL(vp, com, data, fp->f_flag, td->td_proc->p_ucred, td); 656 if (error == 0 && com == TIOCSCTTY) { 657 658 /* Do nothing if reassigning same control tty */	623 struct vattr vattr; 624 int error; 625 626 switch (vp->v_type) { 627 628 case VREG: 629 case VDIR: 630 if (com == FIONREAD) { 631 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 632 error = VOP_GETATTR(vp, &vattr, td->td_proc->p_ucred, td); 633 VOP_UNLOCK(vp, 0, td); 634 if (error) 635 return (error); 636 (int )data = vattr.va_size - fp->f_offset; 637 return (0); 638 } 639 if (com == FIONBIO \|\| com == FIOASYNC) /* XXX / 640* return (0); /* XXX / 641* /* fall into ... / 642* 643 default: 644#if 0 645 return (ENOTTY); 646#endif 647 case VFIFO: 648 case VCHR: 649 case VBLK: 650 if (com == FIODTYPE) { 651 if (vp->v_type != VCHR && vp->v_type != VBLK) 652 return (ENOTTY); 653 (int )data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 654 return (0); 655 } 656 error = VOP_IOCTL(vp, com, data, fp->f_flag, td->td_proc->p_ucred, td); 657 if (error == 0 && com == TIOCSCTTY) { 658 659 /* Do nothing if reassigning same control tty */
659 if (td->td_proc->p_session->s_ttyvp == vp)	660 PGRPSESS_XLOCK(); 661 if (td->td_proc->p_session->s_ttyvp == vp) { 662 PGRPSESS_XUNLOCK();
660 return (0);	663 return (0);
	664 }
661	665
662 /* Get rid of reference to old control tty / 663* if (td->td_proc->p_session->s_ttyvp) 664 vrele(td->td_proc->p_session->s_ttyvp); 665 666 td->td_proc->p_session->s_ttyvp = vp;	666 vpold = td->td_proc->p_session->s_ttyvp;
667 VREF(vp);	667 VREF(vp);
	668 SESS_LOCK(td->td_proc->p_session); 669 td->td_proc->p_session->s_ttyvp = vp; 670 SESS_UNLOCK(td->td_proc->p_session); 671 672 PGRPSESS_XUNLOCK(); 673 674 /* Get rid of reference to old control tty / 675* if (vpold) 676 vrele(vpold);
668 } 669 return (error); 670 } 671} 672 673/* 674 * File table vnode poll routine. 675 / 676static int 677vn_poll(fp, events, cred, td) 678* struct file fp; 679* int events; 680 struct ucred cred; 681* struct thread td; 682{ 683* 684 return (VOP_POLL(((struct vnode )fp->f_data), events, cred, td)); 685} 686* 687/* 688 * Check that the vnode is still valid, and if so 689 * acquire requested lock. 690 / 691int 692#ifndef DEBUG_LOCKS 693vn_lock(vp, flags, td) 694#else 695debug_vn_lock(vp, flags, td, filename, line) 696#endif 697* struct vnode vp; 698* int flags; 699 struct thread td; 700#ifdef DEBUG_LOCKS 701* const char filename; 702* int line; 703#endif 704{ 705 int error; 706 707 do { 708 if ((flags & LK_INTERLOCK) == 0) 709 mtx_lock(&vp->v_interlock); 710 if ((vp->v_flag & VXLOCK) && vp->v_vxproc != curthread) { 711 vp->v_flag \|= VXWANT; 712 msleep(vp, &vp->v_interlock, PINOD \| PDROP, 713 "vn_lock", 0); 714 error = ENOENT; 715 } else { 716#if 0 717 /* this can now occur in normal operation / 718* if (vp->v_vxproc != NULL) 719 log(LOG_INFO, "VXLOCK interlock avoided in vn_lock\n"); 720#endif 721#ifdef DEBUG_LOCKS 722 vp->filename = filename; 723 vp->line = line; 724#endif 725 error = VOP_LOCK(vp, 726 flags \| LK_NOPAUSE \| LK_INTERLOCK, td); 727 if (error == 0) 728 return (error); 729 } 730 flags &= ~LK_INTERLOCK; 731 } while (flags & LK_RETRY); 732 return (error); 733} 734 735/* 736 * File table vnode close routine. 737 / 738static int 739vn_closefile(fp, td) 740* struct file fp; 741* struct thread td; 742{ 743* 744 fp->f_ops = &badfileops; 745 return (vn_close(((struct vnode )fp->f_data), fp->f_flag, 746* fp->f_cred, td)); 747} 748 749/* 750 * Preparing to start a filesystem write operation. If the operation is 751 * permitted, then we bump the count of operations in progress and 752 * proceed. If a suspend request is in progress, we wait until the 753 * suspension is over, and then proceed. 754 / 755int 756vn_start_write(vp, mpp, flags) 757* struct vnode vp; 758* struct mount *mpp; 759* int flags; 760{ 761 struct mount mp; 762* int error; 763 764 /* 765 * If a vnode is provided, get and return the mount point that 766 * to which it will write. 767 / 768* if (vp != NULL) { 769 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { 770 mpp = NULL; 771* if (error != EOPNOTSUPP) 772 return (error); 773 return (0); 774 } 775 } 776 if ((mp = mpp) == NULL) 777* return (0); 778 /* 779 * Check on status of suspension. 780 / 781* while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 782 if (flags & V_NOWAIT) 783 return (EWOULDBLOCK); 784 error = tsleep(&mp->mnt_flag, (PUSER - 1) \| (flags & PCATCH), 785 "suspfs", 0); 786 if (error) 787 return (error); 788 } 789 if (flags & V_XSLEEP) 790 return (0); 791 mp->mnt_writeopcount++; 792 return (0); 793} 794 795/* 796 * Secondary suspension. Used by operations such as vop_inactive 797 * routines that are needed by the higher level functions. These 798 * are allowed to proceed until all the higher level functions have 799 * completed (indicated by mnt_writeopcount dropping to zero). At that 800 * time, these operations are halted until the suspension is over. 801 / 802int 803vn_write_suspend_wait(vp, mp, flags) 804* struct vnode vp; 805* struct mount mp; 806* int flags; 807{ 808 int error; 809 810 if (vp != NULL) { 811 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { 812 if (error != EOPNOTSUPP) 813 return (error); 814 return (0); 815 } 816 } 817 /* 818 * If we are not suspended or have not yet reached suspended 819 * mode, then let the operation proceed. 820 / 821* if (mp == NULL \|\| (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) 822 return (0); 823 if (flags & V_NOWAIT) 824 return (EWOULDBLOCK); 825 /* 826 * Wait for the suspension to finish. 827 / 828* return (tsleep(&mp->mnt_flag, (PUSER - 1) \| (flags & PCATCH), 829 "suspfs", 0)); 830} 831 832/* 833 * Filesystem write operation has completed. If we are suspending and this 834 * operation is the last one, notify the suspender that the suspension is 835 * now in effect. 836 / 837void 838vn_finished_write(mp) 839* struct mount mp; 840{ 841* 842 if (mp == NULL) 843 return; 844 mp->mnt_writeopcount--; 845 if (mp->mnt_writeopcount < 0) 846 panic("vn_finished_write: neg cnt"); 847 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && 848 mp->mnt_writeopcount <= 0) 849 wakeup(&mp->mnt_writeopcount); 850} 851 852/* 853 * Request a filesystem to suspend write operations. 854 / 855void 856vfs_write_suspend(mp) 857* struct mount mp; 858{ 859* struct thread td = curthread; 860* 861 if (mp->mnt_kern_flag & MNTK_SUSPEND) 862 return; 863 mp->mnt_kern_flag \|= MNTK_SUSPEND; 864 if (mp->mnt_writeopcount > 0) 865 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); 866 VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td); 867 mp->mnt_kern_flag \|= MNTK_SUSPENDED; 868} 869 870/* 871 * Request a filesystem to resume write operations. 872 / 873void 874vfs_write_resume(mp) 875* struct mount mp; 876{ 877* 878 if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) 879 return; 880 mp->mnt_kern_flag &= ~(MNTK_SUSPEND \| MNTK_SUSPENDED); 881 wakeup(&mp->mnt_writeopcount); 882 wakeup(&mp->mnt_flag); 883} 884 885static int 886vn_kqfilter(struct file fp, struct knote kn) 887{ 888 889 return (VOP_KQFILTER(((struct vnode )fp->f_data), kn)); 890} 891* 892/* 893 * Simplified in-kernel wrapper calls for extended attribute access. 894 * Both calls pass in a NULL credential, authorizing as "kernel" access. 895 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 896 / 897int 898vn_extattr_get(struct vnode vp, int ioflg, int attrnamespace, 899 const char attrname, int buflen, char buf, struct thread td) 900{ 901 struct uio auio; 902 struct iovec iov; 903 int error; 904 905 iov.iov_len = buflen; 906* iov.iov_base = buf; 907 908 auio.uio_iov = &iov; 909 auio.uio_iovcnt = 1; 910 auio.uio_rw = UIO_READ; 911 auio.uio_segflg = UIO_SYSSPACE; 912 auio.uio_td = td; 913 auio.uio_offset = 0; 914 auio.uio_resid = buflen; 915* 916 if ((ioflg & IO_NODELOCKED) == 0) 917 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 918 919 /* authorize attribute retrieval as kernel / 920* error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL, 921 td); 922 923 if ((ioflg & IO_NODELOCKED) == 0) 924 VOP_UNLOCK(vp, 0, td); 925 926 if (error == 0) { 927 buflen = buflen - auio.uio_resid; 928 } 929 930 return (error); 931} 932 933/* 934 * XXX failure mode if partially written? 935 / 936int 937vn_extattr_set(struct vnode vp, int ioflg, int attrnamespace, 938 const char attrname, int buflen, char buf, struct thread td) 939{ 940* struct uio auio; 941 struct iovec iov; 942 struct mount mp; 943* int error; 944 945 iov.iov_len = buflen; 946 iov.iov_base = buf; 947 948 auio.uio_iov = &iov; 949 auio.uio_iovcnt = 1; 950 auio.uio_rw = UIO_WRITE; 951 auio.uio_segflg = UIO_SYSSPACE; 952 auio.uio_td = td; 953 auio.uio_offset = 0; 954 auio.uio_resid = buflen; 955 956 if ((ioflg & IO_NODELOCKED) == 0) { 957 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 958 return (error); 959 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 960 } 961 962 /* authorize attribute setting as kernel / 963* error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td); 964 965 if ((ioflg & IO_NODELOCKED) == 0) { 966 vn_finished_write(mp); 967 VOP_UNLOCK(vp, 0, td); 968 } 969 970 return (error); 971} 972 973int 974vn_extattr_rm(struct vnode vp, int ioflg, int attrnamespace, 975* const char attrname, struct thread td) 976{ 977 struct mount mp; 978* int error; 979 980 if ((ioflg & IO_NODELOCKED) == 0) { 981 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 982 return (error); 983 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 984 } 985 986 /* authorize attribute removal as kernel / 987* error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL, td); 988 989 if ((ioflg & IO_NODELOCKED) == 0) { 990 vn_finished_write(mp); 991 VOP_UNLOCK(vp, 0, td); 992 } 993 994 return (error); 995}	677 } 678 return (error); 679 } 680} 681 682/* 683 * File table vnode poll routine. 684 / 685static int 686vn_poll(fp, events, cred, td) 687* struct file fp; 688* int events; 689 struct ucred cred; 690* struct thread td; 691{ 692* 693 return (VOP_POLL(((struct vnode )fp->f_data), events, cred, td)); 694} 695* 696/* 697 * Check that the vnode is still valid, and if so 698 * acquire requested lock. 699 / 700int 701#ifndef DEBUG_LOCKS 702vn_lock(vp, flags, td) 703#else 704debug_vn_lock(vp, flags, td, filename, line) 705#endif 706* struct vnode vp; 707* int flags; 708 struct thread td; 709#ifdef DEBUG_LOCKS 710* const char filename; 711* int line; 712#endif 713{ 714 int error; 715 716 do { 717 if ((flags & LK_INTERLOCK) == 0) 718 mtx_lock(&vp->v_interlock); 719 if ((vp->v_flag & VXLOCK) && vp->v_vxproc != curthread) { 720 vp->v_flag \|= VXWANT; 721 msleep(vp, &vp->v_interlock, PINOD \| PDROP, 722 "vn_lock", 0); 723 error = ENOENT; 724 } else { 725#if 0 726 /* this can now occur in normal operation / 727* if (vp->v_vxproc != NULL) 728 log(LOG_INFO, "VXLOCK interlock avoided in vn_lock\n"); 729#endif 730#ifdef DEBUG_LOCKS 731 vp->filename = filename; 732 vp->line = line; 733#endif 734 error = VOP_LOCK(vp, 735 flags \| LK_NOPAUSE \| LK_INTERLOCK, td); 736 if (error == 0) 737 return (error); 738 } 739 flags &= ~LK_INTERLOCK; 740 } while (flags & LK_RETRY); 741 return (error); 742} 743 744/* 745 * File table vnode close routine. 746 / 747static int 748vn_closefile(fp, td) 749* struct file fp; 750* struct thread td; 751{ 752* 753 fp->f_ops = &badfileops; 754 return (vn_close(((struct vnode )fp->f_data), fp->f_flag, 755* fp->f_cred, td)); 756} 757 758/* 759 * Preparing to start a filesystem write operation. If the operation is 760 * permitted, then we bump the count of operations in progress and 761 * proceed. If a suspend request is in progress, we wait until the 762 * suspension is over, and then proceed. 763 / 764int 765vn_start_write(vp, mpp, flags) 766* struct vnode vp; 767* struct mount *mpp; 768* int flags; 769{ 770 struct mount mp; 771* int error; 772 773 /* 774 * If a vnode is provided, get and return the mount point that 775 * to which it will write. 776 / 777* if (vp != NULL) { 778 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { 779 mpp = NULL; 780* if (error != EOPNOTSUPP) 781 return (error); 782 return (0); 783 } 784 } 785 if ((mp = mpp) == NULL) 786* return (0); 787 /* 788 * Check on status of suspension. 789 / 790* while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 791 if (flags & V_NOWAIT) 792 return (EWOULDBLOCK); 793 error = tsleep(&mp->mnt_flag, (PUSER - 1) \| (flags & PCATCH), 794 "suspfs", 0); 795 if (error) 796 return (error); 797 } 798 if (flags & V_XSLEEP) 799 return (0); 800 mp->mnt_writeopcount++; 801 return (0); 802} 803 804/* 805 * Secondary suspension. Used by operations such as vop_inactive 806 * routines that are needed by the higher level functions. These 807 * are allowed to proceed until all the higher level functions have 808 * completed (indicated by mnt_writeopcount dropping to zero). At that 809 * time, these operations are halted until the suspension is over. 810 / 811int 812vn_write_suspend_wait(vp, mp, flags) 813* struct vnode vp; 814* struct mount mp; 815* int flags; 816{ 817 int error; 818 819 if (vp != NULL) { 820 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { 821 if (error != EOPNOTSUPP) 822 return (error); 823 return (0); 824 } 825 } 826 /* 827 * If we are not suspended or have not yet reached suspended 828 * mode, then let the operation proceed. 829 / 830* if (mp == NULL \|\| (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) 831 return (0); 832 if (flags & V_NOWAIT) 833 return (EWOULDBLOCK); 834 /* 835 * Wait for the suspension to finish. 836 / 837* return (tsleep(&mp->mnt_flag, (PUSER - 1) \| (flags & PCATCH), 838 "suspfs", 0)); 839} 840 841/* 842 * Filesystem write operation has completed. If we are suspending and this 843 * operation is the last one, notify the suspender that the suspension is 844 * now in effect. 845 / 846void 847vn_finished_write(mp) 848* struct mount mp; 849{ 850* 851 if (mp == NULL) 852 return; 853 mp->mnt_writeopcount--; 854 if (mp->mnt_writeopcount < 0) 855 panic("vn_finished_write: neg cnt"); 856 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && 857 mp->mnt_writeopcount <= 0) 858 wakeup(&mp->mnt_writeopcount); 859} 860 861/* 862 * Request a filesystem to suspend write operations. 863 / 864void 865vfs_write_suspend(mp) 866* struct mount mp; 867{ 868* struct thread td = curthread; 869* 870 if (mp->mnt_kern_flag & MNTK_SUSPEND) 871 return; 872 mp->mnt_kern_flag \|= MNTK_SUSPEND; 873 if (mp->mnt_writeopcount > 0) 874 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); 875 VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td); 876 mp->mnt_kern_flag \|= MNTK_SUSPENDED; 877} 878 879/* 880 * Request a filesystem to resume write operations. 881 / 882void 883vfs_write_resume(mp) 884* struct mount mp; 885{ 886* 887 if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) 888 return; 889 mp->mnt_kern_flag &= ~(MNTK_SUSPEND \| MNTK_SUSPENDED); 890 wakeup(&mp->mnt_writeopcount); 891 wakeup(&mp->mnt_flag); 892} 893 894static int 895vn_kqfilter(struct file fp, struct knote kn) 896{ 897 898 return (VOP_KQFILTER(((struct vnode )fp->f_data), kn)); 899} 900* 901/* 902 * Simplified in-kernel wrapper calls for extended attribute access. 903 * Both calls pass in a NULL credential, authorizing as "kernel" access. 904 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 905 / 906int 907vn_extattr_get(struct vnode vp, int ioflg, int attrnamespace, 908 const char attrname, int buflen, char buf, struct thread td) 909{ 910 struct uio auio; 911 struct iovec iov; 912 int error; 913 914 iov.iov_len = buflen; 915* iov.iov_base = buf; 916 917 auio.uio_iov = &iov; 918 auio.uio_iovcnt = 1; 919 auio.uio_rw = UIO_READ; 920 auio.uio_segflg = UIO_SYSSPACE; 921 auio.uio_td = td; 922 auio.uio_offset = 0; 923 auio.uio_resid = buflen; 924* 925 if ((ioflg & IO_NODELOCKED) == 0) 926 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 927 928 /* authorize attribute retrieval as kernel / 929* error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL, 930 td); 931 932 if ((ioflg & IO_NODELOCKED) == 0) 933 VOP_UNLOCK(vp, 0, td); 934 935 if (error == 0) { 936 buflen = buflen - auio.uio_resid; 937 } 938 939 return (error); 940} 941 942/* 943 * XXX failure mode if partially written? 944 / 945int 946vn_extattr_set(struct vnode vp, int ioflg, int attrnamespace, 947 const char attrname, int buflen, char buf, struct thread td) 948{ 949* struct uio auio; 950 struct iovec iov; 951 struct mount mp; 952* int error; 953 954 iov.iov_len = buflen; 955 iov.iov_base = buf; 956 957 auio.uio_iov = &iov; 958 auio.uio_iovcnt = 1; 959 auio.uio_rw = UIO_WRITE; 960 auio.uio_segflg = UIO_SYSSPACE; 961 auio.uio_td = td; 962 auio.uio_offset = 0; 963 auio.uio_resid = buflen; 964 965 if ((ioflg & IO_NODELOCKED) == 0) { 966 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 967 return (error); 968 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 969 } 970 971 /* authorize attribute setting as kernel / 972* error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, td); 973 974 if ((ioflg & IO_NODELOCKED) == 0) { 975 vn_finished_write(mp); 976 VOP_UNLOCK(vp, 0, td); 977 } 978 979 return (error); 980} 981 982int 983vn_extattr_rm(struct vnode vp, int ioflg, int attrnamespace, 984* const char attrname, struct thread td) 985{ 986 struct mount mp; 987* int error; 988 989 if ((ioflg & IO_NODELOCKED) == 0) { 990 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 991 return (error); 992 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td); 993 } 994 995 /* authorize attribute removal as kernel / 996* error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL, td); 997 998 if ((ioflg & IO_NODELOCKED) == 0) { 999 vn_finished_write(mp); 1000 VOP_UNLOCK(vp, 0, td); 1001 } 1002 1003 return (error); 1004}