40 */ 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/fcntl.h> 45#include <sys/file.h> 46#include <sys/stat.h> 47#include <sys/proc.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/vnode.h> 52#include <sys/bio.h> 53#include <sys/buf.h> 54#include <sys/filio.h> 55#include <sys/ttycom.h> 56#include <sys/conf.h> 57 58#include <ufs/ufs/quota.h> 59#include <ufs/ufs/inode.h> 60 61static int vn_closefile __P((struct file *fp, struct proc *p)); 62static int vn_ioctl __P((struct file *fp, u_long com, caddr_t data, 63 struct proc *p)); 64static int vn_read __P((struct file *fp, struct uio *uio, 65 struct ucred *cred, int flags, struct proc *p)); 66static int vn_poll __P((struct file *fp, int events, struct ucred *cred, 67 struct proc *p)); 68static int vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p)); 69static int vn_write __P((struct file *fp, struct uio *uio, 70 struct ucred *cred, int flags, struct proc *p)); 71 72struct fileops vnops = 73 { vn_read, vn_write, vn_ioctl, vn_poll, vn_statfile, vn_closefile }; 74 75static int filt_nullattach(struct knote *kn); 76static int filt_vnattach(struct knote *kn); 77static void filt_vndetach(struct knote *kn); 78static int filt_vnode(struct knote *kn, long hint); 79static int filt_vnread(struct knote *kn, long hint); 80 81struct filterops vn_filtops = 82 { 1, filt_vnattach, filt_vndetach, filt_vnode }; 83 84/* 85 * XXX 86 * filt_vnread is ufs-specific, so the attach routine should really 87 * switch out to different filterops based on the vn filetype 88 */ 89struct filterops vn_rwfiltops[] = { 90 { 1, filt_vnattach, filt_vndetach, filt_vnread }, 91 { 1, filt_nullattach, NULL, NULL }, 92}; 93 94/* 95 * Common code for vnode open operations. 96 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 97 * 98 * Note that this does NOT free nameidata for the successful case, 99 * due to the NDINIT being done elsewhere. 100 */ 101int 102vn_open(ndp, flagp, cmode) 103 register struct nameidata *ndp; 104 int *flagp, cmode; 105{ 106 struct vnode *vp; 107 struct mount *mp; 108 struct proc *p = ndp->ni_cnd.cn_proc; 109 struct ucred *cred = p->p_ucred; 110 struct vattr vat; 111 struct vattr *vap = &vat; 112 int mode, fmode, error; 113 114restart: 115 fmode = *flagp; 116 if (fmode & O_CREAT) { 117 ndp->ni_cnd.cn_nameiop = CREATE; 118 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; 119 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 120 ndp->ni_cnd.cn_flags |= FOLLOW; 121 bwillwrite(); 122 if ((error = namei(ndp)) != 0) 123 return (error); 124 if (ndp->ni_vp == NULL) { 125 VATTR_NULL(vap); 126 vap->va_type = VREG; 127 vap->va_mode = cmode; 128 if (fmode & O_EXCL) 129 vap->va_vaflags |= VA_EXCLUSIVE; 130 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 131 NDFREE(ndp, NDF_ONLY_PNBUF); 132 vput(ndp->ni_dvp); 133 if ((error = vn_start_write(NULL, &mp, 134 V_XSLEEP | PCATCH)) != 0) 135 return (error); 136 goto restart; 137 } 138 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); 139 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 140 &ndp->ni_cnd, vap); 141 vput(ndp->ni_dvp); 142 vn_finished_write(mp); 143 if (error) { 144 NDFREE(ndp, NDF_ONLY_PNBUF); 145 return (error); 146 } 147 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 148 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 149 fmode &= ~O_TRUNC; 150 vp = ndp->ni_vp; 151 } else { 152 if (ndp->ni_dvp == ndp->ni_vp) 153 vrele(ndp->ni_dvp); 154 else 155 vput(ndp->ni_dvp); 156 ndp->ni_dvp = NULL; 157 vp = ndp->ni_vp; 158 if (fmode & O_EXCL) { 159 error = EEXIST; 160 goto bad; 161 } 162 fmode &= ~O_CREAT; 163 } 164 } else { 165 ndp->ni_cnd.cn_nameiop = LOOKUP; 166 ndp->ni_cnd.cn_flags = 167 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; 168 if ((error = namei(ndp)) != 0) 169 return (error); 170 vp = ndp->ni_vp; 171 } 172 if (vp->v_type == VLNK) { 173 error = EMLINK; 174 goto bad; 175 } 176 if (vp->v_type == VSOCK) { 177 error = EOPNOTSUPP; 178 goto bad; 179 } 180 if ((fmode & O_CREAT) == 0) { 181 mode = 0; 182 if (fmode & (FWRITE | O_TRUNC)) { 183 if (vp->v_type == VDIR) { 184 error = EISDIR; 185 goto bad; 186 } 187 error = vn_writechk(vp); 188 if (error) 189 goto bad; 190 mode |= VWRITE; 191 } 192 if (fmode & FREAD) 193 mode |= VREAD; 194 if (mode) { 195 error = VOP_ACCESS(vp, mode, cred, p); 196 if (error) 197 goto bad; 198 } 199 } 200 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) 201 goto bad; 202 /* 203 * Make sure that a VM object is created for VMIO support. 204 */ 205 if (vn_canvmio(vp) == TRUE) { 206 if ((error = vfs_object_create(vp, p, cred)) != 0) 207 goto bad; 208 } 209 210 if (fmode & FWRITE) 211 vp->v_writecount++; 212 *flagp = fmode; 213 return (0); 214bad: 215 NDFREE(ndp, NDF_ONLY_PNBUF); 216 vput(vp); 217 *flagp = fmode; 218 return (error); 219} 220 221/* 222 * Check for write permissions on the specified vnode. 223 * Prototype text segments cannot be written. 224 */ 225int 226vn_writechk(vp) 227 register struct vnode *vp; 228{ 229 230 /* 231 * If there's shared text associated with 232 * the vnode, try to free it up once. If 233 * we fail, we can't allow writing. 234 */ 235 if (vp->v_flag & VTEXT) 236 return (ETXTBSY); 237 return (0); 238} 239 240/* 241 * Vnode close call 242 */ 243int 244vn_close(vp, flags, cred, p) 245 register struct vnode *vp; 246 int flags; 247 struct ucred *cred; 248 struct proc *p; 249{ 250 int error; 251 252 if (flags & FWRITE) 253 vp->v_writecount--; 254 error = VOP_CLOSE(vp, flags, cred, p); 255 vrele(vp); 256 return (error); 257} 258 259static __inline 260int 261sequential_heuristic(struct uio *uio, struct file *fp) 262{ 263 /* 264 * Sequential heuristic - detect sequential operation 265 */ 266 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || 267 uio->uio_offset == fp->f_nextoff) { 268 /* 269 * XXX we assume that the filesystem block size is 270 * the default. Not true, but still gives us a pretty 271 * good indicator of how sequential the read operations 272 * are. 273 */ 274 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 275 if (fp->f_seqcount >= 127) 276 fp->f_seqcount = 127; 277 return(fp->f_seqcount << 16); 278 } 279 280 /* 281 * Not sequential, quick draw-down of seqcount 282 */ 283 if (fp->f_seqcount > 1) 284 fp->f_seqcount = 1; 285 else 286 fp->f_seqcount = 0; 287 return(0); 288} 289 290/* 291 * Package up an I/O request on a vnode into a uio and do it. 292 */ 293int 294vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) 295 enum uio_rw rw; 296 struct vnode *vp; 297 caddr_t base; 298 int len; 299 off_t offset; 300 enum uio_seg segflg; 301 int ioflg; 302 struct ucred *cred; 303 int *aresid; 304 struct proc *p; 305{ 306 struct uio auio; 307 struct iovec aiov; 308 struct mount *mp; 309 int error; 310 311 if ((ioflg & IO_NODELOCKED) == 0) { 312 mp = NULL; 313 if (rw == UIO_WRITE && 314 vp->v_type != VCHR && 315 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 316 return (error); 317 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 318 } 319 auio.uio_iov = &aiov; 320 auio.uio_iovcnt = 1; 321 aiov.iov_base = base; 322 aiov.iov_len = len; 323 auio.uio_resid = len; 324 auio.uio_offset = offset; 325 auio.uio_segflg = segflg; 326 auio.uio_rw = rw; 327 auio.uio_procp = p; 328 if (rw == UIO_READ) { 329 error = VOP_READ(vp, &auio, ioflg, cred); 330 } else { 331 error = VOP_WRITE(vp, &auio, ioflg, cred); 332 } 333 if (aresid) 334 *aresid = auio.uio_resid; 335 else 336 if (auio.uio_resid && error == 0) 337 error = EIO; 338 if ((ioflg & IO_NODELOCKED) == 0) { 339 vn_finished_write(mp); 340 VOP_UNLOCK(vp, 0, p); 341 } 342 return (error); 343} 344 345/* 346 * File table vnode read routine. 347 */ 348static int 349vn_read(fp, uio, cred, flags, p) 350 struct file *fp; 351 struct uio *uio; 352 struct ucred *cred; 353 struct proc *p; 354 int flags; 355{ 356 struct vnode *vp; 357 int error, ioflag; 358 359 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 360 uio->uio_procp, p)); 361 vp = (struct vnode *)fp->f_data; 362 ioflag = 0; 363 if (fp->f_flag & FNONBLOCK) 364 ioflag |= IO_NDELAY; 365 VOP_LEASE(vp, p, cred, LEASE_READ); 366 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 367 if ((flags & FOF_OFFSET) == 0) 368 uio->uio_offset = fp->f_offset; 369 370 ioflag |= sequential_heuristic(uio, fp); 371 372 error = VOP_READ(vp, uio, ioflag, cred); 373 if ((flags & FOF_OFFSET) == 0) 374 fp->f_offset = uio->uio_offset; 375 fp->f_nextoff = uio->uio_offset; 376 VOP_UNLOCK(vp, 0, p); 377 return (error); 378} 379 380/* 381 * File table vnode write routine. 382 */ 383static int 384vn_write(fp, uio, cred, flags, p) 385 struct file *fp; 386 struct uio *uio; 387 struct ucred *cred; 388 struct proc *p; 389 int flags; 390{ 391 struct vnode *vp; 392 struct mount *mp; 393 int error, ioflag; 394 395 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 396 uio->uio_procp, p)); 397 vp = (struct vnode *)fp->f_data; 398 if (vp->v_type == VREG) 399 bwillwrite(); 400 vp = (struct vnode *)fp->f_data; /* XXX needed? */ 401 ioflag = IO_UNIT; 402 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 403 ioflag |= IO_APPEND; 404 if (fp->f_flag & FNONBLOCK) 405 ioflag |= IO_NDELAY; 406 if ((fp->f_flag & O_FSYNC) || 407 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 408 ioflag |= IO_SYNC; 409 mp = NULL; 410 if (vp->v_type != VCHR && 411 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 412 return (error); 413 VOP_LEASE(vp, p, cred, LEASE_WRITE); 414 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 415 if ((flags & FOF_OFFSET) == 0) 416 uio->uio_offset = fp->f_offset; 417 ioflag |= sequential_heuristic(uio, fp); 418 error = VOP_WRITE(vp, uio, ioflag, cred); 419 if ((flags & FOF_OFFSET) == 0) 420 fp->f_offset = uio->uio_offset; 421 fp->f_nextoff = uio->uio_offset; 422 VOP_UNLOCK(vp, 0, p); 423 vn_finished_write(mp); 424 return (error); 425} 426 427/* 428 * File table vnode stat routine. 429 */ 430static int 431vn_statfile(fp, sb, p) 432 struct file *fp; 433 struct stat *sb; 434 struct proc *p; 435{ 436 struct vnode *vp = (struct vnode *)fp->f_data; 437 438 return vn_stat(vp, sb, p); 439} 440 441int 442vn_stat(vp, sb, p) 443 struct vnode *vp; 444 register struct stat *sb; 445 struct proc *p; 446{ 447 struct vattr vattr; 448 register struct vattr *vap; 449 int error; 450 u_short mode; 451 452 vap = &vattr; 453 error = VOP_GETATTR(vp, vap, p->p_ucred, p); 454 if (error) 455 return (error); 456 457 /* 458 * Zero the spare stat fields 459 */ 460 sb->st_lspare = 0; 461 sb->st_qspare[0] = 0; 462 sb->st_qspare[1] = 0; 463 464 /* 465 * Copy from vattr table 466 */ 467 if (vap->va_fsid != VNOVAL) 468 sb->st_dev = vap->va_fsid; 469 else 470 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 471 sb->st_ino = vap->va_fileid; 472 mode = vap->va_mode; 473 switch (vap->va_type) { 474 case VREG: 475 mode |= S_IFREG; 476 break; 477 case VDIR: 478 mode |= S_IFDIR; 479 break; 480 case VBLK: 481 mode |= S_IFBLK; 482 break; 483 case VCHR: 484 mode |= S_IFCHR; 485 break; 486 case VLNK: 487 mode |= S_IFLNK; 488 /* This is a cosmetic change, symlinks do not have a mode. */ 489 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 490 sb->st_mode &= ~ACCESSPERMS; /* 0000 */ 491 else 492 sb->st_mode |= ACCESSPERMS; /* 0777 */ 493 break; 494 case VSOCK: 495 mode |= S_IFSOCK; 496 break; 497 case VFIFO: 498 mode |= S_IFIFO; 499 break; 500 default: 501 return (EBADF); 502 }; 503 sb->st_mode = mode; 504 sb->st_nlink = vap->va_nlink; 505 sb->st_uid = vap->va_uid; 506 sb->st_gid = vap->va_gid; 507 sb->st_rdev = vap->va_rdev; 508 sb->st_size = vap->va_size; 509 sb->st_atimespec = vap->va_atime; 510 sb->st_mtimespec = vap->va_mtime; 511 sb->st_ctimespec = vap->va_ctime; 512 513 /* 514 * According to www.opengroup.org, the meaning of st_blksize is 515 * "a filesystem-specific preferred I/O block size for this 516 * object. In some filesystem types, this may vary from file 517 * to file" 518 * Default to zero to catch bogus uses of this field. 519 */ 520 521 if (vap->va_type == VREG) { 522 sb->st_blksize = vap->va_blocksize; 523 } else if (vn_isdisk(vp, NULL)) { 524 sb->st_blksize = vp->v_rdev->si_bsize_best; 525 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 526 sb->st_blksize = vp->v_rdev->si_bsize_phys; 527 if (sb->st_blksize < BLKDEV_IOSIZE) 528 sb->st_blksize = BLKDEV_IOSIZE; 529 } else { 530 sb->st_blksize = 0; 531 } 532 533 sb->st_flags = vap->va_flags; 534 if (suser_xxx(p->p_ucred, 0, 0)) 535 sb->st_gen = 0; 536 else 537 sb->st_gen = vap->va_gen; 538 539#if (S_BLKSIZE == 512) 540 /* Optimize this case */ 541 sb->st_blocks = vap->va_bytes >> 9; 542#else 543 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 544#endif 545 return (0); 546} 547 548/* 549 * File table vnode ioctl routine. 550 */ 551static int 552vn_ioctl(fp, com, data, p) 553 struct file *fp; 554 u_long com; 555 caddr_t data; 556 struct proc *p; 557{ 558 register struct vnode *vp = ((struct vnode *)fp->f_data); 559 struct vattr vattr; 560 int error; 561 562 switch (vp->v_type) { 563 564 case VREG: 565 case VDIR: 566 if (com == FIONREAD) { 567 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); 568 if (error) 569 return (error); 570 *(int *)data = vattr.va_size - fp->f_offset; 571 return (0); 572 } 573 if (com == FIONBIO || com == FIOASYNC) /* XXX */ 574 return (0); /* XXX */ 575 /* fall into ... */ 576 577 default: 578#if 0 579 return (ENOTTY); 580#endif 581 case VFIFO: 582 case VCHR: 583 case VBLK: 584 if (com == FIODTYPE) { 585 if (vp->v_type != VCHR && vp->v_type != VBLK) 586 return (ENOTTY); 587 *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 588 return (0); 589 } 590 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); 591 if (error == 0 && com == TIOCSCTTY) { 592 593 /* Do nothing if reassigning same control tty */ 594 if (p->p_session->s_ttyvp == vp) 595 return (0); 596 597 /* Get rid of reference to old control tty */ 598 if (p->p_session->s_ttyvp) 599 vrele(p->p_session->s_ttyvp); 600 601 p->p_session->s_ttyvp = vp; 602 VREF(vp); 603 } 604 return (error); 605 } 606} 607 608/* 609 * File table vnode poll routine. 610 */ 611static int 612vn_poll(fp, events, cred, p) 613 struct file *fp; 614 int events; 615 struct ucred *cred; 616 struct proc *p; 617{ 618 619 return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, p)); 620} 621 622/* 623 * Check that the vnode is still valid, and if so 624 * acquire requested lock. 625 */ 626int 627#ifndef DEBUG_LOCKS 628vn_lock(vp, flags, p) 629#else 630debug_vn_lock(vp, flags, p, filename, line) 631#endif 632 struct vnode *vp; 633 int flags; 634 struct proc *p; 635#ifdef DEBUG_LOCKS 636 const char *filename; 637 int line; 638#endif 639{ 640 int error; 641 642 do { 643 if ((flags & LK_INTERLOCK) == 0)
| 40 */ 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/fcntl.h> 45#include <sys/file.h> 46#include <sys/stat.h> 47#include <sys/proc.h> 48#include <sys/mount.h> 49#include <sys/mutex.h> 50#include <sys/namei.h> 51#include <sys/vnode.h> 52#include <sys/bio.h> 53#include <sys/buf.h> 54#include <sys/filio.h> 55#include <sys/ttycom.h> 56#include <sys/conf.h> 57 58#include <ufs/ufs/quota.h> 59#include <ufs/ufs/inode.h> 60 61static int vn_closefile __P((struct file *fp, struct proc *p)); 62static int vn_ioctl __P((struct file *fp, u_long com, caddr_t data, 63 struct proc *p)); 64static int vn_read __P((struct file *fp, struct uio *uio, 65 struct ucred *cred, int flags, struct proc *p)); 66static int vn_poll __P((struct file *fp, int events, struct ucred *cred, 67 struct proc *p)); 68static int vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p)); 69static int vn_write __P((struct file *fp, struct uio *uio, 70 struct ucred *cred, int flags, struct proc *p)); 71 72struct fileops vnops = 73 { vn_read, vn_write, vn_ioctl, vn_poll, vn_statfile, vn_closefile }; 74 75static int filt_nullattach(struct knote *kn); 76static int filt_vnattach(struct knote *kn); 77static void filt_vndetach(struct knote *kn); 78static int filt_vnode(struct knote *kn, long hint); 79static int filt_vnread(struct knote *kn, long hint); 80 81struct filterops vn_filtops = 82 { 1, filt_vnattach, filt_vndetach, filt_vnode }; 83 84/* 85 * XXX 86 * filt_vnread is ufs-specific, so the attach routine should really 87 * switch out to different filterops based on the vn filetype 88 */ 89struct filterops vn_rwfiltops[] = { 90 { 1, filt_vnattach, filt_vndetach, filt_vnread }, 91 { 1, filt_nullattach, NULL, NULL }, 92}; 93 94/* 95 * Common code for vnode open operations. 96 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. 97 * 98 * Note that this does NOT free nameidata for the successful case, 99 * due to the NDINIT being done elsewhere. 100 */ 101int 102vn_open(ndp, flagp, cmode) 103 register struct nameidata *ndp; 104 int *flagp, cmode; 105{ 106 struct vnode *vp; 107 struct mount *mp; 108 struct proc *p = ndp->ni_cnd.cn_proc; 109 struct ucred *cred = p->p_ucred; 110 struct vattr vat; 111 struct vattr *vap = &vat; 112 int mode, fmode, error; 113 114restart: 115 fmode = *flagp; 116 if (fmode & O_CREAT) { 117 ndp->ni_cnd.cn_nameiop = CREATE; 118 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; 119 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) 120 ndp->ni_cnd.cn_flags |= FOLLOW; 121 bwillwrite(); 122 if ((error = namei(ndp)) != 0) 123 return (error); 124 if (ndp->ni_vp == NULL) { 125 VATTR_NULL(vap); 126 vap->va_type = VREG; 127 vap->va_mode = cmode; 128 if (fmode & O_EXCL) 129 vap->va_vaflags |= VA_EXCLUSIVE; 130 if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { 131 NDFREE(ndp, NDF_ONLY_PNBUF); 132 vput(ndp->ni_dvp); 133 if ((error = vn_start_write(NULL, &mp, 134 V_XSLEEP | PCATCH)) != 0) 135 return (error); 136 goto restart; 137 } 138 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); 139 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, 140 &ndp->ni_cnd, vap); 141 vput(ndp->ni_dvp); 142 vn_finished_write(mp); 143 if (error) { 144 NDFREE(ndp, NDF_ONLY_PNBUF); 145 return (error); 146 } 147 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); 148 ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); 149 fmode &= ~O_TRUNC; 150 vp = ndp->ni_vp; 151 } else { 152 if (ndp->ni_dvp == ndp->ni_vp) 153 vrele(ndp->ni_dvp); 154 else 155 vput(ndp->ni_dvp); 156 ndp->ni_dvp = NULL; 157 vp = ndp->ni_vp; 158 if (fmode & O_EXCL) { 159 error = EEXIST; 160 goto bad; 161 } 162 fmode &= ~O_CREAT; 163 } 164 } else { 165 ndp->ni_cnd.cn_nameiop = LOOKUP; 166 ndp->ni_cnd.cn_flags = 167 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; 168 if ((error = namei(ndp)) != 0) 169 return (error); 170 vp = ndp->ni_vp; 171 } 172 if (vp->v_type == VLNK) { 173 error = EMLINK; 174 goto bad; 175 } 176 if (vp->v_type == VSOCK) { 177 error = EOPNOTSUPP; 178 goto bad; 179 } 180 if ((fmode & O_CREAT) == 0) { 181 mode = 0; 182 if (fmode & (FWRITE | O_TRUNC)) { 183 if (vp->v_type == VDIR) { 184 error = EISDIR; 185 goto bad; 186 } 187 error = vn_writechk(vp); 188 if (error) 189 goto bad; 190 mode |= VWRITE; 191 } 192 if (fmode & FREAD) 193 mode |= VREAD; 194 if (mode) { 195 error = VOP_ACCESS(vp, mode, cred, p); 196 if (error) 197 goto bad; 198 } 199 } 200 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) 201 goto bad; 202 /* 203 * Make sure that a VM object is created for VMIO support. 204 */ 205 if (vn_canvmio(vp) == TRUE) { 206 if ((error = vfs_object_create(vp, p, cred)) != 0) 207 goto bad; 208 } 209 210 if (fmode & FWRITE) 211 vp->v_writecount++; 212 *flagp = fmode; 213 return (0); 214bad: 215 NDFREE(ndp, NDF_ONLY_PNBUF); 216 vput(vp); 217 *flagp = fmode; 218 return (error); 219} 220 221/* 222 * Check for write permissions on the specified vnode. 223 * Prototype text segments cannot be written. 224 */ 225int 226vn_writechk(vp) 227 register struct vnode *vp; 228{ 229 230 /* 231 * If there's shared text associated with 232 * the vnode, try to free it up once. If 233 * we fail, we can't allow writing. 234 */ 235 if (vp->v_flag & VTEXT) 236 return (ETXTBSY); 237 return (0); 238} 239 240/* 241 * Vnode close call 242 */ 243int 244vn_close(vp, flags, cred, p) 245 register struct vnode *vp; 246 int flags; 247 struct ucred *cred; 248 struct proc *p; 249{ 250 int error; 251 252 if (flags & FWRITE) 253 vp->v_writecount--; 254 error = VOP_CLOSE(vp, flags, cred, p); 255 vrele(vp); 256 return (error); 257} 258 259static __inline 260int 261sequential_heuristic(struct uio *uio, struct file *fp) 262{ 263 /* 264 * Sequential heuristic - detect sequential operation 265 */ 266 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) || 267 uio->uio_offset == fp->f_nextoff) { 268 /* 269 * XXX we assume that the filesystem block size is 270 * the default. Not true, but still gives us a pretty 271 * good indicator of how sequential the read operations 272 * are. 273 */ 274 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE; 275 if (fp->f_seqcount >= 127) 276 fp->f_seqcount = 127; 277 return(fp->f_seqcount << 16); 278 } 279 280 /* 281 * Not sequential, quick draw-down of seqcount 282 */ 283 if (fp->f_seqcount > 1) 284 fp->f_seqcount = 1; 285 else 286 fp->f_seqcount = 0; 287 return(0); 288} 289 290/* 291 * Package up an I/O request on a vnode into a uio and do it. 292 */ 293int 294vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) 295 enum uio_rw rw; 296 struct vnode *vp; 297 caddr_t base; 298 int len; 299 off_t offset; 300 enum uio_seg segflg; 301 int ioflg; 302 struct ucred *cred; 303 int *aresid; 304 struct proc *p; 305{ 306 struct uio auio; 307 struct iovec aiov; 308 struct mount *mp; 309 int error; 310 311 if ((ioflg & IO_NODELOCKED) == 0) { 312 mp = NULL; 313 if (rw == UIO_WRITE && 314 vp->v_type != VCHR && 315 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 316 return (error); 317 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 318 } 319 auio.uio_iov = &aiov; 320 auio.uio_iovcnt = 1; 321 aiov.iov_base = base; 322 aiov.iov_len = len; 323 auio.uio_resid = len; 324 auio.uio_offset = offset; 325 auio.uio_segflg = segflg; 326 auio.uio_rw = rw; 327 auio.uio_procp = p; 328 if (rw == UIO_READ) { 329 error = VOP_READ(vp, &auio, ioflg, cred); 330 } else { 331 error = VOP_WRITE(vp, &auio, ioflg, cred); 332 } 333 if (aresid) 334 *aresid = auio.uio_resid; 335 else 336 if (auio.uio_resid && error == 0) 337 error = EIO; 338 if ((ioflg & IO_NODELOCKED) == 0) { 339 vn_finished_write(mp); 340 VOP_UNLOCK(vp, 0, p); 341 } 342 return (error); 343} 344 345/* 346 * File table vnode read routine. 347 */ 348static int 349vn_read(fp, uio, cred, flags, p) 350 struct file *fp; 351 struct uio *uio; 352 struct ucred *cred; 353 struct proc *p; 354 int flags; 355{ 356 struct vnode *vp; 357 int error, ioflag; 358 359 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 360 uio->uio_procp, p)); 361 vp = (struct vnode *)fp->f_data; 362 ioflag = 0; 363 if (fp->f_flag & FNONBLOCK) 364 ioflag |= IO_NDELAY; 365 VOP_LEASE(vp, p, cred, LEASE_READ); 366 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); 367 if ((flags & FOF_OFFSET) == 0) 368 uio->uio_offset = fp->f_offset; 369 370 ioflag |= sequential_heuristic(uio, fp); 371 372 error = VOP_READ(vp, uio, ioflag, cred); 373 if ((flags & FOF_OFFSET) == 0) 374 fp->f_offset = uio->uio_offset; 375 fp->f_nextoff = uio->uio_offset; 376 VOP_UNLOCK(vp, 0, p); 377 return (error); 378} 379 380/* 381 * File table vnode write routine. 382 */ 383static int 384vn_write(fp, uio, cred, flags, p) 385 struct file *fp; 386 struct uio *uio; 387 struct ucred *cred; 388 struct proc *p; 389 int flags; 390{ 391 struct vnode *vp; 392 struct mount *mp; 393 int error, ioflag; 394 395 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", 396 uio->uio_procp, p)); 397 vp = (struct vnode *)fp->f_data; 398 if (vp->v_type == VREG) 399 bwillwrite(); 400 vp = (struct vnode *)fp->f_data; /* XXX needed? */ 401 ioflag = IO_UNIT; 402 if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) 403 ioflag |= IO_APPEND; 404 if (fp->f_flag & FNONBLOCK) 405 ioflag |= IO_NDELAY; 406 if ((fp->f_flag & O_FSYNC) || 407 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) 408 ioflag |= IO_SYNC; 409 mp = NULL; 410 if (vp->v_type != VCHR && 411 (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 412 return (error); 413 VOP_LEASE(vp, p, cred, LEASE_WRITE); 414 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 415 if ((flags & FOF_OFFSET) == 0) 416 uio->uio_offset = fp->f_offset; 417 ioflag |= sequential_heuristic(uio, fp); 418 error = VOP_WRITE(vp, uio, ioflag, cred); 419 if ((flags & FOF_OFFSET) == 0) 420 fp->f_offset = uio->uio_offset; 421 fp->f_nextoff = uio->uio_offset; 422 VOP_UNLOCK(vp, 0, p); 423 vn_finished_write(mp); 424 return (error); 425} 426 427/* 428 * File table vnode stat routine. 429 */ 430static int 431vn_statfile(fp, sb, p) 432 struct file *fp; 433 struct stat *sb; 434 struct proc *p; 435{ 436 struct vnode *vp = (struct vnode *)fp->f_data; 437 438 return vn_stat(vp, sb, p); 439} 440 441int 442vn_stat(vp, sb, p) 443 struct vnode *vp; 444 register struct stat *sb; 445 struct proc *p; 446{ 447 struct vattr vattr; 448 register struct vattr *vap; 449 int error; 450 u_short mode; 451 452 vap = &vattr; 453 error = VOP_GETATTR(vp, vap, p->p_ucred, p); 454 if (error) 455 return (error); 456 457 /* 458 * Zero the spare stat fields 459 */ 460 sb->st_lspare = 0; 461 sb->st_qspare[0] = 0; 462 sb->st_qspare[1] = 0; 463 464 /* 465 * Copy from vattr table 466 */ 467 if (vap->va_fsid != VNOVAL) 468 sb->st_dev = vap->va_fsid; 469 else 470 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; 471 sb->st_ino = vap->va_fileid; 472 mode = vap->va_mode; 473 switch (vap->va_type) { 474 case VREG: 475 mode |= S_IFREG; 476 break; 477 case VDIR: 478 mode |= S_IFDIR; 479 break; 480 case VBLK: 481 mode |= S_IFBLK; 482 break; 483 case VCHR: 484 mode |= S_IFCHR; 485 break; 486 case VLNK: 487 mode |= S_IFLNK; 488 /* This is a cosmetic change, symlinks do not have a mode. */ 489 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) 490 sb->st_mode &= ~ACCESSPERMS; /* 0000 */ 491 else 492 sb->st_mode |= ACCESSPERMS; /* 0777 */ 493 break; 494 case VSOCK: 495 mode |= S_IFSOCK; 496 break; 497 case VFIFO: 498 mode |= S_IFIFO; 499 break; 500 default: 501 return (EBADF); 502 }; 503 sb->st_mode = mode; 504 sb->st_nlink = vap->va_nlink; 505 sb->st_uid = vap->va_uid; 506 sb->st_gid = vap->va_gid; 507 sb->st_rdev = vap->va_rdev; 508 sb->st_size = vap->va_size; 509 sb->st_atimespec = vap->va_atime; 510 sb->st_mtimespec = vap->va_mtime; 511 sb->st_ctimespec = vap->va_ctime; 512 513 /* 514 * According to www.opengroup.org, the meaning of st_blksize is 515 * "a filesystem-specific preferred I/O block size for this 516 * object. In some filesystem types, this may vary from file 517 * to file" 518 * Default to zero to catch bogus uses of this field. 519 */ 520 521 if (vap->va_type == VREG) { 522 sb->st_blksize = vap->va_blocksize; 523 } else if (vn_isdisk(vp, NULL)) { 524 sb->st_blksize = vp->v_rdev->si_bsize_best; 525 if (sb->st_blksize < vp->v_rdev->si_bsize_phys) 526 sb->st_blksize = vp->v_rdev->si_bsize_phys; 527 if (sb->st_blksize < BLKDEV_IOSIZE) 528 sb->st_blksize = BLKDEV_IOSIZE; 529 } else { 530 sb->st_blksize = 0; 531 } 532 533 sb->st_flags = vap->va_flags; 534 if (suser_xxx(p->p_ucred, 0, 0)) 535 sb->st_gen = 0; 536 else 537 sb->st_gen = vap->va_gen; 538 539#if (S_BLKSIZE == 512) 540 /* Optimize this case */ 541 sb->st_blocks = vap->va_bytes >> 9; 542#else 543 sb->st_blocks = vap->va_bytes / S_BLKSIZE; 544#endif 545 return (0); 546} 547 548/* 549 * File table vnode ioctl routine. 550 */ 551static int 552vn_ioctl(fp, com, data, p) 553 struct file *fp; 554 u_long com; 555 caddr_t data; 556 struct proc *p; 557{ 558 register struct vnode *vp = ((struct vnode *)fp->f_data); 559 struct vattr vattr; 560 int error; 561 562 switch (vp->v_type) { 563 564 case VREG: 565 case VDIR: 566 if (com == FIONREAD) { 567 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); 568 if (error) 569 return (error); 570 *(int *)data = vattr.va_size - fp->f_offset; 571 return (0); 572 } 573 if (com == FIONBIO || com == FIOASYNC) /* XXX */ 574 return (0); /* XXX */ 575 /* fall into ... */ 576 577 default: 578#if 0 579 return (ENOTTY); 580#endif 581 case VFIFO: 582 case VCHR: 583 case VBLK: 584 if (com == FIODTYPE) { 585 if (vp->v_type != VCHR && vp->v_type != VBLK) 586 return (ENOTTY); 587 *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK; 588 return (0); 589 } 590 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); 591 if (error == 0 && com == TIOCSCTTY) { 592 593 /* Do nothing if reassigning same control tty */ 594 if (p->p_session->s_ttyvp == vp) 595 return (0); 596 597 /* Get rid of reference to old control tty */ 598 if (p->p_session->s_ttyvp) 599 vrele(p->p_session->s_ttyvp); 600 601 p->p_session->s_ttyvp = vp; 602 VREF(vp); 603 } 604 return (error); 605 } 606} 607 608/* 609 * File table vnode poll routine. 610 */ 611static int 612vn_poll(fp, events, cred, p) 613 struct file *fp; 614 int events; 615 struct ucred *cred; 616 struct proc *p; 617{ 618 619 return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, p)); 620} 621 622/* 623 * Check that the vnode is still valid, and if so 624 * acquire requested lock. 625 */ 626int 627#ifndef DEBUG_LOCKS 628vn_lock(vp, flags, p) 629#else 630debug_vn_lock(vp, flags, p, filename, line) 631#endif 632 struct vnode *vp; 633 int flags; 634 struct proc *p; 635#ifdef DEBUG_LOCKS 636 const char *filename; 637 int line; 638#endif 639{ 640 int error; 641 642 do { 643 if ((flags & LK_INTERLOCK) == 0)
|
648 tsleep((caddr_t)vp, PINOD, "vn_lock", 0); 649 error = ENOENT; 650 } else { 651 if (vp->v_vxproc != NULL) 652 printf("VXLOCK interlock avoided in vn_lock\n"); 653#ifdef DEBUG_LOCKS 654 vp->filename = filename; 655 vp->line = line; 656#endif 657 error = VOP_LOCK(vp, 658 flags | LK_NOPAUSE | LK_INTERLOCK, p); 659 if (error == 0) 660 return (error); 661 } 662 flags &= ~LK_INTERLOCK; 663 } while (flags & LK_RETRY); 664 return (error); 665} 666 667/* 668 * File table vnode close routine. 669 */ 670static int 671vn_closefile(fp, p) 672 struct file *fp; 673 struct proc *p; 674{ 675 676 fp->f_ops = &badfileops; 677 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, 678 fp->f_cred, p)); 679} 680 681/* 682 * Preparing to start a filesystem write operation. If the operation is 683 * permitted, then we bump the count of operations in progress and 684 * proceed. If a suspend request is in progress, we wait until the 685 * suspension is over, and then proceed. 686 */ 687int 688vn_start_write(vp, mpp, flags) 689 struct vnode *vp; 690 struct mount **mpp; 691 int flags; 692{ 693 struct mount *mp; 694 int error; 695 696 /* 697 * If a vnode is provided, get and return the mount point that 698 * to which it will write. 699 */ 700 if (vp != NULL) { 701 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { 702 *mpp = NULL; 703 if (error != EOPNOTSUPP) 704 return (error); 705 return (0); 706 } 707 } 708 if ((mp = *mpp) == NULL) 709 return (0); 710 /* 711 * Check on status of suspension. 712 */ 713 while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 714 if (flags & V_NOWAIT) 715 return (EWOULDBLOCK); 716 error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), 717 "suspfs", 0); 718 if (error) 719 return (error); 720 } 721 if (flags & V_XSLEEP) 722 return (0); 723 mp->mnt_writeopcount++; 724 return (0); 725} 726 727/* 728 * Secondary suspension. Used by operations such as vop_inactive 729 * routines that are needed by the higher level functions. These 730 * are allowed to proceed until all the higher level functions have 731 * completed (indicated by mnt_writeopcount dropping to zero). At that 732 * time, these operations are halted until the suspension is over. 733 */ 734int 735vn_write_suspend_wait(vp, mp, flags) 736 struct vnode *vp; 737 struct mount *mp; 738 int flags; 739{ 740 int error; 741 742 if (vp != NULL) { 743 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { 744 if (error != EOPNOTSUPP) 745 return (error); 746 return (0); 747 } 748 } 749 /* 750 * If we are not suspended or have not yet reached suspended 751 * mode, then let the operation proceed. 752 */ 753 if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) 754 return (0); 755 if (flags & V_NOWAIT) 756 return (EWOULDBLOCK); 757 /* 758 * Wait for the suspension to finish. 759 */ 760 return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), 761 "suspfs", 0)); 762} 763 764/* 765 * Filesystem write operation has completed. If we are suspending and this 766 * operation is the last one, notify the suspender that the suspension is 767 * now in effect. 768 */ 769void 770vn_finished_write(mp) 771 struct mount *mp; 772{ 773 774 if (mp == NULL) 775 return; 776 mp->mnt_writeopcount--; 777 if (mp->mnt_writeopcount < 0) 778 panic("vn_finished_write: neg cnt"); 779 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && 780 mp->mnt_writeopcount <= 0) 781 wakeup(&mp->mnt_writeopcount); 782} 783 784/* 785 * Request a filesystem to suspend write operations. 786 */ 787void 788vfs_write_suspend(mp) 789 struct mount *mp; 790{ 791 struct proc *p = curproc; 792 793 if (mp->mnt_kern_flag & MNTK_SUSPEND) 794 return; 795 mp->mnt_kern_flag |= MNTK_SUSPEND; 796 if (mp->mnt_writeopcount > 0) 797 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); 798 VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); 799 mp->mnt_kern_flag |= MNTK_SUSPENDED; 800} 801 802/* 803 * Request a filesystem to resume write operations. 804 */ 805void 806vfs_write_resume(mp) 807 struct mount *mp; 808{ 809 810 if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) 811 return; 812 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED); 813 wakeup(&mp->mnt_writeopcount); 814 wakeup(&mp->mnt_flag); 815} 816 817static int 818filt_vnattach(struct knote *kn) 819{ 820 struct vnode *vp; 821 822 if (kn->kn_fp->f_type != DTYPE_VNODE && 823 kn->kn_fp->f_type != DTYPE_FIFO) 824 return (EBADF); 825 826 vp = (struct vnode *)kn->kn_fp->f_data; 827 828 /* 829 * XXX 830 * this is a hack simply to cause the filter attach to fail 831 * for non-ufs filesystems, until the support for them is done. 832 */ 833 if ((vp)->v_tag != VT_UFS) 834 return (EOPNOTSUPP); 835
| 648 tsleep((caddr_t)vp, PINOD, "vn_lock", 0); 649 error = ENOENT; 650 } else { 651 if (vp->v_vxproc != NULL) 652 printf("VXLOCK interlock avoided in vn_lock\n"); 653#ifdef DEBUG_LOCKS 654 vp->filename = filename; 655 vp->line = line; 656#endif 657 error = VOP_LOCK(vp, 658 flags | LK_NOPAUSE | LK_INTERLOCK, p); 659 if (error == 0) 660 return (error); 661 } 662 flags &= ~LK_INTERLOCK; 663 } while (flags & LK_RETRY); 664 return (error); 665} 666 667/* 668 * File table vnode close routine. 669 */ 670static int 671vn_closefile(fp, p) 672 struct file *fp; 673 struct proc *p; 674{ 675 676 fp->f_ops = &badfileops; 677 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, 678 fp->f_cred, p)); 679} 680 681/* 682 * Preparing to start a filesystem write operation. If the operation is 683 * permitted, then we bump the count of operations in progress and 684 * proceed. If a suspend request is in progress, we wait until the 685 * suspension is over, and then proceed. 686 */ 687int 688vn_start_write(vp, mpp, flags) 689 struct vnode *vp; 690 struct mount **mpp; 691 int flags; 692{ 693 struct mount *mp; 694 int error; 695 696 /* 697 * If a vnode is provided, get and return the mount point that 698 * to which it will write. 699 */ 700 if (vp != NULL) { 701 if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { 702 *mpp = NULL; 703 if (error != EOPNOTSUPP) 704 return (error); 705 return (0); 706 } 707 } 708 if ((mp = *mpp) == NULL) 709 return (0); 710 /* 711 * Check on status of suspension. 712 */ 713 while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { 714 if (flags & V_NOWAIT) 715 return (EWOULDBLOCK); 716 error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), 717 "suspfs", 0); 718 if (error) 719 return (error); 720 } 721 if (flags & V_XSLEEP) 722 return (0); 723 mp->mnt_writeopcount++; 724 return (0); 725} 726 727/* 728 * Secondary suspension. Used by operations such as vop_inactive 729 * routines that are needed by the higher level functions. These 730 * are allowed to proceed until all the higher level functions have 731 * completed (indicated by mnt_writeopcount dropping to zero). At that 732 * time, these operations are halted until the suspension is over. 733 */ 734int 735vn_write_suspend_wait(vp, mp, flags) 736 struct vnode *vp; 737 struct mount *mp; 738 int flags; 739{ 740 int error; 741 742 if (vp != NULL) { 743 if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { 744 if (error != EOPNOTSUPP) 745 return (error); 746 return (0); 747 } 748 } 749 /* 750 * If we are not suspended or have not yet reached suspended 751 * mode, then let the operation proceed. 752 */ 753 if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) 754 return (0); 755 if (flags & V_NOWAIT) 756 return (EWOULDBLOCK); 757 /* 758 * Wait for the suspension to finish. 759 */ 760 return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), 761 "suspfs", 0)); 762} 763 764/* 765 * Filesystem write operation has completed. If we are suspending and this 766 * operation is the last one, notify the suspender that the suspension is 767 * now in effect. 768 */ 769void 770vn_finished_write(mp) 771 struct mount *mp; 772{ 773 774 if (mp == NULL) 775 return; 776 mp->mnt_writeopcount--; 777 if (mp->mnt_writeopcount < 0) 778 panic("vn_finished_write: neg cnt"); 779 if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && 780 mp->mnt_writeopcount <= 0) 781 wakeup(&mp->mnt_writeopcount); 782} 783 784/* 785 * Request a filesystem to suspend write operations. 786 */ 787void 788vfs_write_suspend(mp) 789 struct mount *mp; 790{ 791 struct proc *p = curproc; 792 793 if (mp->mnt_kern_flag & MNTK_SUSPEND) 794 return; 795 mp->mnt_kern_flag |= MNTK_SUSPEND; 796 if (mp->mnt_writeopcount > 0) 797 (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); 798 VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); 799 mp->mnt_kern_flag |= MNTK_SUSPENDED; 800} 801 802/* 803 * Request a filesystem to resume write operations. 804 */ 805void 806vfs_write_resume(mp) 807 struct mount *mp; 808{ 809 810 if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) 811 return; 812 mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED); 813 wakeup(&mp->mnt_writeopcount); 814 wakeup(&mp->mnt_flag); 815} 816 817static int 818filt_vnattach(struct knote *kn) 819{ 820 struct vnode *vp; 821 822 if (kn->kn_fp->f_type != DTYPE_VNODE && 823 kn->kn_fp->f_type != DTYPE_FIFO) 824 return (EBADF); 825 826 vp = (struct vnode *)kn->kn_fp->f_data; 827 828 /* 829 * XXX 830 * this is a hack simply to cause the filter attach to fail 831 * for non-ufs filesystems, until the support for them is done. 832 */ 833 if ((vp)->v_tag != VT_UFS) 834 return (EOPNOTSUPP); 835
|
852} 853 854static int 855filt_vnode(struct knote *kn, long hint) 856{ 857 858 if (kn->kn_sfflags & hint) 859 kn->kn_fflags |= hint; 860 return (kn->kn_fflags != 0); 861} 862 863static int 864filt_nullattach(struct knote *kn) 865{ 866 return (ENXIO); 867} 868 869/*ARGSUSED*/ 870static int 871filt_vnread(struct knote *kn, long hint) 872{ 873 struct vnode *vp = (struct vnode *)kn->kn_fp->f_data; 874 struct inode *ip = VTOI(vp); 875 876 kn->kn_data = ip->i_size - kn->kn_fp->f_offset; 877 return (kn->kn_data != 0); 878} 879 880/* 881 * Simplified in-kernel wrapper calls for extended attribute access. 882 * Both calls pass in a NULL credential, authorizing as "kernel" access. 883 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 884 */ 885int 886vn_extattr_get(struct vnode *vp, int ioflg, const char *attrname, int *buflen, 887 char *buf, struct proc *p) 888{ 889 struct uio auio; 890 struct iovec iov; 891 int error; 892 893 iov.iov_len = *buflen; 894 iov.iov_base = buf; 895 896 auio.uio_iov = &iov; 897 auio.uio_iovcnt = 1; 898 auio.uio_rw = UIO_READ; 899 auio.uio_segflg = UIO_SYSSPACE; 900 auio.uio_procp = p; 901 auio.uio_offset = 0; 902 auio.uio_resid = *buflen; 903 904 if ((ioflg & IO_NODELOCKED) == 0) 905 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 906 907 /* authorize attribute retrieval as kernel */ 908 error = VOP_GETEXTATTR(vp, attrname, &auio, NULL, p); 909 910 if ((ioflg & IO_NODELOCKED) == 0) 911 VOP_UNLOCK(vp, 0, p); 912 913 if (error == 0) { 914 *buflen = *buflen - auio.uio_resid; 915 } 916 917 return (error); 918} 919 920/* 921 * XXX failure mode if partially written? 922 */ 923int 924vn_extattr_set(struct vnode *vp, int ioflg, const char *attrname, int buflen, 925 char *buf, struct proc *p) 926{ 927 struct uio auio; 928 struct iovec iov; 929 struct mount *mp; 930 int error; 931 932 iov.iov_len = buflen; 933 iov.iov_base = buf; 934 935 auio.uio_iov = &iov; 936 auio.uio_iovcnt = 1; 937 auio.uio_rw = UIO_WRITE; 938 auio.uio_segflg = UIO_SYSSPACE; 939 auio.uio_procp = p; 940 auio.uio_offset = 0; 941 auio.uio_resid = buflen; 942 943 if ((ioflg & IO_NODELOCKED) == 0) { 944 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 945 return (error); 946 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 947 } 948 949 /* authorize attribute setting as kernel */ 950 error = VOP_SETEXTATTR(vp, attrname, &auio, NULL, p); 951 952 if ((ioflg & IO_NODELOCKED) == 0) { 953 vn_finished_write(mp); 954 VOP_UNLOCK(vp, 0, p); 955 } 956 957 return (error); 958} 959 960int 961vn_extattr_rm(struct vnode *vp, int ioflg, const char *attrname, struct proc *p) 962{ 963 struct mount *mp; 964 int error; 965 966 if ((ioflg & IO_NODELOCKED) == 0) { 967 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 968 return (error); 969 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 970 } 971 972 /* authorize attribute removal as kernel */ 973 error = VOP_SETEXTATTR(vp, attrname, NULL, NULL, p); 974 975 if ((ioflg & IO_NODELOCKED) == 0) { 976 vn_finished_write(mp); 977 VOP_UNLOCK(vp, 0, p); 978 } 979 980 return (error); 981}
| 852} 853 854static int 855filt_vnode(struct knote *kn, long hint) 856{ 857 858 if (kn->kn_sfflags & hint) 859 kn->kn_fflags |= hint; 860 return (kn->kn_fflags != 0); 861} 862 863static int 864filt_nullattach(struct knote *kn) 865{ 866 return (ENXIO); 867} 868 869/*ARGSUSED*/ 870static int 871filt_vnread(struct knote *kn, long hint) 872{ 873 struct vnode *vp = (struct vnode *)kn->kn_fp->f_data; 874 struct inode *ip = VTOI(vp); 875 876 kn->kn_data = ip->i_size - kn->kn_fp->f_offset; 877 return (kn->kn_data != 0); 878} 879 880/* 881 * Simplified in-kernel wrapper calls for extended attribute access. 882 * Both calls pass in a NULL credential, authorizing as "kernel" access. 883 * Set IO_NODELOCKED in ioflg if the vnode is already locked. 884 */ 885int 886vn_extattr_get(struct vnode *vp, int ioflg, const char *attrname, int *buflen, 887 char *buf, struct proc *p) 888{ 889 struct uio auio; 890 struct iovec iov; 891 int error; 892 893 iov.iov_len = *buflen; 894 iov.iov_base = buf; 895 896 auio.uio_iov = &iov; 897 auio.uio_iovcnt = 1; 898 auio.uio_rw = UIO_READ; 899 auio.uio_segflg = UIO_SYSSPACE; 900 auio.uio_procp = p; 901 auio.uio_offset = 0; 902 auio.uio_resid = *buflen; 903 904 if ((ioflg & IO_NODELOCKED) == 0) 905 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 906 907 /* authorize attribute retrieval as kernel */ 908 error = VOP_GETEXTATTR(vp, attrname, &auio, NULL, p); 909 910 if ((ioflg & IO_NODELOCKED) == 0) 911 VOP_UNLOCK(vp, 0, p); 912 913 if (error == 0) { 914 *buflen = *buflen - auio.uio_resid; 915 } 916 917 return (error); 918} 919 920/* 921 * XXX failure mode if partially written? 922 */ 923int 924vn_extattr_set(struct vnode *vp, int ioflg, const char *attrname, int buflen, 925 char *buf, struct proc *p) 926{ 927 struct uio auio; 928 struct iovec iov; 929 struct mount *mp; 930 int error; 931 932 iov.iov_len = buflen; 933 iov.iov_base = buf; 934 935 auio.uio_iov = &iov; 936 auio.uio_iovcnt = 1; 937 auio.uio_rw = UIO_WRITE; 938 auio.uio_segflg = UIO_SYSSPACE; 939 auio.uio_procp = p; 940 auio.uio_offset = 0; 941 auio.uio_resid = buflen; 942 943 if ((ioflg & IO_NODELOCKED) == 0) { 944 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 945 return (error); 946 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 947 } 948 949 /* authorize attribute setting as kernel */ 950 error = VOP_SETEXTATTR(vp, attrname, &auio, NULL, p); 951 952 if ((ioflg & IO_NODELOCKED) == 0) { 953 vn_finished_write(mp); 954 VOP_UNLOCK(vp, 0, p); 955 } 956 957 return (error); 958} 959 960int 961vn_extattr_rm(struct vnode *vp, int ioflg, const char *attrname, struct proc *p) 962{ 963 struct mount *mp; 964 int error; 965 966 if ((ioflg & IO_NODELOCKED) == 0) { 967 if ((error = vn_start_write(vp, &mp, V_WAIT)) != 0) 968 return (error); 969 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 970 } 971 972 /* authorize attribute removal as kernel */ 973 error = VOP_SETEXTATTR(vp, attrname, NULL, NULL, p); 974 975 if ((ioflg & IO_NODELOCKED) == 0) { 976 vn_finished_write(mp); 977 VOP_UNLOCK(vp, 0, p); 978 } 979 980 return (error); 981}
|