nfs_clvnops.c revision 210786
1279377Simp/*- 2279377Simp * Copyright (c) 1989, 1993 3279377Simp * The Regents of the University of California. All rights reserved. 4279377Simp * 5279377Simp * This code is derived from software contributed to Berkeley by 6279377Simp * Rick Macklem at The University of Guelph. 7279377Simp * 8279377Simp * Redistribution and use in source and binary forms, with or without 9279377Simp * modification, are permitted provided that the following conditions 10279377Simp * are met: 11279377Simp * 1. Redistributions of source code must retain the above copyright 12279377Simp * notice, this list of conditions and the following disclaimer. 13279377Simp * 2. Redistributions in binary form must reproduce the above copyright 14279377Simp * notice, this list of conditions and the following disclaimer in the 15279377Simp * documentation and/or other materials provided with the distribution. 16279377Simp * 4. Neither the name of the University nor the names of its contributors 17279377Simp * may be used to endorse or promote products derived from this software 18279377Simp * without specific prior written permission. 19279377Simp * 20279377Simp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21279377Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22279377Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23279377Simp * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24279377Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25279377Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26279377Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27279377Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28279377Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29279377Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30279377Simp * SUCH DAMAGE. 31279377Simp * 32279377Simp * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 33279377Simp */ 34279377Simp 35279377Simp#include <sys/cdefs.h> 36279377Simp__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvnops.c 210786 2010-08-03 01:49:28Z rmacklem $"); 37279377Simp 38279377Simp/* 39279377Simp * vnode op calls for Sun NFS version 2, 3 and 4 40279377Simp */ 41279377Simp 42279377Simp#include "opt_inet.h" 43279377Simp 44279377Simp#include <sys/param.h> 45279377Simp#include <sys/kernel.h> 46279377Simp#include <sys/systm.h> 47279377Simp#include <sys/resourcevar.h> 48279377Simp#include <sys/proc.h> 49279377Simp#include <sys/mount.h> 50279377Simp#include <sys/bio.h> 51279377Simp#include <sys/buf.h> 52279377Simp#include <sys/jail.h> 53279377Simp#include <sys/malloc.h> 54279377Simp#include <sys/mbuf.h> 55279377Simp#include <sys/namei.h> 56279377Simp#include <sys/socket.h> 57279377Simp#include <sys/vnode.h> 58279377Simp#include <sys/dirent.h> 59279377Simp#include <sys/fcntl.h> 60279377Simp#include <sys/lockf.h> 61279377Simp#include <sys/stat.h> 62279377Simp#include <sys/sysctl.h> 63279377Simp#include <sys/signalvar.h> 64279377Simp 65279377Simp#include <vm/vm.h> 66279377Simp#include <vm/vm_object.h> 67279377Simp#include <vm/vm_extern.h> 68279377Simp#include <vm/vm_object.h> 69279377Simp 70279377Simp#include <fs/nfs/nfsport.h> 71279377Simp#include <fs/nfsclient/nfsnode.h> 72279377Simp#include <fs/nfsclient/nfsmount.h> 73279377Simp#include <fs/nfsclient/nfs.h> 74279377Simp 75279377Simp#include <net/if.h> 76279377Simp#include <netinet/in.h> 77279377Simp#include <netinet/in_var.h> 78279377Simp 79279377Simp#include <nfs/nfs_lock.h> 80279377Simp 81279377Simp/* Defs */ 82279377Simp#define TRUE 1 83279377Simp#define FALSE 0 84279377Simp 85279377Simpextern struct nfsstats newnfsstats; 86279377SimpMALLOC_DECLARE(M_NEWNFSREQ); 87279377Simpvop_advlock_t *ncl_advlock_p = nfs_dolock; 88279377Simp 89279377Simp/* 90279377Simp * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these 91279377Simp * calls are not in getblk() and brelse() so that they would not be necessary 92279377Simp * here. 93279377Simp */ 94279377Simp#ifndef B_VMIO 95279377Simp#define vfs_busy_pages(bp, f) 96#endif 97 98static vop_read_t nfsfifo_read; 99static vop_write_t nfsfifo_write; 100static vop_close_t nfsfifo_close; 101static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 102 struct thread *); 103static vop_lookup_t nfs_lookup; 104static vop_create_t nfs_create; 105static vop_mknod_t nfs_mknod; 106static vop_open_t nfs_open; 107static vop_close_t nfs_close; 108static vop_access_t nfs_access; 109static vop_getattr_t nfs_getattr; 110static vop_setattr_t nfs_setattr; 111static vop_read_t nfs_read; 112static vop_fsync_t nfs_fsync; 113static vop_remove_t nfs_remove; 114static vop_link_t nfs_link; 115static vop_rename_t nfs_rename; 116static vop_mkdir_t nfs_mkdir; 117static vop_rmdir_t nfs_rmdir; 118static vop_symlink_t nfs_symlink; 119static vop_readdir_t nfs_readdir; 120static vop_strategy_t nfs_strategy; 121static vop_lock1_t nfs_lock1; 122static int nfs_lookitup(struct vnode *, char *, int, 123 struct ucred *, struct thread *, struct nfsnode **); 124static int nfs_sillyrename(struct vnode *, struct vnode *, 125 struct componentname *); 126static vop_access_t nfsspec_access; 127static vop_readlink_t nfs_readlink; 128static vop_print_t nfs_print; 129static vop_advlock_t nfs_advlock; 130static vop_advlockasync_t nfs_advlockasync; 131static vop_getacl_t nfs_getacl; 132static vop_setacl_t nfs_setacl; 133 134/* 135 * Global vfs data structures for nfs 136 */ 137struct vop_vector newnfs_vnodeops = { 138 .vop_default = &default_vnodeops, 139 .vop_access = nfs_access, 140 .vop_advlock = nfs_advlock, 141 .vop_advlockasync = nfs_advlockasync, 142 .vop_close = nfs_close, 143 .vop_create = nfs_create, 144 .vop_fsync = nfs_fsync, 145 .vop_getattr = nfs_getattr, 146 .vop_getpages = ncl_getpages, 147 .vop_putpages = ncl_putpages, 148 .vop_inactive = ncl_inactive, 149 .vop_link = nfs_link, 150 .vop_lock1 = nfs_lock1, 151 .vop_lookup = nfs_lookup, 152 .vop_mkdir = nfs_mkdir, 153 .vop_mknod = nfs_mknod, 154 .vop_open = nfs_open, 155 .vop_print = nfs_print, 156 .vop_read = nfs_read, 157 .vop_readdir = nfs_readdir, 158 .vop_readlink = nfs_readlink, 159 .vop_reclaim = ncl_reclaim, 160 .vop_remove = nfs_remove, 161 .vop_rename = nfs_rename, 162 .vop_rmdir = nfs_rmdir, 163 .vop_setattr = nfs_setattr, 164 .vop_strategy = nfs_strategy, 165 .vop_symlink = nfs_symlink, 166 .vop_write = ncl_write, 167 .vop_getacl = nfs_getacl, 168 .vop_setacl = nfs_setacl, 169}; 170 171struct vop_vector newnfs_fifoops = { 172 .vop_default = &fifo_specops, 173 .vop_access = nfsspec_access, 174 .vop_close = nfsfifo_close, 175 .vop_fsync = nfs_fsync, 176 .vop_getattr = nfs_getattr, 177 .vop_inactive = ncl_inactive, 178 .vop_print = nfs_print, 179 .vop_read = nfsfifo_read, 180 .vop_reclaim = ncl_reclaim, 181 .vop_setattr = nfs_setattr, 182 .vop_write = nfsfifo_write, 183}; 184 185static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 186 struct componentname *cnp, struct vattr *vap); 187static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 188 int namelen, struct ucred *cred, struct thread *td); 189static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 190 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 191 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 192static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 193 struct componentname *scnp, struct sillyrename *sp); 194 195/* 196 * Global variables 197 */ 198#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) 199 200SYSCTL_DECL(_vfs_newnfs); 201 202static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 203SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 204 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 205 206static int nfs_prime_access_cache = 0; 207SYSCTL_INT(_vfs_newnfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 208 &nfs_prime_access_cache, 0, 209 "Prime NFS ACCESS cache when fetching attributes"); 210 211static int newnfs_commit_on_close = 0; 212SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 213 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 214 215static int nfs_clean_pages_on_close = 1; 216SYSCTL_INT(_vfs_newnfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 217 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 218 219int newnfs_directio_enable = 0; 220SYSCTL_INT(_vfs_newnfs, OID_AUTO, directio_enable, CTLFLAG_RW, 221 &newnfs_directio_enable, 0, "Enable NFS directio"); 222 223/* 224 * This sysctl allows other processes to mmap a file that has been opened 225 * O_DIRECT by a process. In general, having processes mmap the file while 226 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 227 * this by default to prevent DoS attacks - to prevent a malicious user from 228 * opening up files O_DIRECT preventing other users from mmap'ing these 229 * files. "Protected" environments where stricter consistency guarantees are 230 * required can disable this knob. The process that opened the file O_DIRECT 231 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 232 * meaningful. 233 */ 234int newnfs_directio_allow_mmap = 1; 235SYSCTL_INT(_vfs_newnfs, OID_AUTO, directio_allow_mmap, CTLFLAG_RW, 236 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 237 238#if 0 239SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 240 &newnfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); 241 242SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 243 &newnfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); 244#endif 245 246#define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 247 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 248 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 249 250/* 251 * SMP Locking Note : 252 * The list of locks after the description of the lock is the ordering 253 * of other locks acquired with the lock held. 254 * np->n_mtx : Protects the fields in the nfsnode. 255 VM Object Lock 256 VI_MTX (acquired indirectly) 257 * nmp->nm_mtx : Protects the fields in the nfsmount. 258 rep->r_mtx 259 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 260 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 261 nmp->nm_mtx 262 rep->r_mtx 263 * rep->r_mtx : Protects the fields in an nfsreq. 264 */ 265 266static int 267nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 268 struct ucred *cred, u_int32_t *retmode) 269{ 270 int error = 0, attrflag, i, lrupos; 271 u_int32_t rmode; 272 struct nfsnode *np = VTONFS(vp); 273 struct nfsvattr nfsva; 274 275 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 276 &rmode, NULL); 277 if (attrflag) 278 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 279 if (!error) { 280 lrupos = 0; 281 mtx_lock(&np->n_mtx); 282 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 283 if (np->n_accesscache[i].uid == cred->cr_uid) { 284 np->n_accesscache[i].mode = rmode; 285 np->n_accesscache[i].stamp = time_second; 286 break; 287 } 288 if (i > 0 && np->n_accesscache[i].stamp < 289 np->n_accesscache[lrupos].stamp) 290 lrupos = i; 291 } 292 if (i == NFS_ACCESSCACHESIZE) { 293 np->n_accesscache[lrupos].uid = cred->cr_uid; 294 np->n_accesscache[lrupos].mode = rmode; 295 np->n_accesscache[lrupos].stamp = time_second; 296 } 297 mtx_unlock(&np->n_mtx); 298 if (retmode != NULL) 299 *retmode = rmode; 300 } else if (NFS_ISV4(vp)) { 301 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 302 } 303 return (error); 304} 305 306/* 307 * nfs access vnode op. 308 * For nfs version 2, just return ok. File accesses may fail later. 309 * For nfs version 3, use the access rpc to check accessibility. If file modes 310 * are changed on the server, accesses might still fail later. 311 */ 312static int 313nfs_access(struct vop_access_args *ap) 314{ 315 struct vnode *vp = ap->a_vp; 316 int error = 0, i, gotahit; 317 u_int32_t mode, wmode, rmode; 318 int v34 = NFS_ISV34(vp); 319 struct nfsnode *np = VTONFS(vp); 320 321 /* 322 * Disallow write attempts on filesystems mounted read-only; 323 * unless the file is a socket, fifo, or a block or character 324 * device resident on the filesystem. 325 */ 326 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 327 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 328 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 329 switch (vp->v_type) { 330 case VREG: 331 case VDIR: 332 case VLNK: 333 return (EROFS); 334 default: 335 break; 336 } 337 } 338 /* 339 * For nfs v3 or v4, check to see if we have done this recently, and if 340 * so return our cached result instead of making an ACCESS call. 341 * If not, do an access rpc, otherwise you are stuck emulating 342 * ufs_access() locally using the vattr. This may not be correct, 343 * since the server may apply other access criteria such as 344 * client uid-->server uid mapping that we do not know about. 345 */ 346 if (v34) { 347 if (ap->a_accmode & VREAD) 348 mode = NFSACCESS_READ; 349 else 350 mode = 0; 351 if (vp->v_type != VDIR) { 352 if (ap->a_accmode & VWRITE) 353 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 354 if (ap->a_accmode & VAPPEND) 355 mode |= NFSACCESS_EXTEND; 356 if (ap->a_accmode & VEXEC) 357 mode |= NFSACCESS_EXECUTE; 358 if (ap->a_accmode & VDELETE) 359 mode |= NFSACCESS_DELETE; 360 } else { 361 if (ap->a_accmode & VWRITE) 362 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 363 if (ap->a_accmode & VAPPEND) 364 mode |= NFSACCESS_EXTEND; 365 if (ap->a_accmode & VEXEC) 366 mode |= NFSACCESS_LOOKUP; 367 if (ap->a_accmode & VDELETE) 368 mode |= NFSACCESS_DELETE; 369 if (ap->a_accmode & VDELETE_CHILD) 370 mode |= NFSACCESS_MODIFY; 371 } 372 /* XXX safety belt, only make blanket request if caching */ 373 if (nfsaccess_cache_timeout > 0) { 374 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 375 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 376 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 377 } else { 378 wmode = mode; 379 } 380 381 /* 382 * Does our cached result allow us to give a definite yes to 383 * this request? 384 */ 385 gotahit = 0; 386 mtx_lock(&np->n_mtx); 387 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 388 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 389 if (time_second < (np->n_accesscache[i].stamp 390 + nfsaccess_cache_timeout) && 391 (np->n_accesscache[i].mode & mode) == mode) { 392 NFSINCRGLOBAL(newnfsstats.accesscache_hits); 393 gotahit = 1; 394 } 395 break; 396 } 397 } 398 mtx_unlock(&np->n_mtx); 399 if (gotahit == 0) { 400 /* 401 * Either a no, or a don't know. Go to the wire. 402 */ 403 NFSINCRGLOBAL(newnfsstats.accesscache_misses); 404 error = nfs34_access_otw(vp, wmode, ap->a_td, 405 ap->a_cred, &rmode); 406 if (!error && 407 (rmode & mode) != mode) 408 error = EACCES; 409 } 410 return (error); 411 } else { 412 if ((error = nfsspec_access(ap)) != 0) { 413 return (error); 414 } 415 /* 416 * Attempt to prevent a mapped root from accessing a file 417 * which it shouldn't. We try to read a byte from the file 418 * if the user is root and the file is not zero length. 419 * After calling nfsspec_access, we should have the correct 420 * file size cached. 421 */ 422 mtx_lock(&np->n_mtx); 423 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 424 && VTONFS(vp)->n_size > 0) { 425 struct iovec aiov; 426 struct uio auio; 427 char buf[1]; 428 429 mtx_unlock(&np->n_mtx); 430 aiov.iov_base = buf; 431 aiov.iov_len = 1; 432 auio.uio_iov = &aiov; 433 auio.uio_iovcnt = 1; 434 auio.uio_offset = 0; 435 auio.uio_resid = 1; 436 auio.uio_segflg = UIO_SYSSPACE; 437 auio.uio_rw = UIO_READ; 438 auio.uio_td = ap->a_td; 439 440 if (vp->v_type == VREG) 441 error = ncl_readrpc(vp, &auio, ap->a_cred); 442 else if (vp->v_type == VDIR) { 443 char* bp; 444 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 445 aiov.iov_base = bp; 446 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 447 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 448 ap->a_td); 449 free(bp, M_TEMP); 450 } else if (vp->v_type == VLNK) 451 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 452 else 453 error = EACCES; 454 } else 455 mtx_unlock(&np->n_mtx); 456 return (error); 457 } 458} 459 460 461/* 462 * nfs open vnode op 463 * Check to see if the type is ok 464 * and that deletion is not in progress. 465 * For paged in text files, you will need to flush the page cache 466 * if consistency is lost. 467 */ 468/* ARGSUSED */ 469static int 470nfs_open(struct vop_open_args *ap) 471{ 472 struct vnode *vp = ap->a_vp; 473 struct nfsnode *np = VTONFS(vp); 474 struct vattr vattr; 475 int error; 476 int fmode = ap->a_mode; 477 478 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 479 return (EOPNOTSUPP); 480 481 /* 482 * For NFSv4, we need to do the Open Op before cache validation, 483 * so that we conform to RFC3530 Sec. 9.3.1. 484 */ 485 if (NFS_ISV4(vp)) { 486 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 487 if (error) { 488 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 489 (gid_t)0); 490 return (error); 491 } 492 } 493 494 /* 495 * Now, if this Open will be doing reading, re-validate/flush the 496 * cache, so that Close/Open coherency is maintained. 497 */ 498 if ((fmode & FREAD) != 0 && 499 (!NFS_ISV4(vp) || nfscl_mustflush(vp) != 0)) { 500 mtx_lock(&np->n_mtx); 501 if (np->n_flag & NMODIFIED) { 502 mtx_unlock(&np->n_mtx); 503 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 504 if (error == EINTR || error == EIO) { 505 if (NFS_ISV4(vp)) 506 (void) nfsrpc_close(vp, 0, ap->a_td); 507 return (error); 508 } 509 mtx_lock(&np->n_mtx); 510 np->n_attrstamp = 0; 511 if (vp->v_type == VDIR) 512 np->n_direofoffset = 0; 513 mtx_unlock(&np->n_mtx); 514 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 515 if (error) { 516 if (NFS_ISV4(vp)) 517 (void) nfsrpc_close(vp, 0, ap->a_td); 518 return (error); 519 } 520 mtx_lock(&np->n_mtx); 521 np->n_mtime = vattr.va_mtime; 522 if (NFS_ISV4(vp)) 523 np->n_change = vattr.va_filerev; 524 mtx_unlock(&np->n_mtx); 525 } else { 526 mtx_unlock(&np->n_mtx); 527 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 528 if (error) { 529 if (NFS_ISV4(vp)) 530 (void) nfsrpc_close(vp, 0, ap->a_td); 531 return (error); 532 } 533 mtx_lock(&np->n_mtx); 534 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 535 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 536 if (vp->v_type == VDIR) 537 np->n_direofoffset = 0; 538 mtx_unlock(&np->n_mtx); 539 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 540 if (error == EINTR || error == EIO) { 541 if (NFS_ISV4(vp)) 542 (void) nfsrpc_close(vp, 0, 543 ap->a_td); 544 return (error); 545 } 546 mtx_lock(&np->n_mtx); 547 np->n_mtime = vattr.va_mtime; 548 if (NFS_ISV4(vp)) 549 np->n_change = vattr.va_filerev; 550 } 551 mtx_unlock(&np->n_mtx); 552 } 553 } 554 555 /* 556 * If the object has >= 1 O_DIRECT active opens, we disable caching. 557 */ 558 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 559 if (np->n_directio_opens == 0) { 560 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 561 if (error) { 562 if (NFS_ISV4(vp)) 563 (void) nfsrpc_close(vp, 0, ap->a_td); 564 return (error); 565 } 566 mtx_lock(&np->n_mtx); 567 np->n_flag |= NNONCACHE; 568 } else { 569 mtx_lock(&np->n_mtx); 570 } 571 np->n_directio_opens++; 572 mtx_unlock(&np->n_mtx); 573 } 574 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 575 return (0); 576} 577 578/* 579 * nfs close vnode op 580 * What an NFS client should do upon close after writing is a debatable issue. 581 * Most NFS clients push delayed writes to the server upon close, basically for 582 * two reasons: 583 * 1 - So that any write errors may be reported back to the client process 584 * doing the close system call. By far the two most likely errors are 585 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 586 * 2 - To put a worst case upper bound on cache inconsistency between 587 * multiple clients for the file. 588 * There is also a consistency problem for Version 2 of the protocol w.r.t. 589 * not being able to tell if other clients are writing a file concurrently, 590 * since there is no way of knowing if the changed modify time in the reply 591 * is only due to the write for this client. 592 * (NFS Version 3 provides weak cache consistency data in the reply that 593 * should be sufficient to detect and handle this case.) 594 * 595 * The current code does the following: 596 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 597 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 598 * or commit them (this satisfies 1 and 2 except for the 599 * case where the server crashes after this close but 600 * before the commit RPC, which is felt to be "good 601 * enough". Changing the last argument to ncl_flush() to 602 * a 1 would force a commit operation, if it is felt a 603 * commit is necessary now. 604 * for NFS Version 4 - flush the dirty buffers and commit them, if 605 * nfscl_mustflush() says this is necessary. 606 * It is necessary if there is no write delegation held, 607 * in order to satisfy open/close coherency. 608 * If the file isn't cached on local stable storage, 609 * it may be necessary in order to detect "out of space" 610 * errors from the server, if the write delegation 611 * issued by the server doesn't allow the file to grow. 612 */ 613/* ARGSUSED */ 614static int 615nfs_close(struct vop_close_args *ap) 616{ 617 struct vnode *vp = ap->a_vp; 618 struct nfsnode *np = VTONFS(vp); 619 struct nfsvattr nfsva; 620 struct ucred *cred; 621 int error = 0, ret, localcred = 0; 622 int fmode = ap->a_fflag; 623 624 if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)) 625 return (0); 626 /* 627 * During shutdown, a_cred isn't valid, so just use root. 628 */ 629 if (ap->a_cred == NOCRED) { 630 cred = newnfs_getcred(); 631 localcred = 1; 632 } else { 633 cred = ap->a_cred; 634 } 635 if (vp->v_type == VREG) { 636 /* 637 * Examine and clean dirty pages, regardless of NMODIFIED. 638 * This closes a major hole in close-to-open consistency. 639 * We want to push out all dirty pages (and buffers) on 640 * close, regardless of whether they were dirtied by 641 * mmap'ed writes or via write(). 642 */ 643 if (nfs_clean_pages_on_close && vp->v_object) { 644 VM_OBJECT_LOCK(vp->v_object); 645 vm_object_page_clean(vp->v_object, 0, 0, 0); 646 VM_OBJECT_UNLOCK(vp->v_object); 647 } 648 mtx_lock(&np->n_mtx); 649 if (np->n_flag & NMODIFIED) { 650 mtx_unlock(&np->n_mtx); 651 if (NFS_ISV3(vp)) { 652 /* 653 * Under NFSv3 we have dirty buffers to dispose of. We 654 * must flush them to the NFS server. We have the option 655 * of waiting all the way through the commit rpc or just 656 * waiting for the initial write. The default is to only 657 * wait through the initial write so the data is in the 658 * server's cache, which is roughly similar to the state 659 * a standard disk subsystem leaves the file in on close(). 660 * 661 * We cannot clear the NMODIFIED bit in np->n_flag due to 662 * potential races with other processes, and certainly 663 * cannot clear it if we don't commit. 664 * These races occur when there is no longer the old 665 * traditional vnode locking implemented for Vnode Ops. 666 */ 667 int cm = newnfs_commit_on_close ? 1 : 0; 668 error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0); 669 /* np->n_flag &= ~NMODIFIED; */ 670 } else if (NFS_ISV4(vp)) { 671 if (nfscl_mustflush(vp) != 0) { 672 int cm = newnfs_commit_on_close ? 1 : 0; 673 error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, 674 cm, 0); 675 /* 676 * as above w.r.t races when clearing 677 * NMODIFIED. 678 * np->n_flag &= ~NMODIFIED; 679 */ 680 } 681 } else 682 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 683 mtx_lock(&np->n_mtx); 684 } 685 /* 686 * Invalidate the attribute cache in all cases. 687 * An open is going to fetch fresh attrs any way, other procs 688 * on this node that have file open will be forced to do an 689 * otw attr fetch, but this is safe. 690 * --> A user found that their RPC count dropped by 20% when 691 * this was commented out and I can't see any requirement 692 * for it, so I've disabled it when negative lookups are 693 * enabled. (What does this have to do with negative lookup 694 * caching? Well nothing, except it was reported by the 695 * same user that needed negative lookup caching and I wanted 696 * there to be a way to disable it to see if it 697 * is the cause of some caching/coherency issue that might 698 * crop up.) 699 */ 700 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) 701 np->n_attrstamp = 0; 702 if (np->n_flag & NWRITEERR) { 703 np->n_flag &= ~NWRITEERR; 704 error = np->n_error; 705 } 706 mtx_unlock(&np->n_mtx); 707 } 708 709 if (NFS_ISV4(vp)) { 710 /* 711 * Get attributes so "change" is up to date. 712 */ 713 if (error == 0 && nfscl_mustflush(vp) != 0) { 714 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, 715 NULL); 716 if (!ret) { 717 np->n_change = nfsva.na_filerev; 718 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 719 NULL, 0, 0); 720 } 721 } 722 723 /* 724 * and do the close. 725 */ 726 ret = nfsrpc_close(vp, 0, ap->a_td); 727 if (!error && ret) 728 error = ret; 729 if (error) 730 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 731 (gid_t)0); 732 } 733 if (newnfs_directio_enable) 734 KASSERT((np->n_directio_asyncwr == 0), 735 ("nfs_close: dirty unflushed (%d) directio buffers\n", 736 np->n_directio_asyncwr)); 737 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 738 mtx_lock(&np->n_mtx); 739 KASSERT((np->n_directio_opens > 0), 740 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 741 np->n_directio_opens--; 742 if (np->n_directio_opens == 0) 743 np->n_flag &= ~NNONCACHE; 744 mtx_unlock(&np->n_mtx); 745 } 746 if (localcred) 747 NFSFREECRED(cred); 748 return (error); 749} 750 751/* 752 * nfs getattr call from vfs. 753 */ 754static int 755nfs_getattr(struct vop_getattr_args *ap) 756{ 757 struct vnode *vp = ap->a_vp; 758 struct thread *td = curthread; /* XXX */ 759 struct nfsnode *np = VTONFS(vp); 760 int error = 0; 761 struct nfsvattr nfsva; 762 struct vattr *vap = ap->a_vap; 763 struct vattr vattr; 764 765 /* 766 * Update local times for special files. 767 */ 768 mtx_lock(&np->n_mtx); 769 if (np->n_flag & (NACC | NUPD)) 770 np->n_flag |= NCHG; 771 mtx_unlock(&np->n_mtx); 772 /* 773 * First look in the cache. 774 */ 775 if (ncl_getattrcache(vp, &vattr) == 0) { 776 vap->va_type = vattr.va_type; 777 vap->va_mode = vattr.va_mode; 778 vap->va_nlink = vattr.va_nlink; 779 vap->va_uid = vattr.va_uid; 780 vap->va_gid = vattr.va_gid; 781 vap->va_fsid = vattr.va_fsid; 782 vap->va_fileid = vattr.va_fileid; 783 vap->va_size = vattr.va_size; 784 vap->va_blocksize = vattr.va_blocksize; 785 vap->va_atime = vattr.va_atime; 786 vap->va_mtime = vattr.va_mtime; 787 vap->va_ctime = vattr.va_ctime; 788 vap->va_gen = vattr.va_gen; 789 vap->va_flags = vattr.va_flags; 790 vap->va_rdev = vattr.va_rdev; 791 vap->va_bytes = vattr.va_bytes; 792 vap->va_filerev = vattr.va_filerev; 793 /* 794 * Get the local modify time for the case of a write 795 * delegation. 796 */ 797 nfscl_deleggetmodtime(vp, &vap->va_mtime); 798 return (0); 799 } 800 801 if (NFS_ISV34(vp) && nfs_prime_access_cache && 802 nfsaccess_cache_timeout > 0) { 803 NFSINCRGLOBAL(newnfsstats.accesscache_misses); 804 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 805 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 806 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 807 return (0); 808 } 809 } 810 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); 811 if (!error) 812 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); 813 if (!error) { 814 /* 815 * Get the local modify time for the case of a write 816 * delegation. 817 */ 818 nfscl_deleggetmodtime(vp, &vap->va_mtime); 819 } else if (NFS_ISV4(vp)) { 820 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 821 } 822 return (error); 823} 824 825/* 826 * nfs setattr call. 827 */ 828static int 829nfs_setattr(struct vop_setattr_args *ap) 830{ 831 struct vnode *vp = ap->a_vp; 832 struct nfsnode *np = VTONFS(vp); 833 struct thread *td = curthread; /* XXX */ 834 struct vattr *vap = ap->a_vap; 835 int error = 0; 836 u_quad_t tsize; 837 838#ifndef nolint 839 tsize = (u_quad_t)0; 840#endif 841 842 /* 843 * Setting of flags and marking of atimes are not supported. 844 */ 845 if (vap->va_flags != VNOVAL) 846 return (EOPNOTSUPP); 847 848 /* 849 * Disallow write attempts if the filesystem is mounted read-only. 850 */ 851 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 852 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 853 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 854 (vp->v_mount->mnt_flag & MNT_RDONLY)) 855 return (EROFS); 856 if (vap->va_size != VNOVAL) { 857 switch (vp->v_type) { 858 case VDIR: 859 return (EISDIR); 860 case VCHR: 861 case VBLK: 862 case VSOCK: 863 case VFIFO: 864 if (vap->va_mtime.tv_sec == VNOVAL && 865 vap->va_atime.tv_sec == VNOVAL && 866 vap->va_mode == (mode_t)VNOVAL && 867 vap->va_uid == (uid_t)VNOVAL && 868 vap->va_gid == (gid_t)VNOVAL) 869 return (0); 870 vap->va_size = VNOVAL; 871 break; 872 default: 873 /* 874 * Disallow write attempts if the filesystem is 875 * mounted read-only. 876 */ 877 if (vp->v_mount->mnt_flag & MNT_RDONLY) 878 return (EROFS); 879 /* 880 * We run vnode_pager_setsize() early (why?), 881 * we must set np->n_size now to avoid vinvalbuf 882 * V_SAVE races that might setsize a lower 883 * value. 884 */ 885 mtx_lock(&np->n_mtx); 886 tsize = np->n_size; 887 mtx_unlock(&np->n_mtx); 888 error = ncl_meta_setsize(vp, ap->a_cred, td, 889 vap->va_size); 890 mtx_lock(&np->n_mtx); 891 if (np->n_flag & NMODIFIED) { 892 tsize = np->n_size; 893 mtx_unlock(&np->n_mtx); 894 if (vap->va_size == 0) 895 error = ncl_vinvalbuf(vp, 0, td, 1); 896 else 897 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 898 if (error) { 899 vnode_pager_setsize(vp, tsize); 900 return (error); 901 } 902 /* 903 * Call nfscl_delegmodtime() to set the modify time 904 * locally, as required. 905 */ 906 nfscl_delegmodtime(vp); 907 } else 908 mtx_unlock(&np->n_mtx); 909 /* 910 * np->n_size has already been set to vap->va_size 911 * in ncl_meta_setsize(). We must set it again since 912 * nfs_loadattrcache() could be called through 913 * ncl_meta_setsize() and could modify np->n_size. 914 */ 915 mtx_lock(&np->n_mtx); 916 np->n_vattr.na_size = np->n_size = vap->va_size; 917 mtx_unlock(&np->n_mtx); 918 }; 919 } else { 920 mtx_lock(&np->n_mtx); 921 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 922 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 923 mtx_unlock(&np->n_mtx); 924 if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && 925 (error == EINTR || error == EIO)) 926 return (error); 927 } else 928 mtx_unlock(&np->n_mtx); 929 } 930 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 931 if (error && vap->va_size != VNOVAL) { 932 mtx_lock(&np->n_mtx); 933 np->n_size = np->n_vattr.na_size = tsize; 934 vnode_pager_setsize(vp, tsize); 935 mtx_unlock(&np->n_mtx); 936 } 937 return (error); 938} 939 940/* 941 * Do an nfs setattr rpc. 942 */ 943static int 944nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 945 struct thread *td) 946{ 947 struct nfsnode *np = VTONFS(vp); 948 int error, ret, attrflag, i; 949 struct nfsvattr nfsva; 950 951 if (NFS_ISV34(vp)) { 952 mtx_lock(&np->n_mtx); 953 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 954 np->n_accesscache[i].stamp = 0; 955 np->n_flag |= NDELEGMOD; 956 mtx_unlock(&np->n_mtx); 957 } 958 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, 959 NULL); 960 if (attrflag) { 961 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 962 if (ret && !error) 963 error = ret; 964 } 965 if (error && NFS_ISV4(vp)) 966 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 967 return (error); 968} 969 970/* 971 * nfs lookup call, one step at a time... 972 * First look in cache 973 * If not found, unlock the directory nfsnode and do the rpc 974 */ 975static int 976nfs_lookup(struct vop_lookup_args *ap) 977{ 978 struct componentname *cnp = ap->a_cnp; 979 struct vnode *dvp = ap->a_dvp; 980 struct vnode **vpp = ap->a_vpp; 981 struct mount *mp = dvp->v_mount; 982 int flags = cnp->cn_flags; 983 struct vnode *newvp; 984 struct nfsmount *nmp; 985 struct nfsnode *np, *newnp; 986 int error = 0, attrflag, dattrflag, ltype; 987 struct thread *td = cnp->cn_thread; 988 struct nfsfh *nfhp; 989 struct nfsvattr dnfsva, nfsva; 990 struct vattr vattr; 991 time_t dmtime; 992 993 *vpp = NULLVP; 994 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 995 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 996 return (EROFS); 997 if (dvp->v_type != VDIR) 998 return (ENOTDIR); 999 nmp = VFSTONFS(mp); 1000 np = VTONFS(dvp); 1001 1002 /* For NFSv4, wait until any remove is done. */ 1003 mtx_lock(&np->n_mtx); 1004 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1005 np->n_flag |= NREMOVEWANT; 1006 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1007 } 1008 mtx_unlock(&np->n_mtx); 1009 1010 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) 1011 return (error); 1012 error = cache_lookup(dvp, vpp, cnp); 1013 if (error > 0 && error != ENOENT) 1014 return (error); 1015 if (error == -1) { 1016 /* 1017 * We only accept a positive hit in the cache if the 1018 * change time of the file matches our cached copy. 1019 * Otherwise, we discard the cache entry and fallback 1020 * to doing a lookup RPC. 1021 * 1022 * To better handle stale file handles and attributes, 1023 * clear the attribute cache of this node if it is a 1024 * leaf component, part of an open() call, and not 1025 * locally modified before fetching the attributes. 1026 * This should allow stale file handles to be detected 1027 * here where we can fall back to a LOOKUP RPC to 1028 * recover rather than having nfs_open() detect the 1029 * stale file handle and failing open(2) with ESTALE. 1030 */ 1031 newvp = *vpp; 1032 newnp = VTONFS(newvp); 1033 if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1034 !(newnp->n_flag & NMODIFIED)) { 1035 mtx_lock(&newnp->n_mtx); 1036 newnp->n_attrstamp = 0; 1037 mtx_unlock(&newnp->n_mtx); 1038 } 1039 if (nfscl_nodeleg(newvp, 0) == 0 || 1040 (VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1041 vattr.va_ctime.tv_sec == newnp->n_ctime)) { 1042 NFSINCRGLOBAL(newnfsstats.lookupcache_hits); 1043 if (cnp->cn_nameiop != LOOKUP && 1044 (flags & ISLASTCN)) 1045 cnp->cn_flags |= SAVENAME; 1046 return (0); 1047 } 1048 cache_purge(newvp); 1049 if (dvp != newvp) 1050 vput(newvp); 1051 else 1052 vrele(newvp); 1053 *vpp = NULLVP; 1054 } else if (error == ENOENT) { 1055 if (dvp->v_iflag & VI_DOOMED) 1056 return (ENOENT); 1057 /* 1058 * We only accept a negative hit in the cache if the 1059 * modification time of the parent directory matches 1060 * our cached copy. Otherwise, we discard all of the 1061 * negative cache entries for this directory. We also 1062 * only trust -ve cache entries for less than 1063 * nm_negative_namecache_timeout seconds. 1064 */ 1065 if ((u_int)(ticks - np->n_dmtime_ticks) < 1066 (nmp->nm_negnametimeo * hz) && 1067 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1068 vattr.va_mtime.tv_sec == np->n_dmtime) { 1069 NFSINCRGLOBAL(newnfsstats.lookupcache_hits); 1070 return (ENOENT); 1071 } 1072 cache_purge_negative(dvp); 1073 mtx_lock(&np->n_mtx); 1074 np->n_dmtime = 0; 1075 mtx_unlock(&np->n_mtx); 1076 } 1077 1078 /* 1079 * Cache the modification time of the parent directory in case 1080 * the lookup fails and results in adding the first negative 1081 * name cache entry for the directory. Since this is reading 1082 * a single time_t, don't bother with locking. The 1083 * modification time may be a bit stale, but it must be read 1084 * before performing the lookup RPC to prevent a race where 1085 * another lookup updates the timestamp on the directory after 1086 * the lookup RPC has been performed on the server but before 1087 * n_dmtime is set at the end of this function. 1088 */ 1089 dmtime = np->n_vattr.na_mtime.tv_sec; 1090 error = 0; 1091 newvp = NULLVP; 1092 NFSINCRGLOBAL(newnfsstats.lookupcache_misses); 1093 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1094 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1095 NULL); 1096 if (dattrflag) 1097 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1098 if (error) { 1099 if (newvp != NULLVP) { 1100 vput(newvp); 1101 *vpp = NULLVP; 1102 } 1103 1104 if (error != ENOENT) { 1105 if (NFS_ISV4(dvp)) 1106 error = nfscl_maperr(td, error, (uid_t)0, 1107 (gid_t)0); 1108 return (error); 1109 } 1110 1111 /* The requested file was not found. */ 1112 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1113 (flags & ISLASTCN)) { 1114 /* 1115 * XXX: UFS does a full VOP_ACCESS(dvp, 1116 * VWRITE) here instead of just checking 1117 * MNT_RDONLY. 1118 */ 1119 if (mp->mnt_flag & MNT_RDONLY) 1120 return (EROFS); 1121 cnp->cn_flags |= SAVENAME; 1122 return (EJUSTRETURN); 1123 } 1124 1125 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { 1126 /* 1127 * Maintain n_dmtime as the modification time 1128 * of the parent directory when the oldest -ve 1129 * name cache entry for this directory was 1130 * added. If a -ve cache entry has already 1131 * been added with a newer modification time 1132 * by a concurrent lookup, then don't bother 1133 * adding a cache entry. The modification 1134 * time of the directory might have changed 1135 * due to the file this lookup failed to find 1136 * being created. In that case a subsequent 1137 * lookup would incorrectly use the entry 1138 * added here instead of doing an extra 1139 * lookup. 1140 */ 1141 mtx_lock(&np->n_mtx); 1142 if (np->n_dmtime <= dmtime) { 1143 if (np->n_dmtime == 0) { 1144 np->n_dmtime = dmtime; 1145 np->n_dmtime_ticks = ticks; 1146 } 1147 mtx_unlock(&np->n_mtx); 1148 cache_enter(dvp, NULL, cnp); 1149 } else 1150 mtx_unlock(&np->n_mtx); 1151 } 1152 return (ENOENT); 1153 } 1154 1155 /* 1156 * Handle RENAME case... 1157 */ 1158 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1159 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1160 FREE((caddr_t)nfhp, M_NFSFH); 1161 return (EISDIR); 1162 } 1163 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL); 1164 if (error) 1165 return (error); 1166 newvp = NFSTOV(np); 1167 if (attrflag) 1168 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1169 0, 1); 1170 *vpp = newvp; 1171 cnp->cn_flags |= SAVENAME; 1172 return (0); 1173 } 1174 1175 if (flags & ISDOTDOT) { 1176 ltype = VOP_ISLOCKED(dvp); 1177 error = vfs_busy(mp, MBF_NOWAIT); 1178 if (error != 0) { 1179 vfs_ref(mp); 1180 VOP_UNLOCK(dvp, 0); 1181 error = vfs_busy(mp, 0); 1182 vn_lock(dvp, ltype | LK_RETRY); 1183 vfs_rel(mp); 1184 if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { 1185 vfs_unbusy(mp); 1186 error = ENOENT; 1187 } 1188 if (error != 0) 1189 return (error); 1190 } 1191 VOP_UNLOCK(dvp, 0); 1192 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL); 1193 if (error == 0) 1194 newvp = NFSTOV(np); 1195 vfs_unbusy(mp); 1196 if (newvp != dvp) 1197 vn_lock(dvp, ltype | LK_RETRY); 1198 if (dvp->v_iflag & VI_DOOMED) { 1199 if (error == 0) { 1200 if (newvp == dvp) 1201 vrele(newvp); 1202 else 1203 vput(newvp); 1204 } 1205 error = ENOENT; 1206 } 1207 if (error != 0) 1208 return (error); 1209 if (attrflag) 1210 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1211 0, 1); 1212 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1213 FREE((caddr_t)nfhp, M_NFSFH); 1214 VREF(dvp); 1215 newvp = dvp; 1216 if (attrflag) 1217 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1218 0, 1); 1219 } else { 1220 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL); 1221 if (error) 1222 return (error); 1223 newvp = NFSTOV(np); 1224 if (attrflag) 1225 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1226 0, 1); 1227 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1228 !(np->n_flag & NMODIFIED)) { 1229 /* 1230 * Flush the attribute cache when opening a 1231 * leaf node to ensure that fresh attributes 1232 * are fetched in nfs_open() since we did not 1233 * fetch attributes from the LOOKUP reply. 1234 */ 1235 mtx_lock(&np->n_mtx); 1236 np->n_attrstamp = 0; 1237 mtx_unlock(&np->n_mtx); 1238 } 1239 } 1240 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1241 cnp->cn_flags |= SAVENAME; 1242 if ((cnp->cn_flags & MAKEENTRY) && 1243 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { 1244 np->n_ctime = np->n_vattr.na_vattr.va_ctime.tv_sec; 1245 cache_enter(dvp, newvp, cnp); 1246 } 1247 *vpp = newvp; 1248 return (0); 1249} 1250 1251/* 1252 * nfs read call. 1253 * Just call ncl_bioread() to do the work. 1254 */ 1255static int 1256nfs_read(struct vop_read_args *ap) 1257{ 1258 struct vnode *vp = ap->a_vp; 1259 1260 switch (vp->v_type) { 1261 case VREG: 1262 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1263 case VDIR: 1264 return (EISDIR); 1265 default: 1266 return (EOPNOTSUPP); 1267 } 1268} 1269 1270/* 1271 * nfs readlink call 1272 */ 1273static int 1274nfs_readlink(struct vop_readlink_args *ap) 1275{ 1276 struct vnode *vp = ap->a_vp; 1277 1278 if (vp->v_type != VLNK) 1279 return (EINVAL); 1280 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1281} 1282 1283/* 1284 * Do a readlink rpc. 1285 * Called by ncl_doio() from below the buffer cache. 1286 */ 1287int 1288ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1289{ 1290 int error, ret, attrflag; 1291 struct nfsvattr nfsva; 1292 1293 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1294 &attrflag, NULL); 1295 if (attrflag) { 1296 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1297 if (ret && !error) 1298 error = ret; 1299 } 1300 if (error && NFS_ISV4(vp)) 1301 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1302 return (error); 1303} 1304 1305/* 1306 * nfs read rpc call 1307 * Ditto above 1308 */ 1309int 1310ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1311{ 1312 int error, ret, attrflag; 1313 struct nfsvattr nfsva; 1314 1315 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, 1316 NULL); 1317 if (attrflag) { 1318 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1319 if (ret && !error) 1320 error = ret; 1321 } 1322 if (error && NFS_ISV4(vp)) 1323 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1324 return (error); 1325} 1326 1327/* 1328 * nfs write call 1329 */ 1330int 1331ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1332 int *iomode, int *must_commit, int called_from_strategy) 1333{ 1334 struct nfsvattr nfsva; 1335 int error = 0, attrflag, ret; 1336 u_char verf[NFSX_VERF]; 1337 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1338 1339 *must_commit = 0; 1340 error = nfsrpc_write(vp, uiop, iomode, verf, cred, 1341 uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); 1342 NFSLOCKMNT(nmp); 1343 if (!error && NFSHASWRITEVERF(nmp) && 1344 NFSBCMP(verf, nmp->nm_verf, NFSX_VERF)) { 1345 *must_commit = 1; 1346 NFSBCOPY(verf, nmp->nm_verf, NFSX_VERF); 1347 } 1348 NFSUNLOCKMNT(nmp); 1349 if (attrflag) { 1350 if (VTONFS(vp)->n_flag & ND_NFSV4) 1351 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1352 1); 1353 else 1354 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1355 1); 1356 if (ret && !error) 1357 error = ret; 1358 } 1359 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) 1360 *iomode = NFSWRITE_FILESYNC; 1361 if (error && NFS_ISV4(vp)) 1362 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1363 return (error); 1364} 1365 1366/* 1367 * nfs mknod rpc 1368 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1369 * mode set to specify the file type and the size field for rdev. 1370 */ 1371static int 1372nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1373 struct vattr *vap) 1374{ 1375 struct nfsvattr nfsva, dnfsva; 1376 struct vnode *newvp = NULL; 1377 struct nfsnode *np = NULL, *dnp; 1378 struct nfsfh *nfhp; 1379 struct vattr vattr; 1380 int error = 0, attrflag, dattrflag; 1381 u_int32_t rdev; 1382 1383 if (vap->va_type == VCHR || vap->va_type == VBLK) 1384 rdev = vap->va_rdev; 1385 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1386 rdev = 0xffffffff; 1387 else 1388 return (EOPNOTSUPP); 1389 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1390 return (error); 1391 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1392 rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, 1393 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1394 if (!error) { 1395 if (!nfhp) 1396 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1397 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1398 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1399 NULL); 1400 if (nfhp) 1401 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1402 cnp->cn_thread, &np, NULL); 1403 } 1404 if (dattrflag) 1405 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1406 if (!error) { 1407 newvp = NFSTOV(np); 1408 if (attrflag) 1409 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1410 0, 1); 1411 } 1412 if (!error) { 1413 if ((cnp->cn_flags & MAKEENTRY)) 1414 cache_enter(dvp, newvp, cnp); 1415 *vpp = newvp; 1416 } else if (NFS_ISV4(dvp)) { 1417 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1418 vap->va_gid); 1419 } 1420 dnp = VTONFS(dvp); 1421 mtx_lock(&dnp->n_mtx); 1422 dnp->n_flag |= NMODIFIED; 1423 if (!dattrflag) 1424 dnp->n_attrstamp = 0; 1425 mtx_unlock(&dnp->n_mtx); 1426 return (error); 1427} 1428 1429/* 1430 * nfs mknod vop 1431 * just call nfs_mknodrpc() to do the work. 1432 */ 1433/* ARGSUSED */ 1434static int 1435nfs_mknod(struct vop_mknod_args *ap) 1436{ 1437 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1438} 1439 1440static struct mtx nfs_cverf_mtx; 1441MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1442 MTX_DEF); 1443 1444static nfsquad_t 1445nfs_get_cverf(void) 1446{ 1447 static nfsquad_t cverf; 1448 nfsquad_t ret; 1449 static int cverf_initialized = 0; 1450 1451 mtx_lock(&nfs_cverf_mtx); 1452 if (cverf_initialized == 0) { 1453 cverf.lval[0] = arc4random(); 1454 cverf.lval[1] = arc4random(); 1455 cverf_initialized = 1; 1456 } else 1457 cverf.qval++; 1458 ret = cverf; 1459 mtx_unlock(&nfs_cverf_mtx); 1460 1461 return (ret); 1462} 1463 1464/* 1465 * nfs file create call 1466 */ 1467static int 1468nfs_create(struct vop_create_args *ap) 1469{ 1470 struct vnode *dvp = ap->a_dvp; 1471 struct vattr *vap = ap->a_vap; 1472 struct componentname *cnp = ap->a_cnp; 1473 struct nfsnode *np = NULL, *dnp; 1474 struct vnode *newvp = NULL; 1475 struct nfsmount *nmp; 1476 struct nfsvattr dnfsva, nfsva; 1477 struct nfsfh *nfhp; 1478 nfsquad_t cverf; 1479 int error = 0, attrflag, dattrflag, fmode = 0; 1480 struct vattr vattr; 1481 1482 /* 1483 * Oops, not for me.. 1484 */ 1485 if (vap->va_type == VSOCK) 1486 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1487 1488 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1489 return (error); 1490 if (vap->va_vaflags & VA_EXCLUSIVE) 1491 fmode |= O_EXCL; 1492 dnp = VTONFS(dvp); 1493 nmp = VFSTONFS(vnode_mount(dvp)); 1494again: 1495 /* For NFSv4, wait until any remove is done. */ 1496 mtx_lock(&dnp->n_mtx); 1497 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1498 dnp->n_flag |= NREMOVEWANT; 1499 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1500 } 1501 mtx_unlock(&dnp->n_mtx); 1502 1503 cverf = nfs_get_cverf(); 1504 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1505 vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, 1506 &nfhp, &attrflag, &dattrflag, NULL); 1507 if (!error) { 1508 if (nfhp == NULL) 1509 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1510 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1511 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1512 NULL); 1513 if (nfhp != NULL) 1514 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1515 cnp->cn_thread, &np, NULL); 1516 } 1517 if (dattrflag) 1518 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1519 if (!error) { 1520 newvp = NFSTOV(np); 1521 if (attrflag) 1522 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1523 0, 1); 1524 } 1525 if (error) { 1526 if (newvp != NULL) { 1527 vrele(newvp); 1528 newvp = NULL; 1529 } 1530 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1531 error == NFSERR_NOTSUPP) { 1532 fmode &= ~O_EXCL; 1533 goto again; 1534 } 1535 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1536 if (nfscl_checksattr(vap, &nfsva)) { 1537 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1538 cnp->cn_thread, &nfsva, &attrflag, NULL); 1539 if (error && (vap->va_uid != (uid_t)VNOVAL || 1540 vap->va_gid != (gid_t)VNOVAL)) { 1541 /* try again without setting uid/gid */ 1542 vap->va_uid = (uid_t)VNOVAL; 1543 vap->va_gid = (uid_t)VNOVAL; 1544 error = nfsrpc_setattr(newvp, vap, NULL, 1545 cnp->cn_cred, cnp->cn_thread, &nfsva, 1546 &attrflag, NULL); 1547 } 1548 if (attrflag) 1549 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1550 NULL, 0, 1); 1551 } 1552 } 1553 if (!error) { 1554 if (cnp->cn_flags & MAKEENTRY) 1555 cache_enter(dvp, newvp, cnp); 1556 *ap->a_vpp = newvp; 1557 } else if (NFS_ISV4(dvp)) { 1558 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1559 vap->va_gid); 1560 } 1561 mtx_lock(&dnp->n_mtx); 1562 dnp->n_flag |= NMODIFIED; 1563 if (!dattrflag) 1564 dnp->n_attrstamp = 0; 1565 mtx_unlock(&dnp->n_mtx); 1566 return (error); 1567} 1568 1569/* 1570 * nfs file remove call 1571 * To try and make nfs semantics closer to ufs semantics, a file that has 1572 * other processes using the vnode is renamed instead of removed and then 1573 * removed later on the last close. 1574 * - If v_usecount > 1 1575 * If a rename is not already in the works 1576 * call nfs_sillyrename() to set it up 1577 * else 1578 * do the remove rpc 1579 */ 1580static int 1581nfs_remove(struct vop_remove_args *ap) 1582{ 1583 struct vnode *vp = ap->a_vp; 1584 struct vnode *dvp = ap->a_dvp; 1585 struct componentname *cnp = ap->a_cnp; 1586 struct nfsnode *np = VTONFS(vp); 1587 int error = 0; 1588 struct vattr vattr; 1589 1590 KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); 1591 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1592 if (vp->v_type == VDIR) 1593 error = EPERM; 1594 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1595 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1596 vattr.va_nlink > 1)) { 1597 /* 1598 * Purge the name cache so that the chance of a lookup for 1599 * the name succeeding while the remove is in progress is 1600 * minimized. Without node locking it can still happen, such 1601 * that an I/O op returns ESTALE, but since you get this if 1602 * another host removes the file.. 1603 */ 1604 cache_purge(vp); 1605 /* 1606 * throw away biocache buffers, mainly to avoid 1607 * unnecessary delayed writes later. 1608 */ 1609 error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1610 /* Do the rpc */ 1611 if (error != EINTR && error != EIO) 1612 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1613 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1614 /* 1615 * Kludge City: If the first reply to the remove rpc is lost.. 1616 * the reply to the retransmitted request will be ENOENT 1617 * since the file was in fact removed 1618 * Therefore, we cheat and return success. 1619 */ 1620 if (error == ENOENT) 1621 error = 0; 1622 } else if (!np->n_sillyrename) 1623 error = nfs_sillyrename(dvp, vp, cnp); 1624 np->n_attrstamp = 0; 1625 return (error); 1626} 1627 1628/* 1629 * nfs file remove rpc called from nfs_inactive 1630 */ 1631int 1632ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1633{ 1634 /* 1635 * Make sure that the directory vnode is still valid. 1636 * XXX we should lock sp->s_dvp here. 1637 */ 1638 if (sp->s_dvp->v_type == VBAD) 1639 return (0); 1640 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1641 sp->s_cred, NULL)); 1642} 1643 1644/* 1645 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1646 */ 1647static int 1648nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1649 int namelen, struct ucred *cred, struct thread *td) 1650{ 1651 struct nfsvattr dnfsva; 1652 struct nfsnode *dnp = VTONFS(dvp); 1653 int error = 0, dattrflag; 1654 1655 mtx_lock(&dnp->n_mtx); 1656 dnp->n_flag |= NREMOVEINPROG; 1657 mtx_unlock(&dnp->n_mtx); 1658 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1659 &dattrflag, NULL); 1660 mtx_lock(&dnp->n_mtx); 1661 if ((dnp->n_flag & NREMOVEWANT)) { 1662 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1663 mtx_unlock(&dnp->n_mtx); 1664 wakeup((caddr_t)dnp); 1665 } else { 1666 dnp->n_flag &= ~NREMOVEINPROG; 1667 mtx_unlock(&dnp->n_mtx); 1668 } 1669 if (dattrflag) 1670 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1671 mtx_lock(&dnp->n_mtx); 1672 dnp->n_flag |= NMODIFIED; 1673 if (!dattrflag) 1674 dnp->n_attrstamp = 0; 1675 mtx_unlock(&dnp->n_mtx); 1676 if (error && NFS_ISV4(dvp)) 1677 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1678 return (error); 1679} 1680 1681/* 1682 * nfs file rename call 1683 */ 1684static int 1685nfs_rename(struct vop_rename_args *ap) 1686{ 1687 struct vnode *fvp = ap->a_fvp; 1688 struct vnode *tvp = ap->a_tvp; 1689 struct vnode *fdvp = ap->a_fdvp; 1690 struct vnode *tdvp = ap->a_tdvp; 1691 struct componentname *tcnp = ap->a_tcnp; 1692 struct componentname *fcnp = ap->a_fcnp; 1693 struct nfsnode *fnp = VTONFS(ap->a_fvp); 1694 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 1695 struct nfsv4node *newv4 = NULL; 1696 int error; 1697 1698 KASSERT((tcnp->cn_flags & HASBUF) != 0 && 1699 (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); 1700 /* Check for cross-device rename */ 1701 if ((fvp->v_mount != tdvp->v_mount) || 1702 (tvp && (fvp->v_mount != tvp->v_mount))) { 1703 error = EXDEV; 1704 goto out; 1705 } 1706 1707 if (fvp == tvp) { 1708 ncl_printf("nfs_rename: fvp == tvp (can't happen)\n"); 1709 error = 0; 1710 goto out; 1711 } 1712 if ((error = vn_lock(fvp, LK_EXCLUSIVE))) 1713 goto out; 1714 1715 /* 1716 * We have to flush B_DELWRI data prior to renaming 1717 * the file. If we don't, the delayed-write buffers 1718 * can be flushed out later after the file has gone stale 1719 * under NFSV3. NFSV2 does not have this problem because 1720 * ( as far as I can tell ) it flushes dirty buffers more 1721 * often. 1722 * 1723 * Skip the rename operation if the fsync fails, this can happen 1724 * due to the server's volume being full, when we pushed out data 1725 * that was written back to our cache earlier. Not checking for 1726 * this condition can result in potential (silent) data loss. 1727 */ 1728 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1729 VOP_UNLOCK(fvp, 0); 1730 if (!error && tvp) 1731 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1732 if (error) 1733 goto out; 1734 1735 /* 1736 * If the tvp exists and is in use, sillyrename it before doing the 1737 * rename of the new file over it. 1738 * XXX Can't sillyrename a directory. 1739 */ 1740 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1741 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1742 vput(tvp); 1743 tvp = NULL; 1744 } 1745 1746 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1747 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1748 tcnp->cn_thread); 1749 1750 if (!error) { 1751 /* 1752 * For NFSv4, check to see if it is the same name and 1753 * replace the name, if it is different. 1754 */ 1755 MALLOC(newv4, struct nfsv4node *, 1756 sizeof (struct nfsv4node) + 1757 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 1758 M_NFSV4NODE, M_WAITOK); 1759 mtx_lock(&tdnp->n_mtx); 1760 mtx_lock(&fnp->n_mtx); 1761 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 1762 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 1763 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 1764 tcnp->cn_namelen) || 1765 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 1766 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1767 tdnp->n_fhp->nfh_len))) { 1768#ifdef notdef 1769{ char nnn[100]; int nnnl; 1770nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; 1771bcopy(tcnp->cn_nameptr, nnn, nnnl); 1772nnn[nnnl] = '\0'; 1773printf("ren replace=%s\n",nnn); 1774} 1775#endif 1776 FREE((caddr_t)fnp->n_v4, M_NFSV4NODE); 1777 fnp->n_v4 = newv4; 1778 newv4 = NULL; 1779 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 1780 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 1781 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1782 tdnp->n_fhp->nfh_len); 1783 NFSBCOPY(tcnp->cn_nameptr, 1784 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 1785 } 1786 mtx_unlock(&tdnp->n_mtx); 1787 mtx_unlock(&fnp->n_mtx); 1788 if (newv4 != NULL) 1789 FREE((caddr_t)newv4, M_NFSV4NODE); 1790 } 1791 1792 if (fvp->v_type == VDIR) { 1793 if (tvp != NULL && tvp->v_type == VDIR) 1794 cache_purge(tdvp); 1795 cache_purge(fdvp); 1796 } 1797 1798out: 1799 if (tdvp == tvp) 1800 vrele(tdvp); 1801 else 1802 vput(tdvp); 1803 if (tvp) 1804 vput(tvp); 1805 vrele(fdvp); 1806 vrele(fvp); 1807 /* 1808 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1809 */ 1810 if (error == ENOENT) 1811 error = 0; 1812 return (error); 1813} 1814 1815/* 1816 * nfs file rename rpc called from nfs_remove() above 1817 */ 1818static int 1819nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 1820 struct sillyrename *sp) 1821{ 1822 1823 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 1824 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 1825 scnp->cn_thread)); 1826} 1827 1828/* 1829 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1830 */ 1831static int 1832nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 1833 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 1834 int tnamelen, struct ucred *cred, struct thread *td) 1835{ 1836 struct nfsvattr fnfsva, tnfsva; 1837 struct nfsnode *fdnp = VTONFS(fdvp); 1838 struct nfsnode *tdnp = VTONFS(tdvp); 1839 int error = 0, fattrflag, tattrflag; 1840 1841 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 1842 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 1843 &tattrflag, NULL, NULL); 1844 mtx_lock(&fdnp->n_mtx); 1845 fdnp->n_flag |= NMODIFIED; 1846 mtx_unlock(&fdnp->n_mtx); 1847 mtx_lock(&tdnp->n_mtx); 1848 tdnp->n_flag |= NMODIFIED; 1849 mtx_unlock(&tdnp->n_mtx); 1850 if (fattrflag) 1851 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); 1852 else 1853 fdnp->n_attrstamp = 0; 1854 if (tattrflag) 1855 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); 1856 else 1857 tdnp->n_attrstamp = 0; 1858 if (error && NFS_ISV4(fdvp)) 1859 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1860 return (error); 1861} 1862 1863/* 1864 * nfs hard link create call 1865 */ 1866static int 1867nfs_link(struct vop_link_args *ap) 1868{ 1869 struct vnode *vp = ap->a_vp; 1870 struct vnode *tdvp = ap->a_tdvp; 1871 struct componentname *cnp = ap->a_cnp; 1872 struct nfsnode *tdnp; 1873 struct nfsvattr nfsva, dnfsva; 1874 int error = 0, attrflag, dattrflag; 1875 1876 if (vp->v_mount != tdvp->v_mount) { 1877 return (EXDEV); 1878 } 1879 1880 /* 1881 * Push all writes to the server, so that the attribute cache 1882 * doesn't get "out of sync" with the server. 1883 * XXX There should be a better way! 1884 */ 1885 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1886 1887 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 1888 cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, 1889 &dattrflag, NULL); 1890 tdnp = VTONFS(tdvp); 1891 mtx_lock(&tdnp->n_mtx); 1892 tdnp->n_flag |= NMODIFIED; 1893 mtx_unlock(&tdnp->n_mtx); 1894 if (attrflag) 1895 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1896 else 1897 VTONFS(vp)->n_attrstamp = 0; 1898 if (dattrflag) 1899 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); 1900 else 1901 tdnp->n_attrstamp = 0; 1902 /* 1903 * If negative lookup caching is enabled, I might as well 1904 * add an entry for this node. Not necessary for correctness, 1905 * but if negative caching is enabled, then the system 1906 * must care about lookup caching hit rate, so... 1907 */ 1908 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 1909 (cnp->cn_flags & MAKEENTRY)) 1910 cache_enter(tdvp, vp, cnp); 1911 if (error && NFS_ISV4(vp)) 1912 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 1913 (gid_t)0); 1914 return (error); 1915} 1916 1917/* 1918 * nfs symbolic link create call 1919 */ 1920static int 1921nfs_symlink(struct vop_symlink_args *ap) 1922{ 1923 struct vnode *dvp = ap->a_dvp; 1924 struct vattr *vap = ap->a_vap; 1925 struct componentname *cnp = ap->a_cnp; 1926 struct nfsvattr nfsva, dnfsva; 1927 struct nfsfh *nfhp; 1928 struct nfsnode *np = NULL, *dnp; 1929 struct vnode *newvp = NULL; 1930 int error = 0, attrflag, dattrflag, ret; 1931 1932 vap->va_type = VLNK; 1933 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1934 ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, 1935 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1936 if (nfhp) { 1937 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 1938 &np, NULL); 1939 if (!ret) 1940 newvp = NFSTOV(np); 1941 else if (!error) 1942 error = ret; 1943 } 1944 if (newvp != NULL) { 1945 if (attrflag) 1946 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1947 0, 1); 1948 } else if (!error) { 1949 /* 1950 * If we do not have an error and we could not extract the 1951 * newvp from the response due to the request being NFSv2, we 1952 * have to do a lookup in order to obtain a newvp to return. 1953 */ 1954 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1955 cnp->cn_cred, cnp->cn_thread, &np); 1956 if (!error) 1957 newvp = NFSTOV(np); 1958 } 1959 if (error) { 1960 if (newvp) 1961 vput(newvp); 1962 if (NFS_ISV4(dvp)) 1963 error = nfscl_maperr(cnp->cn_thread, error, 1964 vap->va_uid, vap->va_gid); 1965 } else { 1966 /* 1967 * If negative lookup caching is enabled, I might as well 1968 * add an entry for this node. Not necessary for correctness, 1969 * but if negative caching is enabled, then the system 1970 * must care about lookup caching hit rate, so... 1971 */ 1972 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 1973 (cnp->cn_flags & MAKEENTRY)) 1974 cache_enter(dvp, newvp, cnp); 1975 *ap->a_vpp = newvp; 1976 } 1977 1978 dnp = VTONFS(dvp); 1979 mtx_lock(&dnp->n_mtx); 1980 dnp->n_flag |= NMODIFIED; 1981 mtx_unlock(&dnp->n_mtx); 1982 if (dattrflag) 1983 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1984 else 1985 dnp->n_attrstamp = 0; 1986 return (error); 1987} 1988 1989/* 1990 * nfs make dir call 1991 */ 1992static int 1993nfs_mkdir(struct vop_mkdir_args *ap) 1994{ 1995 struct vnode *dvp = ap->a_dvp; 1996 struct vattr *vap = ap->a_vap; 1997 struct componentname *cnp = ap->a_cnp; 1998 struct nfsnode *np = NULL, *dnp; 1999 struct vnode *newvp = NULL; 2000 struct vattr vattr; 2001 struct nfsfh *nfhp; 2002 struct nfsvattr nfsva, dnfsva; 2003 int error = 0, attrflag, dattrflag, ret; 2004 2005 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 2006 return (error); 2007 vap->va_type = VDIR; 2008 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2009 vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, 2010 &attrflag, &dattrflag, NULL); 2011 dnp = VTONFS(dvp); 2012 mtx_lock(&dnp->n_mtx); 2013 dnp->n_flag |= NMODIFIED; 2014 mtx_unlock(&dnp->n_mtx); 2015 if (dattrflag) 2016 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2017 else 2018 dnp->n_attrstamp = 0; 2019 if (nfhp) { 2020 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2021 &np, NULL); 2022 if (!ret) { 2023 newvp = NFSTOV(np); 2024 if (attrflag) 2025 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2026 NULL, 0, 1); 2027 } else if (!error) 2028 error = ret; 2029 } 2030 if (!error && newvp == NULL) { 2031 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2032 cnp->cn_cred, cnp->cn_thread, &np); 2033 if (!error) { 2034 newvp = NFSTOV(np); 2035 if (newvp->v_type != VDIR) 2036 error = EEXIST; 2037 } 2038 } 2039 if (error) { 2040 if (newvp) 2041 vput(newvp); 2042 if (NFS_ISV4(dvp)) 2043 error = nfscl_maperr(cnp->cn_thread, error, 2044 vap->va_uid, vap->va_gid); 2045 } else { 2046 /* 2047 * If negative lookup caching is enabled, I might as well 2048 * add an entry for this node. Not necessary for correctness, 2049 * but if negative caching is enabled, then the system 2050 * must care about lookup caching hit rate, so... 2051 */ 2052 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2053 (cnp->cn_flags & MAKEENTRY)) 2054 cache_enter(dvp, newvp, cnp); 2055 *ap->a_vpp = newvp; 2056 } 2057 return (error); 2058} 2059 2060/* 2061 * nfs remove directory call 2062 */ 2063static int 2064nfs_rmdir(struct vop_rmdir_args *ap) 2065{ 2066 struct vnode *vp = ap->a_vp; 2067 struct vnode *dvp = ap->a_dvp; 2068 struct componentname *cnp = ap->a_cnp; 2069 struct nfsnode *dnp; 2070 struct nfsvattr dnfsva; 2071 int error, dattrflag; 2072 2073 if (dvp == vp) 2074 return (EINVAL); 2075 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2076 cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); 2077 dnp = VTONFS(dvp); 2078 mtx_lock(&dnp->n_mtx); 2079 dnp->n_flag |= NMODIFIED; 2080 mtx_unlock(&dnp->n_mtx); 2081 if (dattrflag) 2082 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2083 else 2084 dnp->n_attrstamp = 0; 2085 2086 cache_purge(dvp); 2087 cache_purge(vp); 2088 if (error && NFS_ISV4(dvp)) 2089 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2090 (gid_t)0); 2091 /* 2092 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2093 */ 2094 if (error == ENOENT) 2095 error = 0; 2096 return (error); 2097} 2098 2099/* 2100 * nfs readdir call 2101 */ 2102static int 2103nfs_readdir(struct vop_readdir_args *ap) 2104{ 2105 struct vnode *vp = ap->a_vp; 2106 struct nfsnode *np = VTONFS(vp); 2107 struct uio *uio = ap->a_uio; 2108 int tresid, error = 0; 2109 struct vattr vattr; 2110 2111 if (vp->v_type != VDIR) 2112 return(EPERM); 2113 2114 /* 2115 * First, check for hit on the EOF offset cache 2116 */ 2117 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2118 (np->n_flag & NMODIFIED) == 0) { 2119 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2120 mtx_lock(&np->n_mtx); 2121 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2122 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2123 mtx_unlock(&np->n_mtx); 2124 NFSINCRGLOBAL(newnfsstats.direofcache_hits); 2125 return (0); 2126 } else 2127 mtx_unlock(&np->n_mtx); 2128 } 2129 } 2130 2131 /* 2132 * Call ncl_bioread() to do the real work. 2133 */ 2134 tresid = uio->uio_resid; 2135 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2136 2137 if (!error && uio->uio_resid == tresid) 2138 NFSINCRGLOBAL(newnfsstats.direofcache_misses); 2139 return (error); 2140} 2141 2142/* 2143 * Readdir rpc call. 2144 * Called from below the buffer cache by ncl_doio(). 2145 */ 2146int 2147ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2148 struct thread *td) 2149{ 2150 struct nfsvattr nfsva; 2151 nfsuint64 *cookiep, cookie; 2152 struct nfsnode *dnp = VTONFS(vp); 2153 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2154 int error = 0, eof, attrflag; 2155 2156 KASSERT(uiop->uio_iovcnt == 1 && 2157 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2158 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2159 ("nfs readdirrpc bad uio")); 2160 2161 /* 2162 * If there is no cookie, assume directory was stale. 2163 */ 2164 ncl_dircookie_lock(dnp); 2165 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2166 if (cookiep) { 2167 cookie = *cookiep; 2168 ncl_dircookie_unlock(dnp); 2169 } else { 2170 ncl_dircookie_unlock(dnp); 2171 return (NFSERR_BAD_COOKIE); 2172 } 2173 2174 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2175 (void)ncl_fsinfo(nmp, vp, cred, td); 2176 2177 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2178 &attrflag, &eof, NULL); 2179 if (attrflag) 2180 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2181 2182 if (!error) { 2183 /* 2184 * We are now either at the end of the directory or have filled 2185 * the block. 2186 */ 2187 if (eof) 2188 dnp->n_direofoffset = uiop->uio_offset; 2189 else { 2190 if (uiop->uio_resid > 0) 2191 ncl_printf("EEK! readdirrpc resid > 0\n"); 2192 ncl_dircookie_lock(dnp); 2193 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2194 *cookiep = cookie; 2195 ncl_dircookie_unlock(dnp); 2196 } 2197 } else if (NFS_ISV4(vp)) { 2198 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2199 } 2200 return (error); 2201} 2202 2203/* 2204 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2205 */ 2206int 2207ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2208 struct thread *td) 2209{ 2210 struct nfsvattr nfsva; 2211 nfsuint64 *cookiep, cookie; 2212 struct nfsnode *dnp = VTONFS(vp); 2213 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2214 int error = 0, attrflag, eof; 2215 2216 KASSERT(uiop->uio_iovcnt == 1 && 2217 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2218 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2219 ("nfs readdirplusrpc bad uio")); 2220 2221 /* 2222 * If there is no cookie, assume directory was stale. 2223 */ 2224 ncl_dircookie_lock(dnp); 2225 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2226 if (cookiep) { 2227 cookie = *cookiep; 2228 ncl_dircookie_unlock(dnp); 2229 } else { 2230 ncl_dircookie_unlock(dnp); 2231 return (NFSERR_BAD_COOKIE); 2232 } 2233 2234 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2235 (void)ncl_fsinfo(nmp, vp, cred, td); 2236 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2237 &attrflag, &eof, NULL); 2238 if (attrflag) 2239 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2240 2241 if (!error) { 2242 /* 2243 * We are now either at end of the directory or have filled the 2244 * the block. 2245 */ 2246 if (eof) 2247 dnp->n_direofoffset = uiop->uio_offset; 2248 else { 2249 if (uiop->uio_resid > 0) 2250 ncl_printf("EEK! readdirplusrpc resid > 0\n"); 2251 ncl_dircookie_lock(dnp); 2252 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2253 *cookiep = cookie; 2254 ncl_dircookie_unlock(dnp); 2255 } 2256 } else if (NFS_ISV4(vp)) { 2257 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2258 } 2259 return (error); 2260} 2261 2262/* 2263 * Silly rename. To make the NFS filesystem that is stateless look a little 2264 * more like the "ufs" a remove of an active vnode is translated to a rename 2265 * to a funny looking filename that is removed by nfs_inactive on the 2266 * nfsnode. There is the potential for another process on a different client 2267 * to create the same funny name between the nfs_lookitup() fails and the 2268 * nfs_rename() completes, but... 2269 */ 2270static int 2271nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2272{ 2273 struct sillyrename *sp; 2274 struct nfsnode *np; 2275 int error; 2276 short pid; 2277 unsigned int lticks; 2278 2279 cache_purge(dvp); 2280 np = VTONFS(vp); 2281 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2282 MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), 2283 M_NEWNFSREQ, M_WAITOK); 2284 sp->s_cred = crhold(cnp->cn_cred); 2285 sp->s_dvp = dvp; 2286 VREF(dvp); 2287 2288 /* 2289 * Fudge together a funny name. 2290 * Changing the format of the funny name to accomodate more 2291 * sillynames per directory. 2292 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2293 * CPU ticks since boot. 2294 */ 2295 pid = cnp->cn_thread->td_proc->p_pid; 2296 lticks = (unsigned int)ticks; 2297 for ( ; ; ) { 2298 sp->s_namlen = sprintf(sp->s_name, 2299 ".nfs.%08x.%04x4.4", lticks, 2300 pid); 2301 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2302 cnp->cn_thread, NULL)) 2303 break; 2304 lticks++; 2305 } 2306 error = nfs_renameit(dvp, vp, cnp, sp); 2307 if (error) 2308 goto bad; 2309 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2310 cnp->cn_thread, &np); 2311 np->n_sillyrename = sp; 2312 return (0); 2313bad: 2314 vrele(sp->s_dvp); 2315 crfree(sp->s_cred); 2316 free((caddr_t)sp, M_NEWNFSREQ); 2317 return (error); 2318} 2319 2320/* 2321 * Look up a file name and optionally either update the file handle or 2322 * allocate an nfsnode, depending on the value of npp. 2323 * npp == NULL --> just do the lookup 2324 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2325 * handled too 2326 * *npp != NULL --> update the file handle in the vnode 2327 */ 2328static int 2329nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2330 struct thread *td, struct nfsnode **npp) 2331{ 2332 struct vnode *newvp = NULL, *vp; 2333 struct nfsnode *np, *dnp = VTONFS(dvp); 2334 struct nfsfh *nfhp, *onfhp; 2335 struct nfsvattr nfsva, dnfsva; 2336 struct componentname cn; 2337 int error = 0, attrflag, dattrflag; 2338 u_int hash; 2339 2340 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2341 &nfhp, &attrflag, &dattrflag, NULL); 2342 if (dattrflag) 2343 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2344 if (npp && !error) { 2345 if (*npp != NULL) { 2346 np = *npp; 2347 vp = NFSTOV(np); 2348 /* 2349 * For NFSv4, check to see if it is the same name and 2350 * replace the name, if it is different. 2351 */ 2352 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2353 (np->n_v4->n4_namelen != len || 2354 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2355 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2356 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2357 dnp->n_fhp->nfh_len))) { 2358#ifdef notdef 2359{ char nnn[100]; int nnnl; 2360nnnl = (len < 100) ? len : 99; 2361bcopy(name, nnn, nnnl); 2362nnn[nnnl] = '\0'; 2363printf("replace=%s\n",nnn); 2364} 2365#endif 2366 FREE((caddr_t)np->n_v4, M_NFSV4NODE); 2367 MALLOC(np->n_v4, struct nfsv4node *, 2368 sizeof (struct nfsv4node) + 2369 dnp->n_fhp->nfh_len + len - 1, 2370 M_NFSV4NODE, M_WAITOK); 2371 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2372 np->n_v4->n4_namelen = len; 2373 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2374 dnp->n_fhp->nfh_len); 2375 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2376 } 2377 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2378 FNV1_32_INIT); 2379 onfhp = np->n_fhp; 2380 /* 2381 * Rehash node for new file handle. 2382 */ 2383 vfs_hash_rehash(vp, hash); 2384 np->n_fhp = nfhp; 2385 if (onfhp != NULL) 2386 FREE((caddr_t)onfhp, M_NFSFH); 2387 newvp = NFSTOV(np); 2388 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2389 FREE((caddr_t)nfhp, M_NFSFH); 2390 VREF(dvp); 2391 newvp = dvp; 2392 } else { 2393 cn.cn_nameptr = name; 2394 cn.cn_namelen = len; 2395 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2396 &np, NULL); 2397 if (error) 2398 return (error); 2399 newvp = NFSTOV(np); 2400 } 2401 if (!attrflag && *npp == NULL) { 2402 vrele(newvp); 2403 return (ENOENT); 2404 } 2405 if (attrflag) 2406 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2407 0, 1); 2408 } 2409 if (npp && *npp == NULL) { 2410 if (error) { 2411 if (newvp) { 2412 if (newvp == dvp) 2413 vrele(newvp); 2414 else 2415 vput(newvp); 2416 } 2417 } else 2418 *npp = np; 2419 } 2420 if (error && NFS_ISV4(dvp)) 2421 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2422 return (error); 2423} 2424 2425/* 2426 * Nfs Version 3 and 4 commit rpc 2427 */ 2428int 2429ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2430 struct thread *td) 2431{ 2432 struct nfsvattr nfsva; 2433 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2434 int error, attrflag; 2435 u_char verf[NFSX_VERF]; 2436 2437 mtx_lock(&nmp->nm_mtx); 2438 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2439 mtx_unlock(&nmp->nm_mtx); 2440 return (0); 2441 } 2442 mtx_unlock(&nmp->nm_mtx); 2443 error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva, 2444 &attrflag, NULL); 2445 if (!error) { 2446 if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) { 2447 NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF); 2448 error = NFSERR_STALEWRITEVERF; 2449 } 2450 if (!error && attrflag) 2451 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 2452 0, 1); 2453 } else if (NFS_ISV4(vp)) { 2454 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2455 } 2456 return (error); 2457} 2458 2459/* 2460 * Strategy routine. 2461 * For async requests when nfsiod(s) are running, queue the request by 2462 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2463 * request. 2464 */ 2465static int 2466nfs_strategy(struct vop_strategy_args *ap) 2467{ 2468 struct buf *bp = ap->a_bp; 2469 struct ucred *cr; 2470 2471 KASSERT(!(bp->b_flags & B_DONE), 2472 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2473 BUF_ASSERT_HELD(bp); 2474 2475 if (bp->b_iocmd == BIO_READ) 2476 cr = bp->b_rcred; 2477 else 2478 cr = bp->b_wcred; 2479 2480 /* 2481 * If the op is asynchronous and an i/o daemon is waiting 2482 * queue the request, wake it up and wait for completion 2483 * otherwise just do it ourselves. 2484 */ 2485 if ((bp->b_flags & B_ASYNC) == 0 || 2486 ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) 2487 (void) ncl_doio(ap->a_vp, bp, cr, curthread, 1); 2488 return (0); 2489} 2490 2491/* 2492 * fsync vnode op. Just call ncl_flush() with commit == 1. 2493 */ 2494/* ARGSUSED */ 2495static int 2496nfs_fsync(struct vop_fsync_args *ap) 2497{ 2498 return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0)); 2499} 2500 2501/* 2502 * Flush all the blocks associated with a vnode. 2503 * Walk through the buffer pool and push any dirty pages 2504 * associated with the vnode. 2505 * If the called_from_renewthread argument is TRUE, it has been called 2506 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2507 * waiting for a buffer write to complete. 2508 */ 2509int 2510ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td, 2511 int commit, int called_from_renewthread) 2512{ 2513 struct nfsnode *np = VTONFS(vp); 2514 struct buf *bp; 2515 int i; 2516 struct buf *nbp; 2517 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2518 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2519 int passone = 1, trycnt = 0; 2520 u_quad_t off, endoff, toff; 2521 struct ucred* wcred = NULL; 2522 struct buf **bvec = NULL; 2523 struct bufobj *bo; 2524#ifndef NFS_COMMITBVECSIZ 2525#define NFS_COMMITBVECSIZ 20 2526#endif 2527 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2528 int bvecsize = 0, bveccount; 2529 2530 if (called_from_renewthread != 0) 2531 slptimeo = hz; 2532 if (nmp->nm_flag & NFSMNT_INT) 2533 slpflag = NFS_PCATCH; 2534 if (!commit) 2535 passone = 0; 2536 bo = &vp->v_bufobj; 2537 /* 2538 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2539 * server, but has not been committed to stable storage on the server 2540 * yet. On the first pass, the byte range is worked out and the commit 2541 * rpc is done. On the second pass, ncl_writebp() is called to do the 2542 * job. 2543 */ 2544again: 2545 off = (u_quad_t)-1; 2546 endoff = 0; 2547 bvecpos = 0; 2548 if (NFS_ISV34(vp) && commit) { 2549 if (bvec != NULL && bvec != bvec_on_stack) 2550 free(bvec, M_TEMP); 2551 /* 2552 * Count up how many buffers waiting for a commit. 2553 */ 2554 bveccount = 0; 2555 BO_LOCK(bo); 2556 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2557 if (!BUF_ISLOCKED(bp) && 2558 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2559 == (B_DELWRI | B_NEEDCOMMIT)) 2560 bveccount++; 2561 } 2562 /* 2563 * Allocate space to remember the list of bufs to commit. It is 2564 * important to use M_NOWAIT here to avoid a race with nfs_write. 2565 * If we can't get memory (for whatever reason), we will end up 2566 * committing the buffers one-by-one in the loop below. 2567 */ 2568 if (bveccount > NFS_COMMITBVECSIZ) { 2569 /* 2570 * Release the vnode interlock to avoid a lock 2571 * order reversal. 2572 */ 2573 BO_UNLOCK(bo); 2574 bvec = (struct buf **) 2575 malloc(bveccount * sizeof(struct buf *), 2576 M_TEMP, M_NOWAIT); 2577 BO_LOCK(bo); 2578 if (bvec == NULL) { 2579 bvec = bvec_on_stack; 2580 bvecsize = NFS_COMMITBVECSIZ; 2581 } else 2582 bvecsize = bveccount; 2583 } else { 2584 bvec = bvec_on_stack; 2585 bvecsize = NFS_COMMITBVECSIZ; 2586 } 2587 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2588 if (bvecpos >= bvecsize) 2589 break; 2590 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2591 nbp = TAILQ_NEXT(bp, b_bobufs); 2592 continue; 2593 } 2594 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2595 (B_DELWRI | B_NEEDCOMMIT)) { 2596 BUF_UNLOCK(bp); 2597 nbp = TAILQ_NEXT(bp, b_bobufs); 2598 continue; 2599 } 2600 BO_UNLOCK(bo); 2601 bremfree(bp); 2602 /* 2603 * Work out if all buffers are using the same cred 2604 * so we can deal with them all with one commit. 2605 * 2606 * NOTE: we are not clearing B_DONE here, so we have 2607 * to do it later on in this routine if we intend to 2608 * initiate I/O on the bp. 2609 * 2610 * Note: to avoid loopback deadlocks, we do not 2611 * assign b_runningbufspace. 2612 */ 2613 if (wcred == NULL) 2614 wcred = bp->b_wcred; 2615 else if (wcred != bp->b_wcred) 2616 wcred = NOCRED; 2617 vfs_busy_pages(bp, 1); 2618 2619 BO_LOCK(bo); 2620 /* 2621 * bp is protected by being locked, but nbp is not 2622 * and vfs_busy_pages() may sleep. We have to 2623 * recalculate nbp. 2624 */ 2625 nbp = TAILQ_NEXT(bp, b_bobufs); 2626 2627 /* 2628 * A list of these buffers is kept so that the 2629 * second loop knows which buffers have actually 2630 * been committed. This is necessary, since there 2631 * may be a race between the commit rpc and new 2632 * uncommitted writes on the file. 2633 */ 2634 bvec[bvecpos++] = bp; 2635 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2636 bp->b_dirtyoff; 2637 if (toff < off) 2638 off = toff; 2639 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 2640 if (toff > endoff) 2641 endoff = toff; 2642 } 2643 BO_UNLOCK(bo); 2644 } 2645 if (bvecpos > 0) { 2646 /* 2647 * Commit data on the server, as required. 2648 * If all bufs are using the same wcred, then use that with 2649 * one call for all of them, otherwise commit each one 2650 * separately. 2651 */ 2652 if (wcred != NOCRED) 2653 retv = ncl_commit(vp, off, (int)(endoff - off), 2654 wcred, td); 2655 else { 2656 retv = 0; 2657 for (i = 0; i < bvecpos; i++) { 2658 off_t off, size; 2659 bp = bvec[i]; 2660 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2661 bp->b_dirtyoff; 2662 size = (u_quad_t)(bp->b_dirtyend 2663 - bp->b_dirtyoff); 2664 retv = ncl_commit(vp, off, (int)size, 2665 bp->b_wcred, td); 2666 if (retv) break; 2667 } 2668 } 2669 2670 if (retv == NFSERR_STALEWRITEVERF) 2671 ncl_clearcommit(vp->v_mount); 2672 2673 /* 2674 * Now, either mark the blocks I/O done or mark the 2675 * blocks dirty, depending on whether the commit 2676 * succeeded. 2677 */ 2678 for (i = 0; i < bvecpos; i++) { 2679 bp = bvec[i]; 2680 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 2681 if (retv) { 2682 /* 2683 * Error, leave B_DELWRI intact 2684 */ 2685 vfs_unbusy_pages(bp); 2686 brelse(bp); 2687 } else { 2688 /* 2689 * Success, remove B_DELWRI ( bundirty() ). 2690 * 2691 * b_dirtyoff/b_dirtyend seem to be NFS 2692 * specific. We should probably move that 2693 * into bundirty(). XXX 2694 */ 2695 bufobj_wref(bo); 2696 bp->b_flags |= B_ASYNC; 2697 bundirty(bp); 2698 bp->b_flags &= ~B_DONE; 2699 bp->b_ioflags &= ~BIO_ERROR; 2700 bp->b_dirtyoff = bp->b_dirtyend = 0; 2701 bufdone(bp); 2702 } 2703 } 2704 } 2705 2706 /* 2707 * Start/do any write(s) that are required. 2708 */ 2709loop: 2710 BO_LOCK(bo); 2711 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2712 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2713 if (waitfor != MNT_WAIT || passone) 2714 continue; 2715 2716 error = BUF_TIMELOCK(bp, 2717 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 2718 BO_MTX(bo), "nfsfsync", slpflag, slptimeo); 2719 if (error == 0) { 2720 BUF_UNLOCK(bp); 2721 goto loop; 2722 } 2723 if (error == ENOLCK) { 2724 error = 0; 2725 goto loop; 2726 } 2727 if (called_from_renewthread != 0) { 2728 /* 2729 * Return EIO so the flush will be retried 2730 * later. 2731 */ 2732 error = EIO; 2733 goto done; 2734 } 2735 if (newnfs_sigintr(nmp, td)) { 2736 error = EINTR; 2737 goto done; 2738 } 2739 if (slpflag & PCATCH) { 2740 slpflag = 0; 2741 slptimeo = 2 * hz; 2742 } 2743 goto loop; 2744 } 2745 if ((bp->b_flags & B_DELWRI) == 0) 2746 panic("nfs_fsync: not dirty"); 2747 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 2748 BUF_UNLOCK(bp); 2749 continue; 2750 } 2751 BO_UNLOCK(bo); 2752 bremfree(bp); 2753 if (passone || !commit) 2754 bp->b_flags |= B_ASYNC; 2755 else 2756 bp->b_flags |= B_ASYNC; 2757 bwrite(bp); 2758 if (newnfs_sigintr(nmp, td)) { 2759 error = EINTR; 2760 goto done; 2761 } 2762 goto loop; 2763 } 2764 if (passone) { 2765 passone = 0; 2766 BO_UNLOCK(bo); 2767 goto again; 2768 } 2769 if (waitfor == MNT_WAIT) { 2770 while (bo->bo_numoutput) { 2771 error = bufobj_wwait(bo, slpflag, slptimeo); 2772 if (error) { 2773 BO_UNLOCK(bo); 2774 if (called_from_renewthread != 0) { 2775 /* 2776 * Return EIO so that the flush will be 2777 * retried later. 2778 */ 2779 error = EIO; 2780 goto done; 2781 } 2782 error = newnfs_sigintr(nmp, td); 2783 if (error) 2784 goto done; 2785 if (slpflag & PCATCH) { 2786 slpflag = 0; 2787 slptimeo = 2 * hz; 2788 } 2789 BO_LOCK(bo); 2790 } 2791 } 2792 if (bo->bo_dirty.bv_cnt != 0 && commit) { 2793 BO_UNLOCK(bo); 2794 goto loop; 2795 } 2796 /* 2797 * Wait for all the async IO requests to drain 2798 */ 2799 BO_UNLOCK(bo); 2800 mtx_lock(&np->n_mtx); 2801 while (np->n_directio_asyncwr > 0) { 2802 np->n_flag |= NFSYNCWAIT; 2803 error = newnfs_msleep(td, &np->n_directio_asyncwr, 2804 &np->n_mtx, slpflag | (PRIBIO + 1), 2805 "nfsfsync", 0); 2806 if (error) { 2807 if (newnfs_sigintr(nmp, td)) { 2808 mtx_unlock(&np->n_mtx); 2809 error = EINTR; 2810 goto done; 2811 } 2812 } 2813 } 2814 mtx_unlock(&np->n_mtx); 2815 } else 2816 BO_UNLOCK(bo); 2817 mtx_lock(&np->n_mtx); 2818 if (np->n_flag & NWRITEERR) { 2819 error = np->n_error; 2820 np->n_flag &= ~NWRITEERR; 2821 } 2822 if (commit && bo->bo_dirty.bv_cnt == 0 && 2823 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 2824 np->n_flag &= ~NMODIFIED; 2825 mtx_unlock(&np->n_mtx); 2826done: 2827 if (bvec != NULL && bvec != bvec_on_stack) 2828 free(bvec, M_TEMP); 2829 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 2830 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || 2831 np->n_directio_asyncwr != 0) && trycnt++ < 5) { 2832 /* try, try again... */ 2833 passone = 1; 2834 wcred = NULL; 2835 bvec = NULL; 2836 bvecsize = 0; 2837printf("try%d\n", trycnt); 2838 goto again; 2839 } 2840 return (error); 2841} 2842 2843/* 2844 * NFS advisory byte-level locks. 2845 */ 2846static int 2847nfs_advlock(struct vop_advlock_args *ap) 2848{ 2849 struct vnode *vp = ap->a_vp; 2850 struct ucred *cred; 2851 struct nfsnode *np = VTONFS(ap->a_vp); 2852 struct proc *p = (struct proc *)ap->a_id; 2853 struct thread *td = curthread; /* XXX */ 2854 struct vattr va; 2855 int ret, error = EOPNOTSUPP; 2856 u_quad_t size; 2857 2858 if (NFS_ISV4(vp) && (ap->a_flags & F_POSIX)) { 2859 cred = p->p_ucred; 2860 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2861 if (vp->v_iflag & VI_DOOMED) { 2862 VOP_UNLOCK(vp, 0); 2863 return (EBADF); 2864 } 2865 2866 /* 2867 * If this is unlocking a write locked region, flush and 2868 * commit them before unlocking. This is required by 2869 * RFC3530 Sec. 9.3.2. 2870 */ 2871 if (ap->a_op == F_UNLCK && 2872 nfscl_checkwritelocked(vp, ap->a_fl, cred, td)) 2873 (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0); 2874 2875 /* 2876 * Loop around doing the lock op, while a blocking lock 2877 * must wait for the lock op to succeed. 2878 */ 2879 do { 2880 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 2881 ap->a_fl, 0, cred, td); 2882 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 2883 ap->a_op == F_SETLK) { 2884 VOP_UNLOCK(vp, 0); 2885 error = nfs_catnap(PZERO | PCATCH, ret, 2886 "ncladvl"); 2887 if (error) 2888 return (EINTR); 2889 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2890 if (vp->v_iflag & VI_DOOMED) { 2891 VOP_UNLOCK(vp, 0); 2892 return (EBADF); 2893 } 2894 } 2895 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 2896 ap->a_op == F_SETLK); 2897 if (ret == NFSERR_DENIED) { 2898 VOP_UNLOCK(vp, 0); 2899 return (EAGAIN); 2900 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 2901 VOP_UNLOCK(vp, 0); 2902 return (ret); 2903 } else if (ret != 0) { 2904 VOP_UNLOCK(vp, 0); 2905 return (EACCES); 2906 } 2907 2908 /* 2909 * Now, if we just got a lock, invalidate data in the buffer 2910 * cache, as required, so that the coherency conforms with 2911 * RFC3530 Sec. 9.3.2. 2912 */ 2913 if (ap->a_op == F_SETLK) { 2914 if ((np->n_flag & NMODIFIED) == 0) { 2915 np->n_attrstamp = 0; 2916 ret = VOP_GETATTR(vp, &va, cred); 2917 } 2918 if ((np->n_flag & NMODIFIED) || ret || 2919 np->n_change != va.va_filerev) { 2920 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 2921 np->n_attrstamp = 0; 2922 ret = VOP_GETATTR(vp, &va, cred); 2923 if (!ret) { 2924 np->n_mtime = va.va_mtime; 2925 np->n_change = va.va_filerev; 2926 } 2927 } 2928 } 2929 VOP_UNLOCK(vp, 0); 2930 return (0); 2931 } else if (!NFS_ISV4(vp)) { 2932 error = vn_lock(vp, LK_SHARED); 2933 if (error) 2934 return (error); 2935 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 2936 size = VTONFS(vp)->n_size; 2937 VOP_UNLOCK(vp, 0); 2938 error = lf_advlock(ap, &(vp->v_lockf), size); 2939 } else { 2940 if (ncl_advlock_p) 2941 error = ncl_advlock_p(ap); 2942 else 2943 error = ENOLCK; 2944 } 2945 } 2946 return (error); 2947} 2948 2949/* 2950 * NFS advisory byte-level locks. 2951 */ 2952static int 2953nfs_advlockasync(struct vop_advlockasync_args *ap) 2954{ 2955 struct vnode *vp = ap->a_vp; 2956 u_quad_t size; 2957 int error; 2958 2959 if (NFS_ISV4(vp)) 2960 return (EOPNOTSUPP); 2961 error = vn_lock(vp, LK_SHARED); 2962 if (error) 2963 return (error); 2964 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 2965 size = VTONFS(vp)->n_size; 2966 VOP_UNLOCK(vp, 0); 2967 error = lf_advlockasync(ap, &(vp->v_lockf), size); 2968 } else { 2969 VOP_UNLOCK(vp, 0); 2970 error = EOPNOTSUPP; 2971 } 2972 return (error); 2973} 2974 2975/* 2976 * Print out the contents of an nfsnode. 2977 */ 2978static int 2979nfs_print(struct vop_print_args *ap) 2980{ 2981 struct vnode *vp = ap->a_vp; 2982 struct nfsnode *np = VTONFS(vp); 2983 2984 ncl_printf("\tfileid %ld fsid 0x%x", 2985 np->n_vattr.na_fileid, np->n_vattr.na_fsid); 2986 if (vp->v_type == VFIFO) 2987 fifo_printinfo(vp); 2988 printf("\n"); 2989 return (0); 2990} 2991 2992/* 2993 * This is the "real" nfs::bwrite(struct buf*). 2994 * We set B_CACHE if this is a VMIO buffer. 2995 */ 2996int 2997ncl_writebp(struct buf *bp, int force __unused, struct thread *td) 2998{ 2999 int s; 3000 int oldflags = bp->b_flags; 3001#if 0 3002 int retv = 1; 3003 off_t off; 3004#endif 3005 3006 BUF_ASSERT_HELD(bp); 3007 3008 if (bp->b_flags & B_INVAL) { 3009 brelse(bp); 3010 return(0); 3011 } 3012 3013 bp->b_flags |= B_CACHE; 3014 3015 /* 3016 * Undirty the bp. We will redirty it later if the I/O fails. 3017 */ 3018 3019 s = splbio(); 3020 bundirty(bp); 3021 bp->b_flags &= ~B_DONE; 3022 bp->b_ioflags &= ~BIO_ERROR; 3023 bp->b_iocmd = BIO_WRITE; 3024 3025 bufobj_wref(bp->b_bufobj); 3026 curthread->td_ru.ru_oublock++; 3027 splx(s); 3028 3029 /* 3030 * Note: to avoid loopback deadlocks, we do not 3031 * assign b_runningbufspace. 3032 */ 3033 vfs_busy_pages(bp, 1); 3034 3035 BUF_KERNPROC(bp); 3036 bp->b_iooffset = dbtob(bp->b_blkno); 3037 bstrategy(bp); 3038 3039 if( (oldflags & B_ASYNC) == 0) { 3040 int rtval = bufwait(bp); 3041 3042 if (oldflags & B_DELWRI) { 3043 s = splbio(); 3044 reassignbuf(bp); 3045 splx(s); 3046 } 3047 brelse(bp); 3048 return (rtval); 3049 } 3050 3051 return (0); 3052} 3053 3054/* 3055 * nfs special file access vnode op. 3056 * Essentially just get vattr and then imitate iaccess() since the device is 3057 * local to the client. 3058 */ 3059static int 3060nfsspec_access(struct vop_access_args *ap) 3061{ 3062 struct vattr *vap; 3063 struct ucred *cred = ap->a_cred; 3064 struct vnode *vp = ap->a_vp; 3065 accmode_t accmode = ap->a_accmode; 3066 struct vattr vattr; 3067 int error; 3068 3069 /* 3070 * Disallow write attempts on filesystems mounted read-only; 3071 * unless the file is a socket, fifo, or a block or character 3072 * device resident on the filesystem. 3073 */ 3074 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3075 switch (vp->v_type) { 3076 case VREG: 3077 case VDIR: 3078 case VLNK: 3079 return (EROFS); 3080 default: 3081 break; 3082 } 3083 } 3084 vap = &vattr; 3085 error = VOP_GETATTR(vp, vap, cred); 3086 if (error) 3087 goto out; 3088 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3089 accmode, cred, NULL); 3090out: 3091 return error; 3092} 3093 3094/* 3095 * Read wrapper for fifos. 3096 */ 3097static int 3098nfsfifo_read(struct vop_read_args *ap) 3099{ 3100 struct nfsnode *np = VTONFS(ap->a_vp); 3101 int error; 3102 3103 /* 3104 * Set access flag. 3105 */ 3106 mtx_lock(&np->n_mtx); 3107 np->n_flag |= NACC; 3108 getnanotime(&np->n_atim); 3109 mtx_unlock(&np->n_mtx); 3110 error = fifo_specops.vop_read(ap); 3111 return error; 3112} 3113 3114/* 3115 * Write wrapper for fifos. 3116 */ 3117static int 3118nfsfifo_write(struct vop_write_args *ap) 3119{ 3120 struct nfsnode *np = VTONFS(ap->a_vp); 3121 3122 /* 3123 * Set update flag. 3124 */ 3125 mtx_lock(&np->n_mtx); 3126 np->n_flag |= NUPD; 3127 getnanotime(&np->n_mtim); 3128 mtx_unlock(&np->n_mtx); 3129 return(fifo_specops.vop_write(ap)); 3130} 3131 3132/* 3133 * Close wrapper for fifos. 3134 * 3135 * Update the times on the nfsnode then do fifo close. 3136 */ 3137static int 3138nfsfifo_close(struct vop_close_args *ap) 3139{ 3140 struct vnode *vp = ap->a_vp; 3141 struct nfsnode *np = VTONFS(vp); 3142 struct vattr vattr; 3143 struct timespec ts; 3144 3145 mtx_lock(&np->n_mtx); 3146 if (np->n_flag & (NACC | NUPD)) { 3147 getnanotime(&ts); 3148 if (np->n_flag & NACC) 3149 np->n_atim = ts; 3150 if (np->n_flag & NUPD) 3151 np->n_mtim = ts; 3152 np->n_flag |= NCHG; 3153 if (vrefcnt(vp) == 1 && 3154 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3155 VATTR_NULL(&vattr); 3156 if (np->n_flag & NACC) 3157 vattr.va_atime = np->n_atim; 3158 if (np->n_flag & NUPD) 3159 vattr.va_mtime = np->n_mtim; 3160 mtx_unlock(&np->n_mtx); 3161 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3162 goto out; 3163 } 3164 } 3165 mtx_unlock(&np->n_mtx); 3166out: 3167 return (fifo_specops.vop_close(ap)); 3168} 3169 3170/* 3171 * Just call ncl_writebp() with the force argument set to 1. 3172 * 3173 * NOTE: B_DONE may or may not be set in a_bp on call. 3174 */ 3175static int 3176nfs_bwrite(struct buf *bp) 3177{ 3178 3179 return (ncl_writebp(bp, 1, curthread)); 3180} 3181 3182struct buf_ops buf_ops_newnfs = { 3183 .bop_name = "buf_ops_nfs", 3184 .bop_write = nfs_bwrite, 3185 .bop_strategy = bufstrategy, 3186 .bop_sync = bufsync, 3187 .bop_bdflush = bufbdflush, 3188}; 3189 3190/* 3191 * Cloned from vop_stdlock(), and then the ugly hack added. 3192 */ 3193static int 3194nfs_lock1(struct vop_lock1_args *ap) 3195{ 3196 struct vnode *vp = ap->a_vp; 3197 int error = 0; 3198 3199 /* 3200 * Since vfs_hash_get() calls vget() and it will no longer work 3201 * for FreeBSD8 with flags == 0, I can only think of this horrible 3202 * hack to work around it. I call vfs_hash_get() with LK_EXCLOTHER 3203 * and then handle it here. All I want for this case is a v_usecount 3204 * on the vnode to use for recovery, while another thread might 3205 * hold a lock on the vnode. I have the other threads blocked, so 3206 * there isn't any race problem. 3207 */ 3208 if ((ap->a_flags & LK_TYPE_MASK) == LK_EXCLOTHER) { 3209 if ((ap->a_flags & LK_INTERLOCK) == 0) 3210 panic("ncllock1"); 3211 if ((vp->v_iflag & VI_DOOMED)) 3212 error = ENOENT; 3213 VI_UNLOCK(vp); 3214 return (error); 3215 } 3216 return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp), 3217 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file, 3218 ap->a_line)); 3219} 3220 3221static int 3222nfs_getacl(struct vop_getacl_args *ap) 3223{ 3224 int error; 3225 3226 if (ap->a_type != ACL_TYPE_NFS4) 3227 return (EOPNOTSUPP); 3228 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3229 NULL); 3230 if (error > NFSERR_STALE) { 3231 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3232 error = EPERM; 3233 } 3234 return (error); 3235} 3236 3237static int 3238nfs_setacl(struct vop_setacl_args *ap) 3239{ 3240 int error; 3241 3242 if (ap->a_type != ACL_TYPE_NFS4) 3243 return (EOPNOTSUPP); 3244 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3245 NULL); 3246 if (error > NFSERR_STALE) { 3247 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3248 error = EPERM; 3249 } 3250 return (error); 3251} 3252